### TextureDiffusion

In [None]:
import os
import torch
import numpy as np
from PIL import Image
import torch.nn.functional as F
from diffusers import DDIMScheduler
from TextureDiffusion.diffuser_utils import TextureDiffusionPipeline
from TextureDiffusion.TextureDiffusion import MutualSelfAttentionControlMaskAuto
from TextureDiffusion.TextureDiffusion_utils import regiter_attention_editor_diffusers, register_conv_control_efficient
from torchvision.io import read_image
from pytorch_lightning import seed_everything
import warnings
warnings.filterwarnings("ignore")


torch.cuda.set_device(0)  # set the GPU device

#### Model Construction

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model_path = "CompVis/stable-diffusion-v1-4"
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)

#### Real editing with TextureDiffusion

In [None]:
def load_image(image_path, device):
    image = read_image(image_path)
    image = image[:3].unsqueeze_(0).float() / 127.5 - 1.  # [-1, 1]
    image = F.interpolate(image, (512, 512))
    image = image.to(device)
    return image

In [None]:
attention_step = 10     # Perform query insertion in self-attention from attention_step to 50
resnet_step = 50        # Perform feature insertion in resnet block from 0 to resnet_step
attention_layer = 10    # Perform query insertion in self-attention from attention_layer to 15
out_dir = "./workdir/"
mask_save = False
seed = 1

dataset = [["data/basket.jpg", "A mug and a basket on the table", "basket", "Gold"], 
           ["data/horse.jpg", "a horse running in the sunset", "horse", "Cloud"],
           ["data/dog.jpg", "a dog sitting on the ground in front of fence", "dog", "Stone"]]

os.makedirs(out_dir, exist_ok=True) 
sample_count = len(os.listdir(out_dir))
out_dir = os.path.join(out_dir, f"sample_{sample_count}")
os.makedirs(out_dir, exist_ok=True)
seed_everything(seed) 

for image_path, source_prompt, target_object, texture in dataset:
    print('-------------------------------------------------------------------------')
    print(source_prompt)

    model = TextureDiffusionPipeline.from_pretrained(model_path, scheduler=scheduler).to(device)

    prompts = [source_prompt, texture]
    source_image = load_image(image_path, device)

    # invert the source image
    start_code, latents_list = model.invert(source_image,
                                            "",
                                            guidance_scale=7.5,
                                            num_inference_steps=50,
                                            return_intermediates=True)
    start_code = start_code.expand(len(prompts), -1, -1, -1)

    words = source_prompt.split()  
    index = words.index(target_object) + 1 
    if mask_save is False:
        mask_save_dir = None
    else:
        mask_save_dir = os.path.join(out_dir, source_prompt)
    editor = MutualSelfAttentionControlMaskAuto(start_step = attention_step, start_layer = attention_layer, ref_token_idx = [index], mask_save_dir = mask_save_dir) 
    regiter_attention_editor_diffusers(model, editor)

    # inject the feature in resnet block
    conv_injection_t = list(range(0, resnet_step))
    register_conv_control_efficient(model, conv_injection_t)

    image_TextureDiffusion = model(prompts,
                        latents=start_code,
                        guidance_scale=7.5,
                        ref_intermediate_latents=latents_list)

    out_image = np.concatenate((((source_image[0].permute(1,2,0).detach().cpu().numpy() * 0.5 + 0.5)*255).astype(np.uint8),
                            (image_TextureDiffusion[-1].permute(1,2,0).detach().cpu().numpy()*255).astype(np.uint8)),1)
    out_image = Image.fromarray(out_image)
    out_image.save(os.path.join(out_dir, source_prompt + "_" + texture + ".png"))

    print("Syntheiszed images are saved in", out_dir)