## Editing Examples V1.0


In [None]:
import os
import sys
import json
import time
import torch
import importlib
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
from PIL import Image
from diffusers import StableDiffusionPipeline, DDIMScheduler, DiffusionPipeline
from ptp import invutils, frq_ptputils

### Stable Diffusion Version
We recommend using the stable diffusion version of :

- "CompVis/stable-diffusion-v1-4"
- "runwayml/stable-diffusion-v1-5"


### Hyperparameter Sets
- We mainly modify two sets of hyperparameters TS=[t_M, t_{M-1}, ..., t_0], FS={s_N, s_{N-1}, ..., s_0}. TS are the timesteps at which we change the filter size. FS are the filter sizes. The combination of them results in a filter sequence: [t:981, s_N, t_M, s_{N-1}, t_{M-1}, ..., t_0, s_0]
- Of course, you are free to tune the hyperparamter that controls the "Clearing low" procedure, which aiming at further eliminating some small values left afther frequency truncation.


### Principle for Hyperparameter Selection
- Generally, with considering the editying type, we recommand trying a earlier response period and smaller filter size(allowing more LFC of the guidance) for larger shape changes.
- For larger change of color, we recommand adopting the two-stage method, and with considering the edit-friendly inversion technique, which blends extra noise into the latents. Codes of this part will be added soon.

In [None]:
# setup the device and backbone model
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
NUM_INFER_STEPS = 50
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012,
                           beta_schedule="scaled_linear", clip_sample=False, 
                           set_alpha_to_one=False, steps_offset=1)
model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler).to(DEVICE)
model.scheduler.set_timesteps(NUM_INFER_STEPS)

g_seed = 8888
g_cpu = torch.Generator().manual_seed(8888)


# specify the image path and save path
def set_img(iname="11102.jpg", idir='./DATASET', sdir='./outputs/test'):
    img_dir = idir
    img_path = os.path.join(img_dir, iname)
    save_dir = sdir
    # the vae-reconstruct image is saved to show the possible difference caused by vae encoder
    os.makedirs(save_dir, exist_ok=True)

    return save_dir, img_path

# compute the inverted latent for editing. Fix-point iteration is used.
def get_latents(iname, ipath, sdir):
    img_np = invutils.load_512(ipath)
    img_latn = invutils.img2latn(img_np, model, DEVICE)
    img_latn_rec = invutils.latn2img(img_latn, model)[0]
    Image.fromarray(img_latn_rec).save(os.path.join(sdir, iname[:-4]+"vae-rec.png"))
    uncond_emb = invutils.encode_text("", model)
    xT, xts = invutils.ddim_inversion_null_fixpt(img_latn, model, uncond_emb, save_all=True, FP_STEPS=5, 
                                                    INV_STEPS=NUM_INFER_STEPS,)

    return xT, xts


### Changing of Objects

The target prompt should follow the principle mentioned in the paper.

- For cases of changing a single object, use a simple one that only describes the target(effect).
- For cases of changing a object while the editing effect affects backgrounds, use a target prompt that contains the description of surrounding

In [None]:
iname = "11102.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a marmot"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(801, 781, 581), FS=(32, 32, 10, 10), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

In [None]:
iname = "11207.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a crown on the hair"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(801, 681, 581), FS=(32, 32, 2, 10), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

In [None]:
iname = "11201.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["hat"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(801, 781, 581), FS=(32, 32, 32, 20), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

### Adding of Objects

The task of adding object is difficult for the current version of our method, even for the P2P with adopting blending. In most cases the methods are changing part of the images to the target object.


In [None]:
iname = "21207.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a woman surrounded by snakes and roses"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(801, 781, 581), FS=(32, 32, 10, 10), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

### Changing of Shape and Poses

- For the changing of poses, drastically changes often happen in earlier generation process with some steps being crucial during the response period.
- For the changing of shapes, though large alteration happens the response period seems to be later and lasting longer.

In [None]:
iname = "21207.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a greyhound jumping"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(961, 881, 781), FS=(32, 4, 8, 16), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

In [None]:
iname = "12408.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a red heart in the snow"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(901, 801, 601, 501), FS=(32, 2, 2, 32, 32), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

### Changing of Materials

- The response period usually lies in later process.

In [None]:
iname = "72100.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a bronze horse"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(681, 581, 481), FS=(32, 20, 8, 1), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

In [None]:
iname = "71102.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a plastic butterfly"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(781, 681, 581), FS=(32, 2, 4, 4), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

### Changing of Styles

- For different styles, the hyperparameter sets differ greatly
- In most cases, a full description of the original image is needed.

In [None]:
iname = "91102.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["kids crayon drawing of a colorful cat with bubbles and stars on it"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(801, 781, 581), FS=(4, 4, 2, 2), HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

In [None]:
iname = "92206.jpg"
save_dir, img_path = set_img(iname)
xT, xts = get_latents(iname, img_path, save_dir)
prompt = ["a cartoon painting of a woman in white sitting on a bench"]
res_latents = frq_ptputils.frq_img_gen(model, prompt, g_seed=g_seed,
                     latent=xT, mod_guidance_frq =True,
                    save_dir=save_dir, guidance_scale=7.5, 
                    TS=(881, 781, 581), FS=(6, 4, 4, 4),  HS=(32, 32, 32, 32, 32, 32),
                    filter_shape = 'sq', 
                    # remove = True,
                    # generate_mask=True, gen_MSK_THRS=0.025,
                    # guide_mask=True, 
                    # reverse_mask=True,
                    num_infer_steps = NUM_INFER_STEPS,
                    clear_low = True,
                    record_time = True
                    )

### More examples will be added later