Skip to content

Commit

Permalink
Att-Map, seed lock
Browse files Browse the repository at this point in the history
  • Loading branch information
IrisRainbowNeko committed Jan 5, 2023
1 parent fbc8570 commit b761e39
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 30 deletions.
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,37 @@ Remember to check the option below, otherwise the preview is wrong.
Fill the trained positive and negative embedding into txt2img to generate with DreamArtist prompt.
![](imgs/gen.jpg)

### Attention Mask
Attention Mask can strengthen or weaken the learning intensity of some local areas.
Attention Mask is a grayscale image whose grayscale values are related to the learning intensity show in the following table.

| grayscale | 0% | 25% | 50% | 75% | 100% |
|-----------|----|-----|------|------|------|
| intensity | 0% | 50% | 100% | 300% | 500% |

The Attention Mask is in the same folder as the training image and its name is the name of the training image + "_att".
You can choose whether to enable Attention Mask for training.
![](imgs/att_map.jpg)

Since there is a self-attention operation in VAE, it may change the distribution of features.
In the ***Process Att-Map*** tab, it can superimpose the attention map of self-attention on the original Att-Map.

### Dynamic CFG
Dynamic CFG can improve the performance, especially when the data set is large (>20).
For example, linearly from 1.5 to 3.0 (1.5-3.0), or with a 0-π/2 cycle of cosine (1.5-3.0:cos), or with a -π/2-0 cycle of cosine (1.5-3.0:cos2).
Or you can also customize non-linear functions, such as 2.5-3.5:torch.sqrt(rate), where rate is a variable from 0-1.

## Tested models (need ema version):
+ Stable Diffusion v1.4
+ Stable Diffusion v1.5
+ animefull-latest
+ Anything v3.0
+ momoko-e

Embeddings can be transferred between different models of the same dataset.

## Pre-trained embeddings:



[Download](https://github.com/7eu7d7/DreamArtist-stable-diffusion/releases/tag/embeddings_v2)

| Name | Model | Image | embedding length <br> (Positive, Negative) | iter | lr | cfg scale |
Expand Down
Binary file added imgs/att_map.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imgs/create.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imgs/train.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
89 changes: 81 additions & 8 deletions scripts/dream_artist/cptuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import sys
import traceback

import cv2
import numpy as np
import torch
import tqdm
import html
Expand All @@ -24,6 +26,8 @@
caption_image_overlay)
from .convnext_discriminator import XPDiscriminator
import json
from torchvision import transforms
import random

class Embedding:
def __init__(self, vec, name, step=None):
Expand Down Expand Up @@ -335,9 +339,18 @@ def get_cfg_range(cfg_text:str):
else:
return float(cfg_text), float(cfg_text), dy_cfg_f

def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height,
cfg_scale, classifier_path, use_negative, use_rec, rec_loss_w, neg_lr_w, ema_w, ema_rep_step, ema_w_neg, ema_rep_step_neg, adam_beta1, adam_beta2, fw_pos_only, accumulation_steps,
def set_seed(seed):
torch.manual_seed(seed) # cpu
torch.cuda.manual_seed(seed) # gpu
torch.backends.cudnn.deterministic = True # cudnn
np.random.seed(seed) # numpy
random.seed(seed) # random and transforms

def train_embedding(embedding_name, seed, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height,
cfg_scale, classifier_path, use_negative, use_att_map, use_rec, rec_loss_w, neg_lr_w, ema_w, ema_rep_step, ema_w_neg, ema_rep_step_neg, adam_beta1, adam_beta2, fw_pos_only, accumulation_steps,
unet_train, unet_lr):
set_seed(seed)

save_embedding_every = save_embedding_every or 0
create_image_every = create_image_every or 0
validate_train_inputs(embedding_name, learn_rate, batch_size, data_root, template_file, steps, save_embedding_every, create_image_every, log_directory, name="embedding")
Expand Down Expand Up @@ -414,9 +427,8 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc
'size': [training_width, training_height],
'neg': use_negative,
'rec': use_rec,
'seed': seed,
'prompt_len': embedding.vec.shape,
'ema': [ema_w, ema_w_neg],
'ema_steps': [ema_rep_step, ema_rep_step_neg],
}
if use_negative:
hyper_param['prompt_len_neg'] = embedding_neg.vec.shape
Expand Down Expand Up @@ -515,9 +527,12 @@ def lr_lambda_cos(current_step):
c_in = cond_model([entry.cond_text for entry in entries])

x = torch.stack([entry.latent for entry in entries]).to(devices.device)
att_mask = torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)

output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=att_mask, dy_cfg_f=dy_cfg_f)
if use_att_map:
att_mask = torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)
output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=att_mask, dy_cfg_f=dy_cfg_f)
else:
output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=None, dy_cfg_f=dy_cfg_f)

if disc is not None or use_rec:
x_samples_ddim = shared.sd_model.decode_first_stage.__wrapped__(shared.sd_model, output[2]) # forward with grad
Expand Down Expand Up @@ -660,6 +675,10 @@ def lr_lambda_cos(current_step):
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
last_saved_image += f", prompt: {preview_text}"

#set seed, seed is change by p
seed+=1
set_seed(seed)

shared.state.job_no = embedding.step

shared.state.textinfo = f"""
Expand Down Expand Up @@ -706,7 +725,6 @@ def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cache
embedding_neg.name = embedding_name+'-neg'
embedding_neg.save(f'{filename[:-3]}-neg.pt')

#torch.save({f'part{i}':layer.state_dict() for i,layer in enumerate(unet_layers)}, f'{filename[:-3]}-unet.ckpt')
except:
embedding.sd_checkpoint = old_sd_checkpoint
embedding.sd_checkpoint_name = old_sd_checkpoint_name
Expand All @@ -719,5 +737,60 @@ def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cache
embedding_neg.name = old_embedding_name+'-neg'
embedding_neg.cached_checksum = old_cached_checksum

#torch.save({f'part{i}': layer.state_dict() for i, layer in enumerate(unet_layers)}, f'{filename[:-3]}-unet.ckpt')
raise

def proc_att(data_root, training_width, training_height):
shared.sd_model.first_stage_model.to(devices.device)

shared.state.textinfo = "Processing Att-Map"
shared.state.job_count = 0

att_map=[None]
att_proc=[None]

with torch.autocast("cuda"):
ds = DA_dataset.DataAtt(data_root=data_root, width=training_width, height=training_height)

def att_hook(module, x, output):
h_ = x[0]
h_ = module.norm(h_)
q = module.q(h_)
k = module.k(h_)
v = att_map[0]

# compute attention
b, c, h, w = q.shape
q = q.reshape(b, c, h * w)
q = q.permute(0, 2, 1) # b,hw,c
k = k.reshape(b, c, h * w) # b,c,hw
w_ = torch.bmm(q, k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
w_ = w_ * (int(c) ** (-0.5))
w_ = torch.nn.functional.softmax(w_, dim=2)

# attend to values
v = v.reshape(b, 1, h * w)
w_ = w_.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)
proc = (torch.bmm(v, w_)-v.mean() + v).clamp(0.,1.)
att_proc[0] = proc.view(b,1,h,w) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
#att_proc[0] = v.view(b, 1, h, w)

hook = shared.sd_model.first_stage_model.encoder.mid.attn_1.register_forward_hook(att_hook)

pbar = tqdm.tqdm(enumerate(ds), total=len(ds))
import torch.nn.functional as F
for i, entries in pbar:
with torch.autocast("cuda"):
timg = torch.cat([entry.timg for entry in entries]).to(devices.device)
att_map[0]=torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)
#att_map[0] = F.interpolate(timg, scale_factor=1/8, mode='bicubic')
shared.sd_model.encode_first_stage(timg)

att_map_new = att_proc[0].detach().cpu().squeeze(0)#.permute(1,2,0).numpy().astype(np.uint8)
#att_map_new = cv2.resize(att_map_new, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
att_map_new = transforms.ToPILImage()(att_map_new)
#att_map_new = att_map_new.resize((int(att_map_new.size[0]*8), int(att_map_new.size[1]*8)), PIL.Image.BICUBIC)
att_map_new.save(entries[0].filename[:entries[0].filename.rfind('.')]+'_att_proc'+entries[0].filename[entries[0].filename.rfind('.'):])

shared.state.job_no = i

hook.remove()
2 changes: 1 addition & 1 deletion scripts/dream_artist/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_to
print(att_path)
att_mask = Image.open(att_path).convert('L').resize((self.width//8, self.height//8), PIL.Image.BICUBIC)
np_mask = np.array(att_mask).astype(float)[:,:,None]
np_mask[np_mask<=127+0.1]=(np_mask[np_mask<=127+0.1]/127.)*0.99+0.01
np_mask[np_mask<=127+0.1]=(np_mask[np_mask<=127+0.1]/127.)#*0.99+0.01
np_mask[np_mask>127]=((np_mask[np_mask>127]-127)/128.)*4+1

torchdata = torch.from_numpy(np_mask).to(device=device, dtype=torch.float32)
Expand Down
26 changes: 22 additions & 4 deletions scripts/dream_artist/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from modules import sd_hijack, shared


def create_embedding(name, initialization_text, nvpt, overwrite_old, use_negative, nvpt_neg):
def create_embedding(name, initialization_text, initialization_text_neg, nvpt, overwrite_old, nvpt_neg, seed):
dream_artist.cptuning.set_seed(seed)

filename = dream_artist.cptuning.create_embedding(name, nvpt, overwrite_old, init_text=initialization_text)
if use_negative:
dream_artist.cptuning.create_embedding(name+'-neg', nvpt_neg, overwrite_old, init_text=initialization_text)
filename=f'{filename} and {filename[:-3]}-neg.pt'
filename_neg = dream_artist.cptuning.create_embedding(name+'-neg', nvpt_neg, overwrite_old, init_text=initialization_text_neg)

filename=f'{filename} and {filename_neg}'

sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()

Expand Down Expand Up @@ -40,3 +42,19 @@ def train_embedding(*args):
if not apply_optimizations:
sd_hijack.apply_optimizations()


def proc_att(*args):

apply_optimizations = shared.opts.training_xattention_optimizations
try:
if not apply_optimizations:
sd_hijack.undo_optimizations()

dream_artist.cptuning.proc_att(*args)

return "process finish", ""
except Exception:
raise
finally:
if not apply_optimizations:
sd_hijack.apply_optimizations()

0 comments on commit b761e39

Please sign in to comment.