Att-Map, seed lock

IrisRainbowNeko · Jan 5, 2023 · b761e39 · b761e39
1 parent fbc8570
commit b761e39
Show file tree

Hide file tree

Showing 8 changed files with 183 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -51,17 +51,37 @@ Remember to check the option below, otherwise the preview is wrong.
 Fill the trained positive and negative embedding into txt2img to generate with DreamArtist prompt.
 ![](imgs/gen.jpg)
 
+### Attention Mask
+Attention Mask can strengthen or weaken the learning intensity of some local areas. 
+Attention Mask is a grayscale image whose grayscale values are related to the learning intensity show in the following table.
+
+| grayscale | 0% | 25% | 50%  | 75%  | 100% |
+|-----------|----|-----|------|------|------|
+| intensity | 0% | 50% | 100% | 300% | 500% |
+
+The Attention Mask is in the same folder as the training image and its name is the name of the training image + "_att".
+You can choose whether to enable Attention Mask for training.
+![](imgs/att_map.jpg)
+
+Since there is a self-attention operation in VAE, it may change the distribution of features. 
+In the ***Process Att-Map*** tab, it can superimpose the attention map of self-attention on the original Att-Map.
+
+### Dynamic CFG
+Dynamic CFG can improve the performance, especially when the data set is large (>20). 
+For example, linearly from 1.5 to 3.0 (1.5-3.0), or with a 0-π/2 cycle of cosine (1.5-3.0:cos), or with a -π/2-0 cycle of cosine (1.5-3.0:cos2).
+Or you can also customize non-linear functions, such as 2.5-3.5:torch.sqrt(rate), where rate is a variable from 0-1.
+
 ## Tested models (need ema version):
++ Stable Diffusion v1.4
 + Stable Diffusion v1.5
 + animefull-latest
 + Anything v3.0
++ momoko-e
 
 Embeddings can be transferred between different models of the same dataset.
 
 ## Pre-trained embeddings:
 
-
-
 [Download](https://github.com/7eu7d7/DreamArtist-stable-diffusion/releases/tag/embeddings_v2)
 
 | Name       | Model            | Image                                                              | embedding length <br> (Positive, Negative) | iter  | lr     | cfg scale |

diff --git a/imgs/att_map.jpg b/imgs/att_map.jpg
diff --git a/imgs/create.jpg b/imgs/create.jpg
diff --git a/imgs/train.jpg b/imgs/train.jpg
diff --git a/scripts/dream_artist/cptuning.py b/scripts/dream_artist/cptuning.py
@@ -2,6 +2,8 @@
 import sys
 import traceback
 
+import cv2
+import numpy as np
 import torch
 import tqdm
 import html
@@ -24,6 +26,8 @@
                                                   caption_image_overlay)
 from .convnext_discriminator import XPDiscriminator
 import json
+from torchvision import transforms
+import random
 
 class Embedding:
     def __init__(self, vec, name, step=None):
@@ -335,9 +339,18 @@ def get_cfg_range(cfg_text:str):
     else:
         return float(cfg_text), float(cfg_text), dy_cfg_f
 
-def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height,
-                    cfg_scale, classifier_path, use_negative, use_rec, rec_loss_w, neg_lr_w, ema_w, ema_rep_step, ema_w_neg, ema_rep_step_neg, adam_beta1, adam_beta2, fw_pos_only, accumulation_steps,
+def set_seed(seed):
+    torch.manual_seed(seed)  # cpu
+    torch.cuda.manual_seed(seed)  # gpu
+    torch.backends.cudnn.deterministic = True  # cudnn
+    np.random.seed(seed)  # numpy
+    random.seed(seed)  # random and transforms
+
+def train_embedding(embedding_name, seed, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height,
+                    cfg_scale, classifier_path, use_negative, use_att_map, use_rec, rec_loss_w, neg_lr_w, ema_w, ema_rep_step, ema_w_neg, ema_rep_step_neg, adam_beta1, adam_beta2, fw_pos_only, accumulation_steps,
                     unet_train, unet_lr):
+    set_seed(seed)
+
     save_embedding_every = save_embedding_every or 0
     create_image_every = create_image_every or 0
     validate_train_inputs(embedding_name, learn_rate, batch_size, data_root, template_file, steps, save_embedding_every, create_image_every, log_directory, name="embedding")
@@ -414,9 +427,8 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc
         'size': [training_width, training_height],
         'neg': use_negative,
         'rec': use_rec,
+        'seed': seed,
         'prompt_len': embedding.vec.shape,
-        'ema': [ema_w, ema_w_neg],
-        'ema_steps': [ema_rep_step, ema_rep_step_neg],
     }
     if use_negative:
         hyper_param['prompt_len_neg'] = embedding_neg.vec.shape
@@ -515,9 +527,12 @@ def lr_lambda_cos(current_step):
                 c_in = cond_model([entry.cond_text for entry in entries])
 
             x = torch.stack([entry.latent for entry in entries]).to(devices.device)
-            att_mask = torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)
 
-            output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=att_mask, dy_cfg_f=dy_cfg_f)
+            if use_att_map:
+                att_mask = torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)
+                output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=att_mask, dy_cfg_f=dy_cfg_f)
+            else:
+                output = shared.sd_model(x, c_in, scale=(cfg_l, cfg_h), att_mask=None, dy_cfg_f=dy_cfg_f)
 
             if disc is not None or use_rec:
                 x_samples_ddim = shared.sd_model.decode_first_stage.__wrapped__(shared.sd_model, output[2])  # forward with grad
@@ -660,6 +675,10 @@ def lr_lambda_cos(current_step):
             last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
             last_saved_image += f", prompt: {preview_text}"
 
+            #set seed, seed is change by p
+            seed+=1
+            set_seed(seed)
+
         shared.state.job_no = embedding.step
 
         shared.state.textinfo = f"""
@@ -706,7 +725,6 @@ def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cache
             embedding_neg.name = embedding_name+'-neg'
             embedding_neg.save(f'{filename[:-3]}-neg.pt')
 
-            #torch.save({f'part{i}':layer.state_dict() for i,layer in enumerate(unet_layers)}, f'{filename[:-3]}-unet.ckpt')
     except:
         embedding.sd_checkpoint = old_sd_checkpoint
         embedding.sd_checkpoint_name = old_sd_checkpoint_name
@@ -719,5 +737,60 @@ def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cache
             embedding_neg.name = old_embedding_name+'-neg'
             embedding_neg.cached_checksum = old_cached_checksum
 
-            #torch.save({f'part{i}': layer.state_dict() for i, layer in enumerate(unet_layers)}, f'{filename[:-3]}-unet.ckpt')
         raise
+
+def proc_att(data_root, training_width, training_height):
+    shared.sd_model.first_stage_model.to(devices.device)
+
+    shared.state.textinfo = "Processing Att-Map"
+    shared.state.job_count = 0
+
+    att_map=[None]
+    att_proc=[None]
+
+    with torch.autocast("cuda"):
+        ds = DA_dataset.DataAtt(data_root=data_root, width=training_width, height=training_height)
+
+    def att_hook(module, x, output):
+        h_ = x[0]
+        h_ = module.norm(h_)
+        q = module.q(h_)
+        k = module.k(h_)
+        v = att_map[0]
+
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h * w)
+        q = q.permute(0, 2, 1)  # b,hw,c
+        k = k.reshape(b, c, h * w)  # b,c,hw
+        w_ = torch.bmm(q, k)  # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = torch.nn.functional.softmax(w_, dim=2)
+
+        # attend to values
+        v = v.reshape(b, 1, h * w)
+        w_ = w_.permute(0, 2, 1)  # b,hw,hw (first hw of k, second of q)
+        proc = (torch.bmm(v, w_)-v.mean() + v).clamp(0.,1.)
+        att_proc[0] = proc.view(b,1,h,w)  # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        #att_proc[0] = v.view(b, 1, h, w)
+
+    hook = shared.sd_model.first_stage_model.encoder.mid.attn_1.register_forward_hook(att_hook)
+
+    pbar = tqdm.tqdm(enumerate(ds), total=len(ds))
+    import torch.nn.functional as F
+    for i, entries in pbar:
+        with torch.autocast("cuda"):
+            timg = torch.cat([entry.timg for entry in entries]).to(devices.device)
+            att_map[0]=torch.stack([(entry.att_mask if entry.att_mask is not None else torch.ones_like(entry.latent)) for entry in entries]).to(devices.device)
+            #att_map[0] = F.interpolate(timg, scale_factor=1/8, mode='bicubic')
+            shared.sd_model.encode_first_stage(timg)
+
+            att_map_new = att_proc[0].detach().cpu().squeeze(0)#.permute(1,2,0).numpy().astype(np.uint8)
+            #att_map_new = cv2.resize(att_map_new, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
+            att_map_new = transforms.ToPILImage()(att_map_new)
+            #att_map_new = att_map_new.resize((int(att_map_new.size[0]*8), int(att_map_new.size[1]*8)), PIL.Image.BICUBIC)
+            att_map_new.save(entries[0].filename[:entries[0].filename.rfind('.')]+'_att_proc'+entries[0].filename[entries[0].filename.rfind('.'):])
+
+        shared.state.job_no = i
+
+    hook.remove()
diff --git a/scripts/dream_artist/dataset.py b/scripts/dream_artist/dataset.py
@@ -127,7 +127,7 @@ def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_to
                 print(att_path)
                 att_mask = Image.open(att_path).convert('L').resize((self.width//8, self.height//8), PIL.Image.BICUBIC)
                 np_mask = np.array(att_mask).astype(float)[:,:,None]
-                np_mask[np_mask<=127+0.1]=(np_mask[np_mask<=127+0.1]/127.)*0.99+0.01
+                np_mask[np_mask<=127+0.1]=(np_mask[np_mask<=127+0.1]/127.)#*0.99+0.01
                 np_mask[np_mask>127]=((np_mask[np_mask>127]-127)/128.)*4+1
 
                 torchdata = torch.from_numpy(np_mask).to(device=device, dtype=torch.float32)

diff --git a/scripts/dream_artist/ui.py b/scripts/dream_artist/ui.py
@@ -7,11 +7,13 @@
 from modules import sd_hijack, shared
 
 
-def create_embedding(name, initialization_text, nvpt, overwrite_old, use_negative, nvpt_neg):
+def create_embedding(name, initialization_text, initialization_text_neg, nvpt, overwrite_old, nvpt_neg, seed):
+    dream_artist.cptuning.set_seed(seed)
+
     filename = dream_artist.cptuning.create_embedding(name, nvpt, overwrite_old, init_text=initialization_text)
-    if use_negative:
-        dream_artist.cptuning.create_embedding(name+'-neg', nvpt_neg, overwrite_old, init_text=initialization_text)
-        filename=f'{filename} and {filename[:-3]}-neg.pt'
+    filename_neg = dream_artist.cptuning.create_embedding(name+'-neg', nvpt_neg, overwrite_old, init_text=initialization_text_neg)
+
+    filename=f'{filename} and {filename_neg}'
 
     sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()
 
@@ -40,3 +42,19 @@ def train_embedding(*args):
         if not apply_optimizations:
             sd_hijack.apply_optimizations()
 
+
+def proc_att(*args):
+
+    apply_optimizations = shared.opts.training_xattention_optimizations
+    try:
+        if not apply_optimizations:
+            sd_hijack.undo_optimizations()
+
+        dream_artist.cptuning.proc_att(*args)
+
+        return "process finish", ""
+    except Exception:
+        raise
+    finally:
+        if not apply_optimizations:
+            sd_hijack.apply_optimizations()