In [1]:
import torch
import utils
from absl import logging
import os
import wandb
import libs.autoencoder
import clip
from libs.clip import FrozenCLIPEmbedder
from libs.caption_decoder import CaptionDecoder
from torch.utils.data import DataLoader
from libs.schedule import stable_diffusion_beta_schedule, Schedule, LSimple_T2I
import argparse
import yaml
import datetime
from pathlib import Path
from libs.data import PersonalizedBase
from libs.uvit_multi_post_ln_v1 import UViT

uvit attention mode is xformers


In [2]:
def get_args():
    parser = argparse.ArgumentParser()
    # key args
    parser.add_argument('-d', '--data', type=str,
                        default="train_data/boy1", help="datadir")
    parser.add_argument('-o', "--outdir", type=str,
                        default="model_ouput/boy1", help="output of model")

    # args of logging
    parser.add_argument("--logdir", type=str, default="logs",
                        help="the dir to put logs")
    parser.add_argument("--nnet_path", type=str,
                        default="models/uvit_v1.pth", help="nnet path to resume")

    return parser.parse_args()

In [2]:
from configs.unidiffuserv1 import get_config
config = get_config()
config.nnet_path = "models/uvit_v1.pth"
device = 'cpu'

In [3]:
train_state = utils.initialize_train_state(config, device, uvit_class=UViT)

In [4]:
train_state.nnet

UViT(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(4, 1536, kernel_size=(2, 2), stride=(2, 2))
  )
  (time_img_embed): Identity()
  (time_text_embed): Identity()
  (text_embed): Linear(in_features=64, out_features=1536, bias=True)
  (text_out): Linear(in_features=1536, out_features=64, bias=True)
  (clip_img_embed): Linear(in_features=512, out_features=1536, bias=True)
  (clip_img_out): Linear(in_features=1536, out_features=512, bias=True)
  (pos_drop): Dropout(p=0.0, inplace=False)
  (in_blocks): ModuleList(
    (0): Block(
      (norm2): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1536, out_features=4608, bias=False)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1536, out_features=1536, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm3): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
      (mlp): Mlp(
 

In [6]:
for name, module in train_state.nnet.named_modules():
    for child_name, child_module in module.named_modules():
        print(name, child_name)

 
 patch_embed
 patch_embed.proj
 time_img_embed
 time_text_embed
 text_embed
 text_out
 clip_img_embed
 clip_img_out
 pos_drop
 in_blocks
 in_blocks.0
 in_blocks.0.norm2
 in_blocks.0.attn
 in_blocks.0.attn.qkv
 in_blocks.0.attn.attn_drop
 in_blocks.0.attn.proj
 in_blocks.0.attn.proj_drop
 in_blocks.0.drop_path
 in_blocks.0.norm3
 in_blocks.0.mlp
 in_blocks.0.mlp.fc1
 in_blocks.0.mlp.act
 in_blocks.0.mlp.fc2
 in_blocks.0.mlp.drop
 in_blocks.1
 in_blocks.1.norm2
 in_blocks.1.attn
 in_blocks.1.attn.qkv
 in_blocks.1.attn.attn_drop
 in_blocks.1.attn.proj
 in_blocks.1.attn.proj_drop
 in_blocks.1.drop_path
 in_blocks.1.norm3
 in_blocks.1.mlp
 in_blocks.1.mlp.fc1
 in_blocks.1.mlp.act
 in_blocks.1.mlp.fc2
 in_blocks.1.mlp.drop
 in_blocks.2
 in_blocks.2.norm2
 in_blocks.2.attn
 in_blocks.2.attn.qkv
 in_blocks.2.attn.attn_drop
 in_blocks.2.attn.proj
 in_blocks.2.attn.proj_drop
 in_blocks.2.drop_path
 in_blocks.2.norm3
 in_blocks.2.mlp
 in_blocks.2.mlp.fc1
 in_blocks.2.mlp.act
 in_blocks.2.mlp.fc

In [7]:
import torch
import torch.nn as nn

# 定义一个三层的MLP
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(10, 20)
        self.fc2 = nn.Linear(20, 30)
        self.fc3 = nn.Linear(30, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

# 初始化模型
model = MLP()

# 冻结第一层和第三层的参数
for param in model.fc1.parameters():
    param.requires_grad = False

for param in model.fc3.parameters():
    param.requires_grad = False

# 定义优化器，仅优化第二层的参数
optimizer = torch.optim.SGD(model.fc2.parameters(), lr=0.01)

# 模拟一个输入和目标输出
input = torch.randn(1, 10)
target = torch.randn(1, 1)

# 前向传播
output = model(input)

# 计算loss
criterion = nn.MSELoss()
loss = criterion(output, target)

# 反向传播
loss.backward()

# 更新权重
optimizer.step()

# 打印第二层权重的梯度
print(model.fc2.weight.grad)

tensor([[-0.0080, -0.0124, -0.1049, -0.0064,  0.0541,  0.0085,  0.0928, -0.0229,
         -0.0246, -0.0421,  0.0960,  0.1239,  0.0411,  0.1166, -0.1277,  0.0453,
         -0.0259,  0.0103,  0.1019,  0.1341],
        [ 0.0103,  0.0160,  0.1350,  0.0082, -0.0696, -0.0109, -0.1194,  0.0295,
          0.0316,  0.0542, -0.1236, -0.1596, -0.0529, -0.1501,  0.1644, -0.0583,
          0.0333, -0.0132, -0.1312, -0.1726],
        [-0.0017, -0.0027, -0.0230, -0.0014,  0.0118,  0.0019,  0.0203, -0.0050,
         -0.0054, -0.0092,  0.0210,  0.0271,  0.0090,  0.0255, -0.0279,  0.0099,
         -0.0057,  0.0022,  0.0223,  0.0293],
        [ 0.0045,  0.0071,  0.0595,  0.0036, -0.0307, -0.0048, -0.0526,  0.0130,
          0.0139,  0.0239, -0.0545, -0.0703, -0.0233, -0.0662,  0.0725, -0.0257,
          0.0147, -0.0058, -0.0578, -0.0761],
        [-0.0021, -0.0033, -0.0275, -0.0017,  0.0142,  0.0022,  0.0243, -0.0060,
         -0.0064, -0.0110,  0.0252,  0.0325,  0.0108,  0.0306, -0.0335,  0.0119,
      