# .vscode

## launch.json

```json
{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Current File (WSL)",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "cwd": "${workspaceFolder}",    // 这里会自动使用当前工作区的根目录
            "env": { "PYTHONPATH": "${workspaceFolder}" }
        }
    ]
}
```

## setting.json

```json
{
    "python.pythonPath": "/root/miniconda/envs/cv/bin/python",
    "python.analysis.extraPaths": [
    "${workspaceFolder}",
    "${workspaceFolder}/src"
    ]
    
}
```

这里的两个，第一个只会影响调试的时候，第二个只会影响我们写代码的时候的提示。所以这两个都无法使运行时能识别到根目录，我们需要再.zshrc中这样设置
```bash
export PYTHONPATH="$PYTHONPATH:/mnt/d/Projects/real_time_style_transformation"
```


# configs

## rtst.yaml

```yaml
device: auto   # 可选值：auto、cuda、cpu
experiment: 0
batch_size: 16
lr: 1e-2            # 学习率  
epochs: 50          # 迭代次数 
a: 1 # 三种损失的比例
b: 1e-5
c: 1e-6
style_pic: 'data/starrynight.jpg'
content_layers: ['21']
style_layers: ['0', '5', '10', '19', '28']
weights_path: 'weights/'
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
pic_size: 256
```

这个配置要通过后面的src.utils.cfg来处理读取，处理后内容就是一个字典，然后如果是数字读取后需要转化后使用

# data

## StyleDataset

In [None]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import glob
from src.utils.cfg import cfg

mean = cfg['mean']
std = cfg['std']
batch_size = cfg['batch_size']
pic_size = int(cfg['pic_size'])

class StyleDataset(Dataset):
    def __init__(self, root):
        self.pics = glob.glob(f"{root}/*.JPEG")
        self.tf = transforms.Compose([
            transforms.Resize(pic_size),
            transforms.CenterCrop(pic_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std)

        ])
    def __len__(self): return len(self.pics)
    def __getitem__(self, i):
        img = Image.open(self.pics[i]).convert('RGB')
        return self.tf(img)

train_full = StyleDataset("data/train")
train_subset_len = 20000                     
train_ds, _ = random_split(train_full, [train_subset_len, len(train_full)-train_subset_len])

val_full = StyleDataset("data/val")
val_subset_len = 1000                     
val_ds, _ = random_split(val_full, [val_subset_len, len(val_full)-val_subset_len])


TrainLoader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
ValidateLoader = DataLoader(val_ds, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)



读取必要参数后开始首先建立一个数据集类，实现__len__和__getitem__就可以了，后者要做一个转化。
然后由于卡的性能不足，我裁切了一个子集来训练，这里指定了子集的大小，后序可能放到参数文件中
然后建立数据集加载器。

# models

## rtst.py

这里我网络的实现开始有一点错误，即上采样的时候函数不是原地修改，需要赋值。

# src

## fe_model

In [None]:
import torch
from utils.cfg import cfg
from torchvision.models import vgg19
from utils.cfg import cfg
#feature extracting model
fe_model = vgg19(weights="IMAGENET1K_V1").features.eval().to(device=cfg['device'])
for param in fe_model.parameters():
    param.requires_grad=False

from torchvision.models.feature_extraction import create_feature_extractor 
device = cfg['device']


content_layers = cfg['content_layers']
style_layers = cfg['style_layers']
extractor = create_feature_extractor(fe_model, return_nodes={**{l: l for l in content_layers}, **{l: l for l in style_layers}}).to(device=device)


准备FE提取特征，提取出的内容是一个字典，键是层数，内容是张量。

## train

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from fe_model import extractor
from data.StyleDataset import TrainLoader, ValidateLoader
from models.rtst import TransformerNet
from torch.optim import Adam
from tqdm import tqdm
from torchmetrics import TotalVariation
from utils.log import log_info
from utils.cfg import cfg
from utils.weights import load_model, save_model
from utils.gram import gram_matrix
from utils.save import save_pics
import logging

RTST = TransformerNet()
device = cfg['device']
epochs = int(cfg['epochs'])
style_pic = cfg['style_pic']
freq = int(cfg['freq'])

a = float(cfg['a'])
b = float(cfg['b'])
c = float(cfg['c'])

content_layers = cfg['content_layers']
style_layers = cfg['style_layers']


lr = float(cfg['lr'])
optimizer = Adam(params=RTST.parameters(), lr=lr)
loss_func = F.mse_loss
loss_tv = TotalVariation(reduction='sum').to(device=device) # Total Variation Loss
pic_num = 0

def train(
    RTST: nn.Module,
    FE:nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn,
    loss_tv,
    device: torch.device,
    content_layers,
    style_layers,
    style_pic,
    epochs: int,
    scheduler,
    log_fn,
    a,
    b,
    c,
    freq):
    RTST.to(device)
    for epoch in range(1, epochs+1):
        RTST.train()
        for step, batch in enumerate(tqdm(train_loader)):
            inputs = batch.to(device)
            LT = 0 

            optimizer.zero_grad()
            outputs = RTST(inputs)
            pic_save = outputs.clone()
            Ltv = loss_tv(outputs) # Total Variation Loss

            with torch.no_grad():
                targets = FE(inputs)
                outputs = FE(outputs)
                styles = FE(style_pic)
            Lc = 0 # Content Loss
            for layer in content_layers:
                Lc += loss_fn(outputs[layer], targets[layer])
            Ls = 0 # Style Loss
            for layer in style_layers:
                Ls += loss_fn(gram_matrix(outputs[layer]), gram_matrix(styles[layer]))
            LT = a*Lc + b*Ls + c*Ltv # Total Loss
            if step % freq == 0:
                log_fn(epoch=step, loss=LT, mode='train', place='step')
                global pic_num 
                save_pics(inputs, pic_num)
                n = save_pics(pic_save, pic_num)
                pic_num += n

            LT.backward()
            optimizer.step()
        if scheduler: 
            scheduler.step()
        log_fn(epoch=epoch, loss=LT, mode='train', place='epoch')

        if val_loader:
            RTST.eval()
            with torch.no_grad():
                total_loss = 0
                for batch in tqdm(val_loader):
                    
                    inputs = batch.to(device)
                    LT = 0 

                    outputs = RTST(inputs)
                    Ltv = loss_tv(outputs) # Total Variation Loss

                    with torch.no_grad():
                        targets = FE(inputs)
                        outputs = FE(outputs)
                        styles = FE(style_pic)
                    Lc = 0 # Content Loss
                    for layer in content_layers:
                        Lc += loss_fn(outputs[layer], targets[layer])
                    Ls = 0 # Style Loss
                    for layer in style_layers:
                        Ls += loss_fn(gram_matrix(outputs[layer]), gram_matrix(styles[layer]))
                    LT = a*Lc + b*Ls + c*Ltv # Total Loss

                    total_loss += LT
                log_fn(epoch=epoch, loss=LT/len(val_loader), mode='val', place='epoch')

load = int(cfg['load'])
save = int(cfg['save'])
#load_model(RTST, load)
train(RTST=RTST, FE=extractor, train_loader=TrainLoader, val_loader=ValidateLoader, optimizer=optimizer, loss_fn=loss_func, loss_tv=loss_tv, device=device,
      content_layers=content_layers, style_layers=style_layers, style_pic=style_pic, epochs=epochs, log_fn=log_info, a=a, b=b, c=c,scheduler=None, freq=freq)
save_model(RTST, save)
                
logging.shutdown()

首先导入工具，数据加载器，特征提取器，网络，优化器，日志，参数，图片保存可视化，损失函数。
训练过程是将输入给网络得到输出，然后将输出和输入以及风格图片都提取特征，第一个和后两个计算损失，风格损失是多层次的，分别计算加，然后计算第一个的总体平滑损失，然后按照一定比例结合，最后优化。
最后考虑权重的加载和存储。

## utils

### cfg

In [None]:
import yaml
import torch
from PIL import Image
from torchvision import transforms
with open("configs/rtst.yaml","r",encoding='utf-8') as f:
    cfg = yaml.safe_load(f)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cfg['device'] = device
style_pic = cfg['style_pic']
style_pic = Image.open(style_pic).convert('RGB')
mean = cfg['mean']
std = cfg['std']
pic_size = int(cfg['pic_size'])
style_pic = transforms.Compose([
            transforms.Resize(pic_size),
            transforms.CenterCrop(pic_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std)

        ])(style_pic)

cfg['style_pic'] = style_pic.unsqueeze(0).to(device)


这个文件的作用是从参数文件中读取数据，放在cfg中，供其他文件读取。
同时这里我需要放置一些其他我想要的参数，比如设备，风格图片张量，因为这里的张量，后期日志的存储用yaml的函数有问题，使用pprint。

### log

In [None]:
import logging
import yaml
from src.utils.cfg import cfg
from logging.handlers import RotatingFileHandler
from src.utils.tensorboard import writer
from pprint import pformat

experiment = cfg['experiment']

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# 文件最大 10MB，保留 5 个备份
handler = RotatingFileHandler(f'logs/experiment{experiment}.log', maxBytes=10*1024*1024, backupCount=5)
logger.addHandler(handler)
logger.addHandler(logging.StreamHandler())

logger.info("current configuration\n%s", pformat(cfg))
logger.info('start training')




def log_info(epoch, loss, mode, place):
    info = f"mode: {mode}\n{place}: {epoch}\nloss: {loss}\n\n"
    #print(info)
    logger.info(info)
    if mode == "train":
        writer.add_scalar(f'{place}/train/loss', loss, epoch)
    elif mode == "val":
        writer.add_scalar(f'{place}/val/loss', loss, epoch)
    else:
        error_info = "writer mode error!!!"
        #print(error_info)
        logger.error(error_info)
    writer.close()
        




这里在开始将参数配置加入日志，这里的配置是同名文件有5个，超过内容会循环覆盖。
然后是一个函数用以记录数据，包括Tensorboard和log，log会同时打印。

### save

In [None]:
from torchvision import transforms
from torchvision.utils import save_image
def save_pic(target, i):
  denormalization = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44))
  img = target.clone().squeeze()
  img = denormalization(img).clamp(0, 1)
#设定保存的路径和文件名
  save_image(img, f'results/store/origin_{i}.png')
  save_image(img, f'results/store/generate_{i}.png')

def save_pics(target, i):
  n, _, _, _ = target.shape
  for j in range(n):
    save_pic(target[j:j+1], i+j)
  return n


: 

保存图片查看效果