In [1]:
import os
import torch
import argparse
import itertools
import numpy as np
from tqdm import tqdm
import torch.optim as optim
from torchvision.utils import save_image
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import get_rank, init_process_group, destroy_process_group, all_gather, get_world_size
from torch import Tensor
from torchvision import transforms
from torch.utils.data import DataLoader,Dataset
from glob import glob
from torch.utils.data.distributed import DistributedSampler
import random
from conditionDiffusion.unet import Unet
from conditionDiffusion.embedding import ConditionalEmbedding
from conditionDiffusion.utils import get_named_beta_schedule
from conditionDiffusion.diffusion import GaussianDiffusion
from conditionDiffusion.Scheduler import GradualWarmupScheduler
from PIL import Image
print(f"GPUs used:\t{torch.cuda.device_count()}")
device = torch.device("cuda",4)
print(f"Device:\t\t{device}")
import pytorch_model_summary as tms

GPUs used:	8
Device:		cuda:4


In [2]:
class_list=['유형1','유형2']
params={'image_size':1024,
        'lr':1e-5,
        'beta1':0.5,
        'beta2':0.999,
        'batch_size':4,
        'epochs':1000,
        'n_classes':None,
        'data_path':'../../data/origin_type/BRNT/',
        'image_count':5000,
        'inch':3,
        'modch':32,
        'outch':3,
        'chmul':[1,2,4,8,16,32,32],
        'numres':2,
        'dtype':torch.float32,
        'cdim':10,
        'useconv':False,
        'droprate':0.1,
        'T':1000,
        'w':1.8,
        'v':0.3,
        'multiplier':2.5,
        'threshold':0.1,
        'ddim':True,
        }


In [3]:
trans = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

def transback(data:Tensor) -> Tensor:
    return data / 2 + 0.5

class CustomDataset(Dataset):
    """COCO Custom Dataset compatible with torch.utils.data.DataLoader."""
    def __init__(self,parmas, images,label):
        
        self.images = images
        self.args=parmas
        self.label=label
        
    def trans(self,image):
        if random.random() > 0.5:
            transform = transforms.RandomHorizontalFlip(1)
            image = transform(image)
            
        if random.random() > 0.5:
            transform = transforms.RandomVerticalFlip(1)
            image = transform(image)
            
        return image
    
    def __getitem__(self, index):
        image=self.images[index]
        label=self.label[index]
        image = self.trans(image)
        return image,label
    
    def __len__(self):
        return len(self.images)


image_label=[]
image_path=[]
for i in tqdm(range(len(class_list))):
    image_list=glob(params['data_path']+class_list[i]+'/*.jpeg')
    for j in range(len(image_list)):
        image_path.append(image_list[j])
        image_label.append(i)
        
train_images=torch.zeros((len(image_path),params['inch'],params['image_size'],params['image_size']))
for i in tqdm(range(len(image_path))):
    train_images[i]=trans(Image.open(image_path[i]).convert('RGB').resize((params['image_size'],params['image_size'])))
train_dataset=CustomDataset(params,train_images,image_label)
dataloader=DataLoader(train_dataset,batch_size=params['batch_size'],shuffle=True)

100%|██████████| 2/2 [00:00<00:00,  9.30it/s]
 15%|█▍        | 551/3722 [01:48<05:06, 10.34it/s]

In [None]:
net = Unet(in_ch = params['inch'],
            mod_ch = params['modch'],
            out_ch = params['outch'],
            ch_mul = params['chmul'],
            num_res_blocks = params['numres'],
            cdim = params['cdim'],
            use_conv = params['useconv'],
            droprate = params['droprate'],
            dtype = params['dtype']
            ).to(device)
cemblayer = ConditionalEmbedding(len(class_list), params['cdim'], params['cdim']).to(device)
betas = get_named_beta_schedule(num_diffusion_timesteps = params['T'])
diffusion = GaussianDiffusion(
                    dtype = params['dtype'],
                    model = net,
                    betas = betas,
                    w = params['w'],
                    v = params['v'],
                    device = device
                )
optimizer = torch.optim.AdamW(
                itertools.chain(
                    diffusion.model.parameters(),
                    cemblayer.parameters()
                ),
                lr = params['lr'],
                weight_decay = 1e-4
            )

cosineScheduler = optim.lr_scheduler.CosineAnnealingLR(
                            optimizer = optimizer,
                            T_max = params['epochs']/100,
                            eta_min = 0,
                            last_epoch = -1
                        )
warmUpScheduler = GradualWarmupScheduler(
                        optimizer = optimizer,
                        multiplier = params['multiplier'],
                        warm_epoch = params['epochs'] // 10,
                        after_scheduler = cosineScheduler,
                        last_epoch = 0
                    )
checkpoint=torch.load(f'../../model/conditionDiff/details/BRNT/ckpt_111_checkpoint.pt')
diffusion.model.load_state_dict(checkpoint['net'])
cemblayer.load_state_dict(checkpoint['cemblayer'])
optimizer.load_state_dict(checkpoint['optimizer'])
warmUpScheduler.load_state_dict(checkpoint['scheduler'])



AttributeError: 'dict' object has no attribute 'load_state_dict'

In [None]:
for epc in range(110,params['epochs']):
    diffusion.model.train()
    cemblayer.train()
    total_loss=0
    steps=0
    with tqdm(dataloader, dynamic_ncols=True) as tqdmDataLoader:
        for img, lab in tqdmDataLoader:
            b = img.shape[0]
            optimizer.zero_grad()
            x_0 = img.to(device)
            lab = lab.to(device)
            cemb = cemblayer(lab)
            cemb[np.where(np.random.rand(b)<params['threshold'])] = 0
            loss = diffusion.trainloss(x_0, cemb = cemb)
            loss.backward()
            optimizer.step()
            steps+=1
            total_loss+=loss.item()
            tqdmDataLoader.set_postfix(
                ordered_dict={
                    "epoch": epc + 1,
                    "loss: ": total_loss/steps,
                    "batch per device: ":x_0.shape[0],
                    "img shape: ": x_0.shape[1:],
                    "LR": optimizer.state_dict()['param_groups'][0]["lr"]
                }
            )
    warmUpScheduler.step()
    if (epc) % 10 == 0:
        diffusion.model.eval()
        cemblayer.eval()
        # generating samples
        # The model generate 80 pictures(8 per row) each time
        # pictures of same row belong to the same class
        all_samples = []
        each_device_batch =10
        with torch.no_grad():
            lab = torch.ones(len(class_list), each_device_batch // len(class_list)).type(torch.long) \
            * torch.arange(start = 0, end = len(class_list)).reshape(-1, 1)
            lab = lab.reshape(-1, 1).squeeze()
            lab = lab.to(device)
            cemb = cemblayer(lab)
            genshape = (each_device_batch , 3, params['image_size'], params['image_size'])
            if params['ddim']:
                generated = diffusion.ddim_sample(genshape, 50, 0, 'linear', cemb = cemb)
            else:
                generated = diffusion.sample(genshape, cemb = cemb)
            img = transback(generated)
            img = img.reshape(len(class_list), each_device_batch // len(class_list), 3, params['image_size'], params['image_size']).contiguous()
            all_samples.append(img)
            samples = torch.concat(all_samples, dim = 1).reshape(each_device_batch, 3,params['image_size'], params['image_size'])

        save_image(samples,f'../../result/Detail/BRNT/generated_{epc+1}_pict.png', nrow = each_device_batch // len(class_list))
        # save checkpoints
        checkpoint = {
                            'net':diffusion.model.state_dict(),
                            'cemblayer':cemblayer.state_dict(),
                            'optimizer':optimizer.state_dict(),
                            'scheduler':warmUpScheduler.state_dict()
                        }
        torch.save(checkpoint, f'../../model/conditionDiff/details/BRNT/ckpt_{epc+1}_checkpoint.pt')
    torch.cuda.empty_cache()
    

100%|██████████| 931/931 [24:30<00:00,  1.58s/it, epoch=11, loss: =0.419, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1e-5]


Start generating(ddim)...


100%|██████████| 50/50 [01:32<00:00,  1.86s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:29<00:00,  1.58s/it, epoch=12, loss: =0.135, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.01e-5]
100%|██████████| 931/931 [24:30<00:00,  1.58s/it, epoch=13, loss: =0.0923, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.03e-5]
100%|██████████| 931/931 [24:33<00:00,  1.58s/it, epoch=14, loss: =0.078, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.04e-5] 
100%|██████████| 931/931 [24:43<00:00,  1.59s/it, epoch=15, loss: =0.0631, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.06e-5]
100%|██████████| 931/931 [24:34<00:00,  1.58s/it, epoch=16, loss: =0.056, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.08e-5] 
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=17, loss: =0.05, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.09e-5]  
100%|██████████| 931/931 [24:36<00:00,  1.59s/it, epoch=18, loss: =0.0451, batch per device: =2, img 

Start generating(ddim)...


100%|██████████| 50/50 [03:17<00:00,  3.95s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:51<00:00,  1.60s/it, epoch=22, loss: =0.0365, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.17e-5]
100%|██████████| 931/931 [24:39<00:00,  1.59s/it, epoch=23, loss: =0.0361, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.18e-5]
100%|██████████| 931/931 [24:45<00:00,  1.60s/it, epoch=24, loss: =0.0345, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.2e-5]
100%|██████████| 931/931 [24:50<00:00,  1.60s/it, epoch=25, loss: =0.0361, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.21e-5]
100%|██████████| 931/931 [24:52<00:00,  1.60s/it, epoch=26, loss: =0.0337, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.23e-5] 
100%|██████████| 931/931 [24:39<00:00,  1.59s/it, epoch=27, loss: =0.0315, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.24e-5]
100%|██████████| 931/931 [24:32<00:00,  1.58s/it, epoch=28, loss: =0.0339, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:48<00:00,  2.16s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:48<00:00,  1.60s/it, epoch=32, loss: =0.0298, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.32e-5]
100%|██████████| 931/931 [24:31<00:00,  1.58s/it, epoch=33, loss: =0.031, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.33e-5] 
100%|██████████| 931/931 [24:39<00:00,  1.59s/it, epoch=34, loss: =0.0291, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.35e-5]
100%|██████████| 931/931 [24:20<00:00,  1.57s/it, epoch=35, loss: =0.0282, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.36e-5]
100%|██████████| 931/931 [24:40<00:00,  1.59s/it, epoch=36, loss: =0.0302, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.38e-5]
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=37, loss: =0.027, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.39e-5] 
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=38, loss: =0.0282, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:49<00:00,  2.19s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:41<00:00,  1.59s/it, epoch=42, loss: =0.0291, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.47e-5]
100%|██████████| 931/931 [24:40<00:00,  1.59s/it, epoch=43, loss: =0.0287, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.48e-5]
100%|██████████| 931/931 [24:48<00:00,  1.60s/it, epoch=44, loss: =0.0265, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.5e-5]
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=45, loss: =0.025, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.51e-5] 
100%|██████████| 931/931 [24:45<00:00,  1.60s/it, epoch=46, loss: =0.028, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.52e-5] 
100%|██████████| 931/931 [24:28<00:00,  1.58s/it, epoch=47, loss: =0.0272, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.54e-5]
100%|██████████| 931/931 [24:56<00:00,  1.61s/it, epoch=48, loss: =0.0279, batch per device: =2, img 

Start generating(ddim)...


100%|██████████| 50/50 [01:46<00:00,  2.12s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:49<00:00,  1.60s/it, epoch=52, loss: =0.0259, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.62e-5]
100%|██████████| 931/931 [24:38<00:00,  1.59s/it, epoch=53, loss: =0.0277, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.63e-5]
100%|██████████| 931/931 [24:41<00:00,  1.59s/it, epoch=54, loss: =0.0266, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.65e-5]
100%|██████████| 931/931 [24:48<00:00,  1.60s/it, epoch=55, loss: =0.0261, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.66e-5]
100%|██████████| 931/931 [24:36<00:00,  1.59s/it, epoch=56, loss: =0.0252, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.68e-5]
100%|██████████| 931/931 [24:41<00:00,  1.59s/it, epoch=57, loss: =0.0271, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.69e-5]
100%|██████████| 931/931 [24:31<00:00,  1.58s/it, epoch=58, loss: =0.0261, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:50<00:00,  2.20s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:42<00:00,  1.59s/it, epoch=62, loss: =0.0262, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.77e-5]
100%|██████████| 931/931 [24:41<00:00,  1.59s/it, epoch=63, loss: =0.0265, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.78e-5]
100%|██████████| 931/931 [24:53<00:00,  1.60s/it, epoch=64, loss: =0.0247, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.79e-5]
100%|██████████| 931/931 [24:33<00:00,  1.58s/it, epoch=65, loss: =0.0259, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.81e-5]
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=66, loss: =0.0253, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.82e-5]
100%|██████████| 931/931 [24:27<00:00,  1.58s/it, epoch=67, loss: =0.024, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.84e-5] 
100%|██████████| 931/931 [24:33<00:00,  1.58s/it, epoch=68, loss: =0.0238, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:48<00:00,  2.18s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:31<00:00,  1.58s/it, epoch=72, loss: =0.0273, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.92e-5]
100%|██████████| 931/931 [24:51<00:00,  1.60s/it, epoch=73, loss: =0.0247, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.93e-5]
100%|██████████| 931/931 [24:26<00:00,  1.58s/it, epoch=74, loss: =0.0242, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.94e-5]
100%|██████████| 931/931 [24:47<00:00,  1.60s/it, epoch=75, loss: =0.0256, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.96e-5]
100%|██████████| 931/931 [24:40<00:00,  1.59s/it, epoch=76, loss: =0.0249, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.98e-5]
100%|██████████| 931/931 [24:36<00:00,  1.59s/it, epoch=77, loss: =0.0262, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=1.99e-5]
100%|██████████| 931/931 [24:23<00:00,  1.57s/it, epoch=78, loss: =0.0253, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:51<00:00,  2.24s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:38<00:00,  1.59s/it, epoch=82, loss: =0.0261, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.07e-5]
100%|██████████| 931/931 [24:36<00:00,  1.59s/it, epoch=83, loss: =0.0247, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.08e-5]
100%|██████████| 931/931 [24:35<00:00,  1.58s/it, epoch=84, loss: =0.0235, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.09e-5]
100%|██████████| 931/931 [24:41<00:00,  1.59s/it, epoch=85, loss: =0.0247, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.11e-5]
100%|██████████| 931/931 [24:43<00:00,  1.59s/it, epoch=86, loss: =0.025, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.13e-5] 
100%|██████████| 931/931 [24:33<00:00,  1.58s/it, epoch=87, loss: =0.0256, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.14e-5]
100%|██████████| 931/931 [24:31<00:00,  1.58s/it, epoch=88, loss: =0.0254, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:50<00:00,  2.22s/it]


ending sampling process(ddim)...


100%|██████████| 931/931 [24:57<00:00,  1.61s/it, epoch=92, loss: =0.0257, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.21e-5]
100%|██████████| 931/931 [24:20<00:00,  1.57s/it, epoch=93, loss: =0.0242, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.23e-5]
100%|██████████| 931/931 [24:47<00:00,  1.60s/it, epoch=94, loss: =0.0267, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.25e-5]
100%|██████████| 931/931 [24:38<00:00,  1.59s/it, epoch=95, loss: =0.0234, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.26e-5]
100%|██████████| 931/931 [24:48<00:00,  1.60s/it, epoch=96, loss: =0.0245, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.28e-5]
100%|██████████| 931/931 [24:37<00:00,  1.59s/it, epoch=97, loss: =0.0237, batch per device: =2, img shape: =torch.Size([3, 1024, 1024]), LR=2.29e-5]
100%|██████████| 931/931 [24:53<00:00,  1.60s/it, epoch=98, loss: =0.0247, batch per device: =2, img

Start generating(ddim)...


100%|██████████| 50/50 [01:48<00:00,  2.17s/it]


ending sampling process(ddim)...
