In [None]:
'''
kaggle - https://www.kaggle.com/code/leejin11/dcgan-code
'''

In [None]:
import time

import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.utils import save_image

from PIL import Image
from IPython.display import display, clear_output

In [None]:
### Network classes

In [None]:
class ConvTransBlock(nn.Module):
    '''ConvTranspose2d + Normalization + Activation function
    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        norm (bool): normalization 실행 여부
        activate (str): Activation function
    '''
    def __init__(self, in_dim, out_dim, *, kernel_size, stride, padding, norm=False, activate='ReLU'):
        super(ConvTransBlock, self).__init__()

        # 기본적인 ConvTranspose layer
        layers = [nn.ConvTranspose2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding)]
        
        # normalization이 없는 layer를 위한 코드
        if norm:
            layers.append(nn.BatchNorm2d(out_dim))

        # Activation function의 type을 지정
        assert activate in ['ReLU', 'Tanh'], 'Wrong Activation functionn Type'
        if activate == 'ReLU':
            layers.append(nn.ReLU(inplace=True))
        elif activate == 'Tanh':
            layers.append(nn.Tanh())
            
        # 각각 block에 맞는 layers를 sequential하게 묶어서 반환
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        # input [b, c, h, w]
        x = self.conv(x) # [b, out_dim, h', w']
        
        return x

class ConvBlock(nn.Module):
    '''Conv2d + Normalization + Activation function
    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        norm (bool): normalization 실행 여부
        activate (str): Activation function
    '''
    def __init__(self, in_dim, out_dim, *, kernel_size, stride, padding, norm=False, activate='LeakyReLU'):
        super(ConvBlock, self).__init__()

        # 기본적인 Conv2d layer
        layers = [nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding)]
        
        # normalization이 없는 layer를 위한 코드
        if norm:
            layers.append(nn.BatchNorm2d(out_dim))

        # Activation function의 type을 지정
        assert activate in ['LeakyReLU', 'Sigmoid'], 'Wrong Activation functionn Type'
        if activate == 'LeakyReLU':
            layers.append(nn.LeakyReLU(0.2, inplace=True))
        elif activate == 'Sigmoid':
            layers.append(nn.Sigmoid())
            
        # 각각 block에 맞는 layers를 sequential하게 묶어서 반환
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        # input [b, c, h, w]
        x = self.conv(x) # [b, out_dim, h', w']
        
        return x

class GeneratorAgent(nn.Module):
    '''G에 관한 network class
    Args:
        in_dim (int): laten의 크기
        img_shape (tuple): load한 image data의 shape
    Return:
        z (torch.Tensor): 생성한 이미지
    '''
    def __init__(self, latent_dim, img_shape):
        super(GeneratorAgent, self).__init__()
        self.img_shape = img_shape

        # ConvTransBlock을 이용한 model 조조
        self.model = nn.Sequential(
            # Input [b, latent_dim, 1, 1]
            ConvTransBlock(latent_dim, 256, kernel_size=4, stride=1, padding=0, norm=True, activate='ReLU'), # [b, 256, 4, 4]
            ConvTransBlock(256, 128, kernel_size=4, stride=1, padding=0, norm=True, activate='ReLU'), # [b, 128, 7, 7]
            ConvTransBlock(128, 64, kernel_size=4, stride=2, padding=1, norm=True, activate='ReLU'), # [b, 64, 14, 14]
            ConvTransBlock(64, img_shape[0], kernel_size=4, stride=2, padding=1, norm=False, activate='Tanh'), # [b, 1, 28, 28]
        )
        
    def forward(self, z):
        # Input [b, latent_dim]
        z = z.view(*z.shape, 1, 1) # [b, latent_dim, 1, 1]
        z = self.model(z) # [b, 1, 28, 28]
        return z

class DiscriminatorAgent(nn.Module):
    '''D에 관한 network class
    Args:
        img_shape (tuple): 판단할 이미지의 shape
    Return:
        valid (torch.Tensor): 판단에 따른 결과값
    '''
    def __init__(self, img_shape):
        super(DiscriminatorAgent, self).__init__()

        # ConvBlock을 이용한 model 구조
        self.model = nn.Sequential(
            # Input [b, 1, 28, 28]
            ConvBlock(int(img_shape[0]), 64, kernel_size=4, stride=2, padding=1, norm=False, activate='LeakyReLU'), # [b, 64, 14, 14]
            ConvBlock(64, 128, kernel_size=4, stride=2, padding=1, norm=True, activate='LeakyReLU'), # [b, 128, 7, 7]
            ConvBlock(128, 256, kernel_size=4, stride=1, padding=0, norm=True, activate='LeakyReLU'), # [b, 256, 4, 4]
            ConvBlock(256, 1, kernel_size=4, stride=1, padding=0, norm=False, activate='Sigmoid'), # [b, 1, 1, 1]
        )

    def forward(self, img):
        # Input [b, 1, 28, 28]
        valid = self.model(img) # [b, 1, 1, 1]
        valid = valid.squeeze((2, 3)) # [b, 1]
        return valid

In [None]:
### Main

In [None]:
# prepare data
 # -1 ~ 1 normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])
 # 기본 제공 데이터 load
train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

img_shape = tuple(train_data[0][0].shape)
latent_dim = 100

# set GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# 모델 정의
G_model = GeneratorAgent(latent_dim, img_shape).to(device)
D_model = DiscriminatorAgent(img_shape).to(device)

# Hyper Params
batch_size = 64
learning_rate = 1e-3
epoch = 30

G_optimizer = torch.optim.Adam(G_model.parameters(), lr=learning_rate, betas=(0.5, 0.999))
D_optimizer = torch.optim.Adam(D_model.parameters(), lr=learning_rate, betas=(0.5, 0.999))
loss = nn.BCELoss()

# dataloader 생성
dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)


# 학습
for e in range(1, epoch+1):
    print(f'\n########### START epoch : {e} ###########')
    # train mode로 변경
    G_model.train()
    D_model.train()
    start_time = time.time()
    
    for i,(train_imgs, _) in enumerate(dataloader):
        train_imgs = train_imgs.to(device) # img gpu or cpu로 변경
        batch = len(train_imgs) # load된 image의 batch size

        # valid = 1, fake = 0으로 batch size 만큼 생성
        valid_labels = torch.ones(batch, 1, device=device)
        fake_labels = torch.zeros(batch, 1, device=device)

        # Generator 학습
        G_optimizer.zero_grad()
        z = torch.randn(batch, latent_dim, device=device) # 핸덤한 값 생성 
        fake_imgs = G_model(z) # generation
        g_loss = loss(D_model(fake_imgs), valid_labels) # 생성한 이미지를 정답에 가깝게 학습하도록 하는 loss
        g_loss.backward()
        G_optimizer.step()

        # Discriminator 학습
        D_optimizer.zero_grad()
        real_loss = loss(D_model(train_imgs), valid_labels) # 진짜를 판별하는 loss
        fake_loss = loss(D_model(fake_imgs.detach()), fake_labels) # 가짜를 판별하는 loss
        
        d_loss = real_loss + fake_loss
        
        d_loss.backward()
        D_optimizer.step()

        # infomation 출력
        if i % 100 == 0 and i != 0:
            info = f'epoch : {e:2d}    iter : {i:5d}    g_loss : {g_loss:.4f}    time : {time.time()-start_time:5.3f}'
            print(info)
            with open('output.txt', 'a') as f:
                f.write(info + '\n')
            start_time = time.time()
    # test
    G_model.eval()
    D_model.eval()
    save_image(fake_imgs, f'output{e}.jpg', nrow=int(batch_size**0.5), normalize=True)

In [None]:
# 결과 확인

In [None]:
for img_num in range(1, 31):
    img = Image.open(f'/kaggle/working/output{img_num}.jpg')
    clear_output(wait=True)
    display(img)
    time.sleep(0.5)