<a href="https://colab.research.google.com/github/MLLSCKR/Paper_Implementation/blob/main/DCGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#DCGAN(Deep Convolutional Generative Adversarial Network)

### GAN의 문제점
1. Generator or Discriminator 둘 중 하나가 수렴하지 않으면 과적합이 발생할 수 있다.
2. 때때로 generator는 소수의 sample 종류만 생성한다. 이를 mode collapse(모드 붕괴)라고 한다.

### DCGAN이 사용한 기술
위의 GAN의 문제를 해결하고 안정적인 학습을 보장하기 위해서 DCGAN은 아래의 3가지 기술을 사용한다.
1. Fully Connected Layer를 제거하고 Convolutional Layer만 사용한다.
2. Pooling layer를 사용하는 대신 Strided Convolution을 사용하여 Down-Sampling 수행
3. Hidden Layer 간에 tanh 대신 ReLU/leakyReLU 활성화 함수 사용

In [1]:
from google.colab import drive
drive.mount('/content/drive')

FOLDERNAME = "pytorch_practice"
assert FOLDERNAME is not None, "[!] Enter the foldername."

import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive/pytorch_practice/GAN/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/pytorch_practice/GAN


In [2]:
import os
import sys

import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils

In [3]:
data_path = './Data/mnist'
out_path = 'output'
log_file = os.path.join(out_path, 'log.txt')
batch_size = 128
image_channel = 1

z_dim = 100

g_hidden = 64
x_dim = 64
d_hidden = 64
epoch_num = 25

real_label = 1
fake_label = 0

lr = 2e-4
seed = 1

In [4]:
device = torch.device("cuda")

### Generator Network

In [5]:
class Generator(nn.Module):
  def __init__(self):
    super(Generator, self).__init__()
    self.main = nn.Sequential(
        #1st layer
        nn.ConvTranspose2d(z_dim, g_hidden * 8, 4, 1, 0, bias = False),
        nn.BatchNorm2d(g_hidden * 8),
        nn.ReLU(True),

        #2nd layer
        nn.ConvTranspose2d(g_hidden * 8, g_hidden * 4, 4, 2, 1, bias = False),
        nn.BatchNorm2d(g_hidden * 4),
        nn.ReLU(True),

        #3rd layer
        nn.ConvTranspose2d(g_hidden * 4, g_hidden * 2, 4, 2, 1, bias = False),
        nn.BatchNorm2d(g_hidden * 2),
        nn.ReLU(True),

        #4th layer
        nn.ConvTranspose2d(g_hidden * 2, g_hidden, 4, 2, 1, bias = False),
        nn.BatchNorm2d(g_hidden),
        nn.ReLU(True),

        #output layer
        nn.ConvTranspose2d(g_hidden, image_channel, 4, 2, 1, bias = False),
        nn.Tanh()

    )

  def forward(self, input):
    return self.main(input)

In [6]:
def weights_init(m):
  classname = m.__class__.__name__
  if classname.find('Conv') != -1:
    m.weight.data.normal_(0.0, 0.02)
  
  elif classname.find('BatchNorm') != -1:
    m.weight.data.normal_(1.0, 0.02)
    m.bias.data.fill_(0)

In [7]:
netG = Generator().to(device)
netG.apply(weights_init)
print(netG)

Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)


In [8]:
from torchsummary import summary

summary(netG, input_size = (100, 1, 1))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1            [-1, 512, 4, 4]         819,200
       BatchNorm2d-2            [-1, 512, 4, 4]           1,024
              ReLU-3            [-1, 512, 4, 4]               0
   ConvTranspose2d-4            [-1, 256, 8, 8]       2,097,152
       BatchNorm2d-5            [-1, 256, 8, 8]             512
              ReLU-6            [-1, 256, 8, 8]               0
   ConvTranspose2d-7          [-1, 128, 16, 16]         524,288
       BatchNorm2d-8          [-1, 128, 16, 16]             256
              ReLU-9          [-1, 128, 16, 16]               0
  ConvTranspose2d-10           [-1, 64, 32, 32]         131,072
      BatchNorm2d-11           [-1, 64, 32, 32]             128
             ReLU-12           [-1, 64, 32, 32]               0
  ConvTranspose2d-13            [-1, 1, 64, 64]           1,024
             Tanh-14            [-1, 1,

### Discriminator Network

In [9]:
class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.main = nn.Sequential(
        #1st layer
        nn.Conv2d(image_channel, d_hidden, 4, 2, 1, bias = False),
        nn.LeakyReLU(0.2, inplace = True),

        #2nd layer
        nn.Conv2d(d_hidden, d_hidden * 2, 4, 2, 1, bias = False),
        nn.BatchNorm2d(d_hidden * 2),
        nn.LeakyReLU(0.2, inplace = True),

        #3rd layer
        nn.Conv2d(d_hidden * 2, d_hidden * 4, 4, 2, 1, bias = False),
        nn.BatchNorm2d(d_hidden * 4),
        nn.LeakyReLU(0.2, inplace = True),

        #4th layer
        nn.Conv2d(d_hidden * 4, d_hidden * 8, 4, 2, 1, bias = False),
        nn.BatchNorm2d(d_hidden * 8),
        nn.LeakyReLU(0.2, inplace = True),

        #output layer
        nn.Conv2d(d_hidden*8, 1, 4, 1, 0, bias = False),
        nn.Sigmoid()
    )
  
  def forward(self, input):
    return self.main(input).view(-1, 1).squeeze(1)

    """
    squeeze method
      Returns a tensor with all the dimensions of input of size 1 removed.
      For example, if input is of shape: (A \times 1 \times B \times C \times 1 \times D)(A×1×B×C×1×D) then the out tensor will be of shape: (A \times B \times C \times D)(A×B×C×D).
    """

In [10]:
netD = Discriminator().to(device)
netD.apply(weights_init)
print(netD)

Discriminator(
  (main): Sequential(
    (0): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()
  )
)


In [11]:
summary(netD, input_size = (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           1,024
         LeakyReLU-2         [-1, 64, 112, 112]               0
            Conv2d-3          [-1, 128, 56, 56]         131,072
       BatchNorm2d-4          [-1, 128, 56, 56]             256
         LeakyReLU-5          [-1, 128, 56, 56]               0
            Conv2d-6          [-1, 256, 28, 28]         524,288
       BatchNorm2d-7          [-1, 256, 28, 28]             512
         LeakyReLU-8          [-1, 256, 28, 28]               0
            Conv2d-9          [-1, 512, 14, 14]       2,097,152
      BatchNorm2d-10          [-1, 512, 14, 14]           1,024
        LeakyReLU-11          [-1, 512, 14, 14]               0
           Conv2d-12            [-1, 1, 11, 11]           8,192
          Sigmoid-13            [-1, 1, 11, 11]               0
Total params: 2,763,520
Trainable param

### Learning and Evaluation

In [12]:
criterion = nn.BCELoss()

optimizerD = optim.Adam(netD.parameters(), lr = lr, betas = (0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (0.5, 0.999))

In [13]:
dataset = dset.MNIST(root = data_path, download = True, 
                     transform = transforms.Compose([
                         transforms.Resize(x_dim),
                         transforms.ToTensor(),
                         transforms.Normalize((0.5,), (0.5,))
                     ]))
assert dataset

In [14]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True, num_workers = 4)

  cpuset_checked))


In [35]:
### Learning!!!

viz_noise = torch.randn(batch_size, z_dim, 1, 1, device = device)

for epoch in range(epoch_num):
  for i, data in enumerate(dataloader):
    x_real = data[0].to(device)
    r_label = torch.full((x_real.size(0),), real_label, device = device).float()
    f_label = torch.full((x_real.size(0),), fake_label, device = device).float()

    # update D with real data
    netD.zero_grad()
    y_real = netD(x_real)

    loss_D_real = criterion(y_real, r_label)
    loss_D_real.backward()

    # update D with fake data
    z_noise = torch.randn(x_real.size(0), z_dim, 1, 1, device = device)
    x_fake = netG(z_noise)
    y_fake = netD(x_fake.detach())
    loss_D_fake = criterion(y_fake, f_label)
    loss_D_fake.backward()
    optimizerD.step()

    # update G with fake data
    netG.zero_grad()
    y_fake_r = netD(x_fake)
    loss_G = criterion(y_fake_r, r_label)
    loss_G.backward()
    optimizerG.step()

    if i % 100 == 0:
      print('Epoch {} [{}/{}] loss_D_real: {:.4f} loss_D_fake: {:.4f} loss_G: {:.4f}'.format(
          epoch, i, len(dataloader), loss_D_real.mean().item(),
          loss_D_fake.mean().item(), loss_G.mean().item()
      ))

Epoch 0 [0/469] loss_D_real: 0.0029 loss_D_fake: 0.0009 loss_G: 9.0288
Epoch 0 [100/469] loss_D_real: 0.1162 loss_D_fake: 0.0003 loss_G: 10.1925
Epoch 0 [200/469] loss_D_real: 0.0493 loss_D_fake: 0.2732 loss_G: 5.9532
Epoch 0 [300/469] loss_D_real: 0.6122 loss_D_fake: 0.1446 loss_G: 2.0974
Epoch 0 [400/469] loss_D_real: 0.1040 loss_D_fake: 0.0904 loss_G: 3.1931
Epoch 1 [0/469] loss_D_real: 0.1060 loss_D_fake: 0.0372 loss_G: 3.2244
Epoch 1 [100/469] loss_D_real: 0.1059 loss_D_fake: 0.0300 loss_G: 3.0169
Epoch 1 [200/469] loss_D_real: 0.2238 loss_D_fake: 0.6994 loss_G: 2.4287
Epoch 1 [300/469] loss_D_real: 0.2864 loss_D_fake: 0.1179 loss_G: 1.7449
Epoch 1 [400/469] loss_D_real: 0.1974 loss_D_fake: 0.1779 loss_G: 2.3498
Epoch 2 [0/469] loss_D_real: 0.4885 loss_D_fake: 0.0925 loss_G: 1.3059
Epoch 2 [100/469] loss_D_real: 0.1484 loss_D_fake: 0.1983 loss_G: 2.6069
Epoch 2 [200/469] loss_D_real: 0.1991 loss_D_fake: 0.3089 loss_G: 2.1731
Epoch 2 [300/469] loss_D_real: 0.1238 loss_D_fake: 0.222

In [15]:
### Learning!!!

viz_noise = torch.randn(batch_size, z_dim, 1, 1, device = device)

for epoch in range(epoch_num):
  for i, data in enumerate(dataloader):
    x_real = data[0].to(device)
    r_label = torch.full((x_real.size(0),), real_label, device = device).float()
    f_label = torch.full((x_real.size(0),), fake_label, device = device).float()

    # update D with real data
    netD.zero_grad()
    y_real = netD(x_real)

    loss_D_real = criterion(y_real, r_label)
    loss_D_real.backward()

    # update D with fake data
    z_noise = torch.randn(x_real.size(0), z_dim, 1, 1, device = device)
    x_fake = netG(z_noise)
    y_fake = netD(x_fake.detach())
    loss_D_fake = criterion(y_fake, f_label)
    loss_D_fake.backward()
    optimizerD.step()

    # update G with fake data
    netG.zero_grad()
    y_fake_r = netD(x_fake)
    loss_G = criterion(y_fake_r, r_label)
    loss_G.backward()
    optimizerG.step()

    if i % 100 == 0:
      print('Epoch {} [{}/{}] loss_D_real: {:.4f} loss_D_fake: {:.4f} loss_G: {:.4f}'.format(
          epoch, i, len(dataloader), loss_D_real.mean().item(),
          loss_D_fake.mean().item(), loss_G.mean().item()
      ))
      
      vutils.save_image(x_real, os.path.join(out_path, 'real_samples_{}.png'.format(epoch)), normalize = True)
      
      with torch.no_grad():
        viz_sample = netG(viz_noise)
        vutils.save_image(viz_sample, os.path.join(out_path, 'fake_samples_{}.png'.format(epoch)), normalize = True)

Epoch 0 [0/469] loss_D_real: 0.7623 loss_D_fake: 0.8161 loss_G: 4.0889
Epoch 0 [100/469] loss_D_real: 0.0001 loss_D_fake: 0.0000 loss_G: 37.4320
Epoch 0 [200/469] loss_D_real: 0.0000 loss_D_fake: 0.0000 loss_G: 36.6530
Epoch 0 [300/469] loss_D_real: 0.0115 loss_D_fake: 0.6389 loss_G: 6.1193
Epoch 0 [400/469] loss_D_real: 0.1142 loss_D_fake: 0.2773 loss_G: 3.0660
Epoch 1 [0/469] loss_D_real: 5.6465 loss_D_fake: 0.0009 loss_G: 0.0120
Epoch 1 [100/469] loss_D_real: 0.0904 loss_D_fake: 0.1984 loss_G: 3.0611
Epoch 1 [200/469] loss_D_real: 0.0640 loss_D_fake: 0.4349 loss_G: 3.8013
Epoch 1 [300/469] loss_D_real: 0.0446 loss_D_fake: 0.6368 loss_G: 3.9069
Epoch 1 [400/469] loss_D_real: 0.1867 loss_D_fake: 0.5579 loss_G: 2.5529
Epoch 2 [0/469] loss_D_real: 0.2262 loss_D_fake: 0.5700 loss_G: 1.9528
Epoch 2 [100/469] loss_D_real: 0.1565 loss_D_fake: 0.3080 loss_G: 2.4902
Epoch 2 [200/469] loss_D_real: 0.8986 loss_D_fake: 0.0398 loss_G: 0.9957
Epoch 2 [300/469] loss_D_real: 0.0806 loss_D_fake: 0.17