# Generative Adversarial Networks (GANs)

## Table of Contents

1. Basics
2. GANs Paper Explained
3. PyTorch GANs Achitecture
4. Implement GANs for Creating MNIST
5. [Implement DCGANs for Creating CIFAR10](#fifth-bullet)

# 5. Implement DCGANs for Creating CIFAR10 <a class="anchor" id="fifth-bullet"></a>

Deep Convolutional Generative Adversarial Networks (DCGANs)

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable

In [None]:
# Setting hyperparameters
batchSize = 64 
imageSize = 64 # size of the generated images (64x64).

# Creating the transformations
transform = transforms.Compose([transforms.Scale(imageSize),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),]) 

# Download the training set and apply the transformations.
dataset = dset.CIFAR10(root = '.', download = True, transform = transform) 
# Get the batches of the images of the training set 
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batchSize, 
                                         shuffle = True, num_workers = 2) 



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to .


In [None]:
# The weights_init function takes as input a neural network and initializes all weights.
def weights_init(net):
    classname = net.__class__.__name__
    if classname.find('Conv') != -1:
        net.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        net.weight.data.normal_(1.0, 0.02)
        net.bias.data.fill_(0)


`nn.ConvTranspose2d`: Inverse convolution - CNN takes an image as input and outputs a vector. Inverse CNN takes a vector as input and outputs an image. G uses inverse convolution because the role of it is to generate fake images.

In [None]:
# Defining the generator
class G(nn.Module): 

    def __init__(self): 
        super(G, self).__init__() 
        self.main = nn.Sequential( 
            nn.ConvTranspose2d(in_channels = 100, out_channels = 512, kernel_size = 4,
                               stride = 1, padding = 0, bias = False), # inversed convolution
            nn.BatchNorm2d(512), # normalize all the features along the dimension of the batch.
            nn.ReLU(True), # inplace is true
            nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size = 4,
                               stride = 2, padding = 1, bias = False), 
            nn.BatchNorm2d(256), 
            nn.ReLU(True), 
            nn.ConvTranspose2d(in_channels = 256, out_channels = 128, kernel_size = 4,
                               stride = 2, padding = 1, bias = False), 
            nn.BatchNorm2d(128), 
            nn.ReLU(True), 
            nn.ConvTranspose2d(in_channels = 128, out_channels = 64, kernel_size = 4,
                               stride = 2, padding = 1, bias = False), 
            nn.BatchNorm2d(64), 
            nn.ReLU(True), 
            nn.ConvTranspose2d(in_channels = 64, out_channels = 3, kernel_size = 4,
                               stride = 2, padding = 1, bias = False), 
            nn.Tanh() # stay between -1 and +1
        )

    def forward(self, input): 
        output = self.main(input) 
        return output 

# Initiate the generator
netG = G() 
netG.apply(weights_init) # initialize all the weights.

G(
  (main): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)

`nn.LeakyReLU`: $f(x) = \max(0,x) + negative\_slope * \min(0,x)$. D works better with LeakyReLU than a normal ReLU.

Note that the size of output of each layer in D is getting larger, as opposite to the sizes of output in each layer of G.

In [None]:
# Defining the discriminator
class D(nn.Module): 

    def __init__(self): 
        super(D, self).__init__()
        self.main = nn.Sequential( 
            nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 4, stride = 2, padding = 1, bias = False), # input channels match the output of the generator
            nn.LeakyReLU(0.2, inplace = True), # negative slope = 0.2, 
            nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1, bias = False),
            nn.BatchNorm2d(128), # normalize all the features along the dimension of the batch.
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 4, stride = 2, padding = 1, bias = False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 4, stride = 2, padding = 1, bias = False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace = True), 
            nn.Conv2d(in_channels = 512, out_channels = 1, kernel_size = 4, stride = 1, padding = 0, bias = False),
            nn.Sigmoid() #  stay between 0 and 1
        )

    def forward(self, input): 
        output = self.main(input) 
        return output.view(-1) 

# Initiate the discriminator
netD = D() 
netD.apply(weights_init) # initialize all the weights


D(
  (main): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()
  )
)

The training implementation contains two steps:

1. To update the weights of the D: Train the D by giving it a real image and set the target to one (real), and then do another training by giving it a fake image and set the target to zero (fake). Some of the fake images are created by generator.

2. To update the weights of the G: Feed the fake image to the D to get an output (a value between 0 and 1). Set a new target to 1 (real), and then compute the loss between the output of the D and the new target (always 1). We will backpropagate this error into the G.


Note:

* `fake` is a torch variable because a output of torch model is also a torch variable. So it contains not only the tensor of teh predictions (numbers between 0 and 1), but also the gradients. However, we are not going to use this gradient after back prop the error back to the D. We absolutely don't care of the gradient of the output with respect to the weights of G, it's not a part of the considerations in stochastic gradient descent. So we detach the gradients of the `fake` variable. This will save some memory and speed up the computation.

In [None]:
# Training the DCGANs

criterion = nn.BCELoss() # binary cross entropy (for target either 0 or 1)
optimizerD = optim.Adam(netD.parameters(), lr = 0.0002, betas = (0.5, 0.999)) # betas: coefficients used for computing averages of gradient and its square.
optimizerG = optim.Adam(netG.parameters(), lr = 0.0002, betas = (0.5, 0.999)) 

for epoch in range(25): 

    for i, data in enumerate(dataloader, 0):
        
        ### Step 1 : Train D ###

        netD.zero_grad() # Zero the gradients with respect to the weights.
         
        # Train D on real images
        real, _ = data 
        input = Variable(real) # Wrap it in a variable.
        target = Variable(torch.ones(input.size()[0])) # Train D on real data, so set the target to be 1 (real).
        output = netD(input) # Output is bewteen 0 and 1.
        errD_real = criterion(output, target) 
        
        # Train D on fake images generated from G
        noise = Variable(torch.randn(input.size()[0], 100, 1, 1)) # Random input vector (noise) of the G.
        fake = netG(noise) # G generates some fake images.
        target = Variable(torch.zeros(input.size()[0])) # Train D on fake data, so set the target to be 0 (fake).
        output = netD(fake.detach()) # Output is between 0 and 1. `.detach()` to save memory.
        errD_fake = criterion(output, target) 

        # Backpropagating the total error
        errD = errD_real + errD_fake # Total error.
        errD.backward() # Backpropagate the loss error.
        optimizerD.step() # Optimizer to update the weights by SGD.

        ### Step 2: Train G ###

        netG.zero_grad() # Zero the gradients with respect to the weights.
        target = Variable(torch.ones(input.size()[0])) # Create all 1 targets.
        output = netD(fake) # Do not detach, we have to keep the gradients of fake images. 
        errG = criterion(output, target) 
        errG.backward() 
        optimizerG.step() 
        
        ### 3rd Step: Printing and saving ###

        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f' % (epoch, 25, i, len(dataloader), errD.data, errG.data)) # Print losses of the D and G.
        if i % 100 == 0: # Every 100 steps:
            vutils.save_image(real, '%s/real_samples.png' % ".", normalize = True) # Save the real images.
            fake = netG(noise) # Get generated fake images.s
            vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % (".", epoch), normalize = True) # Save the generated fake images.

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[6/25][215/782] Loss_D: 0.2558 Loss_G: 5.0468
[6/25][216/782] Loss_D: 0.1389 Loss_G: 4.4549
[6/25][217/782] Loss_D: 0.1857 Loss_G: 3.3671
[6/25][218/782] Loss_D: 0.0967 Loss_G: 3.8697
[6/25][219/782] Loss_D: 1.0872 Loss_G: 8.7437
[6/25][220/782] Loss_D: 2.3229 Loss_G: 2.2508
[6/25][221/782] Loss_D: 0.7488 Loss_G: 3.3362
[6/25][222/782] Loss_D: 0.7358 Loss_G: 2.3009
[6/25][223/782] Loss_D: 0.8468 Loss_G: 6.8381
[6/25][224/782] Loss_D: 2.1058 Loss_G: 0.7371
[6/25][225/782] Loss_D: 1.9039 Loss_G: 7.5653
[6/25][226/782] Loss_D: 0.9998 Loss_G: 2.2322
[6/25][227/782] Loss_D: 0.5030 Loss_G: 3.9101
[6/25][228/782] Loss_D: 0.3710 Loss_G: 3.6902
[6/25][229/782] Loss_D: 0.2724 Loss_G: 3.7632
[6/25][230/782] Loss_D: 0.2961 Loss_G: 4.1897
[6/25][231/782] Loss_D: 0.4764 Loss_G: 2.6042
[6/25][232/782] Loss_D: 0.3858 Loss_G: 3.9581
[6/25][233/782] Loss_D: 0.2632 Loss_G: 4.1515
[6/25][234/782] Loss_D: 0.3514 Loss_G: 3.8092
[6/25][235/782]