# DC-GAN (Unsupervised) from scratch

## GAN provide an attractive alternative to maximum likelihood techniques with lack of a heuristic cost function. But Gan are unstable to train, thus producing non sensical outputs.
One of the main constraint of neural networks is that they are kind of black box models. 

## In order to develop DC-GAN
1. All convolutional net which replaces determinstic spatial pooling functions with strided convolutions, allowing the network to learn its own spatial downsampling. This approach is used to make generator, thus allows to learn spatial upsampling, and discriminator.
2. Fully connected layers are eliminated. 
3. Batch normalization which stabalises learning by normalizing the input to each unit to have zero mean and unit varaiance. This helps deal with training problems that arise due to poor initialization and helps gradient flow in deeper models. This prevents from collapsins all samples to  single point. Also, this tackle the training problems associated with poor initialization and helps gradient flow in deeper models.
4. Use ReLU activation in generator for all layers except for the output, which uses Tanh.
5. Use LeakyReLU activation in the discriminator for all layers.

In [1]:
import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
torch.manual_seed(0)



<torch._C.Generator at 0x7efe353693f0>

tensor +1/2 is done in order to shift to lighter color for better visualization 
Function for visualizing images: 
Given a tensor of images, 
number of images, and
size per image
plots and prints the images in an uniform grid.

In [2]:
def show_tensor_images(tensor, RowsXCols=25, size=(1, 28, 28)):
    '''
    
    '''
    tensor = (tensor + 1) / 2
    unflat = tensor.detach().cpu()
    grid = make_grid(unflat[:RowsXCols], nrow=5)
    plt.imshow(grid.permute(1, 2, 0).squeeze())
    plt.show()

# Building discriminator
 Discriminator Class
 Values:
        im_chan: the number of channels of the output image, a scalar
              (MNIST is black-and-white, so 1 channel is your default)
    hidden_dim: the inner dimension, a scalar
    input 64*64, 1 channel
 output = (input+2*padding-2*size of kernel)/stride +1
 so at 1 layer- 64+2-4/2 +1 =32
 at 2-16
 at 3- 4
 at 4-1 
 
  forward function-  for completing a forward pass of the discriminator: Given an image tensor, 
    returns a 1-dimension tensor representing fake/real.
    Parameters:
        image: a flattened image tensor with dimension (im_dim)
   

In [3]:
class Discriminator(nn.Module):
    
    def __init__(self, im_chan=1, hidden_dim=64):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            nn.Conv2d(im_chan, hidden_dim,kernel_size=4,stride=2,padding=1), 
            nn.BatchNorm2d(hidden_dim),
            nn.LeakyReLU(0.2),
            self.disc_block(hidden_dim, hidden_dim * 2,4,2,1), 
            self.disc_block(hidden_dim*2, hidden_dim * 4,4,2,1),
            self.disc_block(hidden_dim*4, hidden_dim * 8,4,2,1),
            nn.Conv2d(hidden_dim * 8, 1,kernel_size=4, stride=2,padding=0), 
            nn.LeakyReLU(0.2),
            nn.Sigmoid()
        )
         
    def disc_block(self, input_channels, output_channels, kernel_size, stride,padding):
      
      return nn.Sequential(nn.Conv2d(input_channels, output_channels, kernel_size, stride, padding),
                           nn.BatchNorm2d(output_channels),
                           nn.LeakyReLU(0.2, inplace=True))
      

    
    def forward(self, image):
        disc_pred = self.disc(image)
        return disc_pred.view(len(disc_pred), -1)


self.forward() is similar to call method but with registered hooks. This is used to directly call a method in the class when an instance name is called. These methods are inherited from nn.Module.

# making Generator
Generator will take a random noise i.e. 100 dimensionalvector which will be upscaled to 1024 channels and then four by four.
After that it upscales to 8*8 with 512 channels then it keeps on going to achieve 64*64 with RGB channel.
final feature we want 64 thus to convert it to 1024 at 1 upscaling =16 X 64
output=(input-1)X stride + outputpadding - 2Xpaddding + kernel_size

In [4]:
class Generator(nn.Module):
    def __init__(self, z_dim=100, im_chan=1, hidden_dim=64):
        super(Generator, self).__init__()
        self.z_dim = z_dim
        # Build the neural network
        self.gen = nn.Sequential(
            self.gen_block(z_dim, hidden_dim * 16,4,2,0), #image=4*4
            self.gen_block(hidden_dim * 16, hidden_dim * 8, 4,2,1), #image=8*8
            self.gen_block(hidden_dim *8, hidden_dim*4, 4, 2 ,1), #image=16*16
            self.gen_block(hidden_dim *4, hidden_dim*2, 4, 2 ,1), #image=32*32
            self.gen_block(hidden_dim*2, im_chan, kernel_size=4, stride=2,padding=1,final_layer=True),  #64*64
        )

    def gen_block(self, input_channels, output_channels, kernel_size, stride, padding, final_layer=False):
        
        if not final_layer:
            return nn.Sequential(
                nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride,padding),
                 nn.BatchNorm2d(output_channels),
                nn.ReLU(inplace=True)
            )
        else: # Final Layer
            return nn.Sequential(
                nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride,padding),
                nn.Tanh(),
                
            )

    def unsqueeze_noise(self, noise):
        '''
        Function for completing a forward pass of the generator: Given a noise tensor, 
        returns a copy of that noise with width and height = 1 and channels = z_dim.
        Parameters:
            noise: a noise tensor with dimensions (n_samples, z_dim)
        '''
        return noise.view(len(noise), self.z_dim, 1, 1)

    def forward(self, noise):
        x = self.unsqueeze_noise(noise)
        return self.gen(x)

def get_noise(n_samples, z_dim, device='cpu'):
    
    return torch.randn(n_samples, z_dim, device=device)


# Next thing is weight initialization

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCEWithLogitsLoss()
display_step = 500
z_dim=100
batch_size = 128
# A learning rate of 0.0002 works well on DCGAN
lr = 0.0002

beta_1 = 0.5 
beta_2 = 0.999

transform = transforms.Compose([transforms.Resize(64),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

dataloader = DataLoader(
    MNIST(root="dataset/", download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True)
gen = Generator(z_dim).to(device)
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr, betas=(beta_1, beta_2))
disc = Discriminator().to(device) 
disc_opt = torch.optim.Adam(disc.parameters(), lr=lr, betas=(beta_1, beta_2))

def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.normal_(m.weight, 0.0, 0.02)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.normal_(m.weight, 0.0, 0.02)
        torch.nn.init.constant_(m.bias, 0)
gen = gen.apply(weights_init)
disc = disc.apply(weights_init)
n_epochs = 20
cur_step = 0
mean_generator_loss = 0
mean_discriminator_loss = 0

In [6]:
for epoch in range(n_epochs):
    # Dataloader returns the batches
    for real, _ in tqdm(dataloader):
        cur_batch_size = len(real)
        real = real.to(device)

        ## Update discriminator ##
        disc_opt.zero_grad()
        fake_noise = get_noise(cur_batch_size, z_dim, device=device)
        fake = gen(fake_noise)
        disc_fake_pred = disc(fake.detach())
        disc_fake_loss = criterion(disc_fake_pred, torch.zeros_like(disc_fake_pred))
        disc_real_pred = disc(real)
        disc_real_loss = criterion(disc_real_pred, torch.ones_like(disc_real_pred))
        disc_loss = (disc_fake_loss + disc_real_loss) / 2

        # Keep track of the average discriminator loss
        mean_discriminator_loss += disc_loss.item() / display_step
        # Update gradients
        disc_loss.backward(retain_graph=True)
        # Update optimizer
        disc_opt.step()

        ## Update generator ##
        gen_opt.zero_grad()
        fake_noise_2 = get_noise(cur_batch_size, z_dim, device=device)
        fake_2 = gen(fake_noise_2)
        disc_fake_pred = disc(fake_2)
        gen_loss = criterion(disc_fake_pred, torch.ones_like(disc_fake_pred))
        gen_loss.backward()
        gen_opt.step()

        # Keep track of the average generator loss
        mean_generator_loss += gen_loss.item() / display_step

        ## Visualization code ##
        if cur_step % display_step == 0 and cur_step > 0:
            print(f"Step {cur_step}: Generator loss: {mean_generator_loss}, discriminator loss: {mean_discriminator_loss}, epoch :{epoch}")
            showimage(fake)
            showimage(real)
            mean_generator_loss = 0
            mean_discriminator_loss = 0
        cur_step += 1

Widget Javascript not detected.  It may not be installed or enabled properly. Reconnecting the current kernel may help.


KeyboardInterrupt: 