# Final Project - GAN Experiment

Date: 20230508

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim                  # optimization
import torchvision                           # image loading
import torchvision.transforms as transforms  # image transformation
import matplotlib.pyplot as plt
from tqdm import tqdm

%matplotlib inline
import numpy as np
import pandas as pd

torch.__version__

'2.0.1+cu117'

Check GPU cuba capability here: https://developer.nvidia.com/cuda-gpus

In [2]:
torch.cuda.is_available()

True

### Data Initialization

Data Size: 28 $\times$ 28

In [3]:
# Data normalization
# for GAN, it is better to have result as (-1,1) rather than (0,1), 
# since generator use Tanh() in (-1,1) for final activation (experimental result),
# set mean = 0.5 and variance = 0.5 to get (-1,1) [(x-mean)/variance]
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)) 
])

In [4]:
# Load the built-in MNIST dataset
train_dataset = torchvision.datasets.MNIST(
    root = './mnist_train.csv',                # save locally
    train = True,                             # only need train data
    transform = transform,                     # defined in the chrunk above
    download = True
)

dataloader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=64, 
    shuffle=True                              # make less possible to overfit
    # ,num_workers=2
)

In [5]:
# check image size
imgs, _ = next(iter(dataloader))
imgs.shape

torch.Size([64, 1, 28, 28])

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Generator

In [7]:
# Generator
# Input: random noise (normally distributed random numbers) with length of 100
# Output: generated image, which has the same size as the input image with size of [1, 28, 28]
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(  # implement linear layer to transforming the input (noise) data to a higher-dimensional space
            nn.Linear(100, 256),     # Linear 1: 100 to 256
            nn.ReLU(),               # active Linear 1
            nn.Linear(256, 512),     # Linear 2: 256 to 512
            nn.ReLU(),               # active Linear 2
            nn.Linear(512, 1024),    # Linear 3: 512 to 1024
            nn.ReLU(),               # active Linear 3
            nn.Linear(1024, 784),    # Linear 4: 1024 to 28*28=784
            nn.Tanh()                # active to (-1, 1)
        )

    def forward(self, x):            # input x is noise with length of 100
        return self.model(x).view(-1, 1, 28, 28)

### Discriminator

In [8]:
# Discriminator
# Input: image with size of [1, 28, 28]
# Output: probability values [0,1] for binary classification
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(  # flatten the image
            nn.Linear(784, 1024),    # transforming the input image data to a higher-dimensional space first
            nn.LeakyReLU(0.2),       
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 1),
            nn.Sigmoid()              # active to (0, 1)
        )

    def forward(self, x):
        return self.model(x.view(x.size(0), -1))
    
# nn.LeakyReLU(): if x > 0, f(x) = 0; if x < 0, f(x) = a * x, where a is repsents a small gradient value.
# LeakyReLU is recommended for discriminator, since if x < 0, RELU outputs f(x) = 0,leading to gradient = 0, 
# which makes training unable to move further. thus need to mitigate the vanishing gradient problem and stabilize training.

### Plot

In [9]:
import os

def save_image(model, test_input, epoch):
    try:
        generated_images = model(test_input).detach().cpu()
        torchvision.utils.save_image(generated_images, f'epoch_{epoch}.png', nrow=4, normalize=True)
        print(f"Image saved for epoch {epoch}")
    except Exception as e:
        print(f"Error in save_image: {e}")

In [10]:
# possible dead kernel when creating plots

#def plot_image(model, test_input):
#    prediction = np.squeeze(model(test_input).detach().cpu().numpy())
#    fig = plt.figure(figsize = (4,4))
#    for i in range(16): # prediction.size(0)
#        plt.subplot(4, 4, i+1)
#        plt.imshow((prediction[i]+1)/2) # recover (-1, 1) to (0, 1) to make prediction plotable
#        plt.axis("off")
#    plot.show()

In [11]:
test_input = torch.randn(16, 100, device=device)

### Generator and Discriminator Initialization

In [12]:
G = Generator().to(device)
D = Discriminator().to(device)

In [13]:
# Apply BCELoss to calculate cross entropy loss for binary classification (sigmod output)
# if discriminator does not active by Sigmod() (stop at Linear(256, 1)), then BCEWithLogitsLoss() need to be applied
criterion = nn.BCELoss() # lost function
optimizer_G = optim.Adam(G.parameters(), lr=0.0002, betas=(0.5, 0.999)) # optimize G parameters
optimizer_D = optim.Adam(D.parameters(), lr=0.0002, betas=(0.5, 0.999)) # optimize D parameters

### GAN Training Algorithm

In [14]:
G_loss = []
D_loss = []
num_epochs = 50

for epoch in range(num_epochs):
    d_epoch_loss = 0
    g_epoch_loss = 0
    count = len(dataloader)

    for step, (img, _) in enumerate(dataloader):
        img = img.to(device)
        size = img.size(0)
        random_noise = torch.randn(size, 100, device=device)

        optimizer_D.zero_grad()

        real_output = D(img)  
        d_real_loss = criterion(real_output, torch.ones_like(real_output))   
        d_real_loss.backward()
        gen_img = G(random_noise)

        fake_output = D(gen_img.detach())
        d_fake_loss = criterion(fake_output, torch.zeros_like(fake_output))  
        d_fake_loss.backward()

        d_loss = d_real_loss + d_fake_loss
        optimizer_D.step()

        optimizer_G.zero_grad()
        fake_output = D(gen_img)
        g_loss = criterion(fake_output, torch.ones_like(fake_output))      
        g_loss.backward()
        optimizer_G.step()

        with torch.no_grad():
            d_epoch_loss += d_loss
            g_epoch_loss += g_loss

    with torch.no_grad():  # average loss
        d_epoch_loss /= count
        g_epoch_loss /= count
        D_loss.append(d_epoch_loss)
        G_loss.append(g_epoch_loss)
        print(f'Epoch: {epoch}, Discriminator Loss: {d_epoch_loss}, Generator Loss: {g_epoch_loss}')
        save_image(G, test_input, epoch)
        #plot_image(G, test_input)
        
    torch.cuda.empty_cache() # Free memory after each epoch

Epoch: 0, Discriminator Loss: 0.7023011445999146, Generator Loss: 2.8939623832702637
Image saved for epoch 0
Epoch: 1, Discriminator Loss: 0.41959118843078613, Generator Loss: 3.8927013874053955
Image saved for epoch 1
Epoch: 2, Discriminator Loss: 0.3332814574241638, Generator Loss: 3.9070262908935547
Image saved for epoch 2
Epoch: 3, Discriminator Loss: 0.30224746465682983, Generator Loss: 3.759636402130127
Image saved for epoch 3
Epoch: 4, Discriminator Loss: 0.314322829246521, Generator Loss: 3.45082426071167
Image saved for epoch 4
Epoch: 5, Discriminator Loss: 0.4873389005661011, Generator Loss: 2.6328206062316895
Image saved for epoch 5
Epoch: 6, Discriminator Loss: 0.7652638554573059, Generator Loss: 1.8584436178207397
Image saved for epoch 6
Epoch: 7, Discriminator Loss: 0.7930213809013367, Generator Loss: 1.8457943201065063
Image saved for epoch 7
Epoch: 8, Discriminator Loss: 0.8019097447395325, Generator Loss: 1.835387110710144
Image saved for epoch 8
Epoch: 9, Discriminato