In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
 

In [34]:
# Importing the necessary module for building neural networks in PyTorch
import torch.nn as nn

# Define the Discriminator class, which inherits from nn.Module
class Discriminator(nn.Module):
    # Initialization function to set up layers of the discriminator
    def __init__(self, img_dim):
        super().__init__()  # Call the parent class's initialization
        # Creating the neural network structure (Sequential model)
        self.disc = nn.Sequential(
            # First layer: Fully connected (Linear) layer that takes input of size img_dim (flattened image)
            nn.Linear(img_dim, 128),
            
            # Activation function: LeakyReLU with a small negative slope (0.1)
            # LeakyReLU allows a small gradient when the input is negative, helping with the vanishing gradient problem
            nn.LeakyReLU(0.1),
            
            # Second layer: Fully connected layer that reduces the output to a single value
            nn.Linear(128, 1),
            
            # Sigmoid function: Squashes the output value between 0 and 1
            # This final output will represent the probability of the input being a real or fake image
            nn.Sigmoid(),
        )

    # Forward function: Defines how the input moves through the network
    def forward(self, x):
        # Pass the input (x) through the defined layers and return the output
        return self.disc(x)


In [35]:
# Import the necessary module for building neural networks in PyTorch
import torch.nn as nn

# Define the Generator class, which inherits from nn.Module
class Generator(nn.Module):
    # Initialization function to set up the layers of the generator
    def __init__(self, z_dim, img_dim):
        super().__init__()  # Call the parent class's initialization
        # Creating the neural network structure (Sequential model)
        self.gen = nn.Sequential(
            # First layer: Fully connected (Linear) layer that maps the noise (z_dim) to 256 features
            nn.Linear(z_dim, 256),
            
            # Activation function: LeakyReLU with a small negative slope (0.1)
            # This adds non-linearity and helps the generator learn complex patterns
            nn.LeakyReLU(0.1),
            
            # Second layer: Fully connected layer that maps 256 features to the final image dimension (img_dim)
            nn.Linear(256, img_dim),
            
            # Tanh activation function: Squashes the output values between -1 and 1
            # This is useful because image pixel values are typically normalized in this range
            nn.Tanh(),
        )

    # Forward function: Defines how the input moves through the network
    def forward(self, x):
        # Pass the input (x) through the defined layers and return the output (generated image)
        return self.gen(x)


In [36]:
# Set device to GPU if available, otherwise use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
# "cuda" means you're using an NVIDIA GPU for faster computation if available, otherwise, the model will run on the CPU.

# Learning rate for the optimizers
lr = 3e-4  # 0.0003 learning rate, controls how fast the model learns

# Latent dimension for the noise vector input to the generator (size of random noise)
z_dim = 64  # This is the size of the noise vector input to the generator

# Image dimensions for MNIST dataset (28x28 grayscale images, so channels = 1)
image_dim = 28 * 28 * 1  # 28x28 pixels, multiplied by 1 channel (grayscale)

# Batch size for training
batch_size = 32  # Number of images processed in one iteration

# Number of training epochs
num_epochs = 50  # Number of full passes through the dataset during training

# Initialize the discriminator and generator, and move them to the chosen device (GPU or CPU)
disc = Discriminator(image_dim).to(device)
gen = Generator(z_dim, image_dim).to(device)
# The discriminator and generator networks are moved to the GPU (if available) for faster computation.

# Create a fixed noise vector to observe the progress of the generator during training
fixed_noise = torch.randn((batch_size, z_dim)).to(device)
# This noise vector will be used to generate images from the generator after every epoch to track training progress.

# Apply transformations to preprocess the MNIST images
transforms = transforms.Compose(
    [
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize((0.1307), (0.3081))  # Normalize pixel values with mean=0.1307 and std=0.3081 (values from MNIST dataset)
    ]
)
# The `transforms` will be applied to every image in the dataset to convert them to tensors and normalize their pixel values.


In [37]:
# Load the MNIST dataset (a dataset of handwritten digits)
datasets = datasets.MNIST(
    root = r"C:\Users\Gaurav\OneDrive\Desktop\Text2Image\experiments\GANS\saved_datasets",         # Directory where the dataset will be saved
    transform=transforms,     # Apply any necessary transformations to the images (e.g., converting to tensors)
    download=True             # Download the dataset if it's not already available in the directory
)

# Create a DataLoader to efficiently load the dataset during training
loader = DataLoader(
    datasets,                # The dataset to load (MNIST in this case)
    batch_size=batch_size,    # Number of samples in each batch during training
    shuffle=True              # Randomly shuffle the data at each epoch to ensure variety
)

# Create an Adam optimizer for the discriminator
# This will update the weights of the discriminator based on gradients
opt_disc = optim.Adam(
    disc.parameters(),        # The parameters of the discriminator model that need to be optimized
    lr=lr                     # Learning rate for the optimizer, which controls how fast the model learns
)

# Create an Adam optimizer for the generator
# This will update the weights of the generator based on gradients
opt_gen = optim.Adam(
    gen.parameters(),         # The parameters of the generator model that need to be optimized
    lr=lr                     # Learning rate for the optimizer, same as for the discriminator
)

# Define the loss function: Binary Cross-Entropy Loss (BCELoss)
# BCELoss is typically used for binary classification problems, such as determining if an image is real or fake
criterion = nn.BCELoss()

# Create a SummaryWriter for logging the outputs of the generator (fake images) in TensorBoard
writer_fake = SummaryWriter(r"C:\Users\Gaurav\OneDrive\Desktop\Text2Image\experiments\GANS\saved_datasets\logs\fake")

# Create a SummaryWriter for logging the outputs of the generator (real images) in TensorBoard
writer_real = SummaryWriter(r"C:\Users\Gaurav\OneDrive\Desktop\Text2Image\experiments\GANS\saved_datasets\logs\real")

# Initialize a variable to keep track of the current training step
step = 0


In [39]:
 
# Outer loop for number of epochs (full passes through the dataset)
for epoch in range(num_epochs):
    
    # Inner loop for each batch of data in the dataset
    for batch_idx, (real, _) in enumerate(loader):
        # Flatten the real images (28x28) into a vector of size 784 (28*28=784) and move them to the device (GPU/CPU)
        real = real.view(-1, 784).to(device)
        batch_size = real.shape[0]  # Get the current batch size (in case it's less in the last batch)

        # ----- Training the Discriminator -----
        # Goal: Maximize log(D(real)) + log(1 - D(G(z)))
        
        # Generate random noise input for the generator
        noise = torch.randn((batch_size, z_dim)).to(device)
        
        # Generate fake images using the generator from the noise
        fake = gen(noise)
        
        # Get discriminator output for real images
        disc_real = disc(real).view(-1)
        # Calculate the discriminator loss for real images (want D(real) to be close to 1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))  # Compare with labels of 1s for real images
        
        # Get discriminator output for fake images
        disc_fake = disc(fake).view(-1)
        # Calculate the discriminator loss for fake images (want D(fake) to be close to 0)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))  # Compare with labels of 0s for fake images
        
        # Total discriminator loss is the average of losses for real and fake images
        lossD = (lossD_real + lossD_fake) / 2
        
        # Zero the gradients of the discriminator before backpropagation
        disc.zero_grad()
        # Perform backpropagation on the discriminator to compute the gradients
        lossD.backward(retain_graph=True)  # retain_graph=True is needed because the generator will use the same graph
        # Update the discriminator's weights
        opt_disc.step()

        # ----- Training the Generator -----
        # Goal: Maximize log(D(G(z))) <--> Minimize log(1 - D(G(z))) (equivalent formulation)

        # Get the discriminator's output for the fake images generated by the generator
        output = disc(fake).view(-1)
        # Generator loss (want D(fake) to be close to 1, i.e., trick the discriminator)
        lossG = criterion(output, torch.ones_like(output))  # Compare with labels of 1s to maximize D(G(z))
        
        # Zero the gradients of the generator before backpropagation
        gen.zero_grad()
        # Perform backpropagation on the generator to compute the gradients
        lossG.backward()
        # Update the generator's weights
        opt_gen.step()

        # ----- Logging and Displaying Progress -----
        # Log and print the losses every epoch
        if batch_idx == 0:
            print(
                f"Epoch: [{epoch}/{num_epochs}] "
                f"Loss D: {lossD:.4f}, Loss G: {lossG:.4f}"
            )
            
            # ----- Save and Log Fake and Real Images -----
            # Use the fixed noise vector to generate images from the generator (for consistent observation)
            with torch.no_grad():  # Disable gradient calculation for validation/generation
                fake = gen(fixed_noise).reshape(-1, 1, 28, 28)  # Reshape to (batch, channels, height, width)
                data = real.reshape(-1, 1, 28, 28)  # Reshape real images for comparison
                
                # Create a grid of fake images for visualization
                img_grid_fake = torchvision.utils.make_grid(fake, normalize=True)
                # Create a grid of real images for comparison
                img_grid_real = torchvision.utils.make_grid(data, normalize=True)

                # Add the fake images to TensorBoard for visualization
                writer_fake.add_image(
                    "MNIST Fake Images", img_grid_fake, global_step=step
                )

                # Add the real images to TensorBoard for comparison
                writer_real.add_image(
                    "MNIST Real Images", img_grid_real, global_step=step
                )


Epoch: [0/50] Loss D: 0.6865, Loss G: 0.7074
Epoch: [1/50] Loss D: 0.2375, Loss G: 1.8154
Epoch: [2/50] Loss D: 0.0747, Loss G: 2.9379
Epoch: [3/50] Loss D: 0.1117, Loss G: 3.9032
Epoch: [4/50] Loss D: 0.0802, Loss G: 4.4818
Epoch: [5/50] Loss D: 0.0128, Loss G: 4.9220
Epoch: [6/50] Loss D: 0.0294, Loss G: 4.6298
Epoch: [7/50] Loss D: 0.0286, Loss G: 5.9081
Epoch: [8/50] Loss D: 0.0680, Loss G: 5.5225
Epoch: [9/50] Loss D: 0.0602, Loss G: 4.9175
Epoch: [10/50] Loss D: 0.0135, Loss G: 5.2558
Epoch: [11/50] Loss D: 0.0096, Loss G: 5.2096
Epoch: [12/50] Loss D: 0.0172, Loss G: 5.1460
Epoch: [13/50] Loss D: 0.0140, Loss G: 5.1825
Epoch: [14/50] Loss D: 0.1026, Loss G: 5.1217
Epoch: [15/50] Loss D: 0.0177, Loss G: 5.7563
Epoch: [16/50] Loss D: 0.0197, Loss G: 6.9718
Epoch: [17/50] Loss D: 0.0356, Loss G: 5.7064
Epoch: [18/50] Loss D: 0.0130, Loss G: 5.7985
Epoch: [19/50] Loss D: 0.0038, Loss G: 6.2371
Epoch: [20/50] Loss D: 0.0009, Loss G: 8.2554
Epoch: [21/50] Loss D: 0.0787, Loss G: 7.197