# Imports and Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np



In [2]:
# Check if CUDA (GPU) is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Generator Model

In [3]:
# Define the Generator model
# The Generator model transforms random noise into realistic images using a series of layers and activation functions.
class Generator(nn.Module):
   
    def __init__(self):
        super(Generator, self).__init__()
       # Fully connected layers to generate an image from random noise
        self.fc1 = nn.Linear(100,256)  # First layer (100 -> 256)
        self.fc2 = nn.Linear(256,512)  # Second layer (256 -> 512)
        self.fc3 = nn.Linear(512,1024) # Third layer (512 -> 1024)
        self.fc4 = nn.Linear(1024,28 * 28)  # Output layer (1024 -> 28x28)
        
        # Activation functions
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()  # Used to scale the output to [-1, 1]

    
   
    def forward(self, z):
       
        # Apply layers and activation functions sequentially
        x = self.relu(self.fc1(z))  # First hidden layer
        x = self.relu(self.fc2(x))  # Second hidden layer
        x = self.relu(self.fc3(x))  # Third hidden layer
        x = self.tanh(self.fc4(x))  # Output layer with Tanh activation
       
        return x.view(-1,1,28,28)  # Reshape the output to image size (1, 28, 28)



# Discriminator Model

In [4]:
# Define the Discriminator model
# The Discriminator model classifies images as real or fake. 
# It takes an image as input, passes it through a series of layers and activation functions, and outputs a probability of the image being real.
class Discriminator(nn.Module):
   
    def __init__(self):
        super(Discriminator, self).__init__()
        # Fully connected layers to classify if an image is real or fake
        self.fc1 = nn.Linear(28 * 28, 1024)  # Input layer (28x28 -> 1024)
        self.fc2 = nn.Linear(1024,512)  # Hidden layer (1024 -> 512)
        self.fc3 = nn.Linear(512,256)  # Hidden layer (512 -> 256)
        self.fc4 = nn.Linear(256,1)  # Output layer (256 -> 1)

        # Activation functions
        self.leaky_relu = nn.LeakyReLU(0.2)  # LeakyReLU used for negative slope
        self.sigmoid = nn.Sigmoid()  # Sigmoid for binary classification (real or fake)

    
   
    def forward(self, x):
        
        x = x.view(-1, 28 * 28)  # Flatten the input image to a 1D vector
        x = self.leaky_relu(self.fc1(x))  # Apply first hidden layer
        x = self.leaky_relu(self.fc2(x))  # Apply second hidden layer
        x = self.leaky_relu(self.fc3(x))  # Apply third hidden layer
        x = self.sigmoid(self.fc4(x))  # Output layer
        
        return x



# Noise Generator

In [5]:
# Generate random noise
# This code generates random numbers to be used as input for the Generator model.
def generate_noise(batch_size,z_dim = 100):
    
    return torch.randn(batch_size,z_dim,device = device)  


# Training Function

In [6]:
def train_gan(generator, discriminator, train_loader, optimizer_g, optimizer_d, criterion, epochs=20):
   
    for epoch in range(epochs):  # Loop through the number of epochs
        
        for i, (real_images, _) in enumerate(train_loader):  # Iterate over the dataset
           
            real_images =real_images.to(device)  # Move real images to device (GPU)
            batch_size =real_images.size(0)  # Get the batch size
            labels_real =torch.ones(batch_size,1,device=device)  # Labels for real images
            labels_fake =torch.zeros(batch_size,1,device=device)  # Labels for fake images

            
            # Train the discriminator: Real images
            optimizer_d.zero_grad()  # Zero the gradients of the discriminator
            output_real =discriminator(real_images)  # Pass real images through the discriminator
            d_loss_real =criterion(output_real , labels_real)  # Loss for real images
            d_loss_real.backward()  # Backpropagate the loss
            #This line backpropagates the calculated loss for real images through the discriminator's layers to update its weights.

           
            
            # Generate fake images from the generator
            noise = generate_noise( batch_size )  #Generate random noise
            fake_images = generator( noise )  # Generate fake images from the noise
            output_fake = discriminator( fake_images.detach() )  # Get discriminator's output on fake images (detached from graph)
            d_loss_fake = criterion( output_fake, labels_fake )  # Loss for fake images
            d_loss_fake.backward()  #Backpropagate the loss

            
            
            optimizer_d.step()  # Update the discriminator's weights
            d_loss= d_loss_real + d_loss_fake  # Total discriminator loss

           
            # Train the generator: Fake images
            optimizer_g.zero_grad()  # Zero the gradients of the generator
            output_fake_for_g =discriminator( fake_images )  # Get discriminator's output on fake images
            g_loss = criterion(output_fake_for_g , labels_real)  # Generator loss (trying to fool the discriminator)
            g_loss.backward()  # Backpropagate the loss
            optimizer_g.step()  # Update the generator's weights

            
            # Print the progress
            if i%100 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Step [{i+1}/{len(train_loader)}], "
                      f"D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")

        
        # Save generated images after each epoch
        if (epoch + 1) %5 == 0:
            save_generated_images(generator, epoch)


#Training the Discriminator: It feeds real images to the discriminator and calculates the loss. Then, it generates fake images and feeds them to the discriminator, calculating another loss. The discriminator's weights are updated to better distinguish real from fake.
#Training the Generator: It generates fake images and feeds them to the discriminator. The goal is to trick the discriminator into thinking they are real. The generator's weights are updated to improve its ability to create realistic images.


#  After every 100 steps, it prints the current epoch , step number , discriminator loss, and generator loss.
#  Every 5 epochs , it saves a set of images generated by the current generator model.

# Save Generated Images

In [7]:
def save_generated_images(generator, epoch, num_images=16):
    noise = generate_noise(num_images)  # Generate random noise
    fake_images =  generator(noise)  #Generate fake images from the noise
    fake_images = fake_images.cpu().detach().numpy()  # Move images to CPU and detach from graph
    fake_images = fake_images * 0.5 + 0.5  # Rescale to [0, 1] from [-1, 1]
    fake_images = np.transpose(fake_images, (0 , 2 , 3 , 1))  # Change the shape to (num_images, 28, 28, 1)

    # Plot and save the generated images
    fig, axes = plt.subplots(4,4,figsize=( 4 , 4 ))
    for i in range(4):
        for j in range(4):
            axes[i,j].imshow(fake_images[i *  4 + j], cmap='gray')  # Show image
            axes[i,j].axis('off')  # Hide axis
    
    plt.tight_layout()  # Adjust layout
    plt.savefig(f"generated_images_epoch_{epoch+1}.png")  # Save image
    plt.close()  # Close the plot
    
    
   # This code saves generated images after each epoch by plotting them and saving the plot as an image .

# Main Script

In [None]:
# Initialize the models
generator = Generator().to(device)  # Move generator to GPU if available
discriminator = Discriminator().to(device)  # Move discriminator to GPU if available


# Define loss function and optimizers
criterion = nn.BCELoss()  # Binary Cross-Entropy loss function for classification
optimizer_g = optim.Adam(generator.parameters() , lr=0.0002 , betas=(0.5,0.999))  # Adam optimizer for the generator
optimizer_d = optim.Adam(discriminator.parameters() , lr=0.0002 , betas=(0.5,0.999))  # Adam optimizer for the discriminator
#this is an optimizer that uses the Adam algorithm for efficient gradient-based optimization.




# Load the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor() ,  # Convert images to Tensor (This converts a PIL Image or a NumPy array to a PyTorch Tensor)
    transforms.Normalize(mean=[0.5] , std=[0.5])  # Normalize to [-1, 1]
])



train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('.', train=True ,download=True ,transform=transform),
    batch_size=64 , shuffle=True 
    # Batch size and shuffle the dataset
    #This will ensure that the data is randomly shuffled before each epoch, which is important for better training and generalization of the model.
    # Batch size is the number of training examples  used in one iteration of training a machine learning model.
)


# Train the GAN on MNIST dataset
train_gan(generator , discriminator , train_loader , optimizer_g , optimizer_d , criterion , epochs=5)


# This code sets up and trains a GAN to generate images similar to the MNIST dataset .


Epoch [1/5], Step [1/938], D Loss: 1.3938807249069214, G Loss: 0.6938387751579285
Epoch [1/5], Step [101/938], D Loss: 0.9779897928237915, G Loss: 1.3167881965637207
Epoch [1/5], Step [201/938], D Loss: 0.5127428770065308, G Loss: 1.7473915815353394
Epoch [1/5], Step [301/938], D Loss: 0.3738122582435608, G Loss: 3.528465747833252
Epoch [1/5], Step [401/938], D Loss: 0.2638869285583496, G Loss: 2.525359869003296
Epoch [1/5], Step [501/938], D Loss: 1.6985667943954468, G Loss: 5.8629374504089355
Epoch [1/5], Step [601/938], D Loss: 0.6152112483978271, G Loss: 4.264281749725342
Epoch [1/5], Step [701/938], D Loss: 0.12252391874790192, G Loss: 3.8571293354034424
Epoch [1/5], Step [801/938], D Loss: 0.23493826389312744, G Loss: 2.3631832599639893
Epoch [1/5], Step [901/938], D Loss: 0.22982898354530334, G Loss: 5.735939979553223
Epoch [2/5], Step [1/938], D Loss: 0.42187952995300293, G Loss: 2.026096820831299
Epoch [2/5], Step [101/938], D Loss: 0.3406374752521515, G Loss: 4.85422611236572

# Conclusion :

In [None]:
#This Vanilla GAN implementation in PyTorch is a straightforward way to start experimenting with GANs on the MNIST dataset.
#You can improve the model by using more sophisticated architectures, adding advanced techniques like batch normalization or using a deeper neural network.