In [2]:
a = """
Idea in GANs - 

Two probability distribution -
Pr - Distribution from real images
Pg - Distribution that comes out from the generator implicit distribution


Here we want these distribution Pg, Pr to be very similar in order to generate realistic looking images


Q - How we define a distance between two probability distribution Pg, Pr

-> There are some comman ways from statistics

1) Kullback-leibler (KL) divergence
2) Jensen-Shannon (JS) divergence
3) Wasserstein Distance

Formula - 

max Ex~Pr[f(x)] - Ex~Pthita[f(x)] ||f||L<=1
        |               |             |  
    Real Data       Generator   Constraint on discriminator


Discriminator wants to seperate these as much as possible (maximize loss)

Generator wants to put these as much closer to each other (Minimize)


"""

print(a)


Idea in GANs - 

Two probability distribution -
Pr - Distribution from real images
Pg - Distribution that comes out from the generator implicit distribution


Here we want these distribution Pg, Pr to be very similar in order to generate realistic looking images


Q - How we define a distance between two probability distribution Pg, Pr

-> There are some comman ways from statistics

1) Kullback-leibler (KL) divergence
2) Jensen-Shannon (JS) divergence
3) Wasserstein Distance

Formula - 

max Ex~Pr[f(x)] - Ex~Pthita[f(x)] ||f||L<=1
        |               |             |  
    Real Data       Generator   Constraint on discriminator


Discriminator wants to seperate these as much as possible (maximize loss)

Generator wants to put these as much closer to each other (Minimize)





In [2]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter



In [3]:

# Import necessary PyTorch modules for building neural networks
import torch
import torch.nn as nn

# Define the Discriminator class that inherits from nn.Module
class Discriminator(nn.Module):
    def __init__(self, channels_img, features_d):
        # Call the __init__ method of nn.Module (the parent class)
        super(Discriminator, self).__init__()

        # Define the sequential model (a linear stack of layers) for the discriminator network
        self.disc = nn.Sequential(
            # First convolutional layer
            # Input shape: (N, channels_img, 64, 64), where N is the batch size, channels_img is the number of input channels
            nn.Conv2d(
                channels_img,  # Number of input channels (e.g., 3 for RGB images)
                features_d,     # Number of output channels (depth) for this layer (controls the feature maps)
                kernel_size=4,  # Size of the convolutional filter (4x4)
                stride=2,       # How much the filter moves across the image (2 pixels at a time)
                padding=1       # Padding added around the image to maintain size after convolution
            ),
            # Apply LeakyReLU activation to add non-linearity
            # LeakyReLU allows a small, non-zero gradient when the unit is not active
            nn.LeakyReLU(0.2),  # Negative slope of 0.2 to avoid vanishing gradients for negative values

            # Second block: Convolution + BatchNorm + LeakyReLU
            # This block will downsample the image and learn more complex features
            self._block(features_d, features_d * 2, 4, 2, 1),  # Increases depth (number of feature maps)
            
            # Third block: Same structure with more filters (features_d * 2 to features_d * 4)
            self._block(features_d * 2, features_d * 4, 4, 2, 1),
            
            # Fourth block: Same structure with even more filters (features_d * 4 to features_d * 8)
            self._block(features_d * 4, features_d * 8, 4, 2, 1),
            
            # Final block: Convolution to reduce the output to a single value (the "real" or "fake" prediction)
            # Output shape will be (N, 1, 1, 1), where N is the batch size, 1 represents the final output
            nn.Conv2d(
                features_d * 8,  # Input depth (number of feature maps)
                1,               # Output depth is 1 (single prediction for real or fake)
                kernel_size=4,    # 4x4 filter
                stride=2,         # Stride of 2 for downsampling
                padding=1         # Padding to ensure proper dimensionality
            ),
            
            # Sigmoid activation function: It squashes the output between 0 and 1
            # This is useful for binary classification (real or fake)
            nn.Sigmoid(),
        )

    # A helper method to create a block of Conv2D -> BatchNorm -> LeakyReLU layers
    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            # Convolutional layer
            nn.Conv2d(
                in_channels,   # Input depth (from the previous layer)
                out_channels,  # Output depth (number of feature maps this layer produces)
                kernel_size,   # Filter size (e.g., 4x4)
                stride,        # Stride for downsampling
                padding,       # Padding to keep the image size consistent
                bias=False     # No bias term as BatchNorm takes care of it
            ),
            # Batch normalization helps stabilize training by normalizing the output of the Conv layer
            nn.BatchNorm2d(out_channels),
            # Apply LeakyReLU activation function
            nn.LeakyReLU(0.2),
        )
    
    # Forward method: Defines how the input tensor (image) flows through the network layers
    def forward(self, x):
        # Pass the input 'x' (image) through the 'disc' network (the sequential layers)
        return self.disc(x)
    
    


# ______________________________________________________________________________________________________________________________________





class Generator(nn.Module):
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            # Input: N x channels_noise x 1 x 1
            self._block(channels_noise, features_g * 16, 4, 1, 0),  # img: 4x4
            self._block(features_g * 16, features_g * 8, 4, 2, 1),  # img: 8x8
            self._block(features_g * 8, features_g * 4, 4, 2, 1),  # img: 16x16
            self._block(features_g * 4, features_g * 2, 4, 2, 1),  # img: 32x32
            nn.ConvTranspose2d(
                features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
            ),
            # Output: N x channels_img x 64 x 64
            nn.Tanh(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            # nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.net(x)


def initialize_weights(model):
    # Initializes weights according to the DCGAN paper
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, 0.02)


def test():
    N, in_channels, H, W = 8, 3, 64, 64
    noise_dim = 100
    x = torch.randn((N, in_channels, H, W))
    disc = Discriminator(in_channels, 8)
    assert disc(x).shape == (N, 1, 1, 1), "Discriminator test failed"
    gen = Generator(noise_dim, in_channels, 8)
    z = torch.randn((N, noise_dim, 1, 1))
    assert gen(z).shape == (N, in_channels, H, W), "Generator test failed"
    print("Success, tests passed!")


    test()

In [4]:
"""
Training of DCGAN network on MNIST dataset with Discriminator
and Generator imported from models.py
"""

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# Hyperparameters etc.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 2e-4  # could also use two lrs, one for gen and one for disc
BATCH_SIZE = 128
IMAGE_SIZE = 64
CHANNELS_IMG = 1
NOISE_DIM = 100
NUM_EPOCHS = 5
FEATURES_DISC = 64
FEATURES_GEN = 64

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
        ),
    ]
)

# If you train on MNIST, remember to set channels_img to 1
dataset = r"C:\Users\Gaurav\OneDrive\Desktop\Text2Image\data\Extracted\archive (4)\flickr30k_images\flickr30k_images"


# comment mnist above and uncomment below if train on CelebA
# dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
initialize_weights(gen)
initialize_weights(disc)

opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
criterion = nn.BCELoss()

fixed_noise = torch.randn(32, NOISE_DIM, 1, 1).to(device)
writer_real = SummaryWriter(f"logs/real")
writer_fake = SummaryWriter(f"logs/fake")
step = 0

gen.train()
disc.train()

for epoch in range(NUM_EPOCHS):
    # Target labels not needed! <3 unsupervised
    for batch_idx, (real, _) in enumerate(dataloader):
        real = real.to(device)
        noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
        fake = gen(noise)

        ### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
        disc_real = disc(real).reshape(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake.detach()).reshape(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward()
        opt_disc.step()

        ### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()

        # Print losses occasionally and print to tensorboard
        if batch_idx % 100 == 0:
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
                  Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
            )

            with torch.no_grad():
                fake = gen(fixed_noise)
                # take out (up to) 32 examples
                img_grid_real = torchvision.utils.make_grid(real[:32], normalize=True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)

                writer_real.add_image("Real", img_grid_real, global_step=step)
                writer_fake.add_image("Fake", img_grid_fake, global_step=step)

            step += 1

ValueError: too many values to unpack (expected 2)