In [None]:
%cd /content/drive/MyDrive/genai/con_gan

/content/drive/MyDrive/genai/con_gan


In [None]:
# Create a project folder and move there
!mkdir cgan
!cd cgan

# Create and activate a Python environment using venv
!python3 -m venv venv
!source venv/bin/activate

# We should always upgrade pip as it's usually old version
# that has older information about libraries
!pip install --upgrade pip

# We install required libraries under the virtual environment
!pip install torch torchvision matplotlib tqdm

mkdir: cannot create directory ‘cgan’: File exists
Error: Command '['/content/drive/MyDrive/genai/con_gan/venv/bin/python3', '-m', 'ensurepip', '--upgrade', '--default-pip']' returned non-zero exit status 1.
/bin/bash: line 1: venv/bin/activate: No such file or directory
Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.1.1
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-non

In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F

# Labels (i.e., 1 and 3)
labels = torch.LongTensor([1, 3])

# Create one-hot encoded labels
encoded = F.one_hot(labels, num_classes=10)

print(encoded)

tensor([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]])


In [None]:
# Coverts conditions into feature vectors
class Condition(nn.Module):
    def __init__(self, alpha: float):
        super().__init__()

        # From one-hot encoding to features: 10 => 784
        self.fc = nn.Sequential(
            nn.Linear(10, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha))

    def forward(self, labels: torch.Tensor):
        # One-hot encode labels
        x = F.one_hot(labels, num_classes=10)

        # From Long to Float
        x = x.float()

        # To feature vectors
        return self.fc(x)

In [None]:
# Reshape helper
class Reshape(nn.Module):
    def __init__(self, *shape):
        super().__init__()

        self.shape = shape

    def forward(self, x):
        return x.reshape(-1, *self.shape)

In [None]:
# Generator network
class Generator(nn.Module):
    def __init__(self, sample_size: int, alpha: float):
        super().__init__()

        # sample_size => 784
        self.fc = nn.Sequential(
            nn.Linear(sample_size, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha))

        # 784 => 16 x 7 x 7
        self.reshape = Reshape(16, 7, 7)

        # 16 x 7 x 7 => 32 x 14 x 14
        self.conv1 = nn.Sequential(
            nn.ConvTranspose2d(16, 32,
                               kernel_size=5, stride=2, padding=2,
                               output_padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(alpha))

        # 32 x 14 x 14 => 1 x 28 x 28
        self.conv2 = nn.Sequential(
            nn.ConvTranspose2d(32, 1,
                               kernel_size=5, stride=2, padding=2,
                               output_padding=1, bias=False),
            nn.Sigmoid())

        # Random value sample size
        self.sample_size = sample_size

        # To convert labels into feature vectors
        self.cond = Condition(alpha)

    def forward(self, labels: torch.Tensor):
        # Labels as feature vectors
        c = self.cond(labels)

        # Batch size is the number of labels
        batch_size = len(labels)

        # Generate random inputs
        z = torch.randn(batch_size, self.sample_size)

        # Inputs are the sum of random inputs and label features
        x = self.fc(z)        # => 784
        x = self.reshape(x+c) # => 16 x 7 x 7
        x = self.conv1(x)     # => 32 x 14 x 14
        x = self.conv2(x)     # => 1 x 28 x 28
        return x

In [None]:
c = self.cond(labels)
z = torch.randn(batch_size, self.sample_size)
x = self.fc(z)
x = self.reshape(x+c)

NameError: name 'self' is not defined

In [None]:
# Label feature vectors (784)
c = self.cond(labels)

# Random value vectors (784)
z = torch.randn(batch_size, self.sample_size)
x = self.fc(z)

# Element-wise addition and reshape from 784 into 16x7x7
x = self.reshape(x+c)

NameError: name 'self' is not defined

In [None]:
# Create an instance of the Generator
# Assuming a sample_size of 100 and alpha of 0.01 for demonstration
generator = Generator(sample_size=100, alpha=0.01)

# Call the forward method with the labels
output = generator(labels)

# You can optionally print the output to see the generated images
print(output.shape)

torch.Size([2, 1, 28, 28])


In [None]:
# Create an instance of the Generator
# Assuming a sample_size of 100 and alpha of 0.01 for demonstration
generator = Generator(sample_size=100, alpha=0.01)

# Call the forward method with the labels
output = generator(labels)

# You can optionally print the output to see the generated images
print(output.shape)

torch.Size([2, 1, 28, 28])


In [None]:
# Discriminator network
class Discriminator(nn.Module):
    def __init__(self, alpha: float):
        super().__init__()

        # 1 x 28 x 28 => 32 x 14 x 14
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32,
                      kernel_size=5, stride=2, padding=2, bias=False),
            nn.LeakyReLU(alpha))

        # 32 x 14 x 14 => 16 x 7 x 7
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 16,
                      kernel_size=5, stride=2, padding=2, bias=False),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(alpha))

        # 16 x 7 x 7 => 784
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha),
            nn.Linear(784, 1))

        # Reshape label features: 784 => 16 x 7 x 7
        self.cond = nn.Sequential(
            Condition(alpha),
            Reshape(16, 7, 7))

    def forward(self, images: torch.Tensor,
                      labels: torch.Tensor,
                      targets: torch.Tensor):
        # Label features
        c = self.cond(labels)

        # Image features + Label features => real or fake?
        x = self.conv1(images)    # => 32 x 14 x 14
        x = self.conv2(x)         # => 16 x 7 x 7
        prediction = self.fc(x+c) # => 1

        loss = F.binary_cross_entropy_with_logits(prediction, targets)
        return loss

In [None]:
# Train loop
from tqdm import tqdm
for epoch in range(100):

    d_losses = []
    g_losses = []

    for images, labels in tqdm(dataloader):

        #===============================
        # Disciminator Network Training
        #===============================

        # Get the actual batch size
        batch_size = images.size(0)

        # Define target tensors for real and fake labels for the current batch
        real_targets = torch.ones(batch_size, 1)
        fake_targets = torch.zeros(batch_size, 1)

        # Images from MNIST are considered as real
        d_loss = discriminator(images, labels, real_targets)

        # Images from Generator are considered as fake
        d_loss += discriminator(generator(labels), labels, fake_targets)

        # Discriminator paramter update
        d_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()

100%|██████████| 938/938 [01:13<00:00, 12.73it/s]
 21%|██▏       | 201/938 [00:15<00:57, 12.73it/s]


KeyboardInterrupt: 

In [None]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define transformations to apply to the images
transform = transforms.Compose([
    transforms.ToTensor(), # Convert images to PyTorch tensors
])

# Load the MNIST training dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           download=True,
                                           transform=transform)

# Create a DataLoader for the training dataset
batch_size = 64 # You can adjust the batch size
dataloader = DataLoader(train_dataset,
                        batch_size=batch_size,
                        shuffle=True)

print("MNIST dataset loaded and DataLoader created.")

100%|██████████| 9.91M/9.91M [00:00<00:00, 11.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 369kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.18MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.94MB/s]

MNIST dataset loaded and DataLoader created.





In [None]:
# Initialize Generator and Discriminator
generator = Generator(sample_size=100, alpha=0.01) # Assuming sample_size and alpha as before
discriminator = Discriminator(alpha=0.01) # Assuming alpha as before

# Define target tensors for real and fake labels
real_targets = torch.ones(batch_size, 1)
fake_targets = torch.zeros(batch_size, 1)

# Define optimizers
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999)) # We'll need this for the generator later

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid
from tqdm import tqdm

# Common config
batch_size  = 64

# Generator config
sample_size = 100    # Random sample size
g_alpha     = 0.01   # LeakyReLU alpha
g_lr        = 1.0e-4 # Learning rate

# Discriminator config
d_alpha     = 0.01   # LeakyReLU alpha
d_lr        = 1.0e-4 # Learning rate

# Data Loader for MNIST
transform = transforms.ToTensor()
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, drop_last=True)

# Coverts conditions into feature vectors
class Condition(nn.Module):
    def __init__(self, alpha: float):
        super().__init__()

        # From one-hot encoding to features: 10 => 784
        self.fc = nn.Sequential(
            nn.Linear(10, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha))

    def forward(self, labels: torch.Tensor):
        # One-hot encode labels
        x = F.one_hot(labels, num_classes=10)

        # From Long to Float
        x = x.float()

        # To feature vectors
        return self.fc(x)

# Reshape helper
class Reshape(nn.Module):
    def __init__(self, *shape):
        super().__init__()

        self.shape = shape

    def forward(self, x):
        return x.reshape(-1, *self.shape)

# Generator network
class Generator(nn.Module):
    def __init__(self, sample_size: int, alpha: float):
        super().__init__()

        # sample_size => 784
        self.fc = nn.Sequential(
            nn.Linear(sample_size, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha))

        # 784 => 16 x 7 x 7
        self.reshape = Reshape(16, 7, 7)

        # 16 x 7 x 7 => 32 x 14 x 14
        self.conv1 = nn.Sequential(
            nn.ConvTranspose2d(16, 32,
                               kernel_size=5, stride=2, padding=2,
                               output_padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(alpha))

        # 32 x 14 x 14 => 1 x 28 x 28
        self.conv2 = nn.Sequential(
            nn.ConvTranspose2d(32, 1,
                               kernel_size=5, stride=2, padding=2,
                               output_padding=1, bias=False),
            nn.Sigmoid())

        # Random value sample size
        self.sample_size = sample_size

        # To convert labels into feature vectors
        self.cond = Condition(alpha)

    def forward(self, labels: torch.Tensor):
        # Labels as feature vectors
        c = self.cond(labels)

        # Batch size is the number of labels
        batch_size = len(labels)

        # Generate random inputs
        z = torch.randn(batch_size, self.sample_size)

        # Inputs are the sum of random inputs and label features
        x = self.fc(z)        # => 784
        x = self.reshape(x+c) # => 16 x 7 x 7
        x = self.conv1(x)     # => 32 x 14 x 14
        x = self.conv2(x)     # => 1 x 28 x 28
        return x

# Discriminator network
class Discriminator(nn.Module):
    def __init__(self, alpha: float):
        super().__init__()

        # 1 x 28 x 28 => 32 x 14 x 14
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32,
                      kernel_size=5, stride=2, padding=2, bias=False),
            nn.LeakyReLU(alpha))

        # 32 x 14 x 14 => 16 x 7 x 7
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 16,
                      kernel_size=5, stride=2, padding=2, bias=False),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(alpha))

        # 16 x 7 x 7 => 784
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 784),
            nn.BatchNorm1d(784),
            nn.LeakyReLU(alpha),
            nn.Linear(784, 1))

        # Reshape label features: 784 => 16 x 7 x 7
        self.cond = nn.Sequential(
            Condition(alpha),
            Reshape(16, 7, 7))

    def forward(self, images: torch.Tensor,
                      labels: torch.Tensor,
                      targets: torch.Tensor):
        # Label features
        c = self.cond(labels)

        # Image features + Label features => real or fake?
        x = self.conv1(images)    # => 32 x 14 x 14
        x = self.conv2(x)         # => 16 x 7 x 7
        prediction = self.fc(x+c) # => 1

        loss = F.binary_cross_entropy_with_logits(prediction, targets)
        return loss

# To save grid images
def save_image_grid(epoch: int, images: torch.Tensor, ncol: int):
    image_grid = make_grid(images, ncol)     # Into a grid
    image_grid = image_grid.permute(1, 2, 0) # Channel to last
    image_grid = image_grid.cpu().numpy()    # Into Numpy

    plt.imshow(image_grid)
    plt.xticks([])
    plt.yticks([])
    plt.savefig(f'generated_{epoch:03d}.jpg')
    plt.close()

# Real / Fake targets
real_targets = torch.ones(batch_size, 1)
fake_targets = torch.zeros(batch_size, 1)

# Generator and discriminator
generator = Generator(sample_size, g_alpha)
discriminator = Discriminator(d_alpha)

# Optimizers
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=d_lr)
g_optimizer = torch.optim.Adam(generator.parameters(), lr=g_lr)

# Train loop
for epoch in range(100):

    d_losses = []
    g_losses = []

    for images, labels in tqdm(dataloader):

        #===============================
        # Disciminator Network Training
        #===============================

        # Images from MNIST are considered as real
        d_loss = discriminator(images, labels, real_targets)

        # Images from Generator are considered as fake
        d_loss += discriminator(generator(labels), labels, fake_targets)

        # Discriminator paramter update
        d_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()

        #===============================
        # Generator Network Training
        #===============================

        # Images from Generator should be as real as ones from MNIST
        g_loss = discriminator(generator(labels), labels, real_targets)

        # Generator parameter update
        g_optimizer.zero_grad()
        g_loss.backward()
        g_optimizer.step()

        # Keep losses for logging
        d_losses.append(d_loss.item())
        g_losses.append(g_loss.item())

    # Print loss
    print(epoch, np.mean(d_losses), np.mean(g_losses))

    # Save generated images
    labels = torch.LongTensor(list(range(10))).repeat(8).flatten()
    save_image_grid(epoch, generator(labels), ncol=10)

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.2MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 487kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.56MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.83MB/s]
100%|██████████| 937/937 [02:01<00:00,  7.68it/s]


0 0.4963098222125429 1.7084557141158472


100%|██████████| 937/937 [02:00<00:00,  7.77it/s]


1 0.2569355297686578 2.3792391250838336


100%|██████████| 937/937 [02:01<00:00,  7.71it/s]


2 0.17819844387479628 2.813231841731606


100%|██████████| 937/937 [02:00<00:00,  7.80it/s]


3 0.15889845961026983 3.0266903645200944


100%|██████████| 937/937 [02:00<00:00,  7.80it/s]


4 0.16792171006364273 3.050145267040778


100%|██████████| 937/937 [02:00<00:00,  7.75it/s]


5 0.1669741017125714 3.10327550403082


100%|██████████| 937/937 [02:01<00:00,  7.72it/s]


6 0.16216869391858768 3.1686880087267246


100%|██████████| 937/937 [02:00<00:00,  7.81it/s]


7 0.18060065294628846 3.1072985856230257


100%|██████████| 937/937 [02:00<00:00,  7.79it/s]


8 0.1799257197432228 3.135843713611778


100%|██████████| 937/937 [01:59<00:00,  7.85it/s]


9 0.18435387411567292 3.1197136108750594


100%|██████████| 937/937 [02:01<00:00,  7.74it/s]


10 0.1874175486110127 3.1577125598170586


100%|██████████| 937/937 [01:59<00:00,  7.83it/s]


11 0.19691263835006487 3.1178593583397065


100%|██████████| 937/937 [01:59<00:00,  7.82it/s]


12 0.19729078627860405 3.156176245963179


100%|██████████| 937/937 [02:00<00:00,  7.78it/s]


13 0.20933424263174785 3.113224070634506


100%|██████████| 937/937 [02:05<00:00,  7.48it/s]


14 0.19389755192154118 3.117226991353193


100%|██████████| 937/937 [02:06<00:00,  7.43it/s]


15 0.20002170466482957 3.2310159007920274


100%|██████████| 937/937 [02:02<00:00,  7.67it/s]


16 0.20148196573127677 3.1738102312657976


100%|██████████| 937/937 [02:00<00:00,  7.76it/s]


17 0.19771901885864573 3.2474424919171168


100%|██████████| 937/937 [02:04<00:00,  7.54it/s]


18 0.18925735747626524 3.2895487858620625


100%|██████████| 937/937 [02:02<00:00,  7.64it/s]


19 0.18701724355032504 3.320430235903444


100%|██████████| 937/937 [02:13<00:00,  7.02it/s]


20 0.17125521647160438 3.427141616922051


 33%|███▎      | 306/937 [00:46<01:15,  8.41it/s]