The Case Context (E-commerce example)

The company wants to create synthetic product mockups (like shirts, shoes, bags) before manufacturing.

They need to control which product appears in the generated image — i.e., not random but chosen (“generate shoes only”).

 What You Have to Do

Implement a CGAN using Fashion-MNIST
(Fashion-MNIST is a free dataset of 28×28 grayscale images of clothes, shoes, bags, etc.)

Train the CGAN so that if you give it noise + label (“shoe”), it will generate a shoe-like image.

Compare your CGAN with a normal GAN (no labels) to show how much better conditional generation is.

Analyze training stability: GANs are tricky to train (they can collapse or produce noise). You have to explain:

Did you use label smoothing?

Did you use batch normalization?

How did you balance generator vs. discriminator training?

 Expected Deliverables (What you submit)

Working CGAN code (training logs printed so teachers can see the loss decreasing).

Visual results: e.g. a grid of generated images for each label (“all shoes in one row, all bags in another”).

Discussion/comparison:

Show how a plain GAN produces random clothes but a CGAN can be told “only shoes”.

Talk about convergence: which model was more stable? What tricks you used to avoid mode collapse.

Maybe a short report summarizing your findings.

 Key Idea in One Sentence

A CGAN is like a GAN with an extra steering wheel — you don’t just generate random fashion images, you steer it to produce a specific category like “shoes” or “bags.”

In [None]:
# CGAN on Fashion-MNIST (PyTorch) - Colab-ready
# Run this in Google Colab (GPU recommended)

# 1) Install / imports
!pip install -q torch torchvision tqdm matplotlib
import os, math, random, time
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision.utils import make_grid, save_image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# 2) Hyperparams
img_size = 28
nc = 1
nz = 100        # noise dim
n_classes = 10  # Fashion-MNIST labels
embed_dim = 50
ngf = 64
ndf = 64
batch_size = 128
lr = 2e-4
beta1 = 0.5
n_epochs = 10
sample_dir = "cgan_samples"
os.makedirs(sample_dir, exist_ok=True)

# 3) Dataset + loader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
trainset = torchvision.datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

# 4) Models: conditional via label embedding -> concat with noise / image
class Generator(nn.Module):
    def __init__(self, nz, embed_dim, n_classes, ngf):
        super().__init__()
        self.label_emb = nn.Embedding(n_classes, embed_dim)
        self.net = nn.Sequential(
            # input: nz + embed_dim
            nn.Linear(nz + embed_dim, ngf*4*7*7),
            nn.BatchNorm1d(ngf*4*7*7),
            nn.ReLU(True),
            # reshape to (ngf*4, 7, 7)
            View((-1, ngf*4, 7, 7)),
            nn.ConvTranspose2d(ngf*4, ngf*2, 4, 2, 1, bias=False), # 14x14
            nn.BatchNorm2d(ngf*2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf*2, ngf, 4, 2, 1, bias=False),   # 28x28
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.Conv2d(ngf, nc, 3, 1, 1),
            nn.Tanh()
        )
    def forward(self, z, labels):
        le = self.label_emb(labels)
        x = torch.cat([z, le], dim=1)
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self, n_classes, embed_dim, ndf):
        super().__init__()
        self.label_emb = nn.Embedding(n_classes, embed_dim)
        # we'll expand label embedding to an image-sized channel and concat with input
        self.project = nn.Sequential(
            nn.Linear(embed_dim, img_size*img_size),
        )
        self.net = nn.Sequential(
            nn.Conv2d(nc+1, ndf, 4, 2, 1), # 14x14
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf*2, 4, 2, 1), # 7x7
            nn.BatchNorm2d(ndf*2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf*2, 1, 7, 1, 0), # 1x1
        )
    def forward(self, img, labels):
        le = self.label_emb(labels)
        proj = self.project(le).view(-1, 1, img_size, img_size)
        x = torch.cat([img, proj], dim=1)
        out = self.net(x)
        return out.view(-1)

# helper view layer
class View(nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.shape = shape
    def forward(self, x):
        return x.view(*self.shape)

# instantiate
netG = Generator(nz, embed_dim, n_classes, ngf).to(device)
netD = Discriminator(n_classes, embed_dim, ndf).to(device)

# init weights
def weights_init(m):
    if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.Linear)):
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

netG.apply(weights_init); netD.apply(weights_init)

# 5) Losses + optim
criterion = nn.BCEWithLogitsLoss()
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# labels for BCE
real_label = 1.0
fake_label = 0.0

# fixed noise for visualization (10 classes x 8 samples each)
fixed_noise = torch.randn(n_classes*8, nz, device=device)
fixed_labels = torch.tensor([i for i in range(n_classes) for _ in range(8)], dtype=torch.long, device=device)

# 6) Training loop
iter_count = 0
for epoch in range(n_epochs):
    pbar = tqdm(trainloader, desc=f"Epoch {epoch+1}/{n_epochs}")
    for i, (imgs, labels) in enumerate(pbar):
        imgs = imgs.to(device)
        labels = labels.to(device)
        bsize = imgs.size(0)
        # Train D
        netD.zero_grad()
        # real
        out_real = netD(imgs, labels)
        # label smoothing (helps stability): real target slightly less than 1.0
        real_targets = torch.full((bsize,), 0.9, device=device)
        lossD_real = criterion(out_real, real_targets)
        lossD_real.backward()
        # fake
        noise = torch.randn(bsize, nz, device=device)
        rand_labels = torch.randint(0, n_classes, (bsize,), device=device)
        fake = netG(noise, rand_labels)
        out_fake = netD(fake.detach(), rand_labels)
        fake_targets = torch.zeros(bsize, device=device)
        lossD_fake = criterion(out_fake, fake_targets)
        lossD_fake.backward()
        optimizerD.step()
        # Train G
        netG.zero_grad()
        out_fake2 = netD(fake, rand_labels)
        # generator wants discriminator to predict real (1)
        lossG = criterion(out_fake2, torch.ones(bsize, device=device))
        lossG.backward()
        optimizerG.step()

        iter_count += 1
        if i % 200 == 0:
            pbar.set_postfix({'lossD_real': lossD_real.item(), 'lossD_fake': lossD_fake.item(), 'lossG': lossG.item()})

    # Save sample grid after each epoch
    netG.eval()
    with torch.no_grad():
        samples = netG(fixed_noise, fixed_labels).cpu()
    netG.train()
    # unnormalize from [-1,1] to [0,1]
    grid = make_grid((samples+1)/2, nrow=8, padding=2)
    save_image(grid, os.path.join(sample_dir, f"epoch_{epoch+1:03d}.png"))
    print("Saved sample:", os.path.join(sample_dir, f"epoch_{epoch+1:03d}.png"))

print("Training finished. Samples in", sample_dir)


Device: cuda


Epoch 1/10: 100%|██████████| 469/469 [00:17<00:00, 26.78it/s, lossD_real=0.601, lossD_fake=0.641, lossG=1.41]


Saved sample: cgan_samples/epoch_001.png


Epoch 2/10: 100%|██████████| 469/469 [00:15<00:00, 30.29it/s, lossD_real=0.928, lossD_fake=0.369, lossG=1.12]


Saved sample: cgan_samples/epoch_002.png


Epoch 3/10: 100%|██████████| 469/469 [00:15<00:00, 30.61it/s, lossD_real=0.613, lossD_fake=0.677, lossG=1.45]


Saved sample: cgan_samples/epoch_003.png


Epoch 4/10: 100%|██████████| 469/469 [00:15<00:00, 29.73it/s, lossD_real=0.822, lossD_fake=0.484, lossG=1.1]


Saved sample: cgan_samples/epoch_004.png


Epoch 5/10: 100%|██████████| 469/469 [00:15<00:00, 30.98it/s, lossD_real=0.556, lossD_fake=0.355, lossG=1.43]


Saved sample: cgan_samples/epoch_005.png


Epoch 6/10: 100%|██████████| 469/469 [00:15<00:00, 30.34it/s, lossD_real=0.604, lossD_fake=0.335, lossG=1.58]


Saved sample: cgan_samples/epoch_006.png


Epoch 7/10: 100%|██████████| 469/469 [00:15<00:00, 30.61it/s, lossD_real=0.743, lossD_fake=0.289, lossG=1.26]


Saved sample: cgan_samples/epoch_007.png


Epoch 8/10: 100%|██████████| 469/469 [00:16<00:00, 29.26it/s, lossD_real=0.63, lossD_fake=0.326, lossG=1.73]


Saved sample: cgan_samples/epoch_008.png


Epoch 9/10: 100%|██████████| 469/469 [00:16<00:00, 28.79it/s, lossD_real=0.476, lossD_fake=0.773, lossG=1.81]


Saved sample: cgan_samples/epoch_009.png


Epoch 10/10: 100%|██████████| 469/469 [00:15<00:00, 30.42it/s, lossD_real=0.739, lossD_fake=0.268, lossG=1.7]

Saved sample: cgan_samples/epoch_010.png
Training finished. Samples in cgan_samples



