# Experiment 1: Architecture Variations
This notebook tests how changing the number of layers and filter sizes in the Generator and Discriminator affects image quality and training stability.

In [None]:
import torch
from models.gen_variant1 import GeneratorVariant
from models.disc_variant1 import DiscriminatorVariant
from utils.mnist_loader import get_mnist_loader
from torch import nn, optim
import wandb
import matplotlib.pyplot as plt


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
wandb.init(project="dcgan-mnist", name="arch-variation")

generator = GeneratorVariant().to(device)
discriminator = DiscriminatorVariant().to(device)

criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
dataloader = get_mnist_loader(batch_size=64)


In [None]:
for epoch in range(25):
    for real_imgs, _ in dataloader:
        real_imgs = real_imgs.to(device)
        batch_size = real_imgs.size(0)
        real = torch.ones(batch_size, 1).to(device)
        fake = torch.zeros(batch_size, 1).to(device)

        # Train Generator
        z = torch.randn(batch_size, 100, 1, 1).to(device)
        gen_imgs = generator(z)
        loss_G = criterion(discriminator(gen_imgs), real)
        optimizer_G.zero_grad()
        loss_G.backward()
        optimizer_G.step()

        # Train Discriminator
        loss_real = criterion(discriminator(real_imgs), real)
        loss_fake = criterion(discriminator(gen_imgs.detach()), fake)
        loss_D = (loss_real + loss_fake) / 2
        optimizer_D.zero_grad()
        loss_D.backward()
        optimizer_D.step()

    wandb.log({"G Loss": loss_G.item(), "D Loss": loss_D.item(), "Generated": [wandb.Image(gen_imgs[0].cpu())]})


## Observations
- Adding layers improved detail in digits.
- Training was slightly slower but more stable.
- Generator loss decreased more consistently than baseline.