# Experiment 2: Hyperparameter Tuning
This notebook explores how changing learning rate, batch size, and optimizer affects DCGAN performance.

In [None]:
import yaml
import torch
from models.generator import Generator
from models.discriminator import Discriminator
from utils.mnist_loader import get_mnist_loader
from torch import nn, optim
import wandb

In [None]:
with open('../configs/tuning_config.yaml') as f:
    config = yaml.safe_load(f)

wandb.init(project="dcgan-mnist", config=config, name="hyperparam-tuning")

device = 'cuda' if torch.cuda.is_available() else 'cpu'
generator = Generator().to(device)
discriminator = Discriminator().to(device)

criterion = nn.BCELoss()
opt_choice = config['optimizer']
lr = config['learning_rate']

if opt_choice == 'adam':
    optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
    optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
elif opt_choice == 'rmsprop':
    optimizer_G = optim.RMSprop(generator.parameters(), lr=lr)
    optimizer_D = optim.RMSprop(discriminator.parameters(), lr=lr)

dataloader = get_mnist_loader(batch_size=config['batch_size'])


In [None]:
for epoch in range(config['epochs']):
    for real_imgs, _ in dataloader:
        real_imgs = real_imgs.to(device)
        batch_size = real_imgs.size(0)
        
        real = torch.ones(batch_size, 1).to(device)
        fake = torch.zeros(batch_size, 1).to(device)

        # Generator
        z = torch.randn(batch_size, config['latent_dim'], 1, 1).to(device)
        gen_imgs = generator(z)
        loss_G = criterion(discriminator(gen_imgs), real)

        optimizer_G.zero_grad()
        loss_G.backward()
        optimizer_G.step()

        # Discriminator
        loss_real = criterion(discriminator(real_imgs), real)
        loss_fake = criterion(discriminator(gen_imgs.detach()), fake)
        loss_D = (loss_real + loss_fake) / 2

        optimizer_D.zero_grad()
        loss_D.backward()
        optimizer_D.step()

    # Log metrics and sample image
    wandb.log({
        "epoch": epoch + 1,
        "Generator Loss": loss_G.item(),
        "Discriminator Loss": loss_D.item(),
        "Generated": [wandb.Image(gen_imgs[0].cpu())]
    })

    print(f"Epoch {epoch+1}/{config['epochs']} | G Loss: {loss_G.item():.4f} | D Loss: {loss_D.item():.4f}")

## Observations
- Lower learning rate improved stability but slowed convergence.
- RMSprop led to noisier images than Adam.
- Batch size of 128 gave smoother loss curves.