Implementing a DCGAN (Deep Convolutional Generative Adversarial Net) from the paper 'Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks'(2016).

<u>Architecture guidelines for stable Deep Convolutional GANs:</u><br>
* Replace any pooling layers with strided convolutions (Discriminator) and fractional-strided convolutions (generator).
* Use batchnorm in both the Generator and the Discriminator.(*)
* Remove fully connected hidden layers for deeper architectures.
* Use ReLU activation in Generator for all layers except for the output, which uses Tanh.
* Use LeakyReLU activation in the discriminator for all layers.<br>

(*) I didn't use batchnorm in the first layer of the Discriminator and also i didn't use it in the last layer of the Generator (according to the paper).
<br>

### Here i do the training of DCGAN network on MNIST dataset with Discriminator and Generator imported from DCGAN_models.ipynb. Later i try the DCGAN on the celebA dataset

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

import import_ipynb # import another nbs like modules .py
from DCGAN_models import Discriminator, Generator, initialize_weights

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps") # macOS

# This hiperparameters are from the paper
LEARNING_RATE = 2e-4
BATCH_SIZE = 128
IMAGE_SIZE = 64
CHANNELS_IMG = 1 #MNIST DATASET
Z_DIM = 100
NUM_EPOCHS = 4
FEATURES_DISC = 64
FEATURES_GEN = 64

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize([0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)])
    ]
)

dataset = datasets.MNIST(root = "dataset_GAN/", train = True, transform = transforms, download = True)
loader = torch.utils.data.DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = True)

disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)

# setting the initial weights for the gen and the disc
initialize_weights(disc)
initialize_weights(gen)

# hiperperameters taken from the paper
opt_gen = optim.Adam(gen.parameters(), lr = LEARNING_RATE, betas = (0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr = LEARNING_RATE, betas = (0.5, 0.999))

fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device)

criterion = nn.BCELoss() # loss = - Wn[yn * log(xn) + (1 - yn)*log(1-xn)]
# Tensorboard
writer_real = SummaryWriter(f'runs/GAN_MNIST/real_DCGAN_MNIST')
writer_fake = SummaryWriter(f'runs/GAN_MNIST/fake_DCGAN_MNIST')
step = 0

gen.train()
disc.train()

for epoch in range(NUM_EPOCHS):
    for batch_idx, (real, _) in enumerate(loader): # we dont use the labels 
        real = real.to(device)
        noise = torch.randn((BATCH_SIZE, Z_DIM, 1, 1)).to(device)
        fake = gen(noise)
        
        ### Train Discriminator max log(D(z)) + log(1 - D(G(z)))
        disc_real = disc(real).reshape(-1) 
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).reshape(-1)
        
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward(retain_graph = True)
        opt_disc.step()
        
        ### Train Generator min log(1 - D(z)) <--> max log(D(G(z)))
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()
        
        ### Print losses and print to tensorboard
        if batch_idx % 100 == 0:
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(loader)}\
                    Loss D: {loss_disc:.4f}, Loss G: {loss_gen:.4f}"
            )
            with torch.no_grad():
                fake = gen(fixed_noise)
                # take out (up to) 32 examples
                img_grid_real = torchvision.utils.make_grid(
                    real[:32], normalize = True
                )
                img_grid_fake = torchvision.utils.make_grid(
                    fake[:32], normalize = True
                )
                
                writer_real.add_image("Real", img_grid_real, global_step= step)
                writer_fake.add_image("Fake", img_grid_fake, global_step= step)
                
            step += 1

Epoch [0/4] Batch 0/469                    Loss D: 0.6887, Loss G: 0.7882
Epoch [0/4] Batch 100/469                    Loss D: 0.0143, Loss G: 4.1600
Epoch [0/4] Batch 200/469                    Loss D: 0.0165, Loss G: 4.2444
Epoch [0/4] Batch 300/469                    Loss D: 0.7081, Loss G: 2.2838
Epoch [0/4] Batch 400/469                    Loss D: 0.5191, Loss G: 1.5857
Epoch [1/4] Batch 0/469                    Loss D: 0.5029, Loss G: 1.4505
Epoch [1/4] Batch 100/469                    Loss D: 0.5557, Loss G: 0.9646
Epoch [1/4] Batch 200/469                    Loss D: 0.5465, Loss G: 1.0950
Epoch [1/4] Batch 300/469                    Loss D: 0.5544, Loss G: 0.8873
Epoch [1/4] Batch 400/469                    Loss D: 0.5427, Loss G: 1.0445
Epoch [2/4] Batch 0/469                    Loss D: 0.5567, Loss G: 1.0079
Epoch [2/4] Batch 100/469                    Loss D: 0.5304, Loss G: 1.8033
Epoch [2/4] Batch 200/469                    Loss D: 0.6865, Loss G: 0.8245
Epoch [2/4] Batch 

### IMPLEMENTING DCGAN ON CELEBA DATASET

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

import import_ipynb # import another nbs like modules .py
from DCGAN_models import Discriminator, Generator, initialize_weights

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps") # macOS

# This hiperparameters are from the paper
LEARNING_RATE = 2e-4
BATCH_SIZE = 128
IMAGE_SIZE = 64
CHANNELS_IMG = 3 #celebA DATASET
Z_DIM = 100
NUM_EPOCHS = 4
FEATURES_DISC = 64
FEATURES_GEN = 64

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize([0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)])
    ]
)

# dataset = datasets.MNIST(root = "dataset_GAN/", train = True, transform = transforms, download = True)

dataset = datasets.ImageFolder(root = "./dataset_GAN/celeba/", transform = transforms)
loader = torch.utils.data.DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = True)

disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)

# setting the initial weights for the gen and the disc
initialize_weights(disc)
initialize_weights(gen)

# hiperperameters taken from the paper
opt_gen = optim.Adam(gen.parameters(), lr = LEARNING_RATE, betas = (0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr = LEARNING_RATE, betas = (0.5, 0.999))

fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device)

criterion = nn.BCELoss() # loss = - Wn[yn * log(xn) + (1 - yn)*log(1-xn)]
# Tensorboard
writer_real = SummaryWriter(f'runs/GAN_CelebA/real_DCGAN')
writer_fake = SummaryWriter(f'runs/GAN_CelebA/fake_DCGAN')
step = 0

gen.train()
disc.train()

for epoch in range(NUM_EPOCHS):
    for batch_idx, (real, _) in enumerate(loader): # we dont use the labels 
        real = real.to(device)
        noise = torch.randn((BATCH_SIZE, Z_DIM, 1, 1)).to(device)
        fake = gen(noise)
        
        ### Train Discriminator max log(D(z)) + log(1 - D(G(z)))
        disc_real = disc(real).reshape(-1) 
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).reshape(-1)
        
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward(retain_graph = True)
        opt_disc.step()
        
        ### Train Generator min log(1 - D(z)) <--> max log(D(G(z)))
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()
        
        ### Print losses and print to tensorboard
        if batch_idx % 750 == 0:
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(loader)}\
                    Loss D: {loss_disc:.4f}, Loss G: {loss_gen:.4f}"
            )
            with torch.no_grad():
                fake = gen(fixed_noise)
                # take out (up to) 32 examples
                img_grid_real = torchvision.utils.make_grid(
                    real[:32], normalize = True
                )
                img_grid_fake = torchvision.utils.make_grid(
                    fake[:32], normalize = True
                )
                
                writer_real.add_image("Real celebA", img_grid_real, global_step= step)
                writer_fake.add_image("Fake celebA", img_grid_fake, global_step= step)
                
            step += 1