In [1]:
# we will implement DCGAN on SVHN dataset. The dataset is available at http://ufldl.stanford.edu/housenumbers/. The dataset is a collection of 32x32 color images of house numbers. The dataset is split into 3 parts: train, test and extra. We will use the train and test set for training and testing respectively. The extra set is not used in this tutorial.

In [2]:
model_name = "dcgan1"
#check model saving path is there

# Imports

In [21]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.transforms as transformations
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

In [4]:
import PIL.Image as Image
import torchvision.models as models
#numpy
import numpy as np

In [5]:
import tqdm
from ignite.metrics.gan import FID

# Hyperparameters and constants: for dataset and training


In [6]:
# Hyperparameters etc.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 2e-4  # could also use two lrs, one for gen and one for disc
BATCH_SIZE = 3200
IMAGE_SIZE = 64
CHANNELS_IMG = 3
NOISE_DIM = 100
NUM_EPOCHS = 5
FEATURES_DISC = 64
FEATURES_GEN = 64

# preparing Dataset

### we will use SVHN dataset for this example
### we will combine the train, test and extra datasets to make a bigger dataset


In [7]:
transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]
        ),
    ]
)

In [8]:
#get the dataset

dataset = dataset = datasets.ImageFolder(root=os.path.join(os.getcwd(), "bitemojis_dataset"), transform=transform)
loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)


Using downloaded and verified file: dataset_svhm/train_32x32.mat
Using downloaded and verified file: dataset_svhm/test_32x32.mat
Using downloaded and verified file: dataset_svhm/extra_32x32.mat


In [9]:
#print the total number of images in the dataset
print(len(dataset))
# print the shape of the images
print(dataset[0][0].shape)
# print the label of the image
print(dataset[0][1])

630420
torch.Size([3, 64, 64])
1


# Model

## generator

In [10]:


class Generator(nn.Module):
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
           
            self.generator_block_architecture(channels_noise, features_g * 16, 4, 1, 0),  # img: 4x4
            self.generator_block_architecture(features_g * 16, features_g * 8, 4, 2, 1),  # img: 8x8
            self.generator_block_architecture(features_g * 8, features_g * 4, 4, 2, 1),  # img: 16x16
            self.generator_block_architecture(features_g * 4, features_g * 2, 4, 2, 1),  # img: 32x32
            nn.ConvTranspose2d(
                features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
            ),
     
            nn.Tanh(),
        )

    def generator_block_architecture(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.net(x)


## discriminator

In [11]:
class Discriminator(nn.Module):
    def __init__(self, channels_img, features_d):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(

            nn.Conv2d(
                channels_img, features_d, kernel_size=4, stride=2, padding=1
            ),
            nn.LeakyReLU(0.2),
            # _block(in_channels, out_channels, kernel_size, stride, padding)
            self.dicriminator_block_architecture(features_d, features_d * 2, 4, 2, 1),
            self.dicriminator_block_architecture(features_d * 2, features_d * 4, 4, 2, 1),
            self.dicriminator_block_architecture(features_d * 4, features_d * 8, 4, 2, 1),
       
            nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
            nn.Sigmoid(),
        )

    def dicriminator_block_architecture(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size,
                stride,
                padding,
                bias=False,
            ),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2),
        )

    def forward(self, x):
        return self.disc(x)


--------

# Initialization : Model , Loss , Optimizer, data loader

### model

In [12]:
def initialize_weights(model):
    # Initializes weights according to the DCGAN paper
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, 0.02)

### data loader

In [13]:
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
initialize_weights(gen)
initialize_weights(disc)

### optimizer, loss

In [14]:
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
criterion = nn.BCELoss()

### tensorboard

In [15]:

fixed_noise = torch.randn(100, NOISE_DIM, 1, 1).to(device)
#plot loss of generator and critic
writer_loss = SummaryWriter(f"runs/"+model_name+"/loss")
writer_real = SummaryWriter(f"logs/"+model_name+"/real")
writer_fake = SummaryWriter(f"logs/"+model_name+"/fake")

2022-10-03 12:41:05.533295: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


----

### initialize FID wrapper

In [16]:
fid_score = FID()

In [17]:
#interpolate function to resize images to 299,299,3  which is the input size of inception network
def interpolate(batch):
    arr = []
    for img in batch:
        pil_img = transformations.ToPILImage()(img)
        resized_img = pil_img.resize((299,299), Image.BILINEAR)
        arr.append(transformations.ToTensor()(resized_img))
    return torch.stack(arr)

# Training

In [18]:
gen.train()
disc.train()

Discriminator(
  (disc): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (4): Sequential(
      (0): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (5): Conv2d(512, 1, kernel_size=(4, 4), stride=(2, 2))
    (6): Sigmoid()
  )
)

In [22]:
step = 0

for epoch in range(NUM_EPOCHS):
    
    
    #we will track the total loss of the generator and critic for each epoch over the entire dataset
    #initialize the total loss of the generator and critic for each epoch to 0
    total_loss_gen = 0
    total_loss_disc = 0
    #move these to device
    
    
    # Target labels not needed! <3 unsupervised
    for batch_idx, (real, _) in enumerate(dataloader):
        batch_step = 0
        real = real.to(device)
        noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
        fake = gen(noise)

        ### Train Discriminator
        disc_real = disc(real).reshape(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake.detach()).reshape(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2
        disc.zero_grad()
        loss_disc.backward()
        opt_disc.step()

        ### Train Generator:
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()
        
        with torch.no_grad():
            total_loss_gen += loss_gen.item()
            total_loss_disc += loss_disc.item()
            
        

        # Print losses occasionally and print to tensorboard
        if batch_idx % 10 == 0:
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
                  Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
            )
            
            with torch.no_grad():
                               
                 #BATCH LOSS---
        
                #write loss to tensorboard
                writer_loss.add_scalar("Generator loss Batch", loss_gen, global_step=batch_step)
                writer_loss.add_scalar("Discriminator loss Batch", loss_disc, global_step=batch_step)         
                
                #FID--
                #calculate FID score of this batch
                #update the fid_score with real and fake images
                real_images_fid = interpolate(real)
                fake_images_fid = interpolate(fake)
                fid_score.update((real_images_fid, fake_images_fid))
                computed_fid_score = fid_score.compute()
                print("FID score: ", computed_fid_score)
                writer_loss.add_scalar("FID Score DCGAN", computed_fid_score, global_step=batch_step)
                #reset the fid score
                fid_score.reset()
                ##FID--
                
                batch_step += 1 
            
            
            
            
            

    with torch.no_grad():
        fake = gen(fixed_noise)
        # take out upto 100 examples
        img_grid_real = torchvision.utils.make_grid(
            real[:100], normalize=True
        )
        img_grid_fake = torchvision.utils.make_grid(
            fake[:100], normalize=True
        )

        writer_real.add_image("Real", img_grid_real, global_step=step)
        writer_fake.add_image("Fake", img_grid_fake, global_step=step)
        
        
        #AVERAGE LOSS---

        #get average loss of generator and critic for each epoch
        avg_loss_gen = total_loss_gen / len(loader)
        avg_loss_disc= total_loss_disc / len(loader)
        #write loss to tensorboard
        writer_loss.add_scalar("Generator loss Epoch", avg_loss_gen, global_step=batch_step)
        writer_loss.add_scalar("Discriminator loss Epoch", avg_loss_disc, global_step=batch_step)
        
        #AVERAGE LOSS----
        
        #we will plot the gradient of disc output with respect to the input image
        #get the gradient of the disc output with respect to the input image
        gradient = torch.autograd.grad(
        inputs=real,
        outputs=disc_real,
        grad_outputs=torch.ones_like(disc_real),
        create_graph=True,
        retain_graph=True,
        )[0]
        #flatten the gradient
        gradient = gradient.view(gradient.shape[0], -1)
        #get the norm of the gradient
        gradient_norm = gradient.norm(2, dim=1)
        #write gradient norm to tensorboard
        writer_loss.add_scalar("Gradient norm Disc Real DCGAN", gradient_norm.mean(), global_step=step)
        
        #----------------
        #we will plot the gradient of critic output with respect to the input image
        #get the gradient of the critic output with respect to the input image
        gradient = torch.autograd.grad(
        inputs=fake,
        outputs=disc_fake,
        grad_outputs=torch.ones_like(disc_fake),
        create_graph=True,
        retain_graph=True,
        )[0]
        #flatten the gradient
        gradient = gradient.view(gradient.shape[0], -1)
        #get the norm of the gradient
        gradient_norm = gradient.norm(2, dim=1)
        #write gradient norm to tensorboard
        writer_loss.add_scalar("Gradient norm Disc Fake DCGAN", gradient_norm.mean(), global_step=step)
        
        #----------------
        #we will plot the gradient of genrator output with respect to the input 
        #we will plot the gradient of genrator output with respect to the input 
        #get the gradient of the generator output with respect to the input noise
        gradient = torch.autograd.grad(
        inputs=noise,
        outputs=output,
        grad_outputs=torch.ones_like(output),
        create_graph=True,
        retain_graph=True,
        )[0]
        #flatten the gradient
        gradient = gradient.view(gradient.shape[0], -1)
        #get the norm of the gradient
        gradient_norm = gradient.norm(2, dim=1)
        #write gradient norm to tensorboard
        writer_loss.add_scalar("Gradient norm Generator DCGAN", gradient_norm.mean(), global_step=step)
        
        #----------------
        
        #get the gradient of the disc for the parameters weights of first layer
        #we will write the norm of the gardient of weights of the first layer of the disc
        for name, param in critic.named_parameters():
            if name == "disc.0.weight":
                writer_loss.add_scalar("Disc Gradient w.r.t 1st layer DCGAN", param.grad.norm(), global_step=step)
            #also plot the norm of gradient of 2nd layer
            elif name == "disc.2.0.weight":
                writer_loss.add_scalar("Disc Gradient w.r.t 2nd layer DCGAN", param.grad.norm(), global_step=step)
                
                
       

    step += 1
    
    #save the trained model
        #check if trained_model folder exists
    if not os.path.exists("trained_models"):
        os.mkdir("trained_models")
    
    #now trained_model folder exists
    if not os.path.exists("trained_models/"+model_name):
        os.mkdir("trained_models/"+model_name)
    #check if "trained_models/"+model_name     
    torch.save(gen.state_dict(), "trained_models/"+model_name+"/gen.pth")
    torch.save(critic.state_dict(), "trained_models/"+model_name+"/disc.pth")
    

Epoch [0/5] Batch 0/198                   Loss D: 0.6551, loss G: 0.8385


  resized_img = pil_img.resize((299,299), Image.BILINEAR)


FID score:  0.15315825285524987
Epoch [0/5] Batch 10/198                   Loss D: 0.3857, loss G: 1.3355


KeyboardInterrupt: 

In [None]:
#save the tensorboard
writer_real.close()
writer_fake.close()
writer_loss.close()