## Downloading dataset from Kaggle

To use Kaggle dataset, it's good to use [Kaggle Official API](https://github.com/Kaggle/kaggle-api). To be able to use this API properly, you need API credential which you can get by following some simple steps.

1. Sign in to  [https://kaggle.com/](https://kaggle.com),  then click on your profile picture on the top right and select " Account" from the menu.

2. Scroll down to the "API" section and click "Create New API Token". This will download a file `kaggle.json` with the following contents:

```
{"username":"YOUR_KAGGLE_USERNAME","key":"YOUR_KAGGLE_KEY"}
```

3. Save the generated file `kaggle.json` in the same directory as this file.

In [None]:
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()

In [None]:
api.dataset_download_files('splcher/animefacedataset')

In [None]:
from zipfile import ZipFile

zf = ZipFile('animefacedataset.zip')
zf.extractall()
zf.close()

In [None]:
#imports

import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as T
from torchvision.utils import make_grid

import os
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
image_size = 64
batch_size = 128

# Stats for normalizing images
stats = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5) 

DATA_DIR = './animefacedataset'

train_ds = ImageFolder(DATA_DIR, transform = T.Compose([
    T.Resize(image_size),
    T.CenterCrop(image_size),
    T.ToTensor(),
    T.Normalize(*stats)
]))

train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=3, pin_memory = True)

In [None]:
# As we normalize images at the time of creating training dataset,
# if we want to visualize them, we have to denormalize them 
def denorm(img_tensors):
    return img_tensors * stats[1][0] + stats[0][0]

def show_images(images, nmax=64):
    fig, ax = plt.subplots(figsize=(8,8))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(make_grid(denorm(images.detach()[:nmax]), nrow=8).permute(1,2,0))
    
    
def show_batch(dl, nmax =64):
    for images,_ in dl:
        show_images(images, nmax)
        break

In [None]:
show_batch(train_dl)

As pyTorch gives facility of running code on CPU as well as on GPU, we can move our data to either of them. For that we can find if a GPU is available or not and then move data to GPU.

In [None]:
def get_default_device():
    '''
    Selects GPU if availabel otervise selects CPU.
    
    '''
    
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
    
def to_device(data, device):
    '''
    Move tensors and networks to selected device
    
    '''
    
    if isinstance(data,(list,tuple)):
        return [to_device(i,device) for i in data]
    return data.to(device, non_blocking=True)

In [None]:
# A simple class to move data to selected device.
class DeviceDataLoader():
    '''
    Wrap a dataloader to move data to a device
    
    '''
    
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
        
    def __iter__(self):
        '''
        Get a batch of data after moving it to a selected device
        
        '''
        for b in self.dl:
            yield to_device(b, self.device)
            
    def __len__(self):
        return len(self.dl)

In [None]:
device = get_default_device()
device

In [None]:
train_dl = DeviceDataLoader(train_dl, device)

GAN is divided into main two parts, Discriminator and Generator. Function of later one is to create images and the function of the formar one is to differentiate between real and generated image. 

In [None]:
import torch.nn as nn

In [None]:
latent_size = 128 

# In simple term, latent size is number of space dimention
# where each dimention learn different feature of "to be generated"
# image. In this case, features like eyes, hair, nose, etc.

generator = nn.Sequential(
    # input: latent_size x 1 x 1

    nn.ConvTranspose2d(latent_size, 512, kernel_size=4, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),
    # output: 512 x 4 x 4

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),
    # output: 256 x 8 x 8

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),
    # output: 128 x 16 x 16

    nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(True),
    # output: 64 x 32 x 32

    nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh()
    # output: 3 x 64 x 64
)



In [None]:
discriminator = nn.Sequential(
    # input: 3 x 64 x 64

    nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.LeakyReLU(0.2, inplace=True),
    # output: 64 x 32 x 32

    nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),
    # output: 128 x 16 x 16

    nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),
    # output: 256 x 8 x 8

    nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),
    # output: 512 x 4 x 4

    nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
    # output: 1 x 1 x 1

    nn.Flatten(),
    nn.Sigmoid()) 

    # Sigmoid as a last layer because the function of the discriminator 
    # is to find if the image is real or fake. For that binary classification
    # sigmoid gives good result.

    
# Research has showed that LeakyRELU is better for discriminator and RELU is better
# for generator. But if you want you can try and experiment using different activation 
# function and see if it gives you better result.


In [None]:
discriminator = to_device(discriminator, device)
generator = to_device(generator, device)

## Discriminator training 

Main thing you have to understand in GAN is how the discriminator and generator is trained. To train discriminator first of all real images will be given as input and loss will be calculated using `binary_cross_entropy` function.
Then some fake images from generator will be given as input and again loss will be calculated. At the end gradient will be calculaed with the combine loss of real and fake image.

In [None]:
def train_discriminator(real_images, opt_discriminator):
    # Set discriminator gradient to zero
    opt_discriminator.zero_grad()
    
    # Pass real images through descriminator
    real_preds = discriminator(real_images)
    real_targets = torch.ones(real_images.size(0), 1, device=device)
    real_loss = F.binary_cross_entropy(real_preds, real_targets)
    real_score = torch.mean(real_preds).item()
    
    # Generate fake images and pass them through discriminator
    latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
    fake_images = generator(latent)
    
    fake_targets = torch.zeros(fake_images.size(0), 1, device=device) # As they are fake images, target is "0"
    fake_preds = discriminator(fake_images)
    fake_loss = F.binary_cross_entropy(fake_preds, fake_targets)
    fake_score = torch.mean(fake_preds).item()
    
    # Update discriminator weights
    loss = real_loss + fake_loss
    loss.backward()
    opt_discriminator.step()
    return loss.item(), real_score, fake_score   

## Generator training

Generator trainig is a bit trickier than discriminator, because for discriminator we are giving images from generator as an input and from the loss we can train it. But here we have to generate image so the trick we have to use is that, we have  to use discriminator as a part to get the loss function of the generator. 

The trick we have to use is that, even though we are using fake images for discrimonator, we have to give one as a target. For discriminator, we took one as a target for real images and zero for fake images. But here we want that generator produces images which look like they are real. So if we take one as target, discriminator will give high loss for images which are not close to real images and thus generator will learn how to produce images which are close to real ones.

In [None]:
def train_generator(opt_generator):
    # Set generator gradients to zero
    opt_generator.zero_grad()
    
    # Generate fake images
    latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
    fake_images = generator(latent)
    
    # Try to fool the discriminator 
    preds = discriminator(fake_images)
    targets = torch.ones(batch_size, 1, device=device)
    loss = F.binary_cross_entropy(preds, targets)
    
    # Update generator weights
    loss.backward()
    opt_generator.step()
    
    return loss.item()

In [None]:
from torchvision.utils import save_image

sample_dir = 'generated'
os.makedirs(sample_dir, exist_ok = True)

In [None]:
def save_images(index, latent_tensors, show=True):
    fake_images = generator(latent_tensors)
    fake_img_name = f'generated_image_no_{index}.png'
    save_image(denorm(fake_images), os.path.join(sample_dir, fake_img_name), nrow=8)
    
    print('Saving', fake_img_name)
    
    if show:
        fig, ax = plt.subplots(figsize=(8,8))
        ax.set_xticks([])
        ax.set_yticks([])
        ax.imshow(make_grid(fake_images.cpu().detach(), nrow=8).permute(1,2,0))
        
        # Detach is used to tell pytorch that for this tensor we dont need other 
        # information like gradient. We just need its value for calculatoin. 
        # So it can be removed from graph.

In [None]:
fixed_latent = torch.randn(64, latent_size, 1, 1, device=device)

In [None]:
save_images(0, fixed_latent)

In [None]:
from tqdm.notebook import tqdm
import torch.nn.functional as F

In [None]:
def fit(epochs, lr, start_idx=1):
    torch.cuda.empty_cache()
    
    # Losses and scores
    losses_generator = []
    losses_discriminator = []
    real_scores = []
    fake_scores = []
    
    # Create optimizers
    opt_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
    opt_generator = torch.optim.Adam(generator.parameters(), lr = lr)
    
    for epoch in range(epochs):
        for real_images, _ in tqdm(train_dl):
            # Train discriminator and generator
            loss_discriminator, real_score, fake_score = train_discriminator(real_images, opt_discriminator)
            loss_generator = train_generator(opt_generator)

        #Train losses and scores
        losses_generator.append(loss_generator)
        losses_discriminator.append(loss_discriminator)
        real_scores.append(real_score)
        fake_scores.append(fake_score)

        # Log losses and scores 
        print(f"Epoch No.: {epoch+1}/{epochs}, loss_generator: {loss_generator:.3f}, loss_discriminator: {loss_discriminator: .3f}, real_score: {real_score: .3f}, fake_score: {fake_score: .3f}")

        #Save generated images
        save_images(index=start_idx, latent_tensors=fixed_latent, show=False)
            
    return losses_generator, losses_discriminator, real_scores, fake_scores   

In [None]:
lr = 0.0002
epochs = 200

history = fit(epochs, lr)

In [None]:
losses_generator, losses_discriminator, real_scores, fake_scores = history

In [None]:
# Save discriminator and generator models.
torch.save(generator.state_dict(), 'Generator.pth')
torch.save(discriminator.state_dict(), 'Discriminator.pth')

Let's visulaize which kind of anime faces has been generated by the generator.

In [None]:
from IPython.display import Image

Image('./generated/generated_image_no_1.png')

In [None]:
Image('./generated/generated_image_no_10.png')

In [None]:
Image('./generated/generated_image_no_20.png')

It's good to see that trained network is giving good results. It's better to see how those images evolve from epoch 1. We can make a video out of all generated images using OpenCV.

In [None]:
import cv2

vid_fname = 'Anime_face_GAN.avi'


img_dir = []
for img in os.listdir(sample_dir):
    if img.startswith('generated'):
        img_dir.append(os.path.join(sample_dir, img))

img_dir.sort()


vid = cv2.VideoWriter(vid_fname,cv2.VideoWriter_fourcc(*'MP4V'), 1, (530,530))

[out.write(cv2.imread(img_name)) for img_name in files]
out.release()


We can also visualize how losses changes overtime, which helps debugging the training process. For GAN, we expect the generator's loss to reduce overtime without the discriminator loss getting too high.

In [None]:
plt.plot(losses_discriminator, '-')
plt.plot(losses_generator, '-')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Discriminator', 'Generator'])
plt.title('Losses');

In [None]:
plt.plot(real_scores, '-')
plt.plot(fake_scores, '-')
plt.xlabel('epoch')
plt.ylabel('score')
plt.legend(['Real', 'Fake'])
plt.title('Scores');