- Installing or upgrading the opendatasets Python package to install dataset from kaggle.

In [None]:
!pip install opendatasets --upgrade --quiet

In [None]:
import opendatasets as od

- Since it is kaggle dataset you will a kaggle API token to download it.

In [None]:
dataset_url='https://www.kaggle.com/splcher/animefacedataset'
od.download(dataset_url)

- Prints a list of files/folders inside the directory `./animefacedataset`.

In [None]:
import os

data_dir="./animefacedataset"
print(os.listdir(data_dir))

* Let us load this dataset using the `ImageFolder` class from `torchvision`. We will also resize and crop the
images to `64px * 64px`, and normalize the pixel values with a mean & standard deviation of `0.5` for each channel.
This will ensure that pixel values are in the range `(-1, 1)`.

In [None]:
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as T

In [None]:
image_size=64
batch_size=64
stats=((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

- Implementing an image preprocessing pipeline.

In [None]:
tf=T.Compose([T.Resize(image_size),
             T.CenterCrop(image_size),
             T.ToTensor(),
             T.Normalize(*stats)])

In [None]:
training_ds=ImageFolder(data_dir, transform=tf)
training_dl=DataLoader(training_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)

In [None]:
import torch
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
%matplotlib inline

- To reverse the normalization applied during preprocessing for visualization.

In [None]:
def denorm(img_tensors):
  return img_tensors*stats[1][0]+stats[0][0]

- To visualize a batch of images.

In [None]:
def show_batch(dl):
  for i, _ in dl:
    show_image(i)
    break

def show_image(i):
  fig, ax=plt.subplots(figsize=(8, 8))
  ax.set_xticks([])
  ax.set_yticks([])
  denorm_i=denorm(i)
  ax.imshow(make_grid(denorm_i, nrow=8).permute(1, 2, 0).clamp(0, 1))

In [None]:
show_batch(training_dl)

* Changing runtime to GPU

In [None]:
torch.cuda.is_available()

In [None]:
def default_device():
  if torch.cuda.is_available():
    return torch.device("cuda")
  return torch.device("cpu")

In [None]:
device=default_device()
device

In [None]:
def to_device(data, device):
  if isinstance(data, (list, tuple)):
    return [to_device(x, device) for x in data]
  return data.to(device, non_blocking=True)

In [None]:
for i, _ in training_dl:
  print(i.shape)
  print(i.device)
  i=to_device(i, device)
  print(i.device)
  break

In [None]:
class deviceDataLoader():

  def __init__(self, dl, device):
    self.dl=dl
    self.device=device

  def __iter__(self):
    for i in self.dl:
      yield to_device(i, self.device)

  def __len__(self):
    return len(self.dl)

In [None]:
device=default_device()

- Loading the data loader to GPU (if available).

In [None]:
training_dl=deviceDataLoader(training_dl, device)

## Discriminator network

In [None]:
import torch.nn as nn

In [None]:
discriminator=nn.Sequential(
    nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),

    nn.Flatten(),
    nn.Sigmoid()
)

In [None]:
discriminator=to_device(discriminator, device)

In [None]:
latent_size=64

## Generator network

In [None]:
generator=nn.Sequential(
    nn.ConvTranspose2d(latent_size, 512, kernel_size=4, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),

    nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(True),

    nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh()
)

In [None]:
xb=torch.randn(batch_size, latent_size, 1, 1)
print(xb.shape)
fake_image=generator(xb)
print(fake_image.shape)
show_image(fake_image)

In [None]:
generator=to_device(generator, device)

In [None]:
import torch.nn.functional as F

The function `train_d` trains the discriminator to:
- Output 1 for real images and 0 for fake images from generator.
- Compute total loss and update discriminator parameters.

In [None]:
def train_d(real_images, opt_d):

  opt_d.zero_grad()

  real_predictions=discriminator(real_images)
  real_tragets=torch.ones(real_images.size(0), 1, device=device)
  real_loss=F.binary_cross_entropy(real_predictions, real_tragets)
  real_score=torch.mean(real_predictions).item()

  latent=torch.randn(batch_size, latent_size, 1, 1, device=device)
  fake_images=generator(latent)

  fake_predictions=discriminator(fake_images)
  fake_tragets=torch.zeros(fake_images.size(0), 1, device=device)
  fake_loss=F.binary_cross_entropy(fake_predictions, fake_tragets)
  fake_score=torch.mean(fake_predictions).item()

  loss=real_loss+fake_loss
  loss.backward()
  opt_d.step()

  return loss.item(), real_score, fake_score

The function `train_g` trains the generator to:
- Generate fake images that can fool the discriminator.
- Maximize the discriminator's output.
- Compute loss against target label 1 and update generator parameters accordingly.

In [None]:
def train_g(opt_g):

  opt_g.zero_grad()

  latent=torch.randn(batch_size, latent_size, 1, 1, device=device)
  fake_images=generator(latent)

  predictions=discriminator(fake_images)
  targets=torch.ones(batch_size, 1, device=device)
  loss=F.binary_cross_entropy(predictions, targets)

  loss.backward()
  opt_g.step()

  return loss.item()

In [None]:
from torchvision.utils import save_image

- Let us create a directory to save the generated images.

In [None]:
save_dir='gen'
os.makedirs(save_dir, exist_ok=True)

- The function, save_sample, is designed to generate, save, and optionally display a grid of images generated.

In [None]:
def save_sample(idx, latent_tensor, show=True):
  fake_image=generator(latent_tensor)
  fake_fname="generated_img_{}.png".format(idx)
  save_image(denorm(fake_image), os.path.join(save_dir, fake_fname), nrow=8)
  print("Saving", fake_fname)
  if show:
    fig, ax=plt.subplots(figsize=(8, 8))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(make_grid(denorm(fake_image).cpu().detach(), nrow=8).permute(1, 2, 0))

In [None]:
fixed_latent=torch.randn(64, latent_size, 1, 1, device=device)

In [None]:
save_sample(0, fixed_latent)

In [None]:
from tqdm import tqdm

- To train the model for a given number of epochs, track losses and scores, and save generated image samples at each epoch.

In [None]:
def fit(epoch, lr, start_idx=1):

  torch.cuda.empty_cache()

  losses_g=[]
  losses_d=[]
  real_scores=[]
  fake_scores=[]

  opt_d=torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
  opt_g=torch.optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))

  for i in range(epoch):
    for real_i, _ in tqdm(training_dl):

      loss_d, real_score, fake_score=train_d(real_i, opt_d)
      loss_g=train_g(opt_g)

    losses_g.append(loss_g)
    losses_d.append(loss_d)
    real_scores.append(real_score)
    fake_scores.append(fake_score)

    print(f"Epoch : {i+1}, loss_g : {loss_g:.3f}, loss_d : {loss_d:.3f}, real_score : {real_score:.3f}, fake_score : {fake_score:.3f}")

    save_sample(i+start_idx, fixed_latent, show=False)

  return losses_g, losses_d, real_scores, fake_scores

- Start the training

In [None]:
lr=0.0002
epoch=10
history=[]

In [None]:
history+=fit(epoch, lr)

In [None]:
history+=fit(epoch, lr, 11)

In [None]:
history+=fit(epoch, lr/2, 21)

In [None]:
history+=fit(epoch, lr/2, 31)

- Make a video of the saved images to view the changes throughout the epochs.

In [None]:
import cv2
import os

sample_dir="gen"
files=[os.path.join(sample_dir, f) for f in os.listdir(sample_dir) if f.endswith('.png')]
files.sort()

out=cv2.VideoWriter('video.avi', cv2.VideoWriter_fourcc(*'MP4V'), 1, (640, 640))

for i in files:
  img=cv2.imread(i)
  out.write(cv2.resize(img, (640, 640)))

out.release()

Extract generator and discriminator losses from alternating positions. Then plots both loss curves across training epochs.

In [None]:
losses_g=[]
losses_d=[]
for i in range(0, len(history), 4):
  losses_g.extend(history[i])
  losses_d.extend(history[i+1])
plt.plot(losses_g)
plt.plot(losses_d)
plt.xlabel("Number of epochs")
plt.ylabel("Loss")
plt.legend(["Generator", "Discriminator"])
plt.show()