# DCGAN - Supplemental Media and Figures

Contains supplemental media and figures, currently includes the following:

- Training Progress Video 
- Training Progress Gif
- Training Loss
- CPU Generated Samples

--------

In [None]:
# Install ffmpeg - Probably Not on Kernel by Default
!pip3 install ffmpeg

In [None]:
# General Deps
import os
import re
import datetime
import numpy as np

# Plotting + Video
import ffmpeg
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

# Torch
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils

# DCGAN
import gaudi_dcgan as dcgan

In [None]:
# Get Start Datetime for plots; apply all the way through the Notebook
PLOT_DTTM = re.sub(":|-| |\.", "_", datetime.datetime.utcnow().__str__())
EPOCH = 64

In [None]:
# Init Model and Training Configs w. Default Values - See gaudi_dcgan.py for descriptions. For clarity,
# objects below are initialized with their default values.
model_cfg = dcgan.ModelCheckpointConfig(
    model_name="msls_dcgan_ml_p3_8xlarge_001", # Custom Model Name To Identify Gaudi vs GPU trained
    model_dir="/efs/trained_model",
    save_frequency=1,
    log_frequency=50,
    gen_progress_frequency=250,
)

train_cfg = dcgan.TrainingConfig(
    batch_size=128,
    img_size=64,
    nc=3,
    nz=100,
    ngf=64,
    ndf=64,
    lr=0.0002,
    beta1=0.5,
    beta2=0.999,
)

In [None]:
# Create Figures and Videos Directory if Not Yet Exists...
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures")

if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/videos"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/videos")

## Get Model Checkpoint For Evaluation
---------

In [None]:
# Initialize Net and Optimizers
netD, optimD = train_cfg.get_net_D()
netG, optimG = train_cfg.get_net_G()

# Check the save-path for a model with this name && Load Params
cur_epoch, losses, fixed_noise, img_list = dcgan.instantiate_from_checkpoint(
    netD, netG, optimD, optimG, f"{model_cfg.model_dir}/{model_cfg.model_name}/checkpoint_{EPOCH}.pt"
)

## Figure 1.1 - `G` and `D` Training Losses

-----------

In [None]:
# Plot the Training Losses to the Generator (G) and Discriminator (D)

plt.figure(figsize=(12, 6))
plt.title(f"Generator and Discriminator Loss During Training - {model_cfg.model_name}")
plt.plot(losses["_G"], label="G")
plt.plot(losses["_D"], label="D")
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/train_loss_{PLOT_DTTM}.png"
)

## Figure 2.1.1 - Final Images of Fixed Noise Sample vs Real Images

Get a batch of real images from the dataloader and compare the final generated images vs. the real images. Do they hold up against human discretion? Note that this figure uses fixed noise saved as part of the model checkpoint and will **NOT** generate new images from `G` on subsequent runs.

--------

In [None]:
# In general, the ImageFolder/Dataloader reads from the directory of images and applys a transformation
# at runtime to generate our training images. See `Data and Transformations` section for details.
dataset = dset.ImageFolder(
    root="/efs/sample_images",
    transform=transforms.Compose(
        [
            transforms.RandomAffine(degrees=0, translate=(0.3, 0.0)),
            transforms.CenterCrop(train_cfg.img_size * 4),
            transforms.Resize(train_cfg.img_size),
            transforms.ToTensor(),
            transforms.Normalize(
                (
                    0.5,
                    0.5,
                    0.5,
                ),
                (
                    0.5,
                    0.5,
                    0.5,
                ),
            ),
        ]
    ),
)

# NOTE: This step can be slow as the images are processed (esp. on a new EFS); anecdotally, around 8-10 
# min to load 1MM images (~30GB total)
dataloader = torch.utils.data.DataLoader(
    dataset,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
    timeout=0,
    batch_size=train_cfg.batch_size,
)


real_batch = next(iter(dataloader))

# Plot the real images from the dataloader 
plt.figure(figsize=(15, 15))

plt.subplot(1, 2, 1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(
    np.transpose(
        vutils.make_grid(
            real_batch[0].to(train_cfg.dev)[:64], padding=5, normalize=True
        ).cpu(),
        (1, 2, 0),
    )
)

# Plot the fake images from the final epoch of `G`
plt.subplot(1, 2, 2)
plt.axis("off")
plt.title("Generated Images")
plt.imshow(np.transpose(img_list[-1], (1, 2, 0)))
plt.show()

plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/compare_{PLOT_DTTM}.png"
)

## Figure 2.1.2 - Training Progress Sequence on Fixed Noise

In each epoch (or at some fixed interval, e.g. every other, every third) during training the model saved the progress of `G` on transforming a series of fixed inputs, `Z`. This figure shows the progress of `Z` as the model trained.

----------

In [None]:
# 2.1.2 - Training Progress Sequence Saved as video && GIF

ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=150, blit=True)

writergif = animation.PillowWriter(
    fps=10, metadata=dict({"title": f"{model_cfg.model_name}"}, **model_cfg.__dict__)
)

# Save GIF
ani.save(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/videos/progress_{PLOT_DTTM}.gif",
    writer=writergif,
)

# Save Video
with open(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/videos/progress_{PLOT_DTTM}.html", "w",
) as fi:
    print(ani.to_html5_video(), file=fi)

# Show Video - Note; this can get quite large if there are too many frames or too many fixed
# noise images saved along with the model's progress checkpoints...
HTML(ani.to_jshtml())

## Figure 3.1 - Generate New Samples From CPU

This figure generates novel images from `G` by creating a new input vector `Z` and feeding it through the network. Because `Z` is regenerated on each run, these results will vary between executions.

Being able to run "inference" quickly is important. Ideally `G` can generate samples using just the CPU. Using `G` to generate samples on the CPU allows us to serve new imaages off an inexpensive instance.

-----------

In [None]:
# Figure 3.1 - Generate New Samples From CPU

plt.figure(figsize=(15, 15))

# Create a new TrainingConfig which will generate noise on the CPU
train_cfg_cpu_only = dcgan.TrainingConfig(dev=torch.device("cpu"))

# Generate Samples from `G` (again, on CPU)
imgs = dcgan.generate_fake_samples(
    n_samples=4, train_cfg=train_cfg_cpu_only, model_cfg=model_cfg, as_of_epoch=EPOCH
)

# Plot results
plt.imshow(
    np.transpose(
        vutils.make_grid(imgs.to(train_cfg.dev), padding=2, normalize=True).cpu(),
        (1, 2, 0),
    )
)

# Save Result
plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/novel_samples_{PLOT_DTTM}.png"
)