In [None]:
# This notebook is a wrapper around gaudi_dcgan.py and is used to render outputs from
# the model (e.g. generated images, loss plots, etc.)

# NOTE: On Sagemaker, use `conda_amazonei_pytorch_latest_p37` (OR `conda_pytorch_p36`)

# General Deps
import random
import os
import datetime
import ffmpeg
import re

import numpy as np
import matplotlib.pyplot as plt

import matplotlib.animation as animation
from IPython.display import HTML

# Torch Deps
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils

# DCGAN
import gaudi_dcgan as dcgan

In [None]:
## Sample Usage on Command Line - See Notes on Running on DL1
#
# ! python3 run_gaudi_dcgan.py \
#    --dataroot "/efs/images/" \
#    --seed 215 \
#    --name msls_2022_01_24_001 \
#    --s_epoch 0 \
#    --n_epoch 16

In [None]:
# Seed Model
random.seed(215)
torch.manual_seed(215)

# Init Model Config w. Default DCGAN Values
model_cfg = dcgan.ModelCheckpointConfig()
train_cfg = dcgan.TrainingConfig()

In [None]:
%%time

# We can use an image folder dataset...
dataroot = "/efs/images/"

# See Section `Data and Translations` for discussion on what this dataloader
# sequence does
dataset = dset.ImageFolder(
    root=dataroot,
    transform=transforms.Compose(
        [
            transforms.RandomAffine(degrees=0, translate=(0.2, 0.0)),
            transforms.CenterCrop(train_cfg.img_size * 4),
            transforms.Resize(train_cfg.img_size),
            transforms.ToTensor(),
            transforms.Normalize(
                (
                    0.5,
                    0.5,
                    0.5,
                ),
                (
                    0.5,
                    0.5,
                    0.5,
                ),
            ),
        ]
    ),
)

In [None]:
%%time

# Create the dataloader with Similar Params to Habana
dataloader = torch.utils.data.DataLoader(
    dataset,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
    timeout=0,
    batch_size=train_cfg.batch_size,
)

# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(16, 16))
plt.axis("off")
plt.title("Training Images")
plt.imshow(
    np.transpose(
        vutils.make_grid(
            real_batch[0].to(train_cfg.dev)[:16], padding=2, normalize=True
        ).cpu(),
        (1, 2, 0),
    )
)

# Create Figures Directory if Not Yet Exists
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures")

plt.savefig(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/train_samples.png")

In [None]:
# Run Training
result = dcgan.start_or_resume_training_run(
    dataloader, train_cfg, model_cfg, n_epochs=16, st_epoch=0
)

In [None]:
# Get Start Datetime for plots; apply all the way through...
plots_execution_dttm = re.sub(":|-| |\.", "_", datetime.datetime.utcnow().__str__())

# Plot the Losses Over Time

plt.figure(figsize=(10, 5))
plt.title(f"Generator and Discriminator Loss During Training - {model_cfg.model_name}")
plt.plot(result["losses"]["_G"], label="G")
plt.plot(result["losses"]["_D"], label="D")
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Create Figures Directory if Not Yet Exists
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures")

plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/train_loss_{plots_execution_dttm}.png"
)

In [None]:
# Plot a Graphic of the final images...

fig = plt.figure(figsize=(8, 8))
plt.axis("off")

# Create a Frame for each epoch from results.img_list
ims = [
    [plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)] for i in result["img_list"]
]

ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

# Plot a Video of the Final Training Progress Sequence
content = HTML(ani.to_jshtml())

writergif = animation.PillowWriter(
    fps=10, metadata=dict({"title": f"{model_cfg.model_name}"}, **model_cfg.__dict__)
)

# Create Videos Directory if Not Yet Exists...
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/videos"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/videos")

# Save Animation as Gif and as HTML w. Video...
ani.save(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/videos/progress_{plots_execution_dttm}.gif",
    writer=writergif,
)

with open(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/videos/progress_{plots_execution_dttm}.html",
    "w",
) as fi:
    print(ani.to_html5_video(), file=fi)

In [None]:
# Grab a batch of real images from the dataloader and compare the final Generated images vs the Real
# images. Do they hold up against human discretion?

real_batch = next(iter(dataloader))

# Plot the real images
plt.figure(figsize=(15, 15))

plt.subplot(1, 2, 1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(
    np.transpose(
        vutils.make_grid(
            real_batch[0].to(train_cfg.dev)[:64], padding=5, normalize=True
        ).cpu(),
        (1, 2, 0),
    )
)

# Plot the fake images from the final epoch
# NOTE: This uses the fixed noise from the trained model and is not
# variable between executions

plt.subplot(1, 2, 2)
plt.axis("off")
plt.title("Generated Images")
plt.imshow(np.transpose(result["img_list"][-1], (1, 2, 0)))
plt.show()

# Create Figures Directory if Not Yet Exists...
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures")

plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/compare_{plots_execution_dttm}.png"
)

In [None]:
# Generate a few sample images; this is using randomly generated noise
# and results should be variable across multiple runs...

plt.figure(figsize=(15, 15))

# generated_data[0].shape == torch.Size([3, 64, 64])
imgs = dcgan.generate_fake_samples(
    n_samples=16, train_cfg=train_cfg, model_cfg=model_cfg, as_of_epoch=4
)

plt.imshow(
    np.transpose(
        vutils.make_grid(imgs.to(train_cfg.dev), padding=2, normalize=True).cpu(),
        (1, 2, 0),
    )
)

# Create Figures Directory if Not Yet Exists...
if not os.path.exists(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures"):
    os.makedirs(f"{model_cfg.model_dir}/{model_cfg.model_name}/figures")

plt.savefig(
    f"{model_cfg.model_dir}/{model_cfg.model_name}/figures/generaed_{plots_execution_dttm}.png"
)

In [None]:
# Experiment: Ideally a Generator Net can use a CPU to (slowly) generate samples, being able to move
# the G network off GPU/HPU allows us to serve imgs off an inexpensive webserver. In this case we generator
# noise AND images on the CPU

train_cfg_cpu_only = dcgan.TrainingConfig(dev=torch.device("cpu"))

imgs = dcgan.generate_fake_samples(
    n_samples=4, train_cfg=train_cfg_cpu_only, model_cfg=model_cfg, as_of_epoch=4
)

plt.imshow(
    np.transpose(
        vutils.make_grid(imgs.to(train_cfg.dev), padding=2, normalize=True).cpu(),
        (1, 2, 0),
    )
)