# Colab FAQ

For some basic overview and features offered in Colab notebooks, check out: [Overview of Colaboratory Features](https://colab.research.google.com/notebooks/basic_features_overview.ipynb)

You need to use the colab GPU for this assignmentby selecting:

> **Runtime**   →   **Change runtime type**   →   **Hardware Accelerator: GPU**

## Setup PyTorch
All files are stored at /content/csc421/a4/ folder


In [2]:
######################################################################
# Setup python environment and change the current working directory
######################################################################
!pip install torch torchvision
!pip install imageio

!pip install matplotlib

%mkdir -p /content/csc421/a4/
%cd /content/csc421/a4

/content/csc421/a4
/content/csc421/a4


# Helper code

## Utility functions

In [0]:
import os

import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import Parameter
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

from six.moves.urllib.request import urlretrieve
import tarfile

import imageio
from urllib.error import URLError
from urllib.error import HTTPError


def get_file(fname,
             origin,
             untar=False,
             extract=False,
             archive_format='auto',
             cache_dir='data'):
    datadir = os.path.join(cache_dir)
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    if untar:
        untar_fpath = os.path.join(datadir, fname)
        fpath = untar_fpath + '.tar.gz'
    else:
        fpath = os.path.join(datadir, fname)

    print(fpath)
    if not os.path.exists(fpath):
        print('Downloading data from', origin)

        error_msg = 'URL fetch failure on {}: {} -- {}'
        try:
            try:
                urlretrieve(origin, fpath)
            except URLError as e:
                raise Exception(error_msg.format(origin, e.errno, e.reason))
            except HTTPError as e:
                raise Exception(error_msg.format(origin, e.code, e.msg))
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(fpath):
                os.remove(fpath)
            raise

    if untar:
        if not os.path.exists(untar_fpath):
            print('Extracting file.')
            with tarfile.open(fpath) as archive:
                archive.extractall(datadir)
        return untar_fpath

    return fpath


class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

                
def to_var(tensor, cuda=True):
    """Wraps a Tensor in a Variable, optionally placing it on the GPU.

        Arguments:
            tensor: A Tensor object.
            cuda: A boolean flag indicating whether to use the GPU.

        Returns:
            A Variable object, on the GPU if cuda==True.
    """
    if cuda:
        return Variable(tensor.cuda())
    else:
        return Variable(tensor)

    
def to_data(x):
    """Converts variable to numpy."""
    if torch.cuda.is_available():
        x = x.cpu()
    return x.data.numpy()


def create_dir(directory):
    """Creates a directory if it doesn't already exist.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)


def gan_checkpoint(iteration, G, D, opts):
    """Saves the parameters of the generator G and discriminator D.
    """
    G_path = os.path.join(opts.checkpoint_dir, 'G.pkl')
    D_path = os.path.join(opts.checkpoint_dir, 'D.pkl')
    torch.save(G.state_dict(), G_path)
    torch.save(D.state_dict(), D_path)


def cyclegan_checkpoint(iteration, G_XtoY, G_YtoX, D_X, D_Y, opts):
    """Saves the parameters of both generators G_YtoX, G_XtoY and discriminators D_X, D_Y.
    """
    G_XtoY_path = os.path.join(opts.checkpoint_dir, 'G_XtoY.pkl')
    G_YtoX_path = os.path.join(opts.checkpoint_dir, 'G_YtoX.pkl')
    D_X_path = os.path.join(opts.checkpoint_dir, 'D_X.pkl')
    D_Y_path = os.path.join(opts.checkpoint_dir, 'D_Y.pkl')
    torch.save(G_XtoY.state_dict(), G_XtoY_path)
    torch.save(G_YtoX.state_dict(), G_YtoX_path)
    torch.save(D_X.state_dict(), D_X_path)
    torch.save(D_Y.state_dict(), D_Y_path)


def load_checkpoint(opts):
    """Loads the generator and discriminator models from checkpoints.
    """
    G_XtoY_path = os.path.join(opts.load, 'G_XtoY.pkl')
    G_YtoX_path = os.path.join(opts.load, 'G_YtoX.pkl')
    D_X_path = os.path.join(opts.load, 'D_X.pkl')
    D_Y_path = os.path.join(opts.load, 'D_Y.pkl')

    G_XtoY = CycleGenerator(conv_dim=opts.g_conv_dim, init_zero_weights=opts.init_zero_weights)
    G_YtoX = CycleGenerator(conv_dim=opts.g_conv_dim, init_zero_weights=opts.init_zero_weights)
    D_X = DCDiscriminator(conv_dim=opts.d_conv_dim)
    D_Y = DCDiscriminator(conv_dim=opts.d_conv_dim)

    G_XtoY.load_state_dict(torch.load(G_XtoY_path, map_location=lambda storage, loc: storage))
    G_YtoX.load_state_dict(torch.load(G_YtoX_path, map_location=lambda storage, loc: storage))
    D_X.load_state_dict(torch.load(D_X_path, map_location=lambda storage, loc: storage))
    D_Y.load_state_dict(torch.load(D_Y_path, map_location=lambda storage, loc: storage))

    if torch.cuda.is_available():
        G_XtoY.cuda()
        G_YtoX.cuda()
        D_X.cuda()
        D_Y.cuda()
        print('Models moved to GPU.')

    return G_XtoY, G_YtoX, D_X, D_Y


def merge_images(sources, targets, opts):
    """Creates a grid consisting of pairs of columns, where the first column in
    each pair contains images source images and the second column in each pair
    contains images generated by the CycleGAN from the corresponding images in
    the first column.
    """
    _, _, h, w = sources.shape
    row = int(np.sqrt(opts.batch_size))
    merged = np.zeros([3, row * h, row * w * 2])
    for (idx, s, t) in (zip(range(row ** 2), sources, targets, )):
        i = idx // row
        j = idx % row
        merged[:, i * h:(i + 1) * h, (j * 2) * h:(j * 2 + 1) * h] = s
        merged[:, i * h:(i + 1) * h, (j * 2 + 1) * h:(j * 2 + 2) * h] = t
    return merged.transpose(1, 2, 0)


def generate_gif(directory_path, keyword=None):
    images = []
    for filename in sorted(os.listdir(directory_path)):
        if filename.endswith(".png") and (keyword is None or keyword in filename):
            img_path = os.path.join(directory_path, filename)
            print("adding image {}".format(img_path))
            images.append(imageio.imread(img_path))

    if keyword:
        imageio.mimsave(
            os.path.join(directory_path, 'anim_{}.gif'.format(keyword)), images)
    else:
        imageio.mimsave(os.path.join(directory_path, 'anim.gif'), images)


def create_image_grid(array, ncols=None):
    """
    """
    num_images, channels, cell_h, cell_w = array.shape
    if not ncols:
        ncols = int(np.sqrt(num_images))
    nrows = int(np.math.floor(num_images / float(ncols)))
    result = np.zeros((cell_h * nrows, cell_w * ncols, channels), dtype=array.dtype)
    for i in range(0, nrows):
        for j in range(0, ncols):
            result[i * cell_h:(i + 1) * cell_h, j * cell_w:(j + 1) * cell_w, :] = array[i * ncols + j].transpose(1, 2,
                                                                                                                 0)

    if channels == 1:
        result = result.squeeze()
    return result


def gan_save_samples(G, fixed_noise, iteration, opts):
    generated_images = G(fixed_noise)
    generated_images = to_data(generated_images)

    grid = create_image_grid(generated_images)

    # merged = merge_images(X, fake_Y, opts)
    path = os.path.join(opts.sample_dir, 'sample-{:06d}.png'.format(iteration))
    imageio.imwrite(path, grid)
    print('Saved {}'.format(path))


def cyclegan_save_samples(iteration, fixed_Y, fixed_X, G_YtoX, G_XtoY, opts):
    """Saves samples from both generators X->Y and Y->X.
    """
    fake_X = G_YtoX(fixed_Y)
    fake_Y = G_XtoY(fixed_X)

    X, fake_X = to_data(fixed_X), to_data(fake_X)
    Y, fake_Y = to_data(fixed_Y), to_data(fake_Y)

    merged = merge_images(X, fake_Y, opts)
    path = os.path.join(opts.sample_dir, 'sample-{:06d}-X-Y.png'.format(iteration))
    imageio.imwrite(path, merged)
    print('Saved {}'.format(path))

    merged = merge_images(Y, fake_X, opts)
    path = os.path.join(opts.sample_dir, 'sample-{:06d}-Y-X.png'.format(iteration))
    imageio.imwrite(path, merged)
    print('Saved {}'.format(path))

## Data loader

In [0]:
def get_emoji_loader(emoji_type, opts):
    """Creates training and test data loaders.
    """
    transform = transforms.Compose([
                    transforms.Scale(opts.image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                ])

    train_path = os.path.join('data/emojis', emoji_type)
    test_path = os.path.join('data/emojis', 'Test_{}'.format(emoji_type))

    train_dataset = datasets.ImageFolder(train_path, transform)
    test_dataset = datasets.ImageFolder(test_path, transform)

    train_dloader = DataLoader(dataset=train_dataset, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers)
    test_dloader = DataLoader(dataset=test_dataset, batch_size=opts.batch_size, shuffle=False, num_workers=opts.num_workers)

    return train_dloader, test_dloader

## Training and evaluation code

In [0]:
def print_models(G_XtoY, G_YtoX, D_X, D_Y):
    """Prints model information for the generators and discriminators.
    """
    if G_YtoX:
        print("                 G_XtoY                ")
        print("---------------------------------------")
        print(G_XtoY)
        print("---------------------------------------")

        print("                 G_YtoX                ")
        print("---------------------------------------")
        print(G_YtoX)
        print("---------------------------------------")

        print("                  D_X                  ")
        print("---------------------------------------")
        print(D_X)
        print("---------------------------------------")

        print("                  D_Y                  ")
        print("---------------------------------------")
        print(D_Y)
        print("---------------------------------------")
    else:
        print("                 G                     ")
        print("---------------------------------------")
        print(G_XtoY)
        print("---------------------------------------")

        print("                  D                    ")
        print("---------------------------------------")
        print(D_X)
        print("---------------------------------------")


def create_model(opts):
    """Builds the generators and discriminators.
    """
    if opts.Y is None:
        ### GAN
        G = DCGenerator(noise_size=opts.noise_size, conv_dim=opts.g_conv_dim, spectral_norm=opts.spectral_norm)
        D = DCDiscriminator(conv_dim=opts.d_conv_dim, spectral_norm=opts.spectral_norm)

        print_models(G, None, D, None)

        if torch.cuda.is_available():
            G.cuda()
            D.cuda()
            print('Models moved to GPU.')
        return G, D
          
    else:
        ### CycleGAN
        G_XtoY = CycleGenerator(conv_dim=opts.g_conv_dim, init_zero_weights=opts.init_zero_weights)
        G_YtoX = CycleGenerator(conv_dim=opts.g_conv_dim, init_zero_weights=opts.init_zero_weights)
        D_X = DCDiscriminator(conv_dim=opts.d_conv_dim)
        D_Y = DCDiscriminator(conv_dim=opts.d_conv_dim)

        print_models(G_XtoY, G_YtoX, D_X, D_Y)

        if torch.cuda.is_available():
            G_XtoY.cuda()
            G_YtoX.cuda()
            D_X.cuda()
            D_Y.cuda()
            print('Models moved to GPU.')
        return G_XtoY, G_YtoX, D_X, D_Y


def train(opts):
    """Loads the data, creates checkpoint and sample directories, and starts the training loop.
    """

    # Create train and test dataloaders for images from the two domains X and Y
    dataloader_X, test_dataloader_X = get_emoji_loader(emoji_type=opts.X, opts=opts)
    if opts.Y:
        dataloader_Y, test_dataloader_Y = get_emoji_loader(emoji_type=opts.Y, opts=opts)

    # Create checkpoint and sample directories
    create_dir(opts.checkpoint_dir)
    create_dir(opts.sample_dir)

    # Start training
    if opts.Y is None:
        G, D = gan_training_loop(dataloader_X, test_dataloader_X, opts)
        return G, D
    else:
        G_XtoY, G_YtoX, D_X, D_Y = cyclegan_training_loop(dataloader_X, dataloader_Y, test_dataloader_X, test_dataloader_Y, opts)
        return G_XtoY, G_YtoX, D_X, D_Y


def print_opts(opts):
    """Prints the values of all command-line arguments.
    """
    print('=' * 80)
    print('Opts'.center(80))
    print('-' * 80)
    for key in opts.__dict__:
        if opts.__dict__[key]:
            print('{:>30}: {:<30}'.format(key, opts.__dict__[key]).center(80))
    print('=' * 80)


# Your code for generators and discriminators

## Helper modules

In [0]:
def sample_noise(batch_size, dim):
    """
    Generate a PyTorch Tensor of uniform random noise.

    Input:
    - batch_size: Integer giving the batch size of noise to generate.
    - dim: Integer giving the dimension of noise to generate.

    Output:
    - A PyTorch Tensor of shape (batch_size, dim, 1, 1) containing uniform
      random noise in the range (-1, 1).
    """
    return to_var(torch.rand(batch_size, dim) * 2 - 1).unsqueeze(2).unsqueeze(3)
  

def upconv(in_channels, out_channels, kernel_size, stride=2, padding=2, batch_norm=True, spectral_norm=False):
    """Creates a upsample-and-convolution layer, with optional batch normalization.
    """
    layers = []
    if stride>1:
        layers.append(nn.Upsample(scale_factor=stride))
    conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding, bias=False)
    if spectral_norm:
        layers.append(SpectralNorm(conv_layer))
    else:
        layers.append(conv_layer)
    if batch_norm:
        layers.append(nn.BatchNorm2d(out_channels))
    return nn.Sequential(*layers)


def conv(in_channels, out_channels, kernel_size, stride=2, padding=2, batch_norm=True, init_zero_weights=False, spectral_norm=False):
    """Creates a convolutional layer, with optional batch normalization.
    """
    layers = []
    conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
    if init_zero_weights:
        conv_layer.weight.data = torch.randn(out_channels, in_channels, kernel_size, kernel_size) * 0.001
            
    if spectral_norm:
        layers.append(SpectralNorm(conv_layer))
    else:
        layers.append(conv_layer)

    if batch_norm:
        layers.append(nn.BatchNorm2d(out_channels))
    return nn.Sequential(*layers)
  

class ResnetBlock(nn.Module):
    def __init__(self, conv_dim):
        super(ResnetBlock, self).__init__()
        self.conv_layer = conv(in_channels=conv_dim, out_channels=conv_dim, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        out = x + self.conv_layer(x)
        return out

## DCGAN

## Spectral Norm class

In [0]:
def l2normalize(v, eps=1e-12):
    return v / (v.norm() + eps)


class SpectralNorm(nn.Module):
    def __init__(self, module, name='weight', power_iterations=1):
        super(SpectralNorm, self).__init__()
        self.module = module
        self.name = name
        self.power_iterations = power_iterations
        if not self._made_params():
            self._make_params()

    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))

    def _made_params(self):
        try:
            u = getattr(self.module, self.name + "_u")
            v = getattr(self.module, self.name + "_v")
            w = getattr(self.module, self.name + "_bar")
            return True
        except AttributeError:
            return False

    def _make_params(self):
        w = getattr(self.module, self.name)

        height = w.data.shape[0]
        width = w.view(height, -1).data.shape[1]

        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
        u.data = l2normalize(u.data)
        v.data = l2normalize(v.data)
        w_bar = Parameter(w.data)

        del self.module._parameters[self.name]

        self.module.register_parameter(self.name + "_u", u)
        self.module.register_parameter(self.name + "_v", v)
        self.module.register_parameter(self.name + "_bar", w_bar)

    def forward(self, *args):
        self._update_u_v()
        return self.module.forward(*args)

In [8]:
x = torch.randn(32, 100, 1, 1)
layer = upconv(100, 128, kernel_size=1, stride=2, padding=0)
layer(x).shape

torch.Size([32, 128, 2, 2])

### GAN generator

In [0]:
class DCGenerator(nn.Module):
    def __init__(self, noise_size, conv_dim, spectral_norm=False):
        super(DCGenerator, self).__init__()

        self.conv_dim = conv_dim # 32

        ###########################################
        ##   FILL THIS IN: CREATE ARCHITECTURE   ##
        ###########################################
        # self.linear_bn = nn.Sequential(
        #     nn.Linear(noise_size, self.conv_dim*4*4*4),
        #     nn.BatchNorm1d(self.conv_dim*4*4*4)
        # ) # (100, 2048)
        # Alternatively
        self.linear_bn = upconv(100, 128, kernel_size=4, stride=3, padding=2, batch_norm=True)
        self.upconv1 = upconv(self.conv_dim*4, self.conv_dim*2, 5, stride=2, padding=2, batch_norm=True, spectral_norm=spectral_norm)
        self.upconv2 = upconv(self.conv_dim*2, self.conv_dim, 5, stride=2, padding=2, batch_norm=True, spectral_norm=spectral_norm)
        self.upconv3 = upconv(self.conv_dim, 3, 5, stride=2, padding=2, batch_norm=False, spectral_norm=spectral_norm)

    def forward(self, z):
        """Generates an image given a sample of random noise.

            Input
            -----
                z: BS x noise_size x 1 x 1   -->  BSx100x1x1 (during training)

            Output
            ------
                out: BS x channels x image_width x image_height  -->  BSx3x32x32 (during training)
        """
        batch_size = z.size(0)
        # Extra reshape
        # z = z.view(batch_size, -1)
        out = F.relu(self.linear_bn(z)).view(-1, self.conv_dim*4, 4, 4)    # BS x 128 x 4 x 4
        out = F.relu(self.upconv1(out))  # BS x 64 x 8 x 8
        out = F.relu(self.upconv2(out))  # BS x 32 x 16 x 16
        out = F.tanh(self.upconv3(out))  # BS x 3 x 32 x 32
        
        out_size = out.size()
        if out_size != torch.Size([batch_size, 3, 32, 32]):
            raise ValueError("expect {} x 3 x 32 x 32, but get {}".format(batch_size, out_size))
        return out


### GAN discriminator

In [0]:
class DCDiscriminator(nn.Module):
    """Defines the architecture of the discriminator network.
       Note: Both discriminators D_X and D_Y have the same architecture in this assignment.
    """
    def __init__(self, conv_dim=64, spectral_norm=False):
        super(DCDiscriminator, self).__init__()

        ###########################################
        ##   FILL THIS IN: CREATE ARCHITECTURE   ##
        ###########################################

        self.conv1 = conv(in_channels=3, out_channels=conv_dim, kernel_size=5, stride=2, spectral_norm=spectral_norm)
        self.conv2 = conv(in_channels=conv_dim, out_channels=conv_dim*2, kernel_size=5, stride=2, spectral_norm=spectral_norm)
        self.conv3 = conv(in_channels=conv_dim*2, out_channels=conv_dim*4, kernel_size=5, stride=2, spectral_norm=spectral_norm)
        self.conv4 = conv(in_channels=conv_dim*4, out_channels=1, kernel_size=5, stride=2, padding=1, batch_norm=False, spectral_norm=spectral_norm)

    def forward(self, x):
        batch_size = x.size(0)  # Input shape: BS x 3 x 32 x 32

        out = F.relu(self.conv1(x))    # BS x 64 x 16 x 16
        out = F.relu(self.conv2(out))    # BS x 64 x 8 x 8
        out = F.relu(self.conv3(out))    # BS x 64 x 4 x 4

        out = self.conv4(out).squeeze()
        out_size = out.size()
        if out_size != torch.Size([batch_size,]):
            raise ValueError("expect {} x 1, but get {}".format(batch_size, out_size))
        return out

### GAN training loop

In [0]:
def gan_training_loop(dataloader, test_dataloader, opts):
    """Runs the training loop.
        * Saves checkpoint every opts.checkpoint_every iterations
        * Saves generated samples every opts.sample_every iterations
    """

    # Create generators and discriminators
    G, D = create_model(opts)

    g_params = G.parameters()  # Get generator parameters
    d_params = D.parameters()  # Get discriminator parameters

    # Create optimizers for the generators and discriminators
    g_optimizer = optim.Adam(g_params, opts.lr, [opts.beta1, opts.beta2])
    d_optimizer = optim.Adam(d_params, opts.lr * 2., [opts.beta1, opts.beta2])

    train_iter = iter(dataloader)

    test_iter = iter(test_dataloader)

    # Get some fixed data from domains X and Y for sampling. These are images that are held
    # constant throughout training, that allow us to inspect the model's performance.
    fixed_noise = sample_noise(100, opts.noise_size)  # # 100 x noise_size x 1 x 1

    iter_per_epoch = len(train_iter)
    total_train_iters = opts.train_iters

    losses = {"iteration": [], "D_fake_loss": [], "D_real_loss": [], "G_loss": []}

    gp_weight = 10

    try:
        for iteration in range(1, opts.train_iters + 1):

            # Reset data_iter for each epoch
            if iteration % iter_per_epoch == 0:
                train_iter = iter(dataloader)

            real_images, real_labels = train_iter.next()
            real_images, real_labels = to_var(real_images), to_var(real_labels).long().squeeze()

            # ones = Variable(torch.Tensor(real_images.shape[0]).float().cuda().fill_(1.0), requires_grad=False)

            for d_i in range(opts.d_train_iters):
                d_optimizer.zero_grad()

                # FILL THIS IN
                # 1. Compute the discriminator loss on real images
                D_real_loss = torch.mean((D(real_images) - 1) ** 2) / 2

                # 2. Sample noise
                noise = sample_noise(real_images.shape[0], opts.noise_size)

                # 3. Generate fake images from the noise
                fake_images = G(noise)
                
                # 4. Compute the discriminator loss on the fake images
                D_fake_loss = torch.mean(D(fake_images) ** 2) / 2

                # ---- Gradient Penalty ----
                if opts.gradient_penalty:
                    alpha = torch.rand(real_images.shape[0], 1, 1, 1)
                    alpha = alpha.expand_as(real_images).cuda()
                    # interp_images = Variable(alpha * real_images.data + alpha * fake_images.data, requires_grad=True).cuda()
                    interp_images = Variable(alpha * real_images.data + (1 - alpha) * fake_images.data, requires_grad=True).cuda()
                    D_interp_output = D(interp_images)

                    gradients = torch.autograd.grad(outputs=D_interp_output, inputs=interp_images,
                                                    grad_outputs=torch.ones(D_interp_output.size()).cuda(),
                                                    create_graph=True, retain_graph=True)[0]
                    gradients = gradients.view(real_images.shape[0], -1)
                    gradients_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1) + 1e-12)

                    gp = gp_weight * gradients_norm.mean()
                else:
                    gp = 0.0

                # --------------------------
                # 5. Compute the total discriminator loss
                D_total_loss = D_real_loss + D_fake_loss + gp

                D_total_loss.backward()
                d_optimizer.step()

            ###########################################
            ###          TRAIN THE GENERATOR        ###
            ###########################################

            g_optimizer.zero_grad()

            # FILL THIS IN
            # 1. Sample noise
            noise = sample_noise(real_images.shape[0], opts.noise_size)

            # 2. Generate fake images from the noise
            fake_images = G(noise)

            # 3. Compute the generator loss
            G_loss = torch.mean((D(fake_images) - 1)**2)

            G_loss.backward()
            g_optimizer.step()

            # Print the log info
            if iteration % opts.log_step == 0:
                losses['iteration'].append(iteration)
                losses['D_real_loss'].append(D_real_loss.item())
                losses['D_fake_loss'].append(D_fake_loss.item())
                losses['G_loss'].append(G_loss.item())
                print('Iteration [{:4d}/{:4d}] | D_real_loss: {:6.4f} | D_fake_loss: {:6.4f} | G_loss: {:6.4f}'.format(
                    iteration, total_train_iters, D_real_loss.item(), D_fake_loss.item(), G_loss.item()))

            # Save the generated samples
            if iteration % opts.sample_every == 0:
                gan_save_samples(G, fixed_noise, iteration, opts)

            # Save the model parameters
            if iteration % opts.checkpoint_every == 0:
                gan_checkpoint(iteration, G, D, opts)

    except KeyboardInterrupt:
        print('Exiting early from training.')
        return G, D

    plt.figure()
    plt.plot(losses['iteration'], losses['D_real_loss'], label='D_real')
    plt.plot(losses['iteration'], losses['D_fake_loss'], label='D_fake')
    plt.plot(losses['iteration'], losses['G_loss'], label='G')
    plt.legend()
    plt.savefig(os.path.join(opts.sample_dir, 'losses.png'))
    plt.close()
    return G, D

## CycleGAN

### CycleGAN generator

In [0]:
class CycleGenerator(nn.Module):
    """Defines the architecture of the generator network.
       Note: Both generators G_XtoY and G_YtoX have the same architecture in this assignment.
    """
    def __init__(self, conv_dim=64, init_zero_weights=False):
        super(CycleGenerator, self).__init__()

        # 1. Define the encoder part of the generator (that extracts features from the input image)
        self.conv1 = conv(in_channels=3, out_channels=conv_dim, kernel_size=5, init_zero_weights=init_zero_weights)
        self.conv2 = conv(in_channels=conv_dim, out_channels=conv_dim*2, kernel_size=5, init_zero_weights=init_zero_weights)

        # 2. Define the transformation part of the generator
        self.resnet_block  = ResnetBlock(conv_dim*2)

        # 3. Define the decoder part of the generator (that builds up the output image from features)
        self.upconv1 = upconv(in_channels=conv_dim*2, out_channels=conv_dim, kernel_size=5)
        self.upconv2 = upconv(in_channels=conv_dim, out_channels=3, kernel_size=5, batch_norm=False)

    def forward(self, x):
        """Generates an image conditioned on an input image.

            Input
            -----
                x: BS x 3 x 32 x 32

            Output
            ------
                out: BS x 3 x 32 x 32
        """
        batch_size = x.size(0)
        
        out = F.relu(self.conv1(x))            # BS x 32 x 16 x 16
        out = F.relu(self.conv2(out))          # BS x 64 x 8 x 8
        
        out = F.relu(self.resnet_block(out))   # BS x 64 x 8 x 8

        out = F.relu(self.upconv1(out))        # BS x 32 x 16 x 16

        out = F.tanh(self.upconv2(out))        # BS x 3 x 32 x 32
        
        out_size = out.size()
        if out_size != torch.Size([batch_size, 3, 32, 32]):
            raise ValueError("expect {} x 3 x 32 x 32, but get {}".format(batch_size, out_size))


        return out

### CycleGAN training loop

In [0]:
def cyclegan_training_loop(dataloader_X, dataloader_Y, test_dataloader_X, test_dataloader_Y, opts):
    """Runs the training loop.
        * Saves checkpoint every opts.checkpoint_every iterations
        * Saves generated samples every opts.sample_every iterations
    """

    # Create generators and discriminators
    G_XtoY, G_YtoX, D_X, D_Y = create_model(opts)

    g_params = list(G_XtoY.parameters()) + list(G_YtoX.parameters())  # Get generator parameters
    d_params = list(D_X.parameters()) + list(D_Y.parameters())  # Get discriminator parameters

    # Create optimizers for the generators and discriminators
    g_optimizer = optim.Adam(g_params, opts.lr, [opts.beta1, opts.beta2])
    d_optimizer = optim.Adam(d_params, opts.lr, [opts.beta1, opts.beta2])

    iter_X = iter(dataloader_X)
    iter_Y = iter(dataloader_Y)

    test_iter_X = iter(test_dataloader_X)
    test_iter_Y = iter(test_dataloader_Y)

    # Get some fixed data from domains X and Y for sampling. These are images that are held
    # constant throughout training, that allow us to inspect the model's performance.
    fixed_X = to_var(test_iter_X.next()[0])
    fixed_Y = to_var(test_iter_Y.next()[0])

    iter_per_epoch = min(len(iter_X), len(iter_Y))

    try:
        for iteration in range(1, opts.train_iters+1):

            # Reset data_iter for each epoch
            if iteration % iter_per_epoch == 0:
                iter_X = iter(dataloader_X)
                iter_Y = iter(dataloader_Y)

            images_X, labels_X = iter_X.next()
            images_X, labels_X = to_var(images_X), to_var(labels_X).long().squeeze()

            images_Y, labels_Y = iter_Y.next()
            images_Y, labels_Y = to_var(images_Y), to_var(labels_Y).long().squeeze()


            # ============================================
            #            TRAIN THE DISCRIMINATORS
            # ============================================

            #########################################
            ##             FILL THIS IN            ##
            #########################################

            # Train with real images
            d_optimizer.zero_grad()

            # 1. Compute the discriminator losses on real images
            # D_X_loss = ...
            # D_Y_loss = ...
            D_X_loss = torch.mean((D_X(images_X) - 1)**2)
            D_Y_loss = torch.mean((D_Y(images_Y) - 1)**2)

            d_real_loss = D_X_loss + D_Y_loss
            d_real_loss.backward()
            d_optimizer.step()

            # Train with fake images
            d_optimizer.zero_grad()

            # 2. Generate fake images that look like domain X based on real images in domain Y
            # fake_X = ...
            fake_X = G_YtoX(images_Y)

            # 3. Compute the loss for D_X
            # D_X_loss = ...
            D_X_loss = torch.mean(D_X(fake_X)**2)

            # 4. Generate fake images that look like domain Y based on real images in domain X
            # fake_Y = ...
            fake_Y = G_XtoY(images_X)

            # 5. Compute the loss for D_Y
            # D_Y_loss = ...
            D_Y_loss = torch.mean(D_Y(fake_Y)**2)

            d_fake_loss = D_X_loss + D_Y_loss
            d_fake_loss.backward()
            d_optimizer.step()



            # =========================================
            #            TRAIN THE GENERATORS
            # =========================================


            #########################################
            ##    FILL THIS IN: Y--X-->Y CYCLE     ##
            #########################################
            g_optimizer.zero_grad()

            # 1. Generate fake images that look like domain X based on real images in domain Y
            # fake_X = ...
            fake_X = G_YtoX(images_Y)

            # 2. Compute the generator loss based on domain X
            # g_loss = ...
            g_loss = torch.mean((D_X(fake_X) - 1)**2)

            reconstructed_Y = G_XtoY(fake_X)
            # 3. Compute the cycle consistency loss (the reconstruction loss)
            # cycle_consistency_loss = ...
            cycle_consistency_loss = torch.mean(torch.sum(torch.abs(images_Y - reconstructed_Y), [1,2,3]))

            g_loss += opts.lambda_cycle * cycle_consistency_loss

            g_loss.backward()
            g_optimizer.step()



            #########################################
            ##    FILL THIS IN: X--Y-->X CYCLE     ##
            #########################################

            g_optimizer.zero_grad()

            # 1. Generate fake images that look like domain Y based on real images in domain X
            # fake_Y = ...
            fake_Y = G_XtoY(images_X)

            # 2. Compute the generator loss based on domain Y
            # g_loss = ...
            g_loss = torch.mean((D_Y(fake_Y) - 1)**2)

            reconstructed_X = G_YtoX(fake_Y)
            # 3. Compute the cycle consistency loss (the reconstruction loss)
            # cycle_consistency_loss = ...
            cycle_consistency_loss = torch.mean(torch.sum(torch.abs(images_X - reconstructed_X),[1,2,3]))

            g_loss += opts.lambda_cycle * cycle_consistency_loss

            g_loss.backward()
            g_optimizer.step()


            # Print the log info
            if iteration % opts.log_step == 0:
                print('Iteration [{:5d}/{:5d}] | d_real_loss: {:6.4f} | d_Y_loss: {:6.4f} | d_X_loss: {:6.4f} | '
                    'd_fake_loss: {:6.4f} | g_loss: {:6.4f}'.format(
                      iteration, opts.train_iters, d_real_loss.item(), D_Y_loss.item(),
                      D_X_loss.item(), d_fake_loss.item(), g_loss.item()))


            # Save the generated samples
            if iteration % opts.sample_every == 0:
                cyclegan_save_samples(iteration, fixed_Y, fixed_X, G_YtoX, G_XtoY, opts)


            # Save the model parameters
            if iteration % opts.checkpoint_every == 0:
                cyclegan_checkpoint(iteration, G_XtoY, G_YtoX, D_X, D_Y, opts)

    except KeyboardInterrupt:
        print('Exiting early from training.')
        return G_XtoY, G_YtoX, D_X, D_Y

    return G_XtoY, G_YtoX, D_X, D_Y


# Training


## Download dataset

In [14]:
######################################################################
# Download Translation datasets
######################################################################
data_fpath = get_file(fname='emojis', 
                         origin='http://www.cs.toronto.edu/~jba/emojis.tar.gz', 
                         untar=True)

data/emojis.tar.gz
Downloading data from http://www.cs.toronto.edu/~jba/emojis.tar.gz
Extracting file.


In [0]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore", UserWarning)

## DCGAN

In [18]:
SEED = 11

# Set the random seed manually for reproducibility.
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)


args = AttrDict()
args_dict = {
              'image_size':32, 
              'g_conv_dim':32, 
              'd_conv_dim':64,
              'noise_size':100,
              'num_workers': 0,
              'train_iters':20000,
              'X':'Windows',  # options: 'Windows' / 'Apple'
              'Y': None,
              'lr':0.000003,
              'beta1':0.5,
              'beta2':0.999,
              'batch_size':32, 
              'checkpoint_dir': 'results/checkpoints_gan',
              'sample_dir': 'results/samples_gan',
              'load': None,
              'log_step':200,
              'sample_every':200,
              'checkpoint_every':1000,
              'spectral_norm': False,
              'gradient_penalty': True,
              'd_train_iters': 1
}
args.update(args_dict)

print_opts(args)
G, D = train(args)

generate_gif("results/samples_gan")

                                      Opts                                      
--------------------------------------------------------------------------------
                             image_size: 32                                     
                             g_conv_dim: 32                                     
                             d_conv_dim: 64                                     
                             noise_size: 100                                    
                            train_iters: 20000                                  
                                      X: Windows                                
                                     lr: 3e-06                                  
                                  beta1: 0.5                                    
                                  beta2: 0.999                                  
                             batch_size: 32                                     
                         che



Iteration [ 200/20000] | D_real_loss: 0.0875 | D_fake_loss: 0.0430 | G_loss: 0.5162
Saved results/samples_gan/sample-000200.png




Iteration [ 400/20000] | D_real_loss: 0.1009 | D_fake_loss: 0.0652 | G_loss: 0.4229
Saved results/samples_gan/sample-000400.png




Iteration [ 600/20000] | D_real_loss: 0.1029 | D_fake_loss: 0.0730 | G_loss: 0.4079
Saved results/samples_gan/sample-000600.png




Iteration [ 800/20000] | D_real_loss: 0.1210 | D_fake_loss: 0.0809 | G_loss: 0.3593
Saved results/samples_gan/sample-000800.png




Iteration [1000/20000] | D_real_loss: 0.1293 | D_fake_loss: 0.0882 | G_loss: 0.3426
Saved results/samples_gan/sample-001000.png




Iteration [1200/20000] | D_real_loss: 0.1211 | D_fake_loss: 0.0895 | G_loss: 0.3377
Saved results/samples_gan/sample-001200.png




Iteration [1400/20000] | D_real_loss: 0.1334 | D_fake_loss: 0.0907 | G_loss: 0.3345
Saved results/samples_gan/sample-001400.png




Iteration [1600/20000] | D_real_loss: 0.1380 | D_fake_loss: 0.0949 | G_loss: 0.3294
Saved results/samples_gan/sample-001600.png




Iteration [1800/20000] | D_real_loss: 0.1376 | D_fake_loss: 0.0910 | G_loss: 0.3248
Saved results/samples_gan/sample-001800.png




Iteration [2000/20000] | D_real_loss: 0.1452 | D_fake_loss: 0.0920 | G_loss: 0.3277
Saved results/samples_gan/sample-002000.png




Iteration [2200/20000] | D_real_loss: 0.1437 | D_fake_loss: 0.0946 | G_loss: 0.3163
Saved results/samples_gan/sample-002200.png




Iteration [2400/20000] | D_real_loss: 0.1324 | D_fake_loss: 0.1027 | G_loss: 0.2962
Saved results/samples_gan/sample-002400.png




Iteration [2600/20000] | D_real_loss: 0.1301 | D_fake_loss: 0.1088 | G_loss: 0.2860
Saved results/samples_gan/sample-002600.png




Iteration [2800/20000] | D_real_loss: 0.1426 | D_fake_loss: 0.0995 | G_loss: 0.3121
Saved results/samples_gan/sample-002800.png




Iteration [3000/20000] | D_real_loss: 0.1463 | D_fake_loss: 0.0974 | G_loss: 0.3103
Saved results/samples_gan/sample-003000.png




Iteration [3200/20000] | D_real_loss: 0.1423 | D_fake_loss: 0.1017 | G_loss: 0.2998
Saved results/samples_gan/sample-003200.png




Iteration [3400/20000] | D_real_loss: 0.1447 | D_fake_loss: 0.1009 | G_loss: 0.3088
Saved results/samples_gan/sample-003400.png




Iteration [3600/20000] | D_real_loss: 0.1404 | D_fake_loss: 0.1043 | G_loss: 0.3015
Saved results/samples_gan/sample-003600.png




Iteration [3800/20000] | D_real_loss: 0.1459 | D_fake_loss: 0.1005 | G_loss: 0.3034
Saved results/samples_gan/sample-003800.png




Iteration [4000/20000] | D_real_loss: 0.1373 | D_fake_loss: 0.1078 | G_loss: 0.2950
Saved results/samples_gan/sample-004000.png




Iteration [4200/20000] | D_real_loss: 0.1417 | D_fake_loss: 0.1031 | G_loss: 0.3008
Saved results/samples_gan/sample-004200.png




Iteration [4400/20000] | D_real_loss: 0.1340 | D_fake_loss: 0.1102 | G_loss: 0.2791
Saved results/samples_gan/sample-004400.png




Iteration [4600/20000] | D_real_loss: 0.1487 | D_fake_loss: 0.0993 | G_loss: 0.3106
Saved results/samples_gan/sample-004600.png




Iteration [4800/20000] | D_real_loss: 0.1383 | D_fake_loss: 0.1073 | G_loss: 0.2966
Saved results/samples_gan/sample-004800.png




Iteration [5000/20000] | D_real_loss: 0.1443 | D_fake_loss: 0.1008 | G_loss: 0.3055
Saved results/samples_gan/sample-005000.png




Iteration [5200/20000] | D_real_loss: 0.1421 | D_fake_loss: 0.1031 | G_loss: 0.2997
Saved results/samples_gan/sample-005200.png




Iteration [5400/20000] | D_real_loss: 0.1336 | D_fake_loss: 0.1117 | G_loss: 0.2763
Saved results/samples_gan/sample-005400.png




Iteration [5600/20000] | D_real_loss: 0.1376 | D_fake_loss: 0.1088 | G_loss: 0.2836
Saved results/samples_gan/sample-005600.png




Iteration [5800/20000] | D_real_loss: 0.1317 | D_fake_loss: 0.1133 | G_loss: 0.2864
Saved results/samples_gan/sample-005800.png




Iteration [6000/20000] | D_real_loss: 0.1259 | D_fake_loss: 0.1216 | G_loss: 0.2601
Saved results/samples_gan/sample-006000.png




Iteration [6200/20000] | D_real_loss: 0.1369 | D_fake_loss: 0.1084 | G_loss: 0.2875
Saved results/samples_gan/sample-006200.png




Iteration [6400/20000] | D_real_loss: 0.1456 | D_fake_loss: 0.0998 | G_loss: 0.2973
Saved results/samples_gan/sample-006400.png




Iteration [6600/20000] | D_real_loss: 0.1304 | D_fake_loss: 0.1163 | G_loss: 0.2688
Saved results/samples_gan/sample-006600.png




Iteration [6800/20000] | D_real_loss: 0.1263 | D_fake_loss: 0.1198 | G_loss: 0.2634
Saved results/samples_gan/sample-006800.png




Iteration [7000/20000] | D_real_loss: 0.1359 | D_fake_loss: 0.1116 | G_loss: 0.2780
Saved results/samples_gan/sample-007000.png




Iteration [7200/20000] | D_real_loss: 0.1414 | D_fake_loss: 0.1099 | G_loss: 0.2749
Saved results/samples_gan/sample-007200.png




Iteration [7400/20000] | D_real_loss: 0.1305 | D_fake_loss: 0.1170 | G_loss: 0.2689
Saved results/samples_gan/sample-007400.png




Iteration [7600/20000] | D_real_loss: 0.1314 | D_fake_loss: 0.1160 | G_loss: 0.2745
Saved results/samples_gan/sample-007600.png




Iteration [7800/20000] | D_real_loss: 0.1275 | D_fake_loss: 0.1192 | G_loss: 0.2571
Saved results/samples_gan/sample-007800.png




Iteration [8000/20000] | D_real_loss: 0.1250 | D_fake_loss: 0.1244 | G_loss: 0.2589
Saved results/samples_gan/sample-008000.png




Iteration [8200/20000] | D_real_loss: 0.1248 | D_fake_loss: 0.1212 | G_loss: 0.2634
Saved results/samples_gan/sample-008200.png




Iteration [8400/20000] | D_real_loss: 0.1268 | D_fake_loss: 0.1209 | G_loss: 0.2675
Saved results/samples_gan/sample-008400.png




Iteration [8600/20000] | D_real_loss: 0.1281 | D_fake_loss: 0.1175 | G_loss: 0.2642
Saved results/samples_gan/sample-008600.png




Iteration [8800/20000] | D_real_loss: 0.1329 | D_fake_loss: 0.1148 | G_loss: 0.2630
Saved results/samples_gan/sample-008800.png




Iteration [9000/20000] | D_real_loss: 0.1270 | D_fake_loss: 0.1183 | G_loss: 0.2651
Saved results/samples_gan/sample-009000.png




Iteration [9200/20000] | D_real_loss: 0.1304 | D_fake_loss: 0.1171 | G_loss: 0.2703
Saved results/samples_gan/sample-009200.png




Iteration [9400/20000] | D_real_loss: 0.1283 | D_fake_loss: 0.1183 | G_loss: 0.2680
Saved results/samples_gan/sample-009400.png




Iteration [9600/20000] | D_real_loss: 0.1247 | D_fake_loss: 0.1220 | G_loss: 0.2581
Saved results/samples_gan/sample-009600.png




Iteration [9800/20000] | D_real_loss: 0.1251 | D_fake_loss: 0.1216 | G_loss: 0.2570
Saved results/samples_gan/sample-009800.png




Iteration [10000/20000] | D_real_loss: 0.1245 | D_fake_loss: 0.1224 | G_loss: 0.2591
Saved results/samples_gan/sample-010000.png




Iteration [10200/20000] | D_real_loss: 0.1314 | D_fake_loss: 0.1166 | G_loss: 0.2729
Saved results/samples_gan/sample-010200.png




Iteration [10400/20000] | D_real_loss: 0.1263 | D_fake_loss: 0.1189 | G_loss: 0.2595
Saved results/samples_gan/sample-010400.png




Iteration [10600/20000] | D_real_loss: 0.1296 | D_fake_loss: 0.1188 | G_loss: 0.2579
Saved results/samples_gan/sample-010600.png




Iteration [10800/20000] | D_real_loss: 0.1268 | D_fake_loss: 0.1202 | G_loss: 0.2603
Saved results/samples_gan/sample-010800.png




Iteration [11000/20000] | D_real_loss: 0.1235 | D_fake_loss: 0.1235 | G_loss: 0.2531
Saved results/samples_gan/sample-011000.png




Iteration [11200/20000] | D_real_loss: 0.1261 | D_fake_loss: 0.1208 | G_loss: 0.2635
Saved results/samples_gan/sample-011200.png




Iteration [11400/20000] | D_real_loss: 0.1316 | D_fake_loss: 0.1157 | G_loss: 0.2683
Saved results/samples_gan/sample-011400.png




Iteration [11600/20000] | D_real_loss: 0.1296 | D_fake_loss: 0.1180 | G_loss: 0.2670
Saved results/samples_gan/sample-011600.png




Iteration [11800/20000] | D_real_loss: 0.1266 | D_fake_loss: 0.1205 | G_loss: 0.2624
Saved results/samples_gan/sample-011800.png




Iteration [12000/20000] | D_real_loss: 0.1274 | D_fake_loss: 0.1195 | G_loss: 0.2644
Saved results/samples_gan/sample-012000.png




Iteration [12200/20000] | D_real_loss: 0.1266 | D_fake_loss: 0.1191 | G_loss: 0.2611
Saved results/samples_gan/sample-012200.png




Iteration [12400/20000] | D_real_loss: 0.1262 | D_fake_loss: 0.1201 | G_loss: 0.2605
Saved results/samples_gan/sample-012400.png




Iteration [12600/20000] | D_real_loss: 0.1300 | D_fake_loss: 0.1176 | G_loss: 0.2596
Saved results/samples_gan/sample-012600.png




Iteration [12800/20000] | D_real_loss: 0.1291 | D_fake_loss: 0.1193 | G_loss: 0.2572
Saved results/samples_gan/sample-012800.png




Iteration [13000/20000] | D_real_loss: 0.1255 | D_fake_loss: 0.1222 | G_loss: 0.2600
Saved results/samples_gan/sample-013000.png




Iteration [13200/20000] | D_real_loss: 0.1258 | D_fake_loss: 0.1218 | G_loss: 0.2556
Saved results/samples_gan/sample-013200.png




Iteration [13400/20000] | D_real_loss: 0.1248 | D_fake_loss: 0.1225 | G_loss: 0.2573
Saved results/samples_gan/sample-013400.png




Iteration [13600/20000] | D_real_loss: 0.1282 | D_fake_loss: 0.1188 | G_loss: 0.2600
Saved results/samples_gan/sample-013600.png




Iteration [13800/20000] | D_real_loss: 0.1256 | D_fake_loss: 0.1213 | G_loss: 0.2596
Saved results/samples_gan/sample-013800.png




Iteration [14000/20000] | D_real_loss: 0.1303 | D_fake_loss: 0.1181 | G_loss: 0.2620
Saved results/samples_gan/sample-014000.png




Iteration [14200/20000] | D_real_loss: 0.1266 | D_fake_loss: 0.1208 | G_loss: 0.2622
Saved results/samples_gan/sample-014200.png




Iteration [14400/20000] | D_real_loss: 0.1271 | D_fake_loss: 0.1216 | G_loss: 0.2540
Saved results/samples_gan/sample-014400.png




Iteration [14600/20000] | D_real_loss: 0.1246 | D_fake_loss: 0.1228 | G_loss: 0.2526
Saved results/samples_gan/sample-014600.png




Iteration [14800/20000] | D_real_loss: 0.1264 | D_fake_loss: 0.1220 | G_loss: 0.2562
Saved results/samples_gan/sample-014800.png




Iteration [15000/20000] | D_real_loss: 0.1237 | D_fake_loss: 0.1238 | G_loss: 0.2534
Saved results/samples_gan/sample-015000.png




Iteration [15200/20000] | D_real_loss: 0.1292 | D_fake_loss: 0.1186 | G_loss: 0.2600
Saved results/samples_gan/sample-015200.png




Iteration [15400/20000] | D_real_loss: 0.1274 | D_fake_loss: 0.1212 | G_loss: 0.2606
Saved results/samples_gan/sample-015400.png




Iteration [15600/20000] | D_real_loss: 0.1264 | D_fake_loss: 0.1213 | G_loss: 0.2570
Saved results/samples_gan/sample-015600.png




Iteration [15800/20000] | D_real_loss: 0.1255 | D_fake_loss: 0.1216 | G_loss: 0.2596
Saved results/samples_gan/sample-015800.png




Iteration [16000/20000] | D_real_loss: 0.1261 | D_fake_loss: 0.1221 | G_loss: 0.2568
Saved results/samples_gan/sample-016000.png




Iteration [16200/20000] | D_real_loss: 0.1226 | D_fake_loss: 0.1251 | G_loss: 0.2495
Saved results/samples_gan/sample-016200.png




Iteration [16400/20000] | D_real_loss: 0.1275 | D_fake_loss: 0.1203 | G_loss: 0.2557
Saved results/samples_gan/sample-016400.png




Iteration [16600/20000] | D_real_loss: 0.1260 | D_fake_loss: 0.1225 | G_loss: 0.2599
Saved results/samples_gan/sample-016600.png




Iteration [16800/20000] | D_real_loss: 0.1229 | D_fake_loss: 0.1250 | G_loss: 0.2518
Saved results/samples_gan/sample-016800.png




Iteration [17000/20000] | D_real_loss: 0.1235 | D_fake_loss: 0.1245 | G_loss: 0.2552
Saved results/samples_gan/sample-017000.png




Iteration [17200/20000] | D_real_loss: 0.1241 | D_fake_loss: 0.1243 | G_loss: 0.2492
Saved results/samples_gan/sample-017200.png




Iteration [17400/20000] | D_real_loss: 0.1235 | D_fake_loss: 0.1251 | G_loss: 0.2536
Saved results/samples_gan/sample-017400.png




Iteration [17600/20000] | D_real_loss: 0.1231 | D_fake_loss: 0.1252 | G_loss: 0.2522
Saved results/samples_gan/sample-017600.png




Iteration [17800/20000] | D_real_loss: 0.1246 | D_fake_loss: 0.1232 | G_loss: 0.2528
Saved results/samples_gan/sample-017800.png




Iteration [18000/20000] | D_real_loss: 0.1254 | D_fake_loss: 0.1229 | G_loss: 0.2552
Saved results/samples_gan/sample-018000.png




Iteration [18200/20000] | D_real_loss: 0.1240 | D_fake_loss: 0.1238 | G_loss: 0.2579
Saved results/samples_gan/sample-018200.png




Iteration [18400/20000] | D_real_loss: 0.1246 | D_fake_loss: 0.1228 | G_loss: 0.2486
Saved results/samples_gan/sample-018400.png




Iteration [18600/20000] | D_real_loss: 0.1265 | D_fake_loss: 0.1221 | G_loss: 0.2602
Saved results/samples_gan/sample-018600.png




Iteration [18800/20000] | D_real_loss: 0.1229 | D_fake_loss: 0.1257 | G_loss: 0.2522
Saved results/samples_gan/sample-018800.png




Iteration [19000/20000] | D_real_loss: 0.1253 | D_fake_loss: 0.1232 | G_loss: 0.2535
Saved results/samples_gan/sample-019000.png




Iteration [19200/20000] | D_real_loss: 0.1246 | D_fake_loss: 0.1234 | G_loss: 0.2529
Saved results/samples_gan/sample-019200.png




Iteration [19400/20000] | D_real_loss: 0.1263 | D_fake_loss: 0.1225 | G_loss: 0.2529
Saved results/samples_gan/sample-019400.png




Iteration [19600/20000] | D_real_loss: 0.1250 | D_fake_loss: 0.1240 | G_loss: 0.2540
Saved results/samples_gan/sample-019600.png




Iteration [19800/20000] | D_real_loss: 0.1238 | D_fake_loss: 0.1248 | G_loss: 0.2493
Saved results/samples_gan/sample-019800.png




Iteration [20000/20000] | D_real_loss: 0.1247 | D_fake_loss: 0.1235 | G_loss: 0.2495
Saved results/samples_gan/sample-020000.png
adding image results/samples_gan/losses.png
adding image results/samples_gan/sample-000200.png
adding image results/samples_gan/sample-000400.png
adding image results/samples_gan/sample-000600.png
adding image results/samples_gan/sample-000800.png
adding image results/samples_gan/sample-001000.png
adding image results/samples_gan/sample-001200.png
adding image results/samples_gan/sample-001400.png
adding image results/samples_gan/sample-001600.png
adding image results/samples_gan/sample-001800.png
adding image results/samples_gan/sample-002000.png
adding image results/samples_gan/sample-002200.png
adding image results/samples_gan/sample-002400.png
adding image results/samples_gan/sample-002600.png
adding image results/samples_gan/sample-002800.png
adding image results/samples_gan/sample-003000.png
adding image results/samples_gan/sample-003200.png
adding imag

In [19]:
!zip -r ./samples_dcgan_gp_even_smaller_alpha_1malpha_upconv.zip /content/csc421/a4/results/

  adding: content/csc421/a4/results/ (stored 0%)
  adding: content/csc421/a4/results/samples_gan/ (stored 0%)
  adding: content/csc421/a4/results/samples_gan/sample-007400.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-006800.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-018800.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-010800.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-013200.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-009800.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-003600.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-008200.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-017600.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-008400.png (deflated 0%)
  adding: content/csc421/a4/results/samples_gan/sample-001200.png (deflated 0%)
  adding: 

## CycleGAN

In [0]:
SEED = 4
# SEED = 42

# Set the random seed manually for reproducibility.
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)


args = AttrDict()
args_dict = {
              'image_size':32, 
              'g_conv_dim':32, 
              'd_conv_dim':32,
              'init_zero_weights': False,
              'num_workers': 0,
              'train_iters':5000,
              'X':'Apple',
              'Y':'Windows',
            #   'lambda_cycle': 0.015,
              'lambda_cycle': 1,
              'lr':0.0003,
              'beta1':0.3,
              'beta2':0.999,
              'batch_size':32, 
              'checkpoint_dir': 'results/checkpoints_cyclegan',
              'sample_dir': 'results/samples_cyclegan',
              'load': None,
              'log_step':200,
              'sample_every':200,
              'checkpoint_every':1000
}
args.update(args_dict)


print_opts(args)
G_XtoY, G_YtoX, D_X, D_Y = train(args)
generate_gif("results/samples_cyclegan", keyword='X-Y')
generate_gif("results/samples_cyclegan", keyword='Y-X')

                                      Opts                                      
--------------------------------------------------------------------------------
                             image_size: 32                                     
                             g_conv_dim: 32                                     
                             d_conv_dim: 32                                     
                            train_iters: 5000                                   
                                      X: Apple                                  
                                      Y: Windows                                
                           lambda_cycle: 1                                      
                                     lr: 0.0003                                 
                                  beta1: 0.3                                    
                                  beta2: 0.999                                  
                            



Iteration [  200/ 5000] | d_real_loss: 0.0935 | d_Y_loss: 0.0194 | d_X_loss: 0.0078 | d_fake_loss: 0.0272 | g_loss: 470.8833
Saved results/samples_cyclegan/sample-000200-X-Y.png
Saved results/samples_cyclegan/sample-000200-Y-X.png




Iteration [  400/ 5000] | d_real_loss: 0.0405 | d_Y_loss: 0.0127 | d_X_loss: 0.0146 | d_fake_loss: 0.0273 | g_loss: 332.1463
Saved results/samples_cyclegan/sample-000400-X-Y.png
Saved results/samples_cyclegan/sample-000400-Y-X.png




Iteration [  600/ 5000] | d_real_loss: 0.0159 | d_Y_loss: 0.0137 | d_X_loss: 0.0243 | d_fake_loss: 0.0380 | g_loss: 359.1939
Saved results/samples_cyclegan/sample-000600-X-Y.png
Saved results/samples_cyclegan/sample-000600-Y-X.png




Iteration [  800/ 5000] | d_real_loss: 0.0598 | d_Y_loss: 0.0109 | d_X_loss: 0.0026 | d_fake_loss: 0.0135 | g_loss: 321.1346
Saved results/samples_cyclegan/sample-000800-X-Y.png
Saved results/samples_cyclegan/sample-000800-Y-X.png




Iteration [ 1000/ 5000] | d_real_loss: 0.0127 | d_Y_loss: 0.0088 | d_X_loss: 0.0222 | d_fake_loss: 0.0310 | g_loss: 372.3481
Saved results/samples_cyclegan/sample-001000-X-Y.png
Saved results/samples_cyclegan/sample-001000-Y-X.png




Iteration [ 1200/ 5000] | d_real_loss: 0.0076 | d_Y_loss: 0.0115 | d_X_loss: 0.0017 | d_fake_loss: 0.0132 | g_loss: 314.7178
Saved results/samples_cyclegan/sample-001200-X-Y.png
Saved results/samples_cyclegan/sample-001200-Y-X.png




Iteration [ 1400/ 5000] | d_real_loss: 0.0066 | d_Y_loss: 0.0026 | d_X_loss: 0.0030 | d_fake_loss: 0.0056 | g_loss: 335.8820
Saved results/samples_cyclegan/sample-001400-X-Y.png
Saved results/samples_cyclegan/sample-001400-Y-X.png




Iteration [ 1600/ 5000] | d_real_loss: 0.0096 | d_Y_loss: 0.0029 | d_X_loss: 0.0036 | d_fake_loss: 0.0065 | g_loss: 313.2387
Saved results/samples_cyclegan/sample-001600-X-Y.png
Saved results/samples_cyclegan/sample-001600-Y-X.png




Iteration [ 1800/ 5000] | d_real_loss: 0.0332 | d_Y_loss: 0.0226 | d_X_loss: 0.0034 | d_fake_loss: 0.0260 | g_loss: 267.6780
Saved results/samples_cyclegan/sample-001800-X-Y.png
Saved results/samples_cyclegan/sample-001800-Y-X.png




Iteration [ 2000/ 5000] | d_real_loss: 0.0076 | d_Y_loss: 0.0052 | d_X_loss: 0.0063 | d_fake_loss: 0.0114 | g_loss: 247.4308
Saved results/samples_cyclegan/sample-002000-X-Y.png
Saved results/samples_cyclegan/sample-002000-Y-X.png




Iteration [ 2200/ 5000] | d_real_loss: 0.0114 | d_Y_loss: 0.0020 | d_X_loss: 0.0018 | d_fake_loss: 0.0037 | g_loss: 261.9733
Saved results/samples_cyclegan/sample-002200-X-Y.png
Saved results/samples_cyclegan/sample-002200-Y-X.png




Iteration [ 2400/ 5000] | d_real_loss: 0.0095 | d_Y_loss: 0.0048 | d_X_loss: 0.0027 | d_fake_loss: 0.0075 | g_loss: 258.7817
Saved results/samples_cyclegan/sample-002400-X-Y.png
Saved results/samples_cyclegan/sample-002400-Y-X.png




Iteration [ 2600/ 5000] | d_real_loss: 0.0140 | d_Y_loss: 0.0016 | d_X_loss: 0.0083 | d_fake_loss: 0.0099 | g_loss: 264.2756
Saved results/samples_cyclegan/sample-002600-X-Y.png
Saved results/samples_cyclegan/sample-002600-Y-X.png




Iteration [ 2800/ 5000] | d_real_loss: 0.0162 | d_Y_loss: 0.0069 | d_X_loss: 0.0149 | d_fake_loss: 0.0218 | g_loss: 271.2504
Saved results/samples_cyclegan/sample-002800-X-Y.png
Saved results/samples_cyclegan/sample-002800-Y-X.png




Iteration [ 3000/ 5000] | d_real_loss: 0.0035 | d_Y_loss: 0.0065 | d_X_loss: 0.0061 | d_fake_loss: 0.0126 | g_loss: 266.3051
Saved results/samples_cyclegan/sample-003000-X-Y.png
Saved results/samples_cyclegan/sample-003000-Y-X.png




Iteration [ 3200/ 5000] | d_real_loss: 0.0159 | d_Y_loss: 0.0055 | d_X_loss: 0.0011 | d_fake_loss: 0.0066 | g_loss: 265.4001
Saved results/samples_cyclegan/sample-003200-X-Y.png
Saved results/samples_cyclegan/sample-003200-Y-X.png




Iteration [ 3400/ 5000] | d_real_loss: 0.0026 | d_Y_loss: 0.0029 | d_X_loss: 0.0007 | d_fake_loss: 0.0036 | g_loss: 248.2113
Saved results/samples_cyclegan/sample-003400-X-Y.png
Saved results/samples_cyclegan/sample-003400-Y-X.png




Iteration [ 3600/ 5000] | d_real_loss: 0.0019 | d_Y_loss: 0.0013 | d_X_loss: 0.0008 | d_fake_loss: 0.0021 | g_loss: 221.5398
Saved results/samples_cyclegan/sample-003600-X-Y.png
Saved results/samples_cyclegan/sample-003600-Y-X.png




Iteration [ 3800/ 5000] | d_real_loss: 0.0034 | d_Y_loss: 0.0038 | d_X_loss: 0.0010 | d_fake_loss: 0.0048 | g_loss: 241.2918
Saved results/samples_cyclegan/sample-003800-X-Y.png
Saved results/samples_cyclegan/sample-003800-Y-X.png




Iteration [ 4000/ 5000] | d_real_loss: 0.0073 | d_Y_loss: 0.0005 | d_X_loss: 0.0015 | d_fake_loss: 0.0020 | g_loss: 228.1084
Saved results/samples_cyclegan/sample-004000-X-Y.png
Saved results/samples_cyclegan/sample-004000-Y-X.png




Iteration [ 4200/ 5000] | d_real_loss: 0.0082 | d_Y_loss: 0.0006 | d_X_loss: 0.0013 | d_fake_loss: 0.0019 | g_loss: 245.4667
Saved results/samples_cyclegan/sample-004200-X-Y.png
Saved results/samples_cyclegan/sample-004200-Y-X.png




Iteration [ 4400/ 5000] | d_real_loss: 0.0079 | d_Y_loss: 0.0021 | d_X_loss: 0.0031 | d_fake_loss: 0.0051 | g_loss: 216.5164
Saved results/samples_cyclegan/sample-004400-X-Y.png
Saved results/samples_cyclegan/sample-004400-Y-X.png




Iteration [ 4600/ 5000] | d_real_loss: 0.0015 | d_Y_loss: 0.0002 | d_X_loss: 0.0007 | d_fake_loss: 0.0008 | g_loss: 260.6619
Saved results/samples_cyclegan/sample-004600-X-Y.png
Saved results/samples_cyclegan/sample-004600-Y-X.png




Iteration [ 4800/ 5000] | d_real_loss: 0.0060 | d_Y_loss: 0.0022 | d_X_loss: 0.0018 | d_fake_loss: 0.0040 | g_loss: 236.0978
Saved results/samples_cyclegan/sample-004800-X-Y.png
Saved results/samples_cyclegan/sample-004800-Y-X.png




Iteration [ 5000/ 5000] | d_real_loss: 0.0012 | d_Y_loss: 0.0028 | d_X_loss: 0.0002 | d_fake_loss: 0.0030 | g_loss: 236.6309
Saved results/samples_cyclegan/sample-005000-X-Y.png
Saved results/samples_cyclegan/sample-005000-Y-X.png
adding image results/samples_cyclegan/sample-000200-X-Y.png
adding image results/samples_cyclegan/sample-000400-X-Y.png
adding image results/samples_cyclegan/sample-000600-X-Y.png
adding image results/samples_cyclegan/sample-000800-X-Y.png
adding image results/samples_cyclegan/sample-001000-X-Y.png
adding image results/samples_cyclegan/sample-001200-X-Y.png
adding image results/samples_cyclegan/sample-001400-X-Y.png
adding image results/samples_cyclegan/sample-001600-X-Y.png
adding image results/samples_cyclegan/sample-001800-X-Y.png
adding image results/samples_cyclegan/sample-002000-X-Y.png
adding image results/samples_cyclegan/sample-002200-X-Y.png
adding image results/samples_cyclegan/sample-002400-X-Y.png
adding image results/samples_cyclegan/sample-0026

In [0]:
!zip -r ./samples_cyclegan_Q3_lambda1.zip /content/csc421/a4/results/samples_cyclegan

updating: content/csc421/a4/results/samples_cyclegan/ (stored 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-000800-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-004400-X-Y.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-002200-X-Y.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-000600-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-004000-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-004200-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-003000-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-002000-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-000400-X-Y.png (deflated 0%)
updating: content/csc421/a4/results/samples_cyclegan/sample-001400-Y-X.png (deflated 0%)
updating: content/csc421/a4/results/samples_