<a href="https://colab.research.google.com/github/TheodorSergeev/optml_gan/blob/main/dcgan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Adapted from https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html

# Initialisation

In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:  
    from google.colab import drive
    drive.mount('/content/drive')

    # packages to generate requirement.txt
    %pip install nbconvert
    %pip install pipreqs
    # for Frechet inception distance
    %pip install pytorch-fid

    %cd drive/My Drive/optml_gan2
    PATH = './' 
else:
    PATH = './' 

In [2]:
from __future__ import print_function

import argparse
import os
import random
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data

import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from datetime import datetime
import json
import pickle
from scipy import linalg
from torch.nn.functional import adaptive_avg_pool2d
%matplotlib inline

try:
    from tqdm import tqdm
except ImportError:
    # If tqdm is not available, provide a mock version of it
    def tqdm(x):
        return x

# Generate Requirements

In [3]:
# # converts notebook to .py file for pipreqs
# !jupyter nbconvert --output-dir="./" --to script dcgan.ipynb 

# # creates the requirement.txt file
# !pipreqs --force
# os.remove('./dcgan.py')  # deletes the .py file

# Source code

## Data handling

In [4]:
# there are problems with downloading CelebA
# see https://stackoverflow.com/questions/65528568/how-do-i-load-the-celeba-dataset-on-google-colab-using-torch-vision-without-ru

def get_dataset(name, image_size, dataroot):
    # torchvision dataset
    dataset = None

    # number of channels in the training images (3 for color, 1 for grayscale)
    nc = None

    if name == 'cifar10':
        nc = 3

        transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5) # todo: Why do we use these means and stds ? 
        )])

        dataset = torchvision.datasets.CIFAR10(dataroot, download=True, 
                                            train=True,  transform=transform)
    elif name == 'mnist':
        nc = 1

        transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            transforms.Normalize((0.5), (0.5)) # todo: Why do we use these means and stds and not the mean and std of the dataset?
        ])

        dataset = torchvision.datasets.MNIST(dataroot, download=True, 
                                            train=True,  transform=transform)
    else:
        raise ValueError("Unknown dataset name")
    
    return dataset, nc

In [5]:
def plot_img(dataloader, dataset_name):
    # Plot some training images
    real_batch = next(iter(dataloader))
    plt.figure(figsize=(8,8))
    plt.axis("off")
    plt.title("Training Images")
    plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
    plt.savefig(PATH + 'img/training_images_' + dataset_name, format="png", dpi=400)
    return

## Utils

In [6]:
def count_parameters(model):
    # from https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325/9
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def create_repo_paths(PATH):
    data_path = PATH + '/data'
    generated_data_path = PATH + '/generated_data'
    img_path = PATH + '/img'
    src_path = PATH + '/src'
    os.makedirs(data_path, exist_ok=True)
    os.makedirs(generated_data_path, exist_ok=True)
    os.makedirs(img_path, exist_ok=True)
    os.makedirs(src_path, exist_ok=True)
    return
# Paths to load and save the models


def generate_paths(PATH, extra_word, loss_name, lrD, lrG, beta1, iter_dis, iter_gen, grad_penalty_coef, create_dir):

    param_str = loss_name + 'Loss_' + 'lrd' + \
        str(lrD) + '_lrg' + str(lrG) + '_b1' + 'b' + str(beta1)
    param_str = param_str + '_itd' + \
        str(iter_dis) + '_itg' + str(iter_gen) + \
        '_gpv' + str(grad_penalty_coef) + '_'

    # + str(datetime.date(datetime.now())).replace('-', '_') + "_"
    experiment_path = PATH + "generated_data/" + extra_word + param_str

    models_path = experiment_path + '/models/'

    stats_path = experiment_path + '/stat.pickle'

    if create_dir:
        os.makedirs(models_path, exist_ok=True)
        os.makedirs(experiment_path, exist_ok=True)

    return experiment_path, stats_path, models_path


def model_paths(experiment_path, epoch, models_path):

    model_name_G = 'model_G_' + str(epoch)
    save_path_G = models_path + model_name_G

    model_name_D = 'model_D_'+str(epoch)
    save_path_D = models_path + model_name_D

    return save_path_G, save_path_D


def set_seeds(manualSeed=123):
    # Set random seed for reproducibility

    # manualSeed = random.randint(1, 10000) # use if you want new results
    print("Random Seed: ", manualSeed)
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    return


## Model

In [7]:
# Custom weights initialisation called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [8]:
def init_net(model, device):
    # Create the generator
    net = model.to(device)

    # Handle multi-gpu if desired
    if (device.type == 'cuda') and (ngpu > 1):
        net = nn.DataParallel(net, list(range(ngpu)))

    # Apply the weights_init function to randomly initialise all weights
    #  to mean=0, stdev=0.02.
    # net.apply(weights_init)

    # Print the model
    print(net)
    return net

## Losses

In [9]:
# stability constant
EPS = 1e-15


# KL-divergence
def loss_gen_kl(dis_output, eps=EPS):
    return - torch.log(dis_output + eps).mean()


def loss_dis_kl(dis_output_real, dis_output_fake, eps=EPS):
    return - (torch.log(dis_output_real + eps)).mean() - (torch.log(1. - dis_output_fake + eps)).mean()


# Wasserstein distance
# Requires special output of the network + weight clipping / grad penalty
def loss_gen_wasser(dis_output, eps=EPS):
    return - dis_output.mean()


def loss_dis_wasser(dis_output_real, dis_output_fake, eps=EPS):
    return - (dis_output_real.mean() - dis_output_fake.mean())


# Hinge loss
def loss_gen_hinge(dis_output, eps=EPS):
    return - dis_output.mean()


def loss_dis_hinge(dis_output_real, dis_output_fake, eps=EPS):
    return torch.nn.ReLU()(1.0 - dis_output_real).mean() + torch.nn.ReLU()(1.0 + dis_output_fake).mean()


loss_dict = {
    "kl": (loss_dis_kl, loss_gen_kl),
    "wass": (loss_dis_wasser, loss_gen_wasser),
    "hinge": (loss_dis_hinge, loss_gen_hinge)
}


## FID

https://www.kaggle.com/code/ibtesama/gan-in-pytorch-with-fid/notebook  
https://github.com/mseitzer/pytorch-fid   
https://github.com/mseitzer/pytorch-fid/blob/master/src/pytorch_fid/fid_score.py
Currently uses the Kaggle stuff since calculating FID on the repo is run from the command line

In [10]:
import torchvision.models as models


class InceptionV3(nn.Module):
    """Pretrained InceptionV3 network returning feature maps"""

    # Index of default block of inception to return,
    # corresponds to output of final average pooling
    DEFAULT_BLOCK_INDEX = 3

    # Maps feature dimensionality to their output blocks indices
    BLOCK_INDEX_BY_DIM = {
        64: 0,   # First max pooling features
        192: 1,  # Second max pooling featurs
        768: 2,  # Pre-aux classifier features
        2048: 3  # Final average pooling features
    }

    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False):

        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        inception = models.inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3,
            inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1,
                inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a,
                inception.Mixed_7b,
                inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad

    def forward(self, inp):
        """Get Inception feature maps
        Parameters
        ----------
        inp : torch.autograd.Variable
            Input tensor of shape Bx3xHxW. Values are expected to be in
            range (0, 1)
        Returns
        -------
        List of torch.autograd.Variable, corresponding to the selected output
        block, sorted ascending by index
        """
        outp = []
        x = inp

        if self.resize_input:
            x = F.interpolate(x,
                              size=(299, 299),
                              mode='bilinear',
                              align_corners=False)

        if self.normalize_input:
            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)

        for idx, block in enumerate(self.blocks):
            x = block(x)
            if idx in self.output_blocks:
                outp.append(x)

            if idx == self.last_needed_block:
                break

        return outp

In [11]:
def get_activations(isreal, dataloader, num_samples, model, dims=2048, device='cpu'):
    """Calculates the activations of the pool_3 layer for all images.
    Params:
    -- files       : List of image files paths
    -- model       : Instance of inception model
    -- batch_size  : Batch size of images for the model to process at once.
                     Make sure that the number of samples is a multiple of
                     the batch size, otherwise some samples are ignored. This
                     behavior is retained to match the original FID score
                     implementation.
    -- dims        : Dimensionality of features returned by Inception
    -- device      : Device to run calculations
    -- num_workers : Number of parallel dataloader workers
    Returns:
    -- A numpy array of dimension (num images, dims) that contains the
       activations of the given tensor when feeding inception with the
       query tensor.
    """
    model.eval()

    pred_arr = np.empty((num_samples, dims))

    start_idx = 0

    for batch in tqdm(dataloader):
        # if isreal:
        # batch = batch
        batch = batch[0].to(device)
        batch = batch.repeat_interleave(repeats=3, dim=1) # broadcast the 1 grayscale channel to 3 channels
        with torch.no_grad():
            pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.size(2) != 1 or pred.size(3) != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred = pred.squeeze(3).squeeze(2).cpu().numpy()

        pred_arr[start_idx:start_idx + pred.shape[0]] = pred
        
        start_idx = start_idx + pred.shape[0]

    return pred_arr

In [12]:
def calculate_activation_statistics(isreal, dataloader, num_samples, model, dims=2048,
                                    device='cpu'):
    """Calculation of the statistics used by the FID.
    Params:

    -- model       : Instance of inception model
    -- batch_size  : The images numpy array is split into batches with
                     batch size batch_size. A reasonable batch size
                     depends on the hardware.
    -- dims        : Dimensionality of features returned by Inception
    -- device      : Device to run calculations
    -- num_workers : Number of parallel dataloader workers
    Returns:
    -- mu    : The mean over samples of the activations of the pool_3 layer of
               the inception model.
    -- sigma : The covariance matrix of the activations of the pool_3 layer of
               the inception model.
    """

    act = get_activations(isreal,dataloader, num_samples, model, dims, device)
    mu = np.mean(act, axis=0)
    sigma = np.cov(act, rowvar=False)
    return mu, sigma

In [13]:
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return (diff.dot(diff) + np.trace(sigma1) +
            np.trace(sigma2) - 2 * tr_covmean)


In [15]:
def calculate_frechet(device, real_dataloader, fake_dataloader, inception_model) :
    mu_1, std_1 = calculate_activation_statistics(True, real_dataloader, num_samples,
         inception_model, device=device)
    mu_2, std_2 = calculate_activation_statistics(False, fake_dataloader, num_samples,
         inception_model, device=device)

    """get Frechet distance"""
    fid_value = calculate_frechet_distance(mu_1, std_1, mu_2, std_2)
    return fid_value


## Training

In [16]:
def init_optimizers(optimizer_name, netD, netG, lrD, lrG, beta1, nz, device,
                    momentumD, momentumG):

    # Create batch of latent vectors that we will use to visualize
    #  the progression of the generator
    fixed_noise = torch.randn(64, nz, 1, 1, device=device)

    # Establish convention for real and fake labels during training
    real_label = 1.
    fake_label = 0.

    if optimizer_name == 'adam':
        # Setup Adam optimisers for both G and D
        optimizerD = optim.Adam(
            netD.parameters(), lr=lrD, betas=(beta1, 0.999))
        optimizerG = optim.Adam(
            netG.parameters(), lr=lrG, betas=(beta1, 0.999))
    if optimizer_name == 'sgd':
        optimizerD = optim.SGD(netD.parameters(), lr=lrD,
                               momentum=momentumD, dampening=0, weight_decay=0)
        optimizerG = optim.SGD(netG.parameters(), lr=lrG,
                               momentum=momentumG, dampening=0, weight_decay=0)
    if optimizer_name == 'rmsprop':
        optimizerD = optim.RMSprop(netD.parameters(
        ), lr=lrD, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
        optimizerG = optim.RMSprop(netG.parameters(
        ), lr=lrG, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)

    return fixed_noise, real_label, fake_label, optimizerD, optimizerG


def init_losses(loss_type):
    if loss_type not in loss_dict.keys():
        raise Exception("Unknown loss type")

    return loss_dict[loss_type]


In [17]:
from torch.autograd import Variable

def gradient_penalty(device, discriminator, data_gen, data_real, lambda_reg=0.1):
    alpha = torch.rand(data_real.shape[0], 1).to(device)
    dims_to_add = len(data_real.size()) - 2
    for i in range(dims_to_add):
        alpha = alpha.unsqueeze(-1)

    interpolates = (alpha * data_real + ((1. - alpha) * data_gen)).to(device)

    interpolates = Variable(interpolates, requires_grad=True)

    disc_interpolates = discriminator(interpolates)
    grad_outputs = torch.ones(disc_interpolates.size()).to(device)

    gradients = torch.autograd.grad(
        outputs=disc_interpolates, inputs=interpolates, grad_outputs=grad_outputs,
        create_graph=True, retain_graph=True, only_inputs=True
    )[0]

    grad_penalty_coef = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * lambda_reg

    return grad_penalty_coef

In [18]:
def discriminator_step(optimizerD, f_loss_dis, netD, netG, data, device, real_label, fake_label, gp_coef):
    netD.zero_grad()

    real_cpu = data[0].to(device)
    b_size = real_cpu.size(0)
    label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
    output_real = netD(real_cpu).view(-1)

    noise = torch.randn(b_size, nz, 1, 1, device=device)
    fake = netG(noise)
    label.fill_(fake_label)
    output_fake = netD(fake.detach()).view(-1)

    errD = f_loss_dis(output_real, output_fake)

    if gp_coef != 0.0:
        errD += gp_coef * gradient_penalty(device, netD, fake, real_cpu)

    errD.backward()
    optimizerD.step()

    D_x = output_real.mean().item()
    D_G_z1 = output_fake.mean().item()

    return D_x, D_G_z1, errD, label, fake, real_cpu


def generator_step(optimizerG, f_loss_gen, netD, netG, label, fake, real_label):
    netG.zero_grad()
    output = netD(fake).view(-1)
    
    errG = f_loss_gen(output)
    errG.backward()
    
    D_G_z2 = output.mean().item()

    optimizerG.step()
    return D_G_z2, errG

In [19]:
class Training:
    def __init__(self, loss_name, netD, netG, device, real_label, fake_label, dataloader, num_epochs,
                 fixed_noise, lrD, lrG, beta1, experiment_prefix, save_models, 
                 PATH, save_stats, create_dir, iter_per_epoch_dis, iter_per_epoch_gen, grad_penalty_coef,
                 optimizerD, optimizerG,
                 save_epochs=10):
        self.optimizerD, self.optimizerG = optimizerD, optimizerG

        self.loss_name = loss_name
        self.netD, self.netG = netD, netG
        self.device = device
        self.real_label, self.fake_label = real_label, fake_label
        self.dataloader = dataloader
        self.num_epochs = num_epochs
        self.fixed_noise = fixed_noise
        self.iter_per_epoch_dis, self.iter_per_epoch_gen = iter_per_epoch_dis, iter_per_epoch_gen
        self.grad_penalty_coef = grad_penalty_coef

        self.save_models = save_models
        self.PATH = PATH
        self.experiment_prefix = experiment_prefix
        self.loss_name = loss_name
        self.lrD = lrD
        self.lrG = lrG
        self.beta1 = beta1
        self.create_dir = create_dir
        self.save_stats = save_stats
        self.save_epochs = save_epochs
        self.experiment_path, self.stats_path, self.models_path = generate_paths(self.PATH, self.experiment_prefix, 
                                                          self.loss_name, self.lrD, self.lrG, 
                                                          self.beta1, self.iter_per_epoch_dis, self.iter_per_epoch_gen, 
                                                          self.grad_penalty_coef, self.create_dir)
        # self.inception_model = inception_model

    def _output_training_stats(self, epoch, i, size, errD, errG, D_x, D_G_z1, D_G_z2, t0):
        if i == size:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f t: %2.3f'
                % (epoch, self.num_epochs, i, len(self.dataloader),
                    errD, errG, D_x, D_G_z1, D_G_z2, time.time()-t0))
        elif i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                % (epoch, self.num_epochs, i, len(self.dataloader),
                    errD, errG, D_x, D_G_z1, D_G_z2))
    
    def _save_gen_output(self, iters, epoch, i):
        if iters % 500 == 0 or (epoch == self.num_epochs-1) and (i == len(self.dataloader)-1):
            with torch.no_grad():
                fake = self.netG(self.fixed_noise).detach().cpu()
            self.img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
            # maybe remove this if we have memory problems
            self.img_list_nogrid.append(fake)

    def train(self):
        f_loss_dis, f_loss_gen = init_losses(self.loss_name)

        G_losses, D_losses = [], []
        self.img_list = []
        self.img_list_nogrid = []

        iters = 0

        print("Starting Training Loop...")

        D_x, D_G_z1, errD, label, fake = None, None, None, None, None
        D_G_z2, errG = None, None

        for epoch in range(self.num_epochs):
            t0 = time.time()

            for i, data in enumerate(self.dataloader, 0):
                for _ in range(self.iter_per_epoch_dis):
                    D_x, D_G_z1, errD, label, fake, real_cpu = discriminator_step(self.optimizerD,
                        f_loss_dis, self.netD, self.netG, data, self.device, 
                        self.real_label, self.fake_label, self.grad_penalty_coef
                    )

                for _ in range(self.iter_per_epoch_gen):
                    D_G_z2, errG = generator_step(self.optimizerG,
                        f_loss_gen, self.netD, self.netG, label, fake, self.real_label
                    )
                
                # Save Losses for plotting later
                G_losses.append(errG.item())
                D_losses.append(errD.item())

                size = len(self.dataloader) - 1
                self._output_training_stats(epoch, i, size, errD.item(), errG.item(), D_x, D_G_z1, D_G_z2, t0)
                
                # Check how the generator is doing by saving G's output on fixed_noise
                self._save_gen_output(iters, epoch, i)
                
                iters += 1

            # save the model every self.save_epochs epochs
            if self.save_models and (epoch % self.save_epochs == self.save_epochs - 1):
                self.save_path_G, self.save_path_D = model_paths(self.experiment_path, epoch, self.models_path)
                save_models(self.netG, self.netD, self.save_path_G, self.save_path_D)

        stats = {
            'img_list' : self.img_list,
            'img_list_nogrid' : self.img_list_nogrid,
            'G_losses' : G_losses,
            'D_losses' : D_losses  
        }
        # save stats at the end of training
        if self.save_stats:
            pickle_save(stats, self.stats_path)

        return stats

## Visualisation

In [20]:
def plot_loss(G_losses, D_losses, save = False):
    plt.figure(figsize=(10,5))
    plt.title("Generator and Discriminator Loss During Training")
    plt.plot(G_losses, label="G")
    plt.plot(D_losses, label="D")
    plt.xlabel("iterations")
    plt.ylabel("Loss")
    plt.legend()

    if save == True:
      plt.savefig(PATH + 'img/loss', format="png",dpi=400)

    plt.show()
    return

In [21]:
def plot_realvsfake(dataloader, device, img_list, save = False):
    # Grab a batch of real images from the dataloader
    real_batch = next(iter(dataloader))

    # Plot the real images
    plt.figure(figsize=(15,15))
    plt.subplot(1,2,1)
    plt.axis("off")
    plt.title("Real Images")
    plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))

    # Plot the fake images from the last epoch
    plt.subplot(1,2,2)
    plt.axis("off")
    plt.title("Fake Images")
    plt.imshow(np.transpose(img_list[-1],(1,2,0)))
    if save == True:
        plt.savefig(PATH + 'img/real_vs_fake', format="png",dpi=400)
    plt.show()
    return

## Serialization

In [22]:
def save_models(netG ,netD, save_path_G, save_path_D):
    torch.save(netG.state_dict(), save_path_G)
    torch.save(netD.state_dict(), save_path_D)
    print('GAN saved')
    return

In [23]:
def load_models(ngpu, Discriminator, Generator, save_path_G, save_path_D):
  
    netD = init_net(Discriminator(ngpu, nc, loss_name), device)
    netD.load_state_dict(torch.load(save_path_D))
    netD.eval()

    netG = init_net(Generator(ngpu, nc, nz), device)
    netG.load_state_dict(torch.load(save_path_G))
    netG.eval()
    
    print('GAN loaded')
    return netD, netG

In [24]:
def save_dict(dict, dict_path):
    with open(dict_path, 'w') as file:
        file.write(json.dumps(dict))  
    return

def read_dict(dict_path):
    with open(dict_path) as f:
        data = f.read()
    data = json.loads(data)
    return data

def pickle_save(something, path):
    with open(path, 'wb') as handle:
        pickle.dump(something, handle, protocol=pickle.HIGHEST_PROTOCOL)

def pickle_load (path):
    with open(path, 'rb') as handle:
        something = pickle.load(handle)
    return something

## Architectures

https://keras.io/examples/generative/conditional_gan/

In [25]:
import torch.nn.functional as F

class Generator(nn.Module):
    def __init__(self, ngpu, nc, nz):
        super(Generator, self).__init__()       
        self.fc1 = nn.Linear(nz, 256)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features*2)
        self.fc4 = nn.Linear(self.fc3.out_features, nc * 28 * 28)
        return    

    # forward method
    def forward(self, x): 
        x = x.reshape([x.shape[0], -1])

        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = torch.tanh(self.fc4(x))
        x = x.reshape((-1, nc, 28, 28))
        return x
    

class Discriminator(nn.Module):
    def __init__(self, ngpu, nc, loss):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 1024)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2)

        self.fc4 = nn.Linear(self.fc3.out_features, 1)

        if loss == "kl":
            # for KL - discriminator is a classifier
            self.act = torch.sigmoid
        else:
            # for Wasserstein and hinge - discriminator is a critic
            self.act = lambda x: x

        return    

    def forward(self, x):
        x = x.reshape([x.shape[0], -1])
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = F.dropout(x, 0.3)
        x = self.act(self.fc4(x))
        return x

# Hyperparameter optimisation (gridsearch)

In [24]:
def set_loss_params(loss_name):
    
    iter_dis, iter_gen, grad_penalty_coef = 1, 1, 0.0

    if loss_name == "wass":
        iter_dis, grad_penalty_coef = 5, 10.0

    return iter_dis, iter_gen, grad_penalty_coef

In [25]:
def run_experiment(ngpu, device, dataset, workers,
                   batch_size, shuffle, num_epochs, plot, lrD, lrG, beta1, nz, 
                   loss_name, experiment_prefix, save_stats, create_dir, 
                   iter_per_epoch_dis, iter_per_epoch_gen, grad_penalty_coef, 
                   save_epochs, save_models, momentumD, momentumG, optimizer_name): 


    netG = init_net(Generator(ngpu, nc, nz), device)
    netD = init_net(Discriminator(ngpu, nc, loss_name), device)
    
    # Create the dataloader
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                            shuffle=True, num_workers=workers)
     
    fixed_noise, real_label, fake_label, optimizerD, optimizerG = init_optimizers(optimizer_name, netD, netG, lrD, lrG, beta1, nz, device, momentumD, momentumG)

    experiment_prefix = experiment_prefix + optimizer_name +'_mG'+str(momentumD) +'_mD'+str(momentumG) + '_'

    gan_training = Training(loss_name, netD, netG, device, real_label, fake_label, 
                            dataloader, num_epochs, fixed_noise, 
                            lrD, lrG, beta1, experiment_prefix, save_models, PATH, save_stats, create_dir,
                            iter_per_epoch_dis, iter_per_epoch_gen, grad_penalty_coef, optimizerD, optimizerG, save_epochs=save_epochs)

    stats = gan_training.train()
    
    if plot:
        plot_loss(G_losses, D_losses, save = False)
        plot_realvsfake(dataloader, device, img_list, save = False)
    
    return stats, dataloader, netG, netD

In [26]:
def grid_search(ngpu, device, dataset, workers, 
                experiment_prefix, batch_size_list, shuffle_list, 
                num_epochs_list, loss_name_list, optimizer_name_list, 
                beta1_list, lr_list, momentums_list, plot, save_stats, create_dir
                , save_epochs, save_models, manualSeed):

    # all_stats = [] 

    # TO DO change for loops to zip 
    for batch_size in batch_size_list:
        for shuffle in shuffle_list:
            for num_epochs in num_epochs_list:
                for loss_name in loss_name_list:

                    iter_per_epoch_dis, iter_per_epoch_gen, grad_penalty_coef = set_loss_params(loss_name)
                    for optimizer_name in optimizer_name_list:
                        for beta1 in beta1_list:
                            for lr in lr_list:
                                for (momentumD, momentumG) in momentums_list : 
                                    lrD = lr
                                    lrG = lr
                                    # set seed before every experiment
                                    set_seeds(manualSeed = manualSeed)

                                    

                                    print('====================PARAMETERS===================')
                                    print('batch_size =', batch_size)
                                    print('shuffle =', shuffle)
                                    print('num_epoch =', num_epochs)
                                    print('loss_name =', loss_name)
                                    print('optimizer_name =', optimizer_name)
                                    print('beta1 =', beta1)
                                    print('lr =', lr)
                                    print('iter_per_epoch_dis =', iter_per_epoch_dis)
                                    print('iter_per_epoch_gen =', iter_per_epoch_gen)
                                    print('grad_penalty_coef =', grad_penalty_coef)

                                    stats, dataloader, netG, netD = run_experiment(ngpu, device, dataset, workers,
                                                                                  batch_size, shuffle, num_epochs, plot, lrD, lrG, beta1, nz, 
                                                                                  loss_name, experiment_prefix, save_stats, create_dir, 
                                                                                  iter_per_epoch_dis, iter_per_epoch_gen, grad_penalty_coef, 
                                                                                  save_epochs, save_models, momentumD, momentumG, optimizer_name)

    # all_stats.append(stats)
    
    return #all_stats

In [27]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [28]:
create_repo_paths(PATH)

In [29]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [None]:
grid_search(ngpu, device, dataset, workers,
                        experiment_prefix = '', # and an extra word at the begining to the save path of the models and stats
                        batch_size_list = [128],
                        shuffle_list = [True],
                        num_epochs_list = [300],
                        loss_name_list = ['wass'], # wass, hinge
                        optimizer_name_list = ['adam','wass','rmsprop'], # 'adam' 'sgd' 'rmsprop'
                        beta1_list = [0.9], # 0.9 == default # Beta1 hyperparam for Adam optimizers
                        lr_list = [1e-1,1e-2,1e-3,1e-4, 1e-5, 1e-6, 1e-7], # [1,1e-3,2e-4,1e-5,1e-6]
                        momentums_list = [(0,0)], # [(momentumD, momentumG)]
                        plot = False,
                        save_stats = True, # save the stats to disk
                        create_dir = True, # create the directories to save files
                        save_epochs = 10, # save the model every save_epochs epochs
                        save_models = True, # save the models to disk
                        manualSeed = 123 # keep at 123
                        ) # grad_penalty_coef

# Training example

###########
 TO DO : run one training iteration with the examples and 

## Parameters

In [None]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Batch size during training
batch_size = 128

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [None]:
# Number of training epochs
num_epochs = 3

# Learning rate for optimizers
lrD = 2e-4
lrG = 2e-4

# Beta1 hyperparam for Adam optimizers
beta1 = 0.9 # 0.9 == default

In [None]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

#print(dataset)
#plot_img(dataloader, dataset_name)

In [None]:
loss_name = "wass" # wass, hinge
iter_dis, iter_gen, grad_penalty_coef = 1, 1, 0.0

if loss_name == "wass":
    iter_dis, grad_penalty_coef = 5, 10.0

netG = init_net(Generator(ngpu, nc, nz), device)
print('Generator parameters', count_parameters(netG))

netD = init_net(Discriminator(ngpu, nc, loss_name), device)
print('Discriminator parameters', count_parameters(netD))

## Run

In [None]:
fixed_noise, real_label, fake_label, optimizerD, optimizerG = init_optimizers(netD, netG, lrD, lrG, beta1, nz, device)
experiment_prefix = '' # and extra word to add the automatically generate one if you really need it, ideally keep empty
gan_training = Training(loss_name, netD, netG, device, real_label, fake_label, 
                        dataloader, num_epochs, fixed_noise, 
                        grad_penalty_coef, lrD, lrG, beta1, experiment_prefix, save_models, PATH, save_stats = True, create_dir=True,
                        iter_per_epoch_dis=1, iter_per_epoch_gen=1, grad_penalty_coef=0.0)

stats = gan_training.train()

In [None]:
img_list = stats['img_list']
G_losses = stats['G_losses']
D_losses = stats['D_losses']

In [None]:
!python -m pytorch_fid path/to/dataset1 path/to/dataset2 


def bla(folderlist):
    for item in folderlist:
        !python -m pytorch_fid item[1] item[2]

# Visualisation

In [None]:
plot_loss(G_losses, D_losses, save = False)

In [None]:
plot_realvsfake(dataloader, device, img_list, save = False)

## G’s progression



In [None]:
#%%capture
fig = plt.figure(figsize=(8,8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

HTML(ani.to_jshtml())

# Serialisation

In [None]:
epoch = 999999999
experiment_prefix = 'tteeesst'
experiment_path, stats_path, models_path = generate_paths(PATH, experiment_prefix, loss_name, lrD, lrG, beta1, iter_dis, iter_gen, grad_penalty_coef, create_dir=True)
save_path_G, save_path_D = model_paths(experiment_path, epoch, models_path)

print(experiment_path)
print(stats_path)
print(save_path_G)
print(save_path_D)

In [None]:
save_models(netG ,netD, save_path_G, save_path_D)

In [None]:
pickle_save(stats, stats_path)

In [None]:
# load model
netD, netG = load_models(ngpu, Discriminator, Generator, save_path_G, save_path_D)

stats = pickle_load(stats_path)

# Metrics

In [26]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1
create_repo_paths(PATH)
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [27]:
loss_name = 'wass'
netG = init_net(Generator(ngpu, nc, nz), device)
netD = init_net(Discriminator(ngpu, nc, loss_name), device)

Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)
Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)


In [28]:
def sample_gen_dataset(n_samples, batch_size, netG, nz, workers,shuffle=True):

    with torch.no_grad():
      noise = torch.randn(n_samples, nz, 1, 1, device=device)
      fake = netG(noise)

    fake_dataset = torch.utils.data.TensorDataset(fake)
    fake_dataloader = torch.utils.data.DataLoader(fake_dataset, batch_size=batch_size,
                                         shuffle=shuffle, num_workers=workers)
    return fake_dataloader

In [41]:
# Create the dataloader

batch_size_eval = 10 # 128
num_samples = 10 # 1000
set_seeds(manualSeed=123)
which = torch.ones(len(dataset)).multinomial(num_samples, replacement=True)
dataset_subset = torch.utils.data.Subset(dataset, which)

real_dataloader = torch.utils.data.DataLoader(dataset_subset, batch_size=batch_size_eval,
                                         shuffle=False, num_workers=workers) # shuffle=False for reproducibility

Random Seed:  123


In [42]:
# Load inception model
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
inception_model = InceptionV3([block_idx])
inception_model = inception_model.to(device)

In [51]:
# take first batch from the dataloader to get 500 samples :

def calculate_fid(num_samples, real_dataloader, batch_size_eval, device, inception_model, netG, nz, workers):
    with torch.no_grad():
        # sample the generator (and output a dataset from that)
        fake_dataloader = sample_gen_dataset(num_samples, batch_size_eval, netG, nz, workers, shuffle=True)

        t_frechet = time.time()
        frechet_dist = calculate_frechet(device, real_dataloader, fake_dataloader, inception_model) 
        print('frechet dist:', frechet_dist,'| time to calculate :',time.time()-t_frechet,'s')
        
    return frechet_dist

In [44]:
# for i in range(5):
# sample_batch = next(iter(dataloader))
frechet_dist = calculate_fid(num_samples, real_dataloader, batch_size_eval, device, inception_model, netG, nz, workers)
# print()

100%|██████████| 1/1 [00:02<00:00,  2.42s/it]
100%|██████████| 1/1 [00:02<00:00,  2.48s/it]


frechet dist: 397.35060522269424 | time to calculate : 11.557352542877197 s


In [85]:
# Load inception model
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
inception_model = InceptionV3([block_idx])
inception_model = inception_model.to(device)

# Init empty G and D
netG = init_net(Generator(ngpu, nc, nz), device)
netD = init_net(Discriminator(ngpu, nc, loss_name), device)

# Init paths
create_repo_paths(PATH)
generated_data_path = PATH + 'generated_data/'
generated_data_path 

# Create a sample of the mnist dataset
batch_size_eval = 10 # 128
num_samples = 10 # 1000
set_seeds(manualSeed=123)
which = torch.ones(len(dataset)).multinomial(num_samples, replacement=True)
dataset_subset = torch.utils.data.Subset(dataset, which)

real_dataloader = torch.utils.data.DataLoader(dataset_subset, batch_size=batch_size_eval,
                                         shuffle=False, num_workers=workers) # shuffle=False for reproducibility

Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)
Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)
Random Seed:  123


In [100]:
def load_G(ngpu, Generator, save_path_G, device):

    netG = init_net(Generator(ngpu, nc, nz), device)
    netG.load_state_dict(torch.load(save_path_G),map_location=torch.device('cpu') )
    netG.eval()
    return netG

In [101]:
walk_list = os.walk(generated_data_path)
folder_list = next(walk_list)[1]
calc_fid = True
FID_list = []
n_repitions = 1
which_iterations = [0,50,100,150,200,250,290] 
# [0,10,20,30,40,50,60,70,80,90,100,110,120,
# 130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290]
desired_optimizer = 'sgd' # 'adam' 'rsmprop' 'sgd'
desired_learning_rates = [1e-1,1e-2,1e-3,1e-4, 1e-5, 1e-6, 1e-7]


for folder in tqdm(folder_list):
    param_list = folder_list[0].split('_')
    optimizer_name = param_list[0]
    loss_name = param_list[3][:-4]
    lr = param_list[4][3:]

    # stats_path = generated_data_path + folder + '/stat.pickle'
    # stats = pickle_load(stats_path)
    # img_list = stats['img_list'] # 8x8 images fake generatred images in one picture
    # G_losses = stats['G_losses'] 
    # D_losses = stats['D_losses'] 
    # img_list_nogrid  = stats['img_list_nogrid'] # 64 fake generatred images in a list
    
    inner_folder_path = generated_data_path+folder
    for file in os.listdir(inner_folder_path+'/models/'):
        model_type = file[:7]
        if model_type == 'model_G':
            number = int(file[8:-4])
            if number in which_iterations:
                print(number)
                print(inner_folder_path+'/models/'+file)
                net_G = load_G(ngpu, Generator, inner_folder_path+'/models/'+file,device)

  
    netD = init_net(Discriminator(ngpu, nc, loss_name), device)
    netD.load_state_dict(torch.load(save_path_D))
    netD.eval()
    

  0%|          | 0/2 [00:00<?, ?it/s]

0
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_/models/model_G_0.zip
Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)





RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [54]:
walk_list = os.walk(generated_data_path)
folder_list = next(walk_list)[1]
calc_fid = True
FID_list = []
n_repitions = 1

for folder in tqdm(folder_list):
    print(generated_data_path+folder[0])
    walk_list_2 = os.walk(generated_data_path+folder[0])
    folder_list_2 = next(walk_list_2)[1]
    print(folder_list_2)
    param_list = folder_list[0].split('_')

    optimizer_name = param_list[0]
    momentumG = param_list[1][2:]
    momentD = param_list[2][2:]
    loss_name = param_list[3][:-4]
    lrd = param_list[4][3:]
    lrg = param_list[5][3:]
    beta1_val = param_list[6][3:]
    iteration_dis = param_list[7][3:]
    iteration_gen = param_list[8][3:]
    gradient_penalty_val = param_list[9][3:]
    stats_path = generated_data_path + folder + '/stat.pickle'
    params = [optimizer_name, momentumG, momentD, loss_name, lrd, 
                lrg, beta1_val, iteration_dis, iteration_gen, 
                gradient_penalty_val, stats_path]
    stats = pickle_load(stats_path)

    img_list = stats['img_list'] # 8x8 images fake generatred images in one picture
    G_losses = stats['G_losses'] 
    D_losses = stats['D_losses'] 
    img_list_nogrid  = stats['img_list_nogrid'] # 64 fake generatred images in a list

    # netG = 
    # if calc_fid:
    #     fid_list = []
    #     for i in range(n_repitions):
    #             frechet_dist = calculate_fid(num_samples, real_dataloader, batch_size_eval, 
    #             device, inception_model, netG, nz, workers)
    #             fid_list.append(frechet_dist)

    model_paths_ = generated_data_path + '/models'
    # debug prints
    print(img_list_nogrid[0].shape)
    print(img_list[0].shape)
    print(stats_path)
    print(model_paths)
    print(folder)
    print(param_list)
    print('optimizer_name:', optimizer_name)
    print('momentumG:', momentumG)
    print('momentD:', momentD)
    print('loss_name:', loss_name)
    print('lrd:', lrd)
    print('lrg:', lrg)
    print('beta1_val:', beta1_val)
    print('iteration_dis:', iteration_dis)
    print('iteration_gen:', iteration_gen)
    print('gradient_penalty', gradient_penalty_val)


  0%|          | 0/2 [00:00<?, ?it/s]

['models']


 50%|█████     | 1/2 [00:00<00:00,  2.39it/s]

torch.Size([64, 1, 28, 28])
torch.Size([3, 242, 242])
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_/stat.pickle
<function model_paths at 0x000001C17674FF78>
adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_
['adam', 'mG0', 'mD0', 'wassLoss', 'lrd0.01', 'lrg0.01', 'b1b0.9', 'itd5', 'itg1', 'gpv10.0', '']
optimizer_name: adam
momentumG: 0
momentD: 0
loss_name: wass
lrd: 0.01
lrg: 0.01
beta1_val: 0.9
iteration_dis: 5
iteration_gen: 1
gradient_penalty 10.0
['models']


Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\multiprocessing\queues.py", line 232, in _feed
    close()
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\multiprocessing\connection.py", line 177, in close
    self._close()
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\multiprocessing\connection.py", line 277, in _close
    _CloseHandle(self._handle)
OSError: [WinError 6] The handle is invalid

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\Younes\.conda\envs\optmlgan\lib\multiprocessing\queues.py", line 263, in _feed
    queue_sem.release()
ValueError: semaphore or lock released too many times

100%|███

torch.Size([64, 1, 28, 28])
torch.Size([3, 242, 242])
./generated_data/adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_/stat.pickle
<function model_paths at 0x000001C17674FF78>
adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_
['adam', 'mG0', 'mD0', 'wassLoss', 'lrd0.01', 'lrg0.01', 'b1b0.9', 'itd5', 'itg1', 'gpv10.0', '']
optimizer_name: adam
momentumG: 0
momentD: 0
loss_name: wass
lrd: 0.01
lrg: 0.01
beta1_val: 0.9
iteration_dis: 5
iteration_gen: 1
gradient_penalty 10.0





In [None]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [None]:
create_repo_paths(PATH)

In [None]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [53]:
for a,b,c in os.walk(generated_data_path):
    print(a)
    print(b)
    print(c)
    print('_____')

./generated_data/
['adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_', 'adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_']
['ReadMe.md']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_
['models']
['stat.pickle']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_\models
[]
['model_D_0.zip', 'model_D_10.zip', 'model_D_20.zip']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_
['models']
['stat.pickle']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_\models
[]
['model_D_0.zip', 'model_D_10.zip', 'model_D_20.zip']
_____
