<a href="https://colab.research.google.com/github/TheodorSergeev/optml_gan/blob/main/dcgan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Adapted from https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html

# Initialisation

In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    # packages to generate requirement.txt
    %pip install nbconvert
    %pip install pipreqs
    # for Frechet inception distance
    %pip install pytorch-fid

    %cd drive/My Drive/optml_gan2
    PATH = './'
else:
    PATH = './'

In [2]:
from __future__ import print_function

import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data

import torchvision.utils as vutils

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from scipy import linalg
from torch.nn.functional import adaptive_avg_pool2d
%matplotlib inline

In [3]:
%load_ext autoreload
%autoreload 2

# Source code

In [4]:
from src.data_handling import *
from src.utils import *
from src.model import *
from src.losses import *
from src.fid import *

loss_dict = {
    "kl": (loss_dis_kl, loss_gen_kl),
    "wass": (loss_dis_wasser, loss_gen_wasser),
    "hinge": (loss_dis_hinge, loss_gen_hinge)
}

# FID

from src.training import *
from src.visualisation import *
from src.serialisation import *

# https://keras.io/examples/generative/conditional_gan/
from src.architectures import *

from src.gridsearch import *

# Hyperparameter optimisation (gridsearch)

In [5]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist'  # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer
image_size = 28  # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [6]:
create_repo_paths(PATH)

In [7]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [None]:
grid_search(ngpu, device, dataset, workers,
            experiment_prefix='',           # add an extra word at the begining to the save path of the models and stats
            batch_size_list=[128],
            shuffle_list=[True],
            num_epochs_list=[300],
            loss_name_list=['wass'],        # wass, hinge
            optimizer_name_list=['adam', 'sgd', 'rmsprop'],   # 'adam' 'sgd' 'rmsprop'
            beta1_list=[0.9],               # 0.9 == default # Beta1 hyperparam for Adam optimizers
            lr_list=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7],
            momentums_list=[(0, 0)],        # [(momentumD, momentumG)]
            plot=False,
            save_stats=True,                # save the stats to disk
            create_dir=True,                # create the directories to save files
            save_epochs=10,                 # save the model every save_epochs epochs
            save_models=True,               # save the models to disk
            manualSeed=123,                 # keep at 123
            nc=nc, nz=nz
            )

# Training example

## Parameters

In [None]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist'  # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Batch size during training
batch_size = 128

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28  # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [None]:
# Number of training epochs
num_epochs = 3

# Learning rate for optimizers
lrD = 2e-4
lrG = 2e-4

# Beta1 hyperparam for Adam optimizers
beta1 = 0.9  # 0.9 == default

In [None]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [None]:
loss_name = "wass"  # wass, hinge
iter_dis, iter_gen, grad_penalty_coef = 1, 1, 0.0

if loss_name == "wass":
    iter_dis, grad_penalty_coef = 5, 10.0

netG = init_net(Generator(ngpu, nc, nz), device, ngpu)
print('Generator parameters', count_parameters(netG))

netD = init_net(Discriminator(ngpu, nc, loss_name), device, ngpu)
print('Discriminator parameters', count_parameters(netD))

## Run

In [None]:
fixed_noise, real_label, fake_label, optimizerD, optimizerG = init_optimizers(netD, netG, lrD, lrG, beta1, nz, device)
experiment_prefix = ''  # add extra word to add the automatically generate one if you really need it, ideally keep empty
gan_training = Training(loss_name, netD, netG, device, real_label, fake_label,
                        dataloader, num_epochs, fixed_noise,
                        grad_penalty_coef, lrD, lrG, beta1, experiment_prefix, save_models, PATH, save_stats=True, create_dir=True,
                        iter_per_epoch_dis=1, iter_per_epoch_gen=1, grad_penalty_coef=0.0)

stats = gan_training.train()

In [None]:
img_list = stats['img_list']
G_losses = stats['G_losses']
D_losses = stats['D_losses']

# Visualisation

In [None]:
plot_loss(G_losses, D_losses, PATH, save=False)

In [None]:
plot_realvsfake(dataloader, device, img_list, PATH, save=False)

## G’s progression



In [None]:
fig = plt.figure(figsize=(8, 8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)

HTML(ani.to_jshtml())

# Serialisation

In [None]:
epoch = 999999999
experiment_prefix = ''
experiment_path, stats_path, models_path = generate_paths(PATH, experiment_prefix, loss_name, lrD, lrG, beta1, iter_dis, iter_gen, grad_penalty_coef, create_dir=True)
save_path_G, save_path_D = model_paths(experiment_path, epoch, models_path)

print(experiment_path)
print(stats_path)
print(save_path_G)
print(save_path_D)

In [None]:
save_models(netG, netD, save_path_G, save_path_D)

In [None]:
pickle_save(stats, stats_path)

In [None]:
# Load model
netD, netG = load_models(ngpu, Discriminator, Generator, save_path_G, save_path_D, nc, nz, loss_name, device)

stats = pickle_load(stats_path)

# Metrics

In [5]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1
create_repo_paths(PATH)
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [6]:
loss_name = 'wass'
netG = init_net(Generator(ngpu, nc, nz), device, ngpu)
netD = init_net(Discriminator(ngpu, nc, loss_name), device, ngpu)

Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)
Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)


In [7]:
def sample_gen_dataset(n_samples, batch_size, netG, nz, workers,shuffle=True):

    with torch.no_grad():
      noise = torch.randn(n_samples, nz, 1, 1, device=device)
      fake = netG(noise)

    fake_dataset = torch.utils.data.TensorDataset(fake)
    fake_dataloader = torch.utils.data.DataLoader(fake_dataset, batch_size=batch_size,
                                         shuffle=shuffle, num_workers=workers)
    return fake_dataloader

In [8]:
# Create the dataloader

batch_size_eval = 10 # 128
num_samples = 10 # 1000
set_seeds(manualSeed=123)
which = torch.ones(len(dataset)).multinomial(num_samples, replacement=True)
dataset_subset = torch.utils.data.Subset(dataset, which)

real_dataloader = torch.utils.data.DataLoader(dataset_subset, batch_size=batch_size_eval,
                                         shuffle=False, num_workers=workers) # shuffle=False for reproducibility

Random Seed:  123


In [9]:
# Load inception model
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
inception_model = InceptionV3([block_idx])
inception_model = inception_model.to(device)

In [10]:
# take first batch from the dataloader to get 500 samples :

def calculate_fid(num_samples, real_dataloader, batch_size_eval, device, inception_model, netG, nz, workers):
    with torch.no_grad():
        # sample the generator (and output a dataset from that)
        fake_dataloader = sample_gen_dataset(num_samples, batch_size_eval, netG, nz, workers, shuffle=True)

        t_frechet = time.time()
        frechet_dist = calculate_frechet(device, real_dataloader, fake_dataloader, inception_model,num_samples=num_samples) 
        print('frechet dist:', frechet_dist,'| time to calculate :',time.time()-t_frechet,'s')
        
    return frechet_dist

In [15]:
# for i in range(5):
# sample_batch = next(iter(dataloader))
frechet_dist = calculate_fid(num_samples, real_dataloader, batch_size_eval, device, inception_model, netG, nz, workers)
# print()

100%|██████████| 1/1 [00:02<00:00,  2.57s/it]
100%|██████████| 1/1 [00:02<00:00,  2.37s/it]


frechet dist: 396.6785385793526 | time to calculate : 11.724458456039429 s


In [16]:
# Load inception model
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
inception_model = InceptionV3([block_idx])
inception_model = inception_model.to(device)

# Init empty G and D
netG = init_net(Generator(ngpu, nc, nz), device, ngpu)
netD = init_net(Discriminator(ngpu, nc, loss_name), device, ngpu)

# Init paths
create_repo_paths(PATH)
generated_data_path = PATH + 'generated_data/'
generated_data_path 

# Create a sample of the mnist dataset
batch_size_eval = 10 # 128
num_samples = 10 # 1000
set_seeds(manualSeed=123)
which = torch.ones(len(dataset)).multinomial(num_samples, replacement=True)
dataset_subset = torch.utils.data.Subset(dataset, which)

real_dataloader = torch.utils.data.DataLoader(dataset_subset, batch_size=batch_size_eval,
                                         shuffle=False, num_workers=workers) # shuffle=False for reproducibility

Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)
Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)
Random Seed:  123


In [20]:
def load_G(ngpu, Generator, save_path_G, device):

    netG = init_net(Generator(ngpu, nc, nz), device,ngpu)
    netG.load_state_dict(torch.load(save_path_G),map_location=torch.device('cpu') )
    netG.eval()
    return netG

In [21]:
walk_list = os.walk(generated_data_path)
folder_list = next(walk_list)[1]
calc_fid = True
FID_list = []
n_repitions = 1
which_iterations = [0,50,100,150,200,250,290] 
# [0,10,20,30,40,50,60,70,80,90,100,110,120,
# 130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290]
desired_optimizer = 'sgd' # 'adam' 'rsmprop' 'sgd'
desired_learning_rates = [1e-1,1e-2,1e-3,1e-4, 1e-5, 1e-6, 1e-7]


for folder in tqdm(folder_list):
    param_list = folder_list[0].split('_')
    optimizer_name = param_list[0]
    loss_name = param_list[3][:-4]
    lr = param_list[4][3:]

    # stats_path = generated_data_path + folder + '/stat.pickle'
    # stats = pickle_load(stats_path)
    # img_list = stats['img_list'] # 8x8 images fake generatred images in one picture
    # G_losses = stats['G_losses'] 
    # D_losses = stats['D_losses'] 
    # img_list_nogrid  = stats['img_list_nogrid'] # 64 fake generatred images in a list
    
    inner_folder_path = generated_data_path+folder
    for file in os.listdir(inner_folder_path+'/models/'):
        model_type = file[:7]
        if model_type == 'model_G':
            number = int(file[8:-4])
            if number in which_iterations:
                print(number)
                print(inner_folder_path+'/models/'+file)
                net_G = load_G(ngpu, Generator, inner_folder_path+'/models/'+file,device)

  
    netD = init_net(Discriminator(ngpu, nc, loss_name), device,ngpu)
    netD.load_state_dict(torch.load(save_path_D))
    netD.eval()
    

  0%|          | 0/2 [00:00<?, ?it/s]

0
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_/models/model_G_0.zip
Generator(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)





RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [None]:
walk_list = os.walk(generated_data_path)
folder_list = next(walk_list)[1]
calc_fid = True
FID_list = []
n_repitions = 1

for folder in tqdm(folder_list):
    print(generated_data_path+folder[0])
    walk_list_2 = os.walk(generated_data_path+folder[0])
    folder_list_2 = next(walk_list_2)[1]
    print(folder_list_2)
    param_list = folder_list[0].split('_')

    optimizer_name = param_list[0]
    momentumG = param_list[1][2:]
    momentD = param_list[2][2:]
    loss_name = param_list[3][:-4]
    lrd = param_list[4][3:]
    lrg = param_list[5][3:]
    beta1_val = param_list[6][3:]
    iteration_dis = param_list[7][3:]
    iteration_gen = param_list[8][3:]
    gradient_penalty_val = param_list[9][3:]
    stats_path = generated_data_path + folder + '/stat.pickle'
    params = [optimizer_name, momentumG, momentD, loss_name, lrd, 
                lrg, beta1_val, iteration_dis, iteration_gen, 
                gradient_penalty_val, stats_path]
    stats = pickle_load(stats_path)

    img_list = stats['img_list'] # 8x8 images fake generatred images in one picture
    G_losses = stats['G_losses'] 
    D_losses = stats['D_losses'] 
    img_list_nogrid  = stats['img_list_nogrid'] # 64 fake generatred images in a list

    # netG = 
    # if calc_fid:
    #     fid_list = []
    #     for i in range(n_repitions):
    #             frechet_dist = calculate_fid(num_samples, real_dataloader, batch_size_eval, 
    #             device, inception_model, netG, nz, workers)
    #             fid_list.append(frechet_dist)

    model_paths_ = generated_data_path + '/models'
    # debug prints
    print(img_list_nogrid[0].shape)
    print(img_list[0].shape)
    print(stats_path)
    print(model_paths)
    print(folder)
    print(param_list)
    print('optimizer_name:', optimizer_name)
    print('momentumG:', momentumG)
    print('momentD:', momentD)
    print('loss_name:', loss_name)
    print('lrd:', lrd)
    print('lrg:', lrg)
    print('beta1_val:', beta1_val)
    print('iteration_dis:', iteration_dis)
    print('iteration_gen:', iteration_gen)
    print('gradient_penalty', gradient_penalty_val)


NameError: name 'tqdm' is not defined

In [None]:
# Root directory for dataset
dataroot = PATH + "data/"

# Dataset name
dataset_name = 'mnist' # 'cifar10' or 'mnist'

# Number of workers for dataloader
workers = 2

# Spatial size of training images. All images will be resized to this size using a transformer.
image_size = 28 # 28 for mnist, 64 for others

# Size of z latent vector (i.e. size of generator input)
nz = 128

# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

In [None]:
create_repo_paths(PATH)

In [None]:
dataset, nc = get_dataset(dataset_name, image_size, dataroot)

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [None]:
for a,b,c in os.walk(generated_data_path):
    print(a)
    print(b)
    print(c)
    print('_____')

./generated_data/
['adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_', 'adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_']
['ReadMe.md']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_
['models']
['stat.pickle']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.01_lrg0.01_b1b0.9_itd5_itg1_gpv10.0_\models
[]
['model_D_0.zip', 'model_D_10.zip', 'model_D_20.zip']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_
['models']
['stat.pickle']
_____
./generated_data/adam_mG0_mD0_wassLoss_lrd0.1_lrg0.1_b1b0.9_itd5_itg1_gpv10.0_\models
[]
['model_D_0.zip', 'model_D_10.zip', 'model_D_20.zip']
_____
