In [None]:
!pip install comet_ml



In [None]:
!pip install pytorch_lightning



In [None]:
# used fo TPU
# Uncomment if you want to use TPU
# ------------------------------------------------------------------
import collections
from datetime import datetime, timedelta
import os
import requests
import threading

_VersionConfig = collections.namedtuple('_VersionConfig', 'wheels,server')
VERSION = "xrt==1.15.0"  #@param ["xrt==1.15.0", "torch_xla==nightly"]
CONFIG = {
    'xrt==1.15.0': _VersionConfig('1.15', '1.15.0'),
    'torch_xla==nightly': _VersionConfig('nightly', 'XRT-dev{}'.format(
        (datetime.today() - timedelta(1)).strftime('%Y%m%d'))),
}[VERSION]
DIST_BUCKET = 'gs://tpu-pytorch/wheels'
TORCH_WHEEL = 'torch-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
TORCH_XLA_WHEEL = 'torch_xla-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
TORCHVISION_WHEEL = 'torchvision-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)

# Update TPU XRT version
def update_server_xrt():
  print('Updating server-side XRT to {} ...'.format(CONFIG.server))
  url = 'http://{TPU_ADDRESS}:8475/requestversion/{XRT_VERSION}'.format(
      TPU_ADDRESS=os.environ['COLAB_TPU_ADDR'].split(':')[0],
      XRT_VERSION=CONFIG.server,
  )
  print('Done updating server-side XRT: {}'.format(requests.post(url)))

update = threading.Thread(target=update_server_xrt)
update.start()

Updating server-side XRT to 1.15.0 ...


Exception in thread Thread-4:
Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-3-947a1487c49e>", line 23, in update_server_xrt
    TPU_ADDRESS=os.environ['COLAB_TPU_ADDR'].split(':')[0],
  File "/usr/lib/python3.6/os.py", line 669, in __getitem__
    raise KeyError(key) from None
KeyError: 'COLAB_TPU_ADDR'


In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision  
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

from pytorch_lightning.callbacks import ModelCheckpoint
import pytorch_lightning as pl
from pytorch_lightning import loggers

import numpy as np
from numpy.random import choice

from PIL import Image

from skimage import transform as sktransform

import os
from pathlib import Path
import shutil

from collections import OrderedDict




In [None]:
# lets hope this fixes the bug that my pc crashes after reconnecting
from IPython.display import clear_output 

In [None]:
# custom weights initialization called on netG and netD
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
# randomly flip some labels
def noisy_labels(y, p_flip=0.05):  # # flip labels with 5% probability
	# determine the number of labels to flip
	n_select = int(p_flip * y.shape[0])
	# choose labels to flip
	flip_ix = choice([i for i in range(y.shape[0])], size=n_select)
	# invert the labels in place
	y[flip_ix] = 1 - y[flip_ix]
	return y

In [None]:
class AddGaussianNoise(object):
    def __init__(self, mean=0.0, std=0.1):
        self.std = std
        self.mean = mean

    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [None]:
def resize2d(img, size):
    return (F.adaptive_avg_pool2d(Variable(img,volatile=True), size)).data

In [None]:
def get_valid_labels(img):
  return (0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1  # soft labels

In [None]:
def get_unvalid_labels(img):
  return noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3)  # soft labels

In [None]:
class Generator(pl.LightningModule):
    def __init__(self, ngf, nc, latent_dim):
        super(Generator, self).__init__()
        self.ngf = ngf
        self.latent_dim = latent_dim
        self.nc = nc

        self.fc0 = nn.Sequential(
             # input is Z, going into a convolution
            nn.ConvTranspose2d(latent_dim, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc1 = nn.Sequential(
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc2 = nn.Sequential(
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc3 = nn.Sequential(
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc4 = nn.Sequential(
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc5 = nn.Sequential(
            # state size. (nc) x 64 x 64
            nn.ConvTranspose2d(nc, nc, 4, 2, 1, bias=False),
            nn.Tanh()
        )

        # state size. (nc) x 128 x 128

        # For Multi-Scale Gradient
        # Converting the intermediate layers into images
        self.fc0_r = nn.Conv2d(ngf * 8, self.nc, 1)
        self.fc1_r = nn.Conv2d(ngf * 4, self.nc, 1)
        self.fc2_r = nn.Conv2d(ngf * 2, self.nc, 1)
        self.fc3_r = nn.Conv2d(self.ngf, self.nc, 1)
        self.fc4_r = nn.Conv2d(self.ngf, self.nc, 1)

    def forward(self, input):
        x_0 = self.fc0(input)
        x_1 = self.fc1(x_0)
        x_2 = self.fc2(x_1)
        x_3 = self.fc3(x_2)
        x_4 = self.fc4(x_3)
        x_5 = self.fc4(x_4)

        # For Multi-Scale Gradient
        # Converting the intermediate layers into images
        x_0_r = self.fc0_r(x_0)
        x_1_r = self.fc1_r(x_1)
        x_2_r = self.fc2_r(x_2)
        x_3_r = self.fc3_r(x_3)
        x_4_r = self.fc3_r(x_4)

        return x_4, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r


In [None]:
class Discriminator(pl.LightningModule):
    def __init__(self, ndf, nc):
        super(Discriminator, self).__init__()
        self.nc = nc
        self.ndf = ndf

        self.fc0 = nn.Sequential(
            # input is (nc) x 128 x 128
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc1 = nn.Sequential(
            # state size. (ndf) x 64 x 64
            nn.Conv2d(ndf + 3, ndf * 2, 4, 2, 1, bias=False),  # "+ 3" because of multi scale gradient
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc2 = nn.Sequential(
            # state size. (ndf*2) x 32 x 32
            nn.Conv2d(ndf * 2 + 3, ndf * 4, 4, 2, 1, bias=False), # "+ 3" because of multi scale gradient
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc3 = nn.Sequential(
            # state size. (ndf*4) x 16 x 16
            nn.Conv2d(ndf * 4 + 3, ndf * 8, 4, 2, 1, bias=False), # "+ 3" because of multi scale gradient
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc4 = nn.Sequential(
            # state size. (ndf*8) x 8 x 8
            nn.Conv2d(ndf * 8 + 3, ndf, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.fc5 = nn.Sequential(
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf + 3, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

        # state size. 1 x 1 x 1

    def forward(self, input, detach_or_not):
        # When we train i ncombination with generator we use multi scale gradient.
        x_4, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r = input
        if detach_or_not:
            x_4 = x_4.detach()

        x_0 = self.fc0(x_4)

        x_0 = torch.cat((x_0, x_4_r), dim=1)  # Concat Multi-Scale Gradient
        x_1 = self.fc1(x_0)

        x_1 = torch.cat((x_1, x_3_r), dim=1)  # Concat Multi-Scale Gradient
        x_2 = self.fc2(x_1)

        x_2 = torch.cat((x_2, x_2_r), dim=1)  # Concat Multi-Scale Gradient
        x_3 = self.fc3(x_2)

        x_3 = torch.cat((x_3, x_1_r), dim=1)  # Concat Multi-Scale Gradient
        x_4 = self.fc4(x_3)

        x_4 = torch.cat((x_4, x_0_r), dim=1)  # Concat Multi-Scale Gradient
        x_5 = self.fc5(x_4)

        return x_4

In [None]:
class DCGAN(pl.LightningModule):

    def __init__(self, hparams, logger, checkpoint_folder, experiment_name):
        super().__init__()
        self.hparams = hparams
        self.logger = logger  # only compatible with comet_logger at the moment
        self.checkpoint_folder = checkpoint_folder
        self.experiment_name = experiment_name

        # networks
        self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim)
        self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc)
        self.generator.apply(weights_init)
        self.discriminator.apply(weights_init)

        # cache for generated images
        self.generated_imgs = None
        self.last_imgs = None

        # For experience replay
        self.exp_replay_dis = torch.tensor([])

        # creating checkpoint folder
        dirpath = Path(self.checkpoint_folder)
        if not dirpath.exists():
            os.makedirs(dirpath, 0o755)
        
        # For multipel-gradient
        self.trans4 = transforms.Resize((4, 4))
        self.trans8 = transforms.Resize((8, 8))
        self.trans16 = transforms.Resize((16, 16))
        self.trans32 = transforms.Resize((32, 32))
        self.transPil = transforms.ToPILImage()

    def forward(self, z):
        return self.generator(z)

    def adversarial_loss(self, y_hat, y):
        return F.binary_cross_entropy(y_hat, y)

    def training_step(self, batch, batch_nb, optimizer_idx):

        # For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/
        std_gaussian = max(0, self.hparams.level_of_noise - (
                    (self.hparams.level_of_noise * 2) * (self.current_epoch / self.hparams.epochs)))
        AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian)  # the noise decays over time

        imgs, _ = batch
        imgs = AddGaussianNoiseInst(imgs)  # Adding instance noise to real images
        self.last_imgs = imgs

        # train generator
        if optimizer_idx == 0:
            # sample noise
            z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1)

            # generate images
            self.generated_imgs = self(z)

            # ground truth result (ie: all fake)
            g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, False), get_valid_labels(
                self.generated_imgs[0]))  # adversarial loss is binary cross-entropy; [0] is the image of the last layer

            tqdm_dict = {'g_loss': g_loss}
            log = {'g_loss': g_loss, "std_gaussian": std_gaussian}
            output = OrderedDict({
                'loss': g_loss,
                'progress_bar': tqdm_dict,
                'log': log
            })
            return output

        # train discriminator
        if optimizer_idx == 1:
            # Measure discriminator's ability to classify real from generated samples
            # how well can it label as real?
            real_loss = self.adversarial_loss(self.discriminator([imgs, resize2d(imgs, 4), resize2d(imgs, 8), resize2d(imgs, 16), resize2d(imgs, 32)], False), get_valid_labels(imgs))

            fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, True), get_unvalid_labels(
                self.generated_imgs[0]))  # how well can it label as fake?; [0] is the image of the last layer

            # discriminator loss is the average of these
            d_loss = (real_loss + fake_loss) / 2

            tqdm_dict = {'d_loss': d_loss}
            log = {'d_loss': d_loss, "std_gaussian": std_gaussian}
            output = OrderedDict({
                'loss': d_loss,
                'progress_bar': tqdm_dict,
                'log': log
            })
            return output

    def configure_optimizers(self):
        lr_gen = self.hparams.lr_gen
        lr_dis = self.hparams.lr_dis
        b1 = self.hparams.b1
        b2 = self.hparams.b2

        opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr_gen, betas=(b1, b2))
        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr_dis, betas=(b1, b2))
        return [opt_g, opt_d], []

    def backward(self, trainer, loss, optimizer, optimizer_idx: int) -> None:
        loss.backward(retain_graph=True)

    def train_dataloader(self):
        # transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
        #                                 transforms.ToTensor(),
        #                                 transforms.Normalize([0.5], [0.5])])
        # dataset = MNIST(os.getcwd(), train=True, download=True, transform=transform)
        # return DataLoader(dataset, batch_size=self.hparams.batch_size)
        # transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
        #                                 transforms.ToTensor(),
        #                                 transforms.Normalize([0.5], [0.5])
        #                                 ])

        # train_dataset = torchvision.datasets.ImageFolder(
        #     # root="./drive/My Drive/datasets/flower_dataset/",
        #     root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/",
        #     transform=transform
        # )
        # return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
        #                   batch_size=self.hparams.batch_size)
        transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.5], [0.5])
                                        ])

        train_dataset = torchvision.datasets.ImageFolder(
            root="./drive/My Drive/datasets/flower_dataset_miscrosoft/flower_dataset/",
            transform=transform
        )
        return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True, batch_size=self.hparams.batch_size)

    def on_epoch_end(self):
        z = torch.randn(4, self.hparams.latent_dim, 1, 1)
        # match gpu device (or keep as cpu)
        if self.on_gpu:
            z = z.cuda(self.last_imgs.device.index)

        # log sampled images
        sample_imgs = self.generator(z)[0]
        sample_imgs = sample_imgs.view(-1, self.hparams.nc, self.hparams.image_size, self.hparams.image_size)
        grid = torchvision.utils.make_grid(sample_imgs, nrow=2)
        self.logger.experiment.log_image(grid.permute(1, 2, 0), f'generated_images_epoch{self.current_epoch}',
                                         step=self.current_epoch)

        # save model
        if self.current_epoch % self.hparams.save_model_every_epoch == 0:
            trainer.save_checkpoint(
                self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
            comet_logger.experiment.log_asset_folder(self.checkpoint_folder, step=self.current_epoch)

            # Deleting the folder where we saved the model so that we dont upload a thing twice
            dirpath = Path(self.checkpoint_folder)
            if dirpath.exists() and dirpath.is_dir():
                shutil.rmtree(dirpath)

            # creating checkpoint folder
            access_rights = 0o755
            os.makedirs(dirpath, access_rights)

    def on_train_end(self):
        trainer.save_checkpoint(
            self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
        comet_logger.experiment.log_asset_folder(self.checkpoint_folder, step=self.current_epoch)

In [None]:
from argparse import Namespace

args = {
    'batch_size': 32,
    'lr_gen': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
    'lr_dis': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
    'b1': 0.5, # Momentum for adam; tested value(dcgan paper): 0.5
    'b2': 0.999, # Momentum for adam; tested value(dcgan paper): 0.999
    'latent_dim': 256, # tested value which worked(in V4_1): 100
    'nc': 3, # number of color channels
    'ndf': 32, # number of discriminator features
    'ngf': 32, # number of generator features
    'epochs': 5000, # the maxima lamount of epochs the algorith should run
    'save_model_every_epoch': 1000, # how often we save our model
    'image_size': 128,
    'num_workers': 3,
    'level_of_noise': 0.5,  # how much instance noise we introduce(std; tested value: 0.15 and 0.1
    'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1
    'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50
}
hparams = Namespace(**args)

In [None]:
# Parameters
experiment_name = "DCGAN_V5_2"
dataset_name = "Mnist"
checkpoint_folder = "DCGAN/"
tags = ["DCGAN", "GHIBLI", "OVERFIT", "64x64"]
dirpath = Path(checkpoint_folder)

In [None]:
# init logger
comet_logger = loggers.CometLogger(
    api_key="",
    rest_api_key="",
    project_name="gan-testing-models",
    experiment_name=experiment_name,
    # experiment_key="222a685177474cb9b358b5ee642564dc"  # used for resuming trained id can be found in comet.ml
)

INFO:lightning:CometLogger will be initialized in online mode
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/luposx/gan-testing-models/df8a0e79360643eaac79ec198aee6229



In [None]:
#defining net
net = DCGAN(hparams, comet_logger, checkpoint_folder, experiment_name)

#logging
comet_logger.experiment.set_model_graph(str(net))
comet_logger.experiment.add_tags(tags=tags)
comet_logger.experiment.log_dataset_info(dataset_name)

In [None]:
clear_output(wait=True)

trainer = pl.Trainer(# resume_from_checkpoint="DCGAN_V4_2_GHIBLI_epoch_999.ckpt",
                     logger=comet_logger, 
                     max_epochs=args["epochs"]
                     )

clear_output(wait=True)

trainer.fit(net)

clear_output(wait=True)
comet_logger.experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/luposx/gan-testing-models/df8a0e79360643eaac79ec198aee6229

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary
COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/luposx/gan-testing-models/df8a0e79360643eaac79ec198aee6229
COMET INFO: -----------------------------------
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)
