# Train

Colab to test the pix2pix training based on the code found in https://github.com/mrzhu-cool/pix2pix-pytorch

## Imports and parameters

### Imports

In [None]:
# Accessing the files and preparing the dataset
from google.colab import drive
from os import listdir
from os.path import join
import os

# Treating the images
from PIL import Image
import numpy as np
import random
import torch
import torch.utils.data as data
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

# Dealing with GPUs
import torch.backends.cudnn as cudnn

# Defining the networks
import torch.nn as nn
from torch.nn import init
import functools
from torch.optim import lr_scheduler
import torch.optim as optim

# Training
from math import log10
import time
import math

# Tensorboard
from torch.utils.tensorboard import SummaryWriter
import datetime

### Parameters

In [None]:
import argparse

# Training settings
parser = argparse.ArgumentParser(description='pix2pix-pytorch-implementation')
# In the original code, dataset is required. We don't need it for the Inria Aerial Image Labelling Dataset
parser.add_argument('--dataset', required=False, help='facades')
parser.add_argument('--batch_size', type=int, default=1, help='training batch size')
parser.add_argument('--test_batch_size', type=int, default=1, help='testing batch size')
parser.add_argument('--direction', type=str, default='a2b', help='a2b or b2a')
parser.add_argument('--input_nc', type=int, default=3, help='input image channels')
parser.add_argument('--output_nc', type=int, default=3, help='output image channels')
parser.add_argument('--ngf', type=int, default=64, help='generator filters in first conv layer')
parser.add_argument('--ndf', type=int, default=64, help='discriminator filters in first conv layer')
# Training epochs are defined by range(opt.epoch_count, opt.niter + opt.niter_decay + 1)
# So, originally, the training script epochs from 1 to 201, which takes too long at the beginning
# niter and niter_decay are changed to shorten the amount of time during development
parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count')
parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate') # 100
parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero') # 100
parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') # 0.0002
parser.add_argument('--lr_policy', type=str, default='lambda', help='learning rate policy: lambda|step|plateau|cosine')
parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--cuda', action='store_true', help='use cuda?')
parser.add_argument('--threads', type=int, default=1, help='number of threads for data loader to use')
parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123')
parser.add_argument('--lamb', type=int, default=10, help='weight on L1 term in objective') # 10
# Activate or deactivate the use of Tensorboard
parser.add_argument('--tb_active', type=bool, default=True, help='should tensorboard be used') # Deactivate for deep trainings
# Which original image should be stored in Tensorboard.
# Inria satellite images are 5000x5000 and consume much CPU and memory, so only
# one image is saved to avoid using too many resources
parser.add_argument('--tb_image', type=str, default='vienna1.tif', help='image to store in tensorboard')
# Number of images saved to tensorboard. Only tb_image will be saved, so the progress
# of generated images can be seen throw epochs. 5 images in 100 epochs means one
# tb_image will be saved every 20 epochs.
parser.add_argument('--tb_number_img', type=int, default=5, help='number of images saved to tensorboard')
# Level of debug (cell output)
parser.add_argument('--debug', type=int, default=0, help='level of debug from 0 (no debug) to 2 (verbose)')
# Number of iteration messages per epoch. They have the form
# ===> Epoch[{}]({}/{}): Loss_D: {:.4f} Loss_G: {:.4f}
parser.add_argument('--iter_messages', type=int, default=4, help='number of output messages per epoch')
# Number of epochs to save a checkpoint
parser.add_argument('--checkpoint_epochs', type=int, default=50, help='number of epochs to save a checkpoint')
# Stop training after checkpoint is saved. Useful in long trainings
parser.add_argument('--stop_after_checkpoint', type=bool, default=True, help='stop training after a checkpoint has been saved')

# As stated in https://stackoverflow.com/questions/48796169/how-to-fix-ipykernel-launcher-py-error-unrecognized-arguments-in-jupyter
# at least an empty list must be passed to simulate a script execution with no parameters.
# If no parameter is provided, parse_args tries to read _sys.argv[1:], which is not defined
# in a colab execution
training_args = ['--cuda',
                 '--epoch_count=151',
                 '--niter=250',
                 '--niter_decay=250',
                 '--lr=0.002',
                 '--lamb=1',
                 '--direction=a2b',
                 '--batch_size=5',
                 '--checkpoint_epochs=25',
                 '--threads=0',
                 '--debug=1']
opt = parser.parse_args(training_args)

train_dir = 'dataset/train'
train_gt_dir = train_dir + '/gt'
train_images_dir = train_dir + '/images'
train_tensorboard_dir = train_dir + '/log'

test_dir = 'dataset/test'
test_gt_dir = test_dir + '/gt'
test_images_dir = test_dir + '/images'
test_tensorboard_dir = test_dir + '/log'

In [None]:
if opt.cuda and not torch.cuda.is_available():
    raise Exception("No GPU found, please run without --cuda")

cudnn.benchmark = True

torch.manual_seed(opt.seed)
if opt.cuda:
    torch.cuda.manual_seed(opt.seed)

device = torch.device("cuda:0" if opt.cuda else "cpu")

### Debug function

In [None]:
def print_debug(level, text):
    """
    Prints a debug message only if the level of the message is lower or equal
    to the debug level set in global variable debug
    """
    # Accessing the global debug variable
    # global debug
    # The text will only be
    if level <= opt.debug:
        print("  [DEBUG] " + text)

## Accessing the dataset

### Defining the dataset class

In [None]:
# from utils import is_image_file, load_img
def is_image_file(filename):
    return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg", ".tif", ".tiff"])

# Used to save memory and CPU. Only valid with INRIA Aerial Image Dataset
# All dataset tensors are in CPU by default
zeros_tensor = torch.zeros([3,5000,5000]).to('cpu')

class DatasetFromFolder(data.Dataset):
    def __init__(self, image_dir, direction="a2b"):
        """
        Constructor adapted to the characteristics of the https://project.inria.fr/aerialimagelabeling/
        images split as follows:
        - train/a: training mask (ground truth) images
        - train/b: training satellite images
        - test/a: test mask (ground truth) images
        - test/b: test satellite images

        Example of use:
        train_ds = DatasetFromFolder("/content/drive/MyDrive/Colab Notebooks/AIDL/Project/train", "a2b")
        """
        super(DatasetFromFolder, self).__init__()
        self.direction = direction
        self.a_path = join(image_dir, "gt")  # mask (ground truth) images. Originally "a"
        self.b_path = join(image_dir, "images")  # satellite images. Originally "b"
        self.image_filenames = [x for x in listdir(self.a_path) if is_image_file(x)]

        transform_list = [transforms.ToTensor(),
                          # Even if masks have only one channel, they're converted to RGB in __getitem__
                          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
        self.transform = transforms.Compose(transform_list)
        

    def __getitem__(self, index):
        filename = self.image_filenames[index]
        print_debug(2, "DatasetFromFolder __getitem__: getting item {} corresponding to file {}".format(index, filename))
        
        a = Image.open(join(self.a_path, self.image_filenames[index])).convert('RGB')
        b = Image.open(join(self.b_path, self.image_filenames[index])).convert('RGB')

        # Originals can't be stored entirely in memory for later reuse because they take
        # too much memory. So only a few will be stored
        # Remember to avoid converting and storing them when doing the definitive training
        
        # Only one original image will be returned and, thus, transformed
        # into Tensors. Converting all the original images is way too slow, and
        # storing them in memory is useless as the DataTrainer creates a new instance of 
        # DatasetFromFolder on every epoch
        if opt.tb_active and filename == opt.tb_image:
            a_original = transforms.ToTensor()(a)
            b_original = transforms.ToTensor()(b)
            print_debug(2, "DatasetFromFolder __getitem__: {} detected".format(opt.tb_image))
        else:
            # Data loaders expect always the same amount of parameters and shapes when
            # batch_size > 1
            # "RuntimeError: stack expects each tensor to be equal size, but got [1] at entry 0 and [3, 5000, 5000] at entry 7"
            a_original = zeros_tensor # torch.zeros([3] + list(a.size))
            b_original = zeros_tensor # torch.zeros([3,5000,5000])

        a = a.resize((286, 286), Image.BICUBIC) # Revision pending: from 5000x5000 to 286x286 sizes. This can lead to learning problems
        b = b.resize((286, 286), Image.BICUBIC)
        a = transforms.ToTensor()(a)
        b = transforms.ToTensor()(b)

        w_offset = random.randint(0, max(0, 286 - 256 - 1)) # 
        h_offset = random.randint(0, max(0, 286 - 256 - 1))
    
        a = a[:, h_offset:h_offset + 256, w_offset:w_offset + 256]
        b = b[:, h_offset:h_offset + 256, w_offset:w_offset + 256]
    
        # After converting to RGB, even masks have 3 channels
            # La transformación inversa sería simplemente min( (x*0.5)+0.5), 1)
            # (haciendo un clipping de los valores para que no nos salgan colores raros).
            # Tensorboard creo que ya gestiona lo del clipping;
            # pero viene de nuestra cuenta hacer la "desnormalización".
        a = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(a)
        b = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(b)

        if random.random() < 0.5:
            idx = [i for i in range(a.size(2) - 1, -1, -1)]
            idx = torch.LongTensor(idx)
            a = a.index_select(2, idx)
            b = b.index_select(2, idx)

        if self.direction == "a2b":
            return a, b, filename, b_original # Adding the original target image
        else:
            return b, a, filename, a_original # Adding the original target image

    def __len__(self):
        return len(self.image_filenames)

### Connecting to Google Drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Copying files from Drive to CoLab machine

According to this [article](https://enjoymachinelearning.com/posts/colab-with-google-drive/), reading files from the local storage in Google Colab is faster than doing so from Google Drive. Since all the training files are read once per epoch, it makes sense to copy them and work in the local filesystem.

In [None]:
# Create directories where training and test images will be copied from Drive
os.makedirs(name = train_gt_dir, exist_ok=True)
os.makedirs(name = train_images_dir, exist_ok=True)
# os.makedirs(name = train_tensorboard_dir, exist_ok=True)
os.makedirs(name = test_gt_dir, exist_ok=True)
os.makedirs(name = test_images_dir, exist_ok=True)
# os.makedirs(name = test_tensorboard_dir, exist_ok=True)

# Just in case, all files are deleted
!rm "{train_gt_dir}"/*
!rm "{train_images_dir}"/*
!rm "{test_gt_dir}"/*
!rm "{test_images_dir}"/*

# Copy files ending in 0, 1, 2 or 3 to train directories. Shell commands don't accept python variables?
!cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/gt/*[0-5].tif "{train_gt_dir}"
# !cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/gt/*1.tif "{train_gt_dir}"
!cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/images/*[0-5].tif "{train_images_dir}"
# !cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/images/*1.tif "{train_images_dir}"

# Copy files ending in 9 as test images
!cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/gt/*9.tif "{test_gt_dir}"
!cp /content/drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/images/*9.tif "{test_images_dir}"

rm: cannot remove 'dataset/train/gt/*': No such file or directory
rm: cannot remove 'dataset/train/images/*': No such file or directory
rm: cannot remove 'dataset/test/gt/*': No such file or directory
rm: cannot remove 'dataset/test/images/*': No such file or directory


### Creating the data loaders

In [None]:
train_set = DatasetFromFolder(train_dir, opt.direction) # a2b is "gt" to "images"
test_set  = DatasetFromFolder(test_dir, opt.direction)  # b2a is "images" to "gt"
training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batch_size, shuffle=True)
testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.batch_size, shuffle=True)

## Defining the networks

### get_norm_layer

In [None]:
def get_norm_layer(norm_type='instance'):
    if norm_type == 'batch':
        norm_layer = functools.partial(nn.BatchNorm2d, affine=True)
    elif norm_type == 'instance':
        norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
    elif norm_type == 'switchable':
        norm_layer = SwitchNorm2d
    elif norm_type == 'none':
        norm_layer = None
    else:
        raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
    return norm_layer

### get_scheduler

In [None]:
def get_scheduler(optimizer, opt):
    if opt.lr_policy == 'lambda':
        def lambda_rule(epoch):
            lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1)
            return lr_l
        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1)
    elif opt.lr_policy == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
    elif opt.lr_policy == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.niter, eta_min=0)
    else:
        return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
    return scheduler

### update_learning_rate

In [None]:
# update learning rate (called once every epoch)
def update_learning_rate(scheduler, optimizer):
    scheduler.step()
    lr = optimizer.param_groups[0]['lr']
    print('learning rate = %.7f' % lr)

### init_weights

In [None]:
def init_weights(net, init_type='normal', gain=0.02):
    def init_func(m):
        classname = m.__class__.__name__
        if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
            if init_type == 'normal':
                init.normal_(m.weight.data, 0.0, gain)
            elif init_type == 'xavier':
                init.xavier_normal_(m.weight.data, gain=gain)
            elif init_type == 'kaiming':
                init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
            elif init_type == 'orthogonal':
                init.orthogonal_(m.weight.data, gain=gain)
            else:
                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
            if hasattr(m, 'bias') and m.bias is not None:
                init.constant_(m.bias.data, 0.0)
        elif classname.find('BatchNorm2d') != -1:
            init.normal_(m.weight.data, 1.0, gain)
            init.constant_(m.bias.data, 0.0)

    print('initialize network with %s' % init_type)
    net.apply(init_func)

### init_net

In [None]:
def init_net(net, init_type='normal', init_gain=0.02, gpu_id='cuda:0'):
    net.to(gpu_id)
    init_weights(net, init_type, gain=init_gain)
    return net

### define_G

In [None]:
def define_G(input_nc, output_nc, ngf, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_id='cuda:0'):
    net = None
    norm_layer = get_norm_layer(norm_type=norm)

    net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9)
   
    return init_net(net, init_type, init_gain, gpu_id)

### Class ResnetGenerator

In [None]:
# Defines the generator that consists of Resnet blocks between a few
# downsampling/upsampling operations.
class ResnetGenerator(nn.Module):
    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=9, padding_type='reflect'):
        assert(n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        self.inc = Inconv(input_nc, ngf, norm_layer, use_bias)
        self.down1 = Down(ngf, ngf * 2, norm_layer, use_bias)
        self.down2 = Down(ngf * 2, ngf * 4, norm_layer, use_bias)

        model = []
        for i in range(n_blocks):
            model += [ResBlock(ngf * 4, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
        self.resblocks = nn.Sequential(*model)

        self.up1 = Up(ngf * 4, ngf * 2, norm_layer, use_bias)
        self.up2 = Up(ngf * 2, ngf, norm_layer, use_bias)

        self.outc = Outconv(ngf, output_nc)

    def forward(self, input):
        out = {}
        # DTT No hay skip connections?
        out['in'] = self.inc(input)
        out['d1'] = self.down1(out['in'])
        out['d2'] = self.down2(out['d1'])
        out['bottle'] = self.resblocks(out['d2'])
        out['u1'] = self.up1(out['bottle'])
        out['u2'] = self.up2(out['u1'])

        return self.outc(out['u2'])

### Class Inconv

In [None]:
class Inconv(nn.Module):
    def __init__(self, in_ch, out_ch, norm_layer, use_bias):
        super(Inconv, self).__init__()
        self.inconv = nn.Sequential(
            nn.ReflectionPad2d(3),
            nn.Conv2d(in_ch, out_ch, kernel_size=7, padding=0,
                      bias=use_bias),
            norm_layer(out_ch),
            nn.ReLU(True)
        )

    def forward(self, x):
        x = self.inconv(x)
        return x

Class Down

In [None]:
class Down(nn.Module):
    def __init__(self, in_ch, out_ch, norm_layer, use_bias):
        super(Down, self).__init__()
        self.down = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3,
                      stride=2, padding=1, bias=use_bias),
            norm_layer(out_ch),
            nn.ReLU(True)
        )

    def forward(self, x):
        x = self.down(x)
        return x

### Class ResBlock

In [None]:
# Define a Resnet block
class ResBlock(nn.Module):
    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        super(ResBlock, self).__init__()
        self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)

    def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        conv_block = []
        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)

        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias),
                       norm_layer(dim),
                       nn.ReLU(True)]
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]

        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias),
                       norm_layer(dim)]

        return nn.Sequential(*conv_block)

    def forward(self, x):
        # DTT 'x +' === skip connection!!
        out = x + self.conv_block(x)
        return nn.ReLU(True)(out)

### Class Up

In [None]:
class Up(nn.Module):
    def __init__(self, in_ch, out_ch, norm_layer, use_bias):
        super(Up, self).__init__()
        self.up = nn.Sequential(
            # nn.Upsample(scale_factor=2, mode='nearest'),
            # nn.Conv2d(in_ch, out_ch,
            #           kernel_size=3, stride=1,
            #           padding=1, bias=use_bias),
            nn.ConvTranspose2d(in_ch, out_ch,
                               kernel_size=3, stride=2,
                               padding=1, output_padding=1,
                               bias=use_bias),
            norm_layer(out_ch),
            nn.ReLU(True)
        )

    def forward(self, x):
        x = self.up(x)
        return x

### Class Outconv

In [None]:
class Outconv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(Outconv, self).__init__()
        self.outconv = nn.Sequential(
            nn.ReflectionPad2d(3),
            nn.Conv2d(in_ch, out_ch, kernel_size=7, padding=0),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.outconv(x)
        return x

### define_D

In [None]:
def define_D(input_nc, ndf, netD,
             n_layers_D=3, norm='batch', use_sigmoid=False, init_type='normal', init_gain=0.02, gpu_id='cuda:0'):
    net = None
    norm_layer = get_norm_layer(norm_type=norm)

    if netD == 'basic':
        net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=use_sigmoid)
    elif netD == 'n_layers':
        net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid)
    elif netD == 'pixel':
        net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer, use_sigmoid=use_sigmoid)
    else:
        raise NotImplementedError('Discriminator model name [%s] is not recognized' % net)

    return init_net(net, init_type, init_gain, gpu_id)

### Class NLayerDiscriminator

In [None]:
# Defines the PatchGAN discriminator with the specified arguments.
class NLayerDiscriminator(nn.Module):
    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False):
        super(NLayerDiscriminator, self).__init__()
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        kw = 4
        padw = 1
        sequence = [
            nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
            nn.LeakyReLU(0.2, True)
        ]

        nf_mult = 1
        nf_mult_prev = 1
        for n in range(1, n_layers):
            nf_mult_prev = nf_mult
            nf_mult = min(2**n, 8)
            sequence += [
                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                          kernel_size=kw, stride=2, padding=padw, bias=use_bias),
                norm_layer(ndf * nf_mult),
                nn.LeakyReLU(0.2, True)
            ]

        nf_mult_prev = nf_mult
        nf_mult = min(2**n_layers, 8)
        sequence += [
            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult,
                      kernel_size=kw, stride=1, padding=padw, bias=use_bias),
            norm_layer(ndf * nf_mult),
            nn.LeakyReLU(0.2, True)
        ]

        sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]

        if use_sigmoid:
            sequence += [nn.Sigmoid()]

        self.model = nn.Sequential(*sequence)

    def forward(self, input):
        return self.model(input)

### Class PixelDiscriminator

In [None]:
class PixelDiscriminator(nn.Module):
    def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d, use_sigmoid=False):
        super(PixelDiscriminator, self).__init__()
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        self.net = [
            nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias),
            norm_layer(ndf * 2),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)]

        if use_sigmoid:
            self.net.append(nn.Sigmoid())

        self.net = nn.Sequential(*self.net)

    def forward(self, input):
        return self.net(input)

### Class GANLoss

In [None]:
class GANLoss(nn.Module):
    def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0):
        super(GANLoss, self).__init__()
        self.register_buffer('real_label', torch.tensor(target_real_label))
        self.register_buffer('fake_label', torch.tensor(target_fake_label))
        if use_lsgan:
            self.loss = nn.MSELoss()
        else:
            self.loss = nn.BCELoss()

    def get_target_tensor(self, input, target_is_real):
        if target_is_real:
            target_tensor = self.real_label
        else:
            target_tensor = self.fake_label
        return target_tensor.expand_as(input)

    def __call__(self, input, target_is_real):
        target_tensor = self.get_target_tensor(input, target_is_real)
        return self.loss(input, target_tensor)


### Creating the networks

In [None]:
# Creating the networks from scratch
net_g = define_G(opt.input_nc, opt.output_nc, opt.ngf, 'batch', False, 'normal', 0.02, gpu_id=device)
net_d = define_D(opt.input_nc + opt.output_nc, opt.ndf, 'basic', gpu_id=device)

criterionGAN = GANLoss().to(device)
criterionL1 = nn.L1Loss().to(device)
criterionMSE = nn.MSELoss().to(device)

# setup optimizer
optimizer_g = optim.Adam(net_g.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizer_d = optim.Adam(net_d.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
net_g_scheduler = get_scheduler(optimizer_g, opt)
net_d_scheduler = get_scheduler(optimizer_d, opt)

initialize network with normal
initialize network with normal


## Auxiliary functions

### denormalize_image & show_image

In [None]:
def denormalize_image(image_tensor):
    """
    Denormalizes an image coming from the network, usually, a generated image

    Parameters
    ----------
    images_tensor: tensor representing a PIL image
    """
    print_debug(2, "denormalize_image image tensor shape: {}".format(image_tensor.shape))
    # cpu() to avoid error "can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first."
    image_numpy = image_tensor.cpu().data.float().numpy()
    
            # La transformación inversa sería simplemente min( (x*0.5)+0.5), 1)
            # (haciendo un clipping de los valores para que no nos salgan colores raros).
            # Tensorboard creo que ya gestiona lo del clipping;
            # pero viene de nuestra cuenta hacer la "desnormalización".

    print_debug(2, "denormalize_image image_numpy shape: {}".format(image_numpy.shape))
    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
    print_debug(2, "denormalize_image image_numpy shape: {} after transposing".format(image_numpy.shape))
    image_numpy = image_numpy.clip(0, 255)
    print_debug(2, "denormalize_image image_numpy shape: {} after clipping".format(image_numpy.shape))
    image_numpy = image_numpy.astype(np.uint8)
    print_debug(2, "denormalize_image image_numpy shape: {} after converting to uint8".format(image_numpy.shape))

    return image_numpy

def show_image(image_tensor):
    """
    Shows an image coming from the network

    Parameters
    """
    image_numpy = denormalize_image(image_tensor)
    pil_image = Image.fromarray(image_numpy)
    imshow(pil_image)        

### Show list/tuple of images in a grid

In [None]:
# Based on utils.py save_img and the last answer in
# https://stackoverflow.com/questions/46615554/how-to-display-multiple-images-in-one-figure-correctly/46616645#46616645
# Plots several figures in a tile
def show_images_grid(images_tuple, nrows=1, ncols=1):
    """
    Shows several images coming from a DataLoader based on DatasetFromFolder
    in a tile

    Parameters
    ----------
    images_tuple: tuple of tensors representing images
    ncols : number of columns of subplots wanted in the display
    nrows : number of rows of subplots wanted in the figure
    """
    fig, axeslist = plt.subplots(ncols=ncols, nrows=nrows, figsize=(15,15))
    for ind,image_tensor in zip(range(len(images_tuple)), images_tuple):
        # First, denormalize image to allow it to be printable
        image_numpy = denormalize_image(image_tensor)
        image_pil = Image.fromarray(image_numpy)
        # imshow(image_pil)
        
        axeslist.ravel()[ind].imshow(image_pil, cmap=plt.jet())
        # axeslist.ravel()[ind].set_title(title)
        axeslist.ravel()[ind].set_axis_off()
    plt.tight_layout() # optional

### setup_tensorboard_writer

In [None]:
def setup_tensorboard_writer(tensorboard_dir, model=None):
    """
    Creates a new directory in tensorboard_dir to log data for TensorBoard.
    If a model/net is provided, it is added to the writer.

    Returns a reference to the writer
    """
    # Setting up TensorBoard writer
    # Creates a new directory to store TensorBoard data
    log_subdir = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    writer = SummaryWriter(log_dir=tensorboard_dir + "/" + log_subdir)

    if model is not None:
        # Adding the model to TensorBoard
        # Apparently, TensorBoard only accepts one model per writer
        writer.add_graph(model, input_to_model=torch.randn([1,3,256,256]).to(device))

    return writer

### save_iteration_tensorboard

In [None]:
def save_iteration_tensorboard(writer, epoch, iteration, loss_d, loss_g, loss_g_gan, loss_g_l1,
                               real_a, real_b, fake_b, batch):
    tensorboard_step = len(training_data_loader.dataset.image_filenames) * (epoch - opt.epoch_count) + iteration
    writer.add_scalar('Loss/D', loss_d.item(), global_step=tensorboard_step)
    writer.add_scalar('Loss/G', loss_g.item(), global_step=tensorboard_step)
    writer.add_scalar('Loss/G GAN', loss_g_gan.data, global_step=tensorboard_step)
    writer.add_scalar('Loss/G L1', loss_g_l1.data, global_step=tensorboard_step)

    # DTT Decide whether saving images to tensorboard
    final_epoch = opt.niter + opt.niter_decay + 1
                                # final_epoch / opt.tb_number_img gives the number of epochs
                                # that should pass before an image is saved.
    epochs_to_pass = max(1, final_epoch // opt.tb_number_img) # at least should be 1
    save_image_to_tensorboard = ( ( (epoch % epochs_to_pass == 0)
                                # or it is the last epoch of training
                                    or (epoch == final_epoch)
                                  )
                                  # it only saves the image if it corresponds to the defined opt.tb_image
                                  and opt.tb_image in batch[2]
    )
    if save_image_to_tensorboard:
        print_debug(2, "save_iteration_tensorboard: saving {} to TensorBoard. Is in? {}. Batch: {}".format(opt.tb_image, opt.tb_image in batch[2], batch[2]))
        
        batch_index = batch[2].index(opt.tb_image)
        # DTT Write images to TensorBoard at the end of each epoch
        writer.add_image(str(epoch)+'/1 Mask', real_a[batch_index], epoch)
        writer.add_image(str(epoch)+'/2 Normalized satellite image', real_b[batch_index], epoch)
        writer.add_image(str(epoch)+'/3 Generated satellite image', fake_b[batch_index], epoch)
        writer.add_image(str(epoch)+'/4 Denormalized generated satellite image', denormalize_image(fake_b[batch_index]), epoch, dataformats='HWC')
        writer.add_image(str(epoch)+'/5 Original satellite image', batch[3][batch_index].squeeze(dim=0), epoch)
    else:
        print_debug(2, "save_iteration_tensorboard: won't save any image ({})".format(batch[2]))


### save_checkpoint

In [None]:
def save_checkpoint(epoch, net_g, net_d):
    """
    Saves the discriminator and generator.
    It returns a boolean stating whether training should stop or not
    """
    if epoch % opt.checkpoint_epochs == 0:
        os.makedirs(name = "checkpoint", exist_ok=True)
        net_g_model_out_path = "checkpoint/netG_model_epoch_{}.pth".format(epoch)
        net_d_model_out_path = "checkpoint/netD_model_epoch_{}.pth".format(epoch)
        torch.save(net_g, net_g_model_out_path)
        torch.save(net_d, net_d_model_out_path)
        print("Checkpoint for epoch {} saved".format(epoch))

        if opt.stop_after_checkpoint:
            return True
        else:
            return False

## Loading previous trained checkpoint

In [None]:
!ls drive/MyDrive/"Colab Notebooks"/AIDL/Project/train/log/net*100.pth


ls: cannot access 'drive/MyDrive/Colab Notebooks/AIDL/Project/train/log/net*100.pth': No such file or directory


In [None]:
# Loading already calculated weights
net_g = torch.load('drive/MyDrive/Colab Notebooks/AIDL/Project/train/trainedModels/netG_model_epoch_150.pth', map_location=torch.device(device)).to(device)
net_d = torch.load('drive/MyDrive/Colab Notebooks/AIDL/Project/train/trainedModels/netD_model_epoch_150.pth', map_location=torch.device(device)).to(device)

## Training function

In [None]:
if opt.tb_active:
    writer_train = setup_tensorboard_writer(train_tensorboard_dir, model=net_g)

output_images = []
start_time = time.time()

# Training function
for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
    epoch_start_time = time.time()
    # train
    for iteration, batch in enumerate(training_data_loader, 1):
        # forward
        # DTT a: masks, b: satellite image
        real_a, real_b = batch[0].to(device), batch[1].to(device)
        # DTT fake_b: generated satellite image from generator
        #     ** Code from original pix2pix implementation:
        #     ** self.fake_B = self.netG(self.real_A)  # G(A)
        fake_b = net_g(real_a)

        ######################
        # (1) Update D network
        ######################

        optimizer_d.zero_grad()
        
        # train with fake
        # DTT Concatenates the real mask with generated image
        #     ** Code from original pix2pix implementation:
        #     ** fake_AB = torch.cat((self.real_A, self.fake_B), 1)  # we use conditional GANs; we need to feed both input and output to the discriminator
        fake_ab = torch.cat((real_a, fake_b), 1)

        # DTT Discriminator's prediction stating if the couple of images are
        #     (real, real) o (real, false)
        #     detach() to avoid calculating gradients
        #     ** Code from original pix2pix implementation:
        #     ** pred_fake = self.netD(fake_AB.detach())
        #     ** # Fake; stop backprop to the generator by detaching fake_B
        pred_fake = net_d.forward(fake_ab.detach())

        # DTT Calculated losses where extremely big. Debug message to see why
        print_debug(2, "Train: pred_fake's shape {}, min {} and max {}".format(
            pred_fake.shape, pred_fake.min(), pred_fake.max()
        ))

        # DTT Loss when a generated image is fed. Should classificate it as False
        #     ** Code from original pix2pix implementation:
        #     ** self.loss_D_fake = self.criterionGAN(pred_fake, False)
        loss_d_fake = criterionGAN(pred_fake, False)

        # train with real
        # DTT Concatenates the same real mask with its corresponding real image
        #     ** Code from original pix2pix implementation:
        #     ** real_AB = torch.cat((self.real_A, self.real_B), 1)
        real_ab = torch.cat((real_a, real_b), 1)
        # DTT Discriminator's prediction. Now calculating gradients
        #     ** Code from original pix2pix implementation:
        #     ** pred_real = self.netD(real_AB)
        pred_real = net_d.forward(real_ab)
        # DTT Discriminator should predict True with a real mask + image couple
        #     ** Code from original pix2pix implementation:
        #     ** self.loss_D_real = self.criterionGAN(pred_real, True)
        loss_d_real = criterionGAN(pred_real, True)
        
        # Combined D loss
        # DTT D's loss is the mean between its capacity ot detect a generated image
        #     and its capacity to detect a real image
        #     ** Code from original pix2pix implementation:
        #     ** # combine loss and calculate gradients
        #     ** self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5
        loss_d = (loss_d_fake + loss_d_real) * 0.5

        loss_d.backward()
       
        optimizer_d.step()

        ######################
        # (2) Update G network
        ######################

        # DTT In the pix2pix original implementation, discriminator's gradients
        #     are deactivated
        #     ** self.set_requires_grad(self.netD, False)  # D requires no gradients when optimizing G

        optimizer_g.zero_grad()

        # First, G(A) should fake the discriminator
        fake_ab = torch.cat((real_a, fake_b), 1)
        pred_fake = net_d.forward(fake_ab)
        loss_g_gan = criterionGAN(pred_fake, True)

        # Second, G(A) = B
        loss_g_l1 = criterionL1(fake_b, real_b) * opt.lamb
        loss_g = loss_g_gan + loss_g_l1
        loss_g.backward()
        optimizer_g.step()

        # DTT Let's print just some iteration messages per epoch
        #     Iterations go from 1 to ceiling(len(train_set) / batch_size)
        if iteration % (math.ceil(len(train_set) / opt.batch_size) // opt.iter_messages) == 0:
            print("===> Epoch[{}]({}/{}): Loss_D: {:.4f} Loss_G: {:.4f}".format(
                epoch, iteration, len(training_data_loader), loss_d.item(), loss_g.item()))
        
        # DTT Logging the same data for TensorBoard analysis
        if opt.tb_active:
            save_iteration_tensorboard(writer_train, epoch, iteration, loss_d, loss_g, loss_g_gan, loss_g_l1,
                               real_a, real_b, fake_b, batch)

    # Only execute if a minimum epochs are expected
    if (opt.niter + opt.niter_decay + 1) > opt.checkpoint_epochs:
        update_learning_rate(net_g_scheduler, optimizer_g)
        update_learning_rate(net_d_scheduler, optimizer_d)

    # test
    avg_psnr = 0
    for batch in testing_data_loader:
        input, target = batch[0].to(device), batch[1].to(device)

        prediction = net_g(input)
        mse = criterionMSE(prediction, target)
        psnr = 10 * log10(1 / mse.item())
        avg_psnr += psnr
    print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader)))

    if opt.tb_active:
        # DTT I log the same data for TensorBoard analysis
        writer_train.add_scalar('Avg. PSNR', avg_psnr / len(testing_data_loader), epoch)
        time_spent = time.time() - epoch_start_time
        print_debug(1, "Train: time spent in epoch {} is {}".format(epoch, time_spent))
        writer_train.add_scalar('Time spent', time_spent, epoch)

    #checkpoint
    exit = save_checkpoint(epoch, net_g, net_d)
    if exit:
        print("Ending training as stop_after_checkpoint is set to True")
        break

if opt.tb_active:
    writer_train.close()

print("\nTraining ended. It took {} seconds".format(time.time() - start_time))
print("Arguments used: {}".format(training_args))

===> Epoch[151](5/23): Loss_D: 0.2396 Loss_G: 0.6186
===> Epoch[151](10/23): Loss_D: 0.2452 Loss_G: 0.5928
===> Epoch[151](15/23): Loss_D: 0.2147 Loss_G: 0.6151
===> Epoch[151](20/23): Loss_D: 0.2105 Loss_G: 0.6141
learning rate = 0.0020000
learning rate = 0.0020000
===> Avg. PSNR: 8.1232 dB
  [DEBUG] Train: time spent in epoch 151 is 357.28951358795166
===> Epoch[152](5/23): Loss_D: 0.2018 Loss_G: 0.6396
===> Epoch[152](10/23): Loss_D: 0.2221 Loss_G: 0.5794
===> Epoch[152](15/23): Loss_D: 0.2529 Loss_G: 0.6416
===> Epoch[152](20/23): Loss_D: 0.2125 Loss_G: 0.6634
learning rate = 0.0020000
learning rate = 0.0020000
===> Avg. PSNR: 8.3307 dB
  [DEBUG] Train: time spent in epoch 152 is 333.3474531173706
===> Epoch[153](5/23): Loss_D: 0.2330 Loss_G: 0.6917
===> Epoch[153](10/23): Loss_D: 0.2194 Loss_G: 0.6565
===> Epoch[153](15/23): Loss_D: 0.2339 Loss_G: 0.6787
===> Epoch[153](20/23): Loss_D: 0.2189 Loss_G: 0.5972
learning rate = 0.0020000
learning rate = 0.0020000
===> Avg. PSNR: 8.1475

### Training results

#### Epochs 126-150 (batch size of 10)
```
===> Avg. PSNR: 8.0881 dB
  [DEBUG] Train: time spent in epoch 149 is 356.96931982040405
===> Epoch[150](3/12): Loss_D: 0.2416 Loss_G: 0.6463
===> Epoch[150](6/12): Loss_D: 0.2202 Loss_G: 0.6578
===> Epoch[150](9/12): Loss_D: 0.2300 Loss_G: 0.6615
===> Epoch[150](12/12): Loss_D: 0.2092 Loss_G: 0.6119
learning rate = 0.0020000
learning rate = 0.0020000
===> Avg. PSNR: 7.9952 dB
  [DEBUG] Train: time spent in epoch 150 is 363.01134037971497
Checkpoint for epoch 150 saved
Ending training as stop_after_checkpoint is set to True

Training ended. It took 8968.985772371292 seconds
Arguments used: ['--cuda', '--epoch_count=126', '--niter=250', '--niter_decay=250', '--lr=0.002', '--lamb=1', '--direction=a2b', '--batch_size=10', '--checkpoint_epochs=25', '--threads=0', '--debug=1']
```



#### Epochs 151-175 (batch size of 5)
```
===> Avg. PSNR: 8.0505 dB
  [DEBUG] Train: time spent in epoch 174 is 354.73154282569885
===> Epoch[175](5/23): Loss_D: 0.2337 Loss_G: 0.5743
===> Epoch[175](10/23): Loss_D: 0.2201 Loss_G: 0.6518
===> Epoch[175](15/23): Loss_D: 0.2399 Loss_G: 0.6031
===> Epoch[175](20/23): Loss_D: 0.2157 Loss_G: 0.6706
learning rate = 0.0020000
learning rate = 0.0020000
===> Avg. PSNR: 7.9869 dB
  [DEBUG] Train: time spent in epoch 175 is 341.47743701934814
Checkpoint for epoch 175 saved
Ending training as stop_after_checkpoint is set to True

Training ended. It took 8649.769093751907 seconds
Arguments used: ['--cuda', '--epoch_count=151', '--niter=250', '--niter_decay=250', '--lr=0.002', '--lamb=1', '--direction=a2b', '--batch_size=5', '--checkpoint_epochs=25', '--threads=0', '--debug=1']
```

## Launch TensorBoard

In [None]:
# Load extension
%reload_ext tensorboard
# Run TensorBoard on training directory: takes some seconds
# It doesn't work with python variables: %tensorboard --logdir dir_log_train  <-- fails
%tensorboard --logdir "dataset/train/log"