#### Imports

In [1]:
import time
import os
import numpy as np
import torch
from torch.autograd import Variable
from collections import OrderedDict
from subprocess import call
import fractions
def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0

#from options.train_options import TrainOptions
#from data.data_loader import CreateDataLoader
#from models.models import create_model
#import util.util as util
#from util.visualizer import Visualizer

#### Input Config

In [None]:
opt = {
    'name' : 'label2city',
    'gpu_ids' : [0],
    'checkpoints_dir' : '/vol2/reflective_shadow/checkpoints_v8/',
    'model' : 'pix2pixHD',
    'norm' : 'instance',
    'use_dropout' : True,
    'data_type' : 32,
    'verbose' : False,
    'fp16' : False,
    'local_rank' : 0,
    
    # Input/Output sizes
    'batchSize' : 1,
    'loadSize' : 1024,
    'fineSize' : 512,
    'label_nc' : 35,
    'input_nc' : 3,
    'output_nc' : 3,
    
    # for setting inputs
    'dataroot' : './datasets/cityscapes/',
    'resize_or_crop' : 'scale_width', #'scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]
    'serial_batches' : False,
    'no_flip' : False,
    'nThreads' : 2,
    'max_dataset_size' : float("inf"),
    
    
    # for displays
    'display_winsize'=512,  #help='display window size'
    'tf_log'=False, #help='if specified, use tensorboard logging. Requires tensorflow installed'

    # for generator
    'netG' : 'global', #help='selects model to use for netG'
    'ngf' : 64, #help='# of gen filters in first conv layer'
    'n_downsample_global' : 4, #help='number of downsampling layers in netG'
    'n_blocks_global' : 9, #help='number of residual blocks in the global generator network'
    'n_blocks_local' : 3, #help='number of residual blocks in the local enhancer network'
    'n_local_enhancers' : 1, #help='number of local enhancers to use'
    'niter_fix_global' : 0, #help='number of epochs that we only train the outmost local enhancer'

    # for instance-wise features
    'no_instance' : True, #help='if specified, do *not* add instance map as input'
    'instance_feat' : False, #help='if specified, add encoded instance features as input'
    'label_feat' : False, #help='if specified, add encoded label features as input'
    'feat_num' : 3, #help='vector length for encoded features'
    'load_features':False, #help='if specified, load precomputed feature maps'
    'n_downsample_E' : 4, #help='# of downsampling layers in encoder'
    'nef' : 16, #help='# of encoder filters in the first conv layer'
    'n_clusters' : 10, #help='number of clusters for features'
    
    # for displays
    'display_freq' : 100, #help='frequency of showing training results on screen'
    'print_freq' : 100, #help='frequency of showing training results on console'
    'save_latest_freq' : 1000, #help='frequency of saving the latest results'
    'save_epoch_freq' : 10, #help='frequency of saving checkpoints at the end of epochs' 
    'no_html' : False, #help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/'
    'debug' : False, #help='only do one epoch and displays at each iteration'

    # for training
    'continue_train'False, #help='continue training: load the latest model'
    'load_pretrain' : '', #help='load the pretrained model from the specified location'
    'which_epoch' : 'latest', #help='which epoch to load? set to latest to use latest cached model'
    'phase' : 'train', #help='train, val, test, etc'
    'niter' : 100, #help='# of iter at starting learning rate'
    'niter_decay' : 100, #help='# of iter to linearly decay learning rate to zero'
    'beta1' : 0.5, #help='momentum term of adam'
    'lr' : 0.0002, #help='initial learning rate for adam'

    # for discriminators        
    'num_D' : 2, #help='number of discriminators to use'
    'n_layers_D' : 3, #help='only used if which_model_netD==n_layers'
    'ndf' : 64, #help='# of discrim filters in first conv layer'
    'lambda_feat' : 10.0, #help='weight for feature matching loss'
    'no_ganFeat_loss' : False, #help='if specified, do *not* use discriminator feature matching loss'
    'no_vgg_loss' : False, #help='if specified, do *not* use VGG feature matching loss'
    'no_lsgan' : False, #help='do *not* use least square GAN, if false, use vanilla GAN'
    'pool_size' : 0, #help='the size of image buffer that stores previously generated images'
}

#### Read Data

In [None]:


opt = TrainOptions().parse()
iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')
if opt.continue_train:
    try:
        start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int)
    except:
        start_epoch, epoch_iter = 1, 0
    print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter))        
else:    
    start_epoch, epoch_iter = 1, 0

opt.print_freq = lcm(opt.print_freq, opt.batchSize)    
if opt.debug:
    opt.display_freq = 1
    opt.print_freq = 1
    opt.niter = 1
    opt.niter_decay = 0
    opt.max_dataset_size = 10

data_loader = CreateDataLoader(opt)
dataset = data_loader.load_data()
dataset_size = len(data_loader)
print('#training images = %d' % dataset_size)

model = create_model(opt)
visualizer = Visualizer(opt)
if opt.fp16:    
    from apex import amp
    model, [optimizer_G, optimizer_D] = amp.initialize(model, [model.optimizer_G, model.optimizer_D], opt_level='O1')             
    model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids)
else:
    optimizer_G, optimizer_D = model.module.optimizer_G, model.module.optimizer_D

total_steps = (start_epoch-1) * dataset_size + epoch_iter

display_delta = total_steps % opt.display_freq
print_delta = total_steps % opt.print_freq
save_delta = total_steps % opt.save_latest_freq

for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1):
    epoch_start_time = time.time()
    if epoch != start_epoch:
        epoch_iter = epoch_iter % dataset_size
    for i, data in enumerate(dataset, start=epoch_iter):
        if total_steps % opt.print_freq == print_delta:
            iter_start_time = time.time()
        total_steps += opt.batchSize
        epoch_iter += opt.batchSize

        # whether to collect output images
        save_fake = total_steps % opt.display_freq == display_delta

        ############## Forward Pass ######################
        losses, generated = model(Variable(data['label']), Variable(data['inst']), 
            Variable(data['image']), Variable(data['feat']), infer=save_fake)

        # sum per device losses
        losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ]
        loss_dict = dict(zip(model.module.loss_names, losses))

        # calculate final loss scalar
        loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5
        loss_G = loss_dict['G_GAN'] + loss_dict.get('G_GAN_Feat',0) + loss_dict.get('G_VGG',0)

        ############### Backward Pass ####################
        # update generator weights
        optimizer_G.zero_grad()
        if opt.fp16:                                
            with amp.scale_loss(loss_G, optimizer_G) as scaled_loss: scaled_loss.backward()                
        else:
            loss_G.backward()          
        optimizer_G.step()

        # update discriminator weights
        optimizer_D.zero_grad()
        if opt.fp16:                                
            with amp.scale_loss(loss_D, optimizer_D) as scaled_loss: scaled_loss.backward()                
        else:
            loss_D.backward()        
        optimizer_D.step()        

        ############## Display results and errors ##########
        ### print out errors
        if total_steps % opt.print_freq == print_delta:
            errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()}            
            t = (time.time() - iter_start_time) / opt.print_freq
            visualizer.print_current_errors(epoch, epoch_iter, errors, t)
            visualizer.plot_current_errors(errors, total_steps)
            #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) 

        ### display output images
        if save_fake:
            visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)),
                                   ('synthesized_image', util.tensor2im(generated.data[0])),
                                   ('real_image', util.tensor2im(data['image'][0]))])
            visualizer.display_current_results(visuals, epoch, total_steps)

        ### save latest model
        if total_steps % opt.save_latest_freq == save_delta:
            print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps))
            model.module.save('latest')            
            np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d')

        if epoch_iter >= dataset_size:
            break
       
    # end of epoch 
    iter_end_time = time.time()
    print('End of epoch %d / %d \t Time Taken: %d sec' %
          (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))

    ### save model for this epoch
    if epoch % opt.save_epoch_freq == 0:
        print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps))        
        model.module.save('latest')
        model.module.save(epoch)
        np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d')

    ### instead of only training the local enhancer, train the entire network after certain iterations
    if (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global):
        model.module.update_fixed_params()

    ### linearly decay learning rate after certain iterations
    if epoch > opt.niter:
        model.module.update_learning_rate()