In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/MyDrive/BiasMitigation/LNTL')
print('Changed the local path to....', os.getcwd())

Changed the local path to.... /content/drive/MyDrive/BiasMitigation/LNTL


In [None]:
# Local scripts
! pip install import_ipynb

import import_ipynb

from models.Deeplab import deeplabv3
from models.SegNet import segnet
from models import biashead
from utils.utils_LNTL import logger_setting, Timer
# Python
import time
import os
import math
import numpy as np
import numpy as np
import pickle
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
# Torch
import torch
from torch import nn
from torch import optim
from torch.autograd import Variable
# Colab 
from google.colab.patches import cv2_imshow


class GradReverse(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * 0.1

def grad_reverse(x):
    return GradReverse.apply(x)



class Trainer(object):
    def __init__(self, option, path):
        self.option = option
        self.path = path

        self._build_model()
        self._set_optimizer()
        self.logger = logger_setting(option.exp_name, option.save_dir, path, option.debug)

    def _build_model(self):
        self.n_color_cls = 8    # This is how many colour bins we have used 256/32

        # The bias input channels depend on fork placement 
        if self.option.network_type == 'Deeplab': # CAN WE JUST USE option.network_type HERE BECUASE OF LINE 31
            bias_input_channels = 1280  # The number of feature maps after the aspp concat step.
        elif self.option.network_type == 'Segnet':
            bias_input_channels = 64

        #self.net = deeplabv3.DeepLabV3(model_id=self.option.exp_name, project_dir = self.option.save_dir) changed from this becuase i changed the arguements in DeepLabV3
        self.net = deeplabv3.DeepLabV3() # Doesnt need number of classes as its hard coded at the top of the deeplab class
        # The path might have to be changed since we have a directory models with the models inside. Rather than a script with the models inside as this is being called.
        # Also the deeplab class calls Aspp and Resnet so check paths in those scripts. 
        self.pred_net_r = biashead.BiasPredictor(input_ch=bias_input_channels, num_classes=self.n_color_cls)
        self.pred_net_g = biashead.BiasPredictor(input_ch=bias_input_channels, num_classes=self.n_color_cls)
        self.pred_net_b = biashead.BiasPredictor(input_ch=bias_input_channels, num_classes=self.n_color_cls)


        with open( self.option.meta_dir + "/class_weights.pkl", "rb") as file: # (needed for python3)
            class_weights = np.array(pickle.load(file))
        class_weights = torch.from_numpy(class_weights)
        class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()


        self.loss = nn.CrossEntropyLoss(weight = class_weights)  # must add weights to this one to perform cross entropy relatively (a traffic light is as important as a building)
        self.color_loss = nn.CrossEntropyLoss()


        if self.option.cuda:
            self.net.cuda()
            self.pred_net_r.cuda()
            self.pred_net_g.cuda()
            self.pred_net_b.cuda()
            self.loss.cuda()
            self.color_loss.cuda()

    def _set_optimizer(self): # worth adding in ADAM here?
        self.optim = optim.SGD(filter(lambda p: p.requires_grad, self.net.parameters()), lr=self.option.lr, momentum=self.option.momentum, weight_decay=self.option.weight_decay)
        self.optim_r = optim.SGD(self.pred_net_r.parameters(), lr=self.option.lr, momentum=self.option.momentum, weight_decay=self.option.weight_decay)
        self.optim_g = optim.SGD(self.pred_net_g.parameters(), lr=self.option.lr, momentum=self.option.momentum, weight_decay=self.option.weight_decay)
        self.optim_b = optim.SGD(self.pred_net_b.parameters(), lr=self.option.lr, momentum=self.option.momentum, weight_decay=self.option.weight_decay)

        #TODO: last_epoch should be the last step of loaded model
        lr_lambda = lambda step: self.option.lr_decay_rate ** (step // self.option.lr_decay_period)
        self.scheduler = optim.lr_scheduler.LambdaLR(self.optim, lr_lambda=lr_lambda, last_epoch=-1)
        self.scheduler_r = optim.lr_scheduler.LambdaLR(self.optim_r, lr_lambda=lr_lambda, last_epoch=-1)
        self.scheduler_g = optim.lr_scheduler.LambdaLR(self.optim_g, lr_lambda=lr_lambda, last_epoch=-1)
        self.scheduler_b = optim.lr_scheduler.LambdaLR(self.optim_b, lr_lambda=lr_lambda, last_epoch=-1)

    @staticmethod
    def _weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif classname.find('BatchNorm') != -1:
            m.weight.data.fill_(1.0)
            m.bias.data.zero_()

    def _initialization(self):
        self.net.apply(self._weights_init)


        if self.option.is_train and self.option.use_pretrain:
            if self.option.checkpoint is not None:
                self._load_model()
            else:
                print("Pre-trained model not provided")



    def _mode_setting(self, is_train=True):
        if is_train:
            self.net.train()
            self.pred_net_r.train()
            self.pred_net_g.train()
            self.pred_net_b.train()
        else:
            self.net.eval()
            self.pred_net_r.eval()
            self.pred_net_g.eval()
            self.pred_net_b.eval()



    def _train_step(self, data_loader, step):
        _lambda = 0.01    # should we make this an option?

        for i, (images, bias_labels, label_imgs) in enumerate(data_loader):
            
            images = self._get_variable(images)
            bias_labels = self._get_variable(bias_labels)
            label_imgs = self._get_variable(label_imgs)  # converts to pytorch variable, uses CUDA if GPU available. 

            # reset gradients 
            self.optim.zero_grad()
            self.optim_r.zero_grad()
            self.optim_g.zero_grad()
            self.optim_b.zero_grad()
            pred_map, softmax, bias_fork = self.net(images)


            # predict colors from feat_label. Their prediction should be uniform.
            _,pseudo_pred_r = self.pred_net_r(bias_fork)  # outputs x and p(x),  here we get the softmax output
            _,pseudo_pred_g = self.pred_net_g(bias_fork)
            _,pseudo_pred_b = self.pred_net_b(bias_fork)


            # loss for self.net - semantic segmentation
            loss_pred = self.loss(pred_map, torch.squeeze(label_imgs)) #do we need squeeze?

            loss_pseudo_pred_r = torch.mean(torch.sum(pseudo_pred_r*torch.log(pseudo_pred_r),1))  # manual cross entropy p(x)log(p(x)) 
            loss_pseudo_pred_g = torch.mean(torch.sum(pseudo_pred_g*torch.log(pseudo_pred_g),1))
            loss_pseudo_pred_b = torch.mean(torch.sum(pseudo_pred_b*torch.log(pseudo_pred_b),1))
            
            
            loss_pred_ps_color = (loss_pseudo_pred_r + loss_pseudo_pred_g + loss_pseudo_pred_b) / 3.
            loss = loss_pred + loss_pred_ps_color*_lambda
            
            # DEBUGGING: #####
            print('**************************')
            print('pseudo_pred_r.... \n', psuedo_pred_r)
            print('pseudo_pred_r.shape =  ', psuedo_pred_r.shape)
            print('**************************')
            print('loss_pseudo_pred_r  \n', loss_psuedo_pred_r)
            print('loss_pseudo_pred_r.shape = ', loss_psuedo_pred_r.shape)
            print('**************************')
            print('loss_pred_ps_color \n', loss_pred_ps_color)
            print('loss_pred_ps_color.shape  = ', loss_pred_ps_color.shape)
            print('**************************')
            print('loss \n', loss)
            ###################
            
            loss.backward()
            self.optim.step()

            # Reset gradients for the next stage of training schema
            self.optim.zero_grad()
            self.optim_r.zero_grad()
            self.optim_g.zero_grad()
            self.optim_b.zero_grad()

            pred_map, softmax, bias_fork = self.net(images)
            feat_color = grad_reverse(pred_map)
            
            pred_r,_ = self.pred_net_r(feat_color)  # outputs x, p(x),  this time we get the layer before the softmax
            pred_g,_ = self.pred_net_g(feat_color)
            pred_b,_ = self.pred_net_b(feat_color)

            # loss for rgb predictors
            loss_pred_r = self.color_loss(pred_r, bias_labels[:,0]) # colour_loss() is the cross entropy instance, bias_labels are the ground truths created in dataloaders, [:,0] strips out R component 
            loss_pred_g = self.color_loss(pred_g, bias_labels[:,1])
            loss_pred_b = self.color_loss(pred_b, bias_labels[:,2])

            loss_pred_color = loss_pred_r + loss_pred_g + loss_pred_b

            loss_pred_color.backward()
            self.optim.step()
            self.optim_r.step()
            self.optim_g.step()
            self.optim_b.step()

            if i % self.option.log_step == 0:
                msg = "[TRAIN] cls loss : %.6f, rgb : %.6f, MI : %.6f  (epoch %d.%02d)" \
                       % (loss_pred,loss_pred_color/3.,loss_pred_ps_color,step,int(100*i/data_loader.__len__()))
                self.logger.info(msg)
            
            # DEBUGGING (remove break)
            break

            ### Add some plotting functions here once code is running ###

    def _train_step_baseline(self, data_loader, step):
    
        batch_losses = []

        for i, (images, bias_labels, label_imgs) in enumerate(data_loader):
            
            # test_images = images.cpu().detach().numpy()
            # test_bias_labels = bias_labels.cpu().detach().numpy()
            # test_label_imgs = label_imgs.cpu().detach().numpy()
            # cv2_imshow(test_images) 
            # cv2_imshow(test_bias_labels)
            # cv2_imshow(test_label_imgs)

            images = self._get_variable(images)
            label_imgs = self._get_variable(label_imgs.type(torch.LongTensor))

            self.optim.zero_grad()
            pred_map, softmax, bias_fork = self.net(images)

            # Loss for self.net, semantic segmentation loss
            loss_pred = self.loss(pred_map, label_imgs)  # cross entropy of the final layer before softmax and the cityscapes black images
            # Create loss value for plotting
            loss_value = loss_pred.data.cpu().numpy()
            batch_losses.append(loss_value)
            
            # Optimiser step
            loss_pred.backward()
            self.optim.step()

            # From deeplab script:
            # # compute the loss:
            # loss = loss_fn(outputs, label_imgs)
            # loss_value = loss.data.cpu().numpy() # this creates a numpy array on the cpu of the loss tenosr for plotting
            # batch_losses.append(loss_value)

            # # optimization step:
            # optimizer.zero_grad() # (reset gradients)
            # loss.backward() # (compute gradients)
            # optimizer.step() # (perform optimization step)


            # TODO: print elapsed time for iteration
            if i % self.option.log_step == 0:
                msg = "[TRAIN] cls loss : %.6f (epoch %d.%02d)" \
                    % (loss_pred,step,int(100*i/data_loader.__len__()))
                self.logger.info(msg)
            
            # add an output to the screen for debug to check greyscale is working

            break        
        
        return batch_losses

    def _validate(self, data_loader):
        self._mode_setting(is_train=False)
        self._initialization()
        if self.option.checkpoint is not None:
            self._load_model()
        else:
            print("No trained model for evaluation provided")
            import sys
            sys.exit()

        num_test = 10000

        total_num_correct = 0.
        total_num_test = 0.
        total_loss = 0.
        for i, (images, bias_labels, label_imgs) in enumerate(data_loader):
            
            start_time = time.time()
            images = self._get_variable(images)
            bias_labels = self._get_variable(bias_labels)
            label_imgs = self._get_variable(label_imgs.type(torch.LongTensor))

            self.optim.zero_grad()
            pred_map, softmax, bias_fork = self.net(images)


            loss = self.loss(pred_map, torch.squeeze(label_imgs)) #again not sure about squeeze check deeplab train script. 
            
            batch_size = images.shape[0]
            total_num_correct += self._num_correct(pred_map, label_imgs, topk=1).data[0]
            total_loss += loss.data[0]*batch_size
            total_num_test += batch_size
               
        avg_loss = total_loss/total_num_test
        avg_acc = total_num_correct/total_num_test
        msg = "EVALUATION LOSS  %.4f, ACCURACY : %.4f (%d/%d)" % \
                        (avg_loss,avg_acc,int(total_num_correct),total_num_test)
        self.logger.info(msg)



    def _num_correct(self,outputs,labels,topk=1):
        _, preds = outputs.topk(k=topk, dim=1)
        preds = preds.t()
        correct = preds.eq(labels.view(1, -1).expand_as(preds))
        correct = correct.view(-1).sum()
        return correct
        


    def _accuracy(self, outputs, labels):
        batch_size = labels.size(0)
        _, preds = outputs.topk(k=1, dim=1)
        preds = preds.t()
        correct = preds.eq(labels.view(1, -1).expand_as(preds))
        correct = correct.view(-1).float().sum(0, keepdim=True)
        accuracy = correct.mul_(100.0 / batch_size)
        return accuracy

    def _save_model(self, step): # this requires the directory to already be created (this is done in backend setting in main script)
        checkpoint_dir = self.path + '/checkpoint'
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        torch.save({
            'step': step,
            'optim_state_dict': self.optim.state_dict(),
            'net_state_dict': self.net.state_dict()
        }, os.path.join(checkpoint_dir, 'checkpoint_epoch_%04d.pth' % step))
        print('Checkpoint saved. Epoch : %d'%step)

    def _load_model(self):
        ckpt = torch.load(self.option.checkpoint)
        self.net.load_state_dict(ckpt['net_state_dict']) # this is how we saved them in save model method above
        self.optim.load_state_dict(ckpt['optim_state_dict'])

    # def _plotter_function(self, batch_losses, path):
    #       if self.option.is_train:
    #           title = 'train'
    #           filename = 'e
    #       else: 
    #           title = 'val'     
    #     
    #     epoch_loss = np.mean(batch_losses)
    #     epoch_losses_train.append(epoch_loss)
    #     with open("%s/epoch_losses_%s.pkl" % (path,title), "wb") as file:
    #         pickle.dump(epoch_losses_train, file)
    #     print ("train loss: %g" % epoch_loss)
    #     plt.figure(1)
    #     plt.plot(epoch_losses_train, "k^")
    #     plt.plot(epoch_losses_train, "k")
    #     plt.ylabel("loss")
    #     plt.xlabel("epoch")
    #     plt.title("%s loss per epoch", % stage)
    #     plt.savefig("%s/epoch_losses_%s.png" % (path, stage) )
    #     plt.close(1)

    def train(self, train_loader, val_loader=None):
        self._initialization()
        if self.option.checkpoint is not None:
            self._load_model()

        self._mode_setting(is_train=True)
        timer = Timer(self.logger, self.option.max_step)
        start_epoch = 0
        epoch_losses_train = []
        epoch_losses_val = []
        
        # if self.option.is_train:
        #     stage = 'train'

        for step in range(start_epoch, self.option.max_step):
            if self.option.train_baseline:
                batch_losses = self._train_step_baseline(train_loader, step)
                print(batch_losses)
                #self._plotter_function(batch_losses, stage )
            else:
                self._train_step(train_loader,step) #LNTL proceedure
            self.scheduler.step()
            self.scheduler_r.step()
            self.scheduler_g.step()
            self.scheduler_b.step()

            if step == 1 or step % self.option.save_step == 0 or step == (self.option.max_step-1):
                if val_loader is not None:
                    self._validate(val_loader) #removed step argument from here i dont think we need it
                self._save_model(step)


    def _get_variable(self, inputs):
        if self.option.cuda:
            return Variable(inputs.cuda()) #Is there a difference between Variable(inputs).cuda()??
        return Variable(inputs)

importing Jupyter notebook from /content/drive/My Drive/BiasMitigation/LNTL/models/SegNet/segnet.ipynb
importing Jupyter notebook from /content/drive/My Drive/BiasMitigation/LNTL/models/BiasPredictor.ipynb
importing Jupyter notebook from /content/drive/My Drive/BiasMitigation/LNTL/utils/utils_LNTL.ipynb
