In [1]:
# system imports
import os
import time
import sys 
import datetime
import import_ipynb

# common matrix manipulation
import numpy as np

# plotting, Image showing, Image string operations
import matplotlib.pyplot as plt

# import Image
from PIL import Image

# Image loading from disk
import cv2

# Progress bar
from tqdm import tqdm

# Pytorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR


In [None]:
# model, loss, utils 
from model import MonodepthModel
from ipynb.fs.full.loss import SSIM_loss, disparity_smoothness, LR_disparity_consistency
from ipynb.fs.full.utils import scale_pyramid
from dataset import KITTIDataset

weight_file_location = "/home/sur/MonoDepth1_Implementation/monodepth_weight.pth"
image_save_path = "/home/sur/MonoDepth1_Implementation/train_img/"

if __name__ == '__main__':

    dataset = KITTIDataset()
    net = MonodepthModel()
    net.to(torch.device("cuda:0"))
    
    resume_trining = True;
    
    if resume_trining:
        print("\n Loading previous weights from ", weight_file_location)
        net.load_state_dict(torch.load(weight_file_location))
    
    # configure loss
    appearance_matching_loss_weight = 1
    LR_loss_weight = 1
    disparity_smoothness_loss_weight = 0.05
    alpha_appearance_matching_loss = 0.75
    
    learning_rate = 0.0001
    epochs = 200
    batch = 4
    
    TrainLoader = torch.utils.data.DataLoader(dataset, batch_size = batch, shuffle = True, num_workers = 0)
    resume_trining = True;

    print("\n \nTraining with the following loss parmeters:")
    print("appearance_matching_loss_weight: ",appearance_matching_loss_weight)
    print("LR_loss_weight: ", LR_loss_weight)
    print("disparity_smoothness_loss_weight: ", disparity_smoothness_loss_weight)
    print("alpha_appearance_matching_loss: ", alpha_appearance_matching_loss)
    print("\n")
    
    is_gpu_available = torch.cuda.is_available()

    if is_gpu_available:
        loss_function = nn.L1Loss().cuda()
    else:
        loss_function = nn.L1Loss()

    optimizer = optim.Adam(net.parameters(), lr = learning_rate)
    scheduler = StepLR(optimizer, step_size=15, gamma=0.1)
    current_datetime = datetime.datetime.now()
    print("Training Started @ ", current_datetime.strftime("%Y-%m-%d %H:%M:%S"))
    for epoch in range(1, epochs):     
        for batch_data in tqdm(TrainLoader):
            # retrieve stereo images
            left_original = batch_data["left_img"]
            right_original = batch_data["right_img"]
            
            # send to CUDA device
            if is_gpu_available: 
                left = left_original.type(torch.FloatTensor).cuda()
                right = right_original.type(torch.FloatTensor).cuda()
            else:
                left = left_original.type(torch.FloatTensor)
                right = right_original.type(torch.FloatTensor)

            # generate pyramid
            left_pyramid = scale_pyramid(left,4)
            right_pyramid = scale_pyramid(right,4)

            output = net.forward(left)

            # collect disparities from the model
            left_disp = [output[i][:, 0, :, :] for i in range(4)]
            right_disp = [output[i][:, 1, :, :] for i in range(4)]

            # reconsturct corresponding images using disparities
            right_reconstuct = [LR_disparity_consistency(left_pyramid[i], right_disp[i]) for i in range(4)]
            left_reconstuct = [LR_disparity_consistency(right_pyramid[i], left_disp[i]) for i in range(4)]
            
            """
            calculate L1 loss
            """
            # TODO: Put weighted loss for pyramid : error in smaller image should contribute more
            left_L1loss = [loss_function(left_pyramid[i], left_reconstuct[i]) for i in range(4)]
            right_L1loss = [loss_function(right_pyramid[i], right_reconstuct[i]) for i in range(4)]
            if is_gpu_available:
                total_L1_loss = torch.FloatTensor([0]).cuda()
                total_SSIM_loss = torch.FloatTensor([0]).cuda()
            else:
                total_L1_loss = torch.FloatTensor([0])
                total_SSIM_loss = torch.FloatTensor([0])
            for i in range(4): 
                total_L1_loss += (left_L1loss[i] + right_L1loss[i])
            total_L1_loss /= 4 

            """
            calculate SSIM loss
            """
            left_SSIM_loss = [torch.mean(SSIM_loss(left_pyramid[i], left_reconstuct[i])) for i in range(4)] #Reconstructed Image and Original Image 
            right_SSIM_loss = [torch.mean(SSIM_loss(right_pyramid[i], right_reconstuct[i])) for i in range(4)]
            for i in range(4): 
                total_SSIM_loss += (left_SSIM_loss[i] + right_SSIM_loss[i])
            total_SSIM_loss /= 4
            
            """
            Total apparance matching loss
            """
            appearance_matching_loss = (alpha_appearance_matching_loss * total_SSIM_loss) + (1- alpha_appearance_matching_loss)*total_L1_loss

            # append axis of channel to treat disparities as images
            left_disp[0] = left_disp[0].view([-1, 1, 256, 512])
            left_disp[1] = left_disp[1].view([-1, 1, 128, 256])
            left_disp[2] = left_disp[2].view([-1, 1, 64, 128])
            left_disp[3] = left_disp[3].view([-1, 1, 32, 64])

            right_disp[0] = right_disp[0].view([-1, 1, 256, 512])
            right_disp[1] = right_disp[1].view([-1, 1, 128, 256])
            right_disp[2] = right_disp[2].view([-1, 1, 64, 128])
            right_disp[3] = right_disp[3].view([-1, 1, 32, 64])

            """
            Calculate L-R consistency loss
            """
            reconstruct_left = [LR_disparity_consistency(right_disp[i], left_disp[i]) for i in range(4)]
            reconstruct_right = [LR_disparity_consistency(left_disp[i], right_disp[i]) for i in range(4)]    
            LR_loss_left = [torch.mean(left_disp[i]-reconstruct_left[i]) for i in range(4)]
            LR_loss_right = [torch.mean(right_disp[i]-reconstruct_right[i]) for i in range(4)]
            if is_gpu_available:
                total_LR_loss = torch.FloatTensor([0]).cuda()
            else:
                total_LR_loss = torch.FloatTensor([0])
            for i in range(4): 
                total_LR_loss += LR_loss_left[i] + LR_loss_right[i] 
            total_LR_loss /= 4

            """
            Disparity smoothness loss
            """
            disparity_smoothnesss_loss_left = disparity_smoothness(left_pyramid,left_disp)
            disparity_smoothness_loss_right = disparity_smoothness(right_pyramid,right_disp)
            disparity_smoothness_loss = sum(disparity_smoothnesss_loss_left + disparity_smoothness_loss_right)
 
            loss = (appearance_matching_loss_weight * appearance_matching_loss+ \
                LR_loss_weight * total_LR_loss + \
                disparity_smoothness_loss_weight * disparity_smoothness_loss)/batch   

            loss.backward()  
            optimizer.step()
            net.zero_grad()
            scheduler.step() 

        #TO DO: Query same image and see how it evolves over epochs
        print("Epoch : ", epoch, " Loss: ", loss)
        rgb = right_disp[0][0].detach().cpu().numpy()
        fig = plt.figure(1)
        plt.imshow(rgb[0],cmap='plasma')
        plt.savefig(image_save_path + str(epoch)) 
        torch.save(net.state_dict(), weight_file_location)

importing Jupyter notebook from model.ipynb
importing Jupyter notebook from dataset.ipynb
Loading Dataset: from ->  /home/sur/MonoDepth1_Implementation/dataset/training
Loading Dataset: COMPLETE! took  0.040547847747802734  seconds
Total Stereo images acquired:  4200
Loading Dataset: from ->  /home/sur/MonoDepth1_Implementation/dataset/training
Loading Dataset: COMPLETE! took  0.029474496841430664  seconds
Total Stereo images acquired:  4200

 Loading previous weights from  /home/sur/MonoDepth1_Implementation/monodepth_weight.pth


  0%|          | 0/1050 [00:00<?, ?it/s]


 
Training with the following loss parmeters:
appearance_matching_loss_weight:  1
LR_loss_weight:  1
disparity_smoothness_loss_weight:  0.05
alpha_appearance_matching_loss:  0.75


Training Started @  2020-09-07 05:16:33


100%|██████████| 1050/1050 [08:19<00:00,  2.10it/s]


Epoch :  1  Loss:  tensor([4.2430], device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1050/1050 [06:56<00:00,  2.52it/s]


Epoch :  2  Loss:  tensor([3.7667], device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1050/1050 [06:29<00:00,  2.69it/s]


Epoch :  3  Loss:  tensor([3.9368], device='cuda:0', grad_fn=<DivBackward0>)


  6%|▌         | 62/1050 [00:23<05:36,  2.93it/s]