# Intro to ML Capstone Project 
* Professor: Lerrel Pinto
* Made by Sangwon Baek
* December 10th 2022
* Kaggle Site URL:
https://www.kaggle.com/competitions/csci-ua-473-intro-to-machine-learning-fall22/overview
* Batchsize 32 was used
* ResNet 152 / LR : 0.001 (1e-3)
* Applied normalization for RGB by calculating them / for depth(divided by 1000 first) used CV2 normalizer minmax
* Test/Valid split (8:2), implemented earlystopping (patience=5, stepLR on every epoch with Gamma 0.8 step 1)
* Did (targets) ground truth * 100 before passing to the loss function (meters to mm) then divided by 100 before submission

In [1]:
import os 
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights, resnet18, ResNet18_Weights, resnet152, ResNet152_Weights

from torchsummary import summary

import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np
import cv2
import pickle as pkl 

### Preprocessing and loading dataset

In [2]:
class LazyLoadDataset(Dataset):
    def __init__(self, path, train=True, transform=None):
        self.transform = transform
        self.train = train
        self.transform_0 = None
        self.transform_1 = None
        self.transform_2 = None
        
        if self.transform is not None:
            self.transform_0 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.4352, 0.4170, 0.3960], [0.1992, 0.1987, 0.2111])
            ])
            self.transform_1 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.5008, 0.4879, 0.4697], [0.2276, 0.2252, 0.2417])
            ])
            self.transform_2 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.5193, 0.4820, 0.4412], [0.2293, 0.2288, 0.2465])
            ])
        path = path + ("train/" if train else "test/")
        
        self.pathX = path+"X/"
        self.pathY = path+"Y/"
        
        self.data = os.listdir(self.pathX)
    
    def __getitem__(self, idx):
        f = self.data[idx]
        
        #Read rgb images
        img0 = cv2.imread(self.pathX + f + '/rgb/0.png')
        img1 = cv2.imread(self.pathX + f + '/rgb/1.png')
        img2 = cv2.imread(self.pathX + f + '/rgb/2.png')
        
        #read depth images
        depth = np.load(self.pathX + f + '/depth.npy')        
        depth = depth/1000
        
        #Convert RGB and depth images to tensor
        if self.transform is not None:
            img0 = self.transform_0(img0)
            img1 = self.transform_1(img1)
            img2 = self.transform_2(img2)
        
        #Perform transformation on Depth image
        depth = cv2.normalize(depth, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)  
            
        #read field ID & Y
        field_id = pkl.load(open(self.pathX + f + '/field_id.pkl', 'rb'))
        
        if self.train==True:
            Y = np.load(self.pathY + f + '.npy')
            return (img0, img1, img2, depth, field_id), Y
        if self.train==False:
            return (img0, img1, img2, depth, field_id)
        
    def __len__(self):
        return len(self.data)

### Explore data shape

In [3]:
#Transform to tensor
transform = transforms.ToTensor()

#Lazy Load the dataset
dataset = LazyLoadDataset('../lazydata/',train=True, transform=transform)


In [4]:
#Define train/validation size (8:2)
train_size = int(len(dataset) * 0.8)
validation_size = len(dataset)-train_size

#Randomly split dataset into train and validation dataset with specified size above
train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])

#Create train/validation dataloader with batch_size of 64, 32 respectively
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=32, shuffle=True)


In [5]:
print("Train set size: {} \nTrain Loader size: {}".format(len(train_dataset),len(train_dataloader)))
print("Validation set size: {} \nValidation Loader size: {}".format(len(validation_dataset),len(validation_dataloader)))

Train set size: 2716 
Train Loader size: 85
Validation set size: 680 
Validation Loader size: 22


In [6]:
for i, ((img0, img1, img2, depth, field_id), labels) in enumerate(train_dataloader):
    print(i)
    # print(depth[0])
    print("img0 shape{}".format(img0.size()))
    print("img1 shape{}".format(img1.size()))
    print("img2 shape{}".format(img2.size()))
    print("depth shape{}".format(depth.shape))
    print("field id {}".format(field_id))
    print("labels size {}".format(labels.size()))
    break

0
img0 shapetorch.Size([32, 3, 224, 224])
img1 shapetorch.Size([32, 3, 224, 224])
img2 shapetorch.Size([32, 3, 224, 224])
depth shapetorch.Size([32, 3, 224, 224])
field id ('3617', '3175', '2547', '2973', '2878', '1370', '1478', '1040', '4005', '1758', '251', '1856', '2774', '160', '3816', '1977', '1231', '698', '3339', '516', '892', '1049', '3471', '2712', '2818', '2187', '3036', '502', '1943', '1749', '710', '1781')
labels size torch.Size([32, 12])


### Set device to cuda & Define parameters

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 12
lr = 1e-3
epochs = 50

### Train 1st img 

In [8]:
def train_1(epoch, model, optimizer, scheduler):
    # Early stopping Parameters
    Best_Val_loss = None
    patience = 5
    trigger_times = 0
    for epoch in range(0, epochs):
        model.train()
        for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(train_dataloader): 
            #Stack all three RGB 
            img_all = torch.stack((img0[:,0,:,:],img0[:,1,:,:],img0[:,2,:,:]),1)
            data = img_all
            data = data.to(device)

            #Send to Device
            target = target.to(device)
            
            #multiply GT by 100
            target = target*100
            
            #Run necessary train implementations
            output = model(data)
            
            #Zero gradients
            optimizer.zero_grad()

            #forward and backward propagation
            loss_function = nn.MSELoss()
            #Root Mean Squared Error (RMSE) 
            train_loss = torch.sqrt(loss_function(output.float(), target.float()))
            train_loss.backward()
            optimizer.step()

            #Show progress
            if batch_idx % 28 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_dataloader.dataset),
                    100. * batch_idx / len(train_dataloader), train_loss.item()))
   
        #To help model improving apply scheduler to reduce learning rate
        scheduler.step()
        curr_lr = scheduler.get_last_lr()
        print("current lr{}".format(curr_lr))     
        
        #Early Stopping
        Curr_Val_loss = validation_1(model)
        print('Valid Epoch: {} Current Validation Loss: {:.6F}'.format(epoch, Curr_Val_loss))
        
        if Curr_Val_loss < 0.242:
            print ("Best model obtained! Set Patience = 2")
            patience = 2 
        
        if Best_Val_loss is None:
            Best_Val_loss = Curr_Val_loss - 0.0001
        #Model didn't improve so add trigger time
        if Curr_Val_loss > Best_Val_loss:
            trigger_times += 1
            print('trigger times:', trigger_times)
            if trigger_times >= patience:
                print('Early stopping!\nStart to test process.')
                return model

        #Reset to trigger_time 0 and update best validation loss
        else:
            trigger_times = 0
            Best_Val_loss = Curr_Val_loss
            print('trigger times: 0 Best_Val_loss: {:.6F}'.format(Best_Val_loss)) 

    return model

In [9]:
def validation_1(model):
    model.eval()
    loss_total = 0
    with torch.no_grad():
        for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(validation_dataloader):
            #Stack all three RGB and depth images
            img_all = torch.stack((img0[:,0,:,:],img0[:,1,:,:],img0[:,2,:,:]),1)
            data = img_all
            data = data.to(device)
            
            #Send to Device
            target = target.to(device)
            
            #multiply GT by 100
            target = target*100
            
            output = model(data)
            loss_function = nn.MSELoss()
            loss = torch.sqrt(loss_function(output.float(), target.float()))
            loss_total += loss.item()
    return loss_total / len(validation_dataloader)

In [10]:
model_1 = models.resnet152(weights=ResNet152_Weights.DEFAULT)
model_1.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model_1.fc = nn.Linear(2048, num_classes)
model_1 = model_1.cuda()
optimizer = torch.optim.Adam(model_1.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.8)
model_1 = train_1(epochs, model_1, optimizer, scheduler)

current lr[0.0008]
Valid Epoch: 0 Current Validation Loss: 0.797618
trigger times: 1
current lr[0.00064]
Valid Epoch: 1 Current Validation Loss: 0.446688
trigger times: 0 Best_Val_loss: 0.446688
current lr[0.0005120000000000001]
Valid Epoch: 2 Current Validation Loss: 0.445030
trigger times: 0 Best_Val_loss: 0.445030
current lr[0.0004096000000000001]
Valid Epoch: 3 Current Validation Loss: 0.366323
trigger times: 0 Best_Val_loss: 0.366323
current lr[0.0003276800000000001]
Valid Epoch: 4 Current Validation Loss: 0.296331
trigger times: 0 Best_Val_loss: 0.296331
current lr[0.0002621440000000001]
Valid Epoch: 5 Current Validation Loss: 0.315297
trigger times: 1
current lr[0.00020971520000000012]
Valid Epoch: 6 Current Validation Loss: 0.299679
trigger times: 2
current lr[0.0001677721600000001]
Valid Epoch: 7 Current Validation Loss: 0.269646
trigger times: 0 Best_Val_loss: 0.269646
current lr[0.00013421772800000008]
Valid Epoch: 8 Current Validation Loss: 0.256158
trigger times: 0 Best_Va

In [11]:
#Save my CNN model
torch.save(model_1.state_dict(), "../model/model_img1.pt")