# Capstone Project
### Author: Leo Xu

In [None]:
import cv2
import os
import torch
import pickle as pkl
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.optim as optim
import matplotlib.pyplot as plt

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Lazy loading the data

In [None]:
class LazyLoadDataset(Dataset):
    def __init__(self, path, final = False, transform = None):
        self.transform = transform
        self.final = final
        if not final:
            path = path + 'train/' 
        else:
            path = path + 'test/' 
        self.pathX = path + 'X/'
        self.pathY = path + 'Y/'
        self.X = os.listdir(self.pathX)
    
    def __getitem__(self, index):
        f = self.X[index]
        img0 = cv2.imread(self.pathX + f + '/rgb/0.png')
        img1 = cv2.imread(self.pathX + f + '/rgb/1.png')
        img2 = cv2.imread(self.pathX + f + '/rgb/2.png')
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)     
        depth = np.load(self.pathX + f + '/depth.npy') / 1000
        field_id = pkl.load(open(self.pathX + f + '/field_id.pkl', 'rb'))
        if not self.final:
            Y = np.load(self.pathY + f + '.npy')
            return (img0, img1, img2, depth, field_id), Y*1000 
        else:
            return (img0, img1, img2, depth, field_id)
                       
    def __len__(self):
        return len(self.X)

In [None]:
data0 = LazyLoadDataset('./', transform = transforms.ToTensor())

## Computing the mean and standard deviation of dataset based on RGB dimensions

In [None]:
dataloader=DataLoader(data0, batch_size=len(data0), shuffle=False)#We don't need to shuffle the dataset for computing mean and std
(img0, img1, img2, depth, field_id), Y = next(iter(dataloader))
temp=torch.cat([img0.view(3396,1,3,224,224),img1.view(3396,1,3,224,224),img2.view(3396,1,3,224,224)],dim=1)
mean=temp.mean([0,1,3,4])
std=temp.std([0,1,3,4])

Normalize data and train test split

In [None]:
normalize = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])
data = LazyLoadDataset('./', transform = normalize)

train_data, test_data = torch.utils.data.random_split(data, [0.8,0.2])
train_size = len(train_data)
test_size = len(test_data)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

## CNN based on AlexNet

In [None]:
class CNN(nn.Module):
    def __init__(self, input_channels, fc_feature, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=11, stride=4, padding=2, groups=4)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2, groups=4)
        self.conv3 = nn.Conv2d(192, 192, kernel_size=1, groups=4)
        self.conv4 = nn.Conv2d(192, 384, kernel_size=3, padding=1, groups=4)
        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=4)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, padding=1, groups=4)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))
        self.flatten = nn.Flatten()
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(256*6*6, fc_feature)
        self.fc2 = nn.Linear(fc_feature, fc_feature)
        self.fc3 = nn.Linear(fc_feature, output_size)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.maxpool(x)
        x = self.relu(self.conv4(x))
        x = self.relu(self.conv5(x))
        x = self.relu(self.conv6(x))
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.dropout1(x)
        x = self.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x


## Train and Test functions

In [None]:
def train(epoch, model, optimizer):
    model.train()
    loss = 0
    for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(train_loader):
        data = torch.cat((img0, img1, img2, depth), dim=1).to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.MSELoss()(output.float(),target.float())
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), 
                                                                           len(train_loader.dataset),100. * batch_idx / len(train_loader), 
                                                                           loss.item()))

In [None]:
def test(model):
    model.eval()
    loss = 0
    for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(test_loader):
        data = torch.cat((img0, img1, img2, depth), dim=1).to(device)
        target = target.to(device)
        pred = model(data)
        mse = nn.MSELoss()(pred.float(),target.float())
        loss += mse.item()
    loss = loss/test_size
    print('Test set: Average MSE: {:.5f}'.format(loss))
    return loss

## Training and Tuning the model

First iteration

In [None]:
input_channels = 12
fc_features = 4096
output_size = 12
model_cnn = CNN(input_channels, fc_features, output_size)
model_cnn.to(device)
optimizer = optim.Adam(model_cnn.parameters(), lr = 0.0001)
test_loss=[]
for epoch in range(0, 30):
    train(epoch, model_cnn, optimizer)
    test_loss.append(test(model_cnn))
    SAVE_PATH='models/epoch'+str(epoch)+".pt"
    torch.save(model_cnn.state_dict(),SAVE_PATH)
min_epoch=test_loss.index(min(test_loss))
min_epoch

Adjusting the learning rate for more accurate convergence

#### Note: the following cell was run several times with change of learning rate and model parameters

In [None]:
model_cnn = CNN(input_channels, fc_features, output_size)
model_cnn.load_state_dict(torch.load('models/epoch7.pt'))
model_cnn.to(device)
optimizer = optim.Adam(model_cnn.parameters(), lr = 0.00001)
test_loss=[]
for epoch in range(0, 10):
    train(epoch, model_cnn, optimizer)
    test_loss.append(test(model_cnn))
    SAVE_PATH='models/epoch'+str(epoch)+".pt"
    torch.save(model_cnn.state_dict(),SAVE_PATH)
min_epoch=test_loss.index(min(test_loss))
min_epoch

## Loading the final model and producing data for submission

Loading the best model

In [None]:
model = CNN(input_channels, fc_features, output_size)
model.load_state_dict(torch.load('models/final.pt'))

Loading submission dataset

In [None]:
final_data = LazyLoadDataset('./', final = True, transform = normalize)
final_loader = DataLoader(final_data, batch_size=64, shuffle=True)

Write to csv

In [None]:
outfile = 'submission.csv'
output_file = open(outfile, 'w')
titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
     'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

model.eval()
model.cuda()
preds = []
file_ids = []

for i, ((img0, img1, img2, depth, field_id)) in enumerate(final_loader):
    data = torch.cat((img0, img1, img2, depth), dim=1).to(device)
    output = model(data)
    preds.append(output)
    file_ids.extend(field_id)

preds=torch.cat(preds,dim=0)

preds = preds.cpu().detach().numpy() / 1000.0

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))