# ML Final Project For Kaggle (cleaned version)

In [1]:
import cv2
import os
import torch
import pickle as pkl
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision import transforms
import torch.optim as optim

In [2]:
# lazy loading data
class LazyLoadDataset(Dataset):
    def __init__(self, path, train = True, transform = None):
        self.transform = transform
        self.train = train
        path = path + ('train/' if train else 'test/')

        self.pathX = path + 'X/'
        self.pathY = path + 'Y/'

        self.data = os.listdir(self.pathX)
    
    def __getitem__(self, index):
        f = self.data[index]

        # X
        # read rgb images
        img0 = cv2.imread(self.pathX + f + '/rgb/0.png')
        img1 = cv2.imread(self.pathX + f + '/rgb/1.png')
        img2 = cv2.imread(self.pathX + f + '/rgb/2.png')

        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        
        # read depth
        depth = np.load(self.pathX + f + '/depth.npy') / 1000

        #read field id
        field_id = pkl.load(open(self.pathX + f + '/field_id.pkl', 'rb'))

        # Y
        if self.train:
            Y = np.load(self.pathY + f + '.npy')

            return (img0, img1, img2, depth, field_id), Y
        else:
            return (img0, img1, img2, depth, field_id)
        
    def __len__(self):
        return len(self.data)

## Calculate Mean and Standard Deviation for normalization

In [3]:
# transform = transforms.Compose([transforms.ToTensor(), ])

# dataset = LazyLoadDataset('./lazydata/', transform = transform)
# train_dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)

# def get_mean_std(loader):
#     (img0, img1, img2, depth, field_id), Y = next(iter(loader))
#     img0_mean = img0.mean([0,2,3])
#     img0_std = img0.std([0,2,3])
#     img1_mean = img1.mean([0,2,3])
#     img1_std = img1.std([0,2,3])
#     img2_mean = img2.mean([0,2,3])
#     img2_std = img2.std([0,2,3])
#     mean, std = (img0_mean + img1_mean + img2_mean) / 3, (img0_std + img1_std + img2_std) / 3
#     return mean, std

# mean, std = get_mean_std(train_dataloader)
# print("mean and std: \n", mean, std)

In [4]:
# The calculated mean and std (save running time)
# mean = torch.Tensor([0.4851, 0.4623, 0.4356])
# std = torch.Tensor([0.2195, 0.2181, 0.2339])

# mean and std for img0 only
mean = torch.Tensor([0.4352, 0.4170, 0.3960])
std = torch.Tensor([0.1997, 0.1991, 0.2120])

In [5]:
transform_with_normalization = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std),])
dataset = LazyLoadDataset('./lazydata/', transform = transform_with_normalization)

train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## CNN Architecture (Slightly modified from AlexNet)

In [7]:
class CNN(nn.Module):
    def __init__(self, input_channels, conv_feature, fc_feature, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=11, stride=4, padding=2)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(conv_feature * 6 * 6, fc_feature)
        self.fc2 = nn.Linear(fc_feature, fc_feature)
        self.fc3 = nn.Linear(fc_feature, output_size)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.relu(self.conv5(x))
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [8]:
def train(epoch, model, optimizer):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
    """
    model.train()
    for batch_idx, ((img0, img1, img2, depth, field_id), target) in enumerate(train_dataloader):
        # send three images and depth to device
        # data = torch.cat((img0, img1, img2, depth), dim=1).to(device)

        # only img0 input
        data = img0.to(device)
        # send target to device
        target = target.to(device)

        optimizer.zero_grad()
        output = model(data)

        # mseloss
        loss = nn.MSELoss()(output.float(), target.float() * 1000.0)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_dataloader.dataset),
            100. * batch_idx / len(train_dataloader), loss.item()))

In [9]:
# Training settings 
# number of feature maps
conv_features = 256
# number of input channels
input_channels = 3
fc_features = 4096
output_size = 12

# optimal lr
# lr = 0.0001

# test lr
lr = 0.0001

model_cnn = CNN(input_channels, conv_features, fc_features, output_size) # create CNN model
model_cnn.to(device)
optimizer = optim.Adam(model_cnn.parameters(), lr = lr) # create optimizer

# lr scheduler
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)  

for epoch in range(0, 20):
    train(epoch, model_cnn, optimizer)
    # scheduler.step()



## Get Submission.csv

In [10]:
test_dataset = LazyLoadDataset('./lazydata/', train = False, transform = transform_with_normalization)
test_dataloader = DataLoader(test_dataset, batch_size=64 * 2, shuffle=True)

In [11]:
import pandas as pd

def predict(model):
    outfile = 'submission.csv'
    output_file = open(outfile, 'w')
    titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
    
    model.eval()
    pred = []
    file_ids = []

    for i, ((img0, img1, img2, depth, field_id)) in enumerate(test_dataloader):
        # data = torch.cat((img0, img1, img2, depth), dim=1).to(device)
        data = img0.to(device)
        output = model(data)
        pred.append(output.cpu().detach().numpy())
        file_ids.extend(field_id)
    
    pred = np.concatenate(pred) / 1000.0

    df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(pred)], axis = 1, names = titles)
    df.columns = titles
    df.to_csv(outfile, index = False)
    print("Written to csv file {}".format(outfile))

In [12]:
predict(model_cnn)

Written to csv file submission.csv
