In [None]:
import torch
import numpy as np
import os
import cv2
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
from torch.utils.data import Dataset, DataLoader
import pickle
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class LazyLoadDataset(Dataset):
    def __init__(self, path, training = True):
        self.train = training
        self.path = path + ('train/' if training else 'test/')
        self.data = os.listdir(self.path + 'X/')
    
    def __getitem__(self, index):
        mean = torch.Tensor([0.4851, 0.4623, 0.4356])
        std = torch.Tensor([0.2195, 0.2181, 0.2339])
        normalizer = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std),])
        id = self.data[index]
        a = normalizer(cv2.imread(self.path + 'X/' + id + '/rgb/0.png'))
        b = normalizer(cv2.imread(self.path + 'X/' + id + '/rgb/1.png'))
        c = normalizer(cv2.imread(self.path + 'X/' + id + '/rgb/2.png'))
        
        depth = np.load(self.path + 'X/' + id + '/depth.npy') / 1000
        field_id = pickle.load(open(self.path + 'X/' + id + '/field_id.pkl', 'rb'))

        if self.train:
            Y = np.load(self.path + 'Y/' + id + '.npy')
            return (a, b, c, depth, field_id), Y
        else:
            return (a, b, c, depth, field_id)
        
    def __len__(self):
        return len(self.data)

In [None]:
dataset = LazyLoadDataset('11/')
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def train(epoch, model, optimizer):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
    """
    model.train()
    for batch_idx, ((a, b, c, depth, id), target) in enumerate(train_loader):

        data = torch.cat((a, b, c, depth), dim=1).to(device)
        target = target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = nn.MSELoss()(output.float(), target.float())
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))



In [None]:
#AlexNet
class CNN(nn.Module):
    def __init__(self, input_channels, conv_feature, fc_feature, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 72, kernel_size=11, stride=4, padding=2, groups=4)
        self.conv2 = nn.Conv2d(72, 192, kernel_size=5, stride=2, padding=2, groups=4)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1, groups=4)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=4)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(conv_feature * 6 * 6, fc_feature)
        self.fc2 = nn.Linear(fc_feature, fc_feature)
        self.fc3 = nn.Linear(fc_feature, output_size)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
in_channels=12
conv_features = 256 # number of feature maps
fc_features = 4096
output_size = 12
model_cnn = CNN(in_channels, conv_features,fc_features,output_size) # create CNN model
model_cnn.to(device)
optimizer = optim.Adam(model_cnn.parameters(), lr = 0.0001)

test_accuracy = []
for epoch in range(0, 20):
    train(epoch, model_cnn, optimizer)


In [None]:
test_dataset = LazyLoadDataset('11/', train = False, transform = transform_with_normalization)
test_loader = DataLoader(test_dataset, batch_size=64 * 2, shuffle=True)
def predict(model):
    outfile = 'submission.csv'
    output_file = open(outfile, 'w')
    titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
    
    model.eval()
    pred = []
    file_ids = []

    for i, ((img0, img1, img2, depth, field_id)) in enumerate(test_loader):
        data = torch.cat((img0, img1, img2, depth), dim=1).to(device)
        output = model(data)
        pred.append(output.cpu().detach().numpy())
        file_ids.extend(field_id)
    
    pred = np.concatenate(pred) / 1000.0

    df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(pred)], axis = 1, names = titles)
    df.columns = titles
    df.to_csv(outfile, index = False)
    print("Written to csv file {}".format(outfile))
predict(model_cnn)