# Intro to ML Final Project
Daniel Sun

In [115]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import pickle as pkl
from torchvision.models import resnet50, ResNet50_Weights
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [117]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [119]:
class LazyLoadDataset(Dataset):
  def __init__(self, path, train=True, transform=None):
    self.transform = transform
    path = path + ("train/" if train else "test/")

    self.pathX = path + "X/"
    self.pathY = path + "Y/"

    self.data = os.listdir(self.pathX)
    self.train = train
  
  def __getitem__(self, idx):
    f = self.data[idx]

    img0 = cv2.imread(self.pathX + f + "/rgb/0.png")
    img1 = cv2.imread(self.pathX + f + "/rgb/1.png")
    img2 = cv2.imread(self.pathX + f + "/rgb/2.png")

    if self.transform is not None:
      img0 = self.transform(img0)
      img1 = self.transform(img1)
      img2 = self.transform(img2)
    
    depth = np.load(self.pathX + f + "/depth.npy") 
    # depth = self.transform(depth)

    field_id = pkl.load(open(self.pathX + f + "/field_id.pkl", "rb"))

    
    Y = np.load(self.pathY + f + ".npy") * 1000
    return (img0, img1, img2, depth), Y

  def __len__(self):
    return len(self.data)

In [121]:
data_transforms = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomRotation(45),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        #transforms.Resize((224, 224)),#attention 
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                            [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                             [0.229, 0.224, 0.225])
    ]),
}

In [123]:
train_dataset = LazyLoadDataset("./lazydata/", transform=data_transforms['train'])
test_dataset = LazyLoadDataset("./lazydata/", transform=data_transforms['test'], train=False)

In [124]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False)

In [126]:

for batch_idx, (data, target) in enumerate(train_loader):
  print(batch_idx)
  # print(data.shape)
  # print(target.shape)
  # img0, img1, img2, depth, field_id = data
  img0, img1, img2, depth = data
  print(img0.shape)
  print(img1.shape)
  print(img1.shape)
  
  print(depth.shape)
  concate = torch.cat((img0, img1, img2, depth), dim=1)
  print(concate.shape)
  # print(depth.shape)
  # print(len(field_id))
  break

0
torch.Size([2, 3, 224, 224])
torch.Size([2, 3, 224, 224])
torch.Size([2, 3, 224, 224])
torch.Size([2, 3, 224, 224])
torch.Size([2, 12, 224, 224])


In [127]:
def train(epoch, model, optimizer, permute_pixels=None, permutation_order=None):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
        permute_pixels (function): function to permute the pixels (default: None)
        permutation_order (1D torch array): order of the permutation (default: None)
    """
    model.train()
    for batch_idx, ((img0, img1, img2, depth), target) in enumerate(train_loader):
        # send to device
        concate = torch.cat((img0, img1, img2, depth), dim=1)
        data, target = concate.to(device), target.to(device)
        
        # permute pixels
        if permute_pixels is not None:
            data = permute_pixels(data, permutation_order)

        optimizer.zero_grad()
        model = model.to(device)
        output = model(data)
        lossFn = nn.MSELoss()
        loss = lossFn(output.float(), target.float())
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


In [128]:
def get_n_params(model):
    # return number of parameters in model
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

In [129]:
model = resnet50(weights=ResNet50_Weights.DEFAULT)
model = model.to(device)
model.eval()
model.float()
model.fc = nn.Linear(2048, 12)
weight = model.conv1.weight.clone()
model.conv1 = nn.Conv2d(12, 64, kernel_size=7, stride=2, padding=3, bias=False)
with torch.no_grad():
    model.conv1.weight[:, :3] = weight
    model.conv1.weight[:, 3] = model.conv1.weight[:, 0]

In [130]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
print('Number of parameters: {}'.format(get_n_params(model)))

test_accuracy = []
for epoch in range(0, 3):
    train(epoch, model, optimizer)

Number of parameters: 23560844


In [131]:
import pickle
import pandas as pd

outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []

test_data = torch.load('./test/test/testX.pt')
file_ids = test_data[-1]
rgb_data = test_data[0]
depth = test_data[1]
model.eval()

for i, data in enumerate(rgb_data):
    # Please remember to modify this loop, input and output based on your model/architecture
    data = data.view(-1, 224, 224)
    data = torch.cat((data, depth[i]), dim=0)
    data = torch.unsqueeze(data, 0)
    output = model(data.to('cuda')) /1000
    preds.append(output[0].cpu().detach().numpy())

df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Written to csv file submission.csv
