In [None]:
import os
import pandas as pd
import numpy as np
import pickle
import torch
from torchvision import transforms
import torchvision.models as m
import torch.nn as nn
import cv2

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class ImageDataset:
    def __init__(self, type = "train", transform=None):
        self.transform = transform
        if type == "train":
            self.path1 = "./lazydata/train/X"
            self.path2 = "./lazydata/train/Y"
            self.data=os.listdir(self.path1)
            self.data=[f for f in self.data if f != ".DS_Store"]
        else:
            self.path1 = "./lazydata/test/X"
            self.path2 = "./lazydata/train/Y"
            self.data=os.listdir(self.path1)
            self.data=[f for f in self.data if f != ".DS_Store"]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        path = os.path.join(self.path1, str(idx))
        image0 = cv2.imread(os.path.join(path, "rgb/0.png"))
        image1 = cv2.imread(os.path.join(path, "rgb/1.png"))
        image2 = cv2.imread(os.path.join(path, "rgb/2.png"))
        # image0 = image0[20:224, 20:200]
        # image1 = image1[60:224, 20:224]
        # image2 = image2[30:210, 0:224]

        normalize0 = transforms.Normalize(mean=[100.9770, 106.3326, 110.9824], std=[54.0573, 50.7763, 50.9337])
        normalize1 = transforms.Normalize(mean=[119.7678, 124.4105, 127.7085], std=[61.7536, 57.5833, 58.3284])
        normalize2 = transforms.Normalize(mean=[112.5017, 122.9047, 132.4158], std=[63.1012, 58.5006, 58.6347])

        if self.transform:
            image0 = self.transform(image0)
            image1 = self.transform(image1)
            image2 = self.transform(image2)
            image0 = normalize0(image0)
            image1 = normalize1(image1)
            image2 = normalize2(image2)

        depth = np.load(os.path.join(path, "depth.npy"))/1000
        depth[0] = (depth[0]-0.6558)/0.4239 * 300
        depth[1] = (depth[1]-0.8711)/0.6199 * 300
        depth[2] = (depth[2]-1.2106)/1.1016 * 300
        
        Y = np.load(os.path.join(self.path2, str(idx)+".npy"))*1000
        
        return (image0, image1, image2, depth), Y

In [None]:
from torchvision import transforms
transformations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(3),
    transforms.ColorJitter(contrast=(.7,.8), brightness=0, saturation=0, hue=0),
    transforms.ToTensor(),
])


train_dataset = ImageDataset(type="train", transform = transformations)
# train_dataset, test_dataset = torch.utils.data.random_split(dataset, [.8, .2], generator=torch.Generator().manual_seed(42))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=True)
print(len(train_dataset))

3396


In [None]:
def train(epoch, model, optimizer):
    model.train()
    l = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data=torch.cat((data[0],data[1],data[2],data[3]), 1)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        model = model.to(device)
        output = model(data)
        mse_loss = nn.MSELoss()
        loss = mse_loss(output.float(), target.float())
        loss.backward()
        optimizer.step()
        l += loss/149
        if batch_idx == 148:
          print("epoch = {}, {}".format(epoch,l))
          l = 0

In [None]:
def test(model):
    model.eval()
    correct = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        data=torch.cat((data[0],data[1],data[2],data[3]), 1)
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = torch.nn.MSELoss()
        mse = loss(output, target)
        correct += mse.item()
    accuracy = correct / output.size(0)
    print(accuracy)
    return accuracy

In [None]:
import matplotlib.pyplot as plt
model = m.resnet50(weights=m.ResNet50_Weights.DEFAULT)
model.eval()
model.float()
model.fc = nn.Linear(2048, 12)
model.conv1 = nn.Conv2d(12, 64, kernel_size=7, stride=2, padding=3, bias=False)
model = model.to(device)
# optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

In [None]:
for epoch in range(0, 70):
    train(epoch, model, optimizer) 

In [None]:
torch.save(model.state_dict(), "model_resnet.pt")

In [None]:
outfile = 'submission.csv'
output_file = open(outfile, 'w')
titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []
t_data = torch.load('./csci-ua-473-intro-to-machine-learning-fall22/test/test/testX.pt')
file_ids = t_data[-1]
model.eval()
t_dataset = ImageDataset(type="test", transform = transformations)
t_loader = torch.utils.data.DataLoader(t_dataset, batch_size=1, shuffle=False)
for batch_idx, (data, target) in enumerate(t_loader):
      data=torch.cat((data[0],data[1],data[2],data[3]), 1)
      data, target = data.to(device), target.to(device)
      output = model(data)/1000
      preds.append(output[0].cpu().detach().numpy())
df = pd.concat([pd.DataFrame(file_ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))