# Intro to ML Capstone Project 
* Professor: Lerrel Pinto
* Made by Sangwon Baek
* December 7th 2022
* Kaggle Site URL:
https://www.kaggle.com/competitions/csci-ua-473-intro-to-machine-learning-fall22/overview

In [1]:
import os 
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights, resnet18, ResNet18_Weights

from torchsummary import summary

import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np
import cv2
import pickle as pkl 

### Preprocess my model

In [2]:
class PreLazyLoadDataset(Dataset):
    def __init__(self, path, train=True, transform=None):
        self.transform = transform

        path = path + ("train/" if train else "test/")
        
        self.pathX = path+"X/"
        self.pathY = path+"Y/"
        
        self.data = os.listdir(self.pathX)
    
    def __getitem__(self, idx):
        f = self.data[idx]
        
        #Read rgb images
        img0 = cv2.imread(self.pathX + f + '/rgb/0.png')
        img1 = cv2.imread(self.pathX + f + '/rgb/1.png')
        img2 = cv2.imread(self.pathX + f + '/rgb/2.png')
        
        #Convert RGB & depth images to tensor
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)
            img2 = self.transform(img2)        
        
        #read depth images
        depth = np.load(self.pathX + f + '/depth.npy')
        depth = depth/1000
        
        #Perform transformation on Depth image
        depth = cv2.normalize(depth, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)  
               
        #read field ID & Y
        field_id = pkl.load(open(self.pathX + f + '/field_id.pkl', 'rb'))
        Y = np.load(self.pathY + f + '.npy')
        
        return (img0, img1, img2, depth, field_id), Y
        
    def __len__(self):
        return len(self.data)

In [3]:
transform_first = transforms.ToTensor()

### Explore data shape

In [4]:
#Get mean and standard evication of RGB images and depth image for normalization value
def get_mean_std(dataset):
    meanRGB_img0 = [np.mean(image_0.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]
    stdRGB_img0 = [np.std(image_0.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]
    meanRGB_img1 = [np.mean(image_1.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]
    stdRGB_img1 = [np.std(image_1.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]
    meanRGB_img2 = [np.mean(image_2.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]
    stdRGB_img2 = [np.std(image_2.numpy(), axis=(1,2)) for (image_0,image_1,image_2,image_depth,_),_ in dataset]

    meanR_img0 = np.mean([m[0] for m in meanRGB_img0])
    meanG_img0 = np.mean([m[1] for m in meanRGB_img0])
    meanB_img0 = np.mean([m[2] for m in meanRGB_img0])
    stdR_img0 = np.mean([s[0] for s in stdRGB_img0])
    stdG_img0 = np.mean([s[1] for s in stdRGB_img0])
    stdB_img0 = np.mean([s[2] for s in stdRGB_img0])
    
    print("Img_0 Mean: [{:.4F}, {:.4F}, {:.4F}]".format(meanR_img0, meanG_img0, meanB_img0))
    print("Img_0 STD: [{:.4F}, {:.4F}, {:.4F}]".format(stdR_img0, stdG_img0, stdB_img0))

    meanR_img1 = np.mean([m[0] for m in meanRGB_img1])
    meanG_img1 = np.mean([m[1] for m in meanRGB_img1])
    meanB_img1 = np.mean([m[2] for m in meanRGB_img1])
    stdR_img1 = np.mean([s[0] for s in stdRGB_img1])
    stdG_img1 = np.mean([s[1] for s in stdRGB_img1])
    stdB_img1 = np.mean([s[2] for s in stdRGB_img1])
    
    print("Img_1 Mean: [{:.4F}, {:.4F}, {:.4F}]".format(meanR_img1, meanG_img1, meanB_img1))
    print("Img_1 STD: [{:.4F}, {:.4F}, {:.4F}]".format(stdR_img1, stdG_img1, stdB_img1))

    meanR_img2 = np.mean([m[0] for m in meanRGB_img2])
    meanG_img2 = np.mean([m[1] for m in meanRGB_img2])
    meanB_img2 = np.mean([m[2] for m in meanRGB_img2])
    stdR_img2 = np.mean([s[0] for s in stdRGB_img2])
    stdG_img2 = np.mean([s[1] for s in stdRGB_img2])
    stdB_img2 = np.mean([s[2] for s in stdRGB_img2])
    
    print("Img_2 Mean: [{:.4F}, {:.4F}, {:.4F}]".format(meanR_img2, meanG_img2, meanB_img2))
    print("Img_2 STD: [{:.4F}, {:.4F}, {:.4F}]".format(stdR_img2, stdG_img2, stdB_img2))

In [5]:
class LazyLoadDataset(Dataset):
    def __init__(self, path, train=True, transform=None):
        self.transform = transform
        self.transform_0 = None
        self.transform_1 = None
        self.transform_2 = None
        
        if self.transform is not None:
            self.transform_0 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.4352, 0.4170, 0.3960], [0.1992, 0.1987, 0.2111])
            ])
            self.transform_1 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.5008, 0.4879, 0.4697], [0.2276, 0.2252, 0.2417])
            ])
            self.transform_2 = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(240),
            transforms.CenterCrop(224),
            transforms.Normalize([0.5193, 0.4820, 0.4412], [0.2293, 0.2288, 0.2465])
            ])
        path = path + ("train/" if train else "test/")
        
        self.pathX = path+"X/"
        self.pathY = path+"Y/"
        
        self.data = os.listdir(self.pathX)
    
    def __getitem__(self, idx):
        f = self.data[idx]
        
        #Read rgb images
        img0 = cv2.imread(self.pathX + f + '/rgb/0.png')
        img1 = cv2.imread(self.pathX + f + '/rgb/1.png')
        img2 = cv2.imread(self.pathX + f + '/rgb/2.png')
        
        #read depth images
        depth = np.load(self.pathX + f + '/depth.npy')        
        depth = depth/1000
        
        #Convert RGB and depth images to tensor
        if self.transform is not None:
            img0 = self.transform_0(img0)
            img1 = self.transform_1(img1)
            img2 = self.transform_2(img2)
        
        #Perform transformation on Depth image
        depth = cv2.normalize(depth, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)  
            
        #read field ID & Y
        field_id = pkl.load(open(self.pathX + f + '/field_id.pkl', 'rb'))
        Y = np.load(self.pathY + f + '.npy')
        
        return (img0, img1, img2, depth, field_id), Y
        
    def __len__(self):
        return len(self.data)

In [6]:
#Lazy Load the dataset
preliminary_dataset = PreLazyLoadDataset('../lazydata/', transform=transform_first)

In [7]:
(a,b,c,d,e), y = preliminary_dataset[0]

In [8]:
get_mean_std(preliminary_dataset)

Img_0 Mean: [0.4352, 0.4170, 0.3960]
Img_0 STD: [0.1992, 0.1987, 0.2111]
Img_1 Mean: [0.5008, 0.4879, 0.4697]
Img_1 STD: [0.2276, 0.2252, 0.2417]
Img_2 Mean: [0.5193, 0.4820, 0.4412]
Img_2 STD: [0.2293, 0.2288, 0.2465]


In [9]:
dataset = LazyLoadDataset('../lazydata/', transform=transform_first)

In [10]:
#Define train/validation size (8:2)
train_size = int(len(dataset) * 0.8)
validation_size = len(dataset)-train_size

#Randomly split dataset into train and validation dataset with specified size above
train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])

#Create train/validation dataloader with batch_size of 64
train_dataloader = DataLoader(train_dataset, batch_size=3, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=3, shuffle=True)


In [11]:
print("Train set size: {} \nTrain Loader size: {}".format(len(train_dataset),len(train_dataloader)))
print("Validation set size: {} \nValidation Loader size: {}".format(len(validation_dataset),len(validation_dataloader)))

Train set size: 2716 
Train Loader size: 906
Validation set size: 680 
Validation Loader size: 227


In [12]:
for i, ((img0, img1, img2, depth, field_id), labels) in enumerate(train_dataloader):
    print(i)
    # print(depth[0])
    print("img0 shape{}".format(img0.size()))
    print("img1 shape{}".format(img1.size()))
    print("img2 shape{}".format(img2.size()))
    print("depth shape{}".format(depth.shape))
    print("field id {}".format(field_id))
    print("labels size {}".format(labels.size()))
    break

0
img0 shapetorch.Size([3, 3, 224, 224])
img1 shapetorch.Size([3, 3, 224, 224])
img2 shapetorch.Size([3, 3, 224, 224])
depth shapetorch.Size([3, 3, 224, 224])
field id ('3917', '779', '3283')
labels size torch.Size([3, 12])
