# Assignment 3 - Transfer Learning

### Name: Anirudh Swaminathan
### PID: A53316083
### Email ID: aswamina@eng.ucsd.edu

#### Notebook created by Anirudh Swaminathan from ECE department majoring in Intelligent Systems, Robotics and Control for the course ECE285 Machine Learning for Image Processing for Fall 2019

## Getting Started

In [None]:
%matplotlib notebook

import os
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data as td
import torchvision as tv
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt

In [None]:
# select the relevant device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

## Data Loader

#### Question 1

In [None]:
dataset_root_dir = '/datasets/ee285f-public/caltech_ucsd_birds/'

In [None]:
# Using the getpass.getuser() and socket.gethostname()
import getpass
import socket

user = getpass.getuser()
hostname = socket.gethostname()
print(user)
print(hostname)

We have created the $dataset\_root\_dir$ and made it point to the Bird dataset directory

#### Question 2

In [None]:
class BirdsDataset(td.Dataset):
    
    def __init__(self, root_dir, mode="train", image_size=(224, 224)):
        super(BirdsDataset, self).__init__()
        self.image_size = image_size
        self.mode = mode
        
        # data is a pandas DataFrame
        self.data = pd.read_csv(os.path.join(root_dir, "%s.csv" % mode))
        self.images_dir = os.path.join(root_dir, "CUB_200_2011/images")
    
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        return "BirdsDataset(mode={}, image_size={})".format(self.mode, self.image_size)
    
    def __getitem__(self, idx):
        # For the idxth entry, choose the value that is in the column file_path
        img_path = os.path.join(self.images_dir, self.data.iloc[idx]['file_path'])
        
        # the bounding box coordinates are at the x1, y1, x2, and y2 columns
        bbox = self.data.iloc[idx][['x1', 'y1', 'x2', 'y2']]
        
        # open the image
        img = Image.open(img_path).convert('RGB')
        img = img.crop([bbox[0], bbox[1], bbox[2], bbox[3]])
        transform = tv.transforms.Compose([
            # resize the image to image_size
            tv.transforms.Resize(self.image_size),
            
            # convert to torch tensor
            tv.transforms.ToTensor(),
            
            # Normalize each channel from [-1, 1]
            tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
        
        # apply the transform on the image
        x = transform(img)
        
        # access the data from the panda DataFrame at the idxth row and the class column
        d = self.data.iloc[idx]['class']
        return x, d
    
    def number_of_classes(self):
        return self.data['class'].max() + 1

Completed the torchvision transforms compose function. <br>
I resize the image using $tv.transforms.Resize()$ function. Then the image is converted to a torch tensor using the $tv.transforms.ToTensor()$ function. Finally, I normalize the image using the $tv.transforms.Normalize()$ function. <br>


#### Question 3

In [None]:
def myimshow(image, ax=plt):
    image = image.to('cpu').numpy()
    image = np.moveaxis(image, [0, 1, 2], [2, 0, 1])
    image = (image + 1) / 2
    image[image<0] = 0
    image[image>1] = 1
    h = ax.imshow(image)
    ax.axis('off')
    return h

In [None]:
# train_set is an instance of BirdDataset
train_set = BirdsDataset(root_dir=dataset_root_dir)

# access the element at the 10th index
x, d_x = train_set.__getitem__(10)

# myimshow to display the obtained image
myimshow(x)