**Install requirements, Import libraries**

In [0]:
# install packets (first time need env restart)
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
!pip3 install --upgrade 'pillow'

# import libraries
import os
import logging
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
import torchvision
from torchvision import transforms
from torchvision.models import alexnet
from PIL import Image
from tqdm import tqdm
from torchvision.datasets import VisionDataset
from PIL import Image
import os.path
import sys
import matplotlib.pyplot as plt
from copy import deepcopy

# time finished task function
def donetime(start=None):
  if start != None:
    if ((time.time() - start)/60) < 60:
      print('DONE (time {:4.2f} m)\n'.format((time.time() - start)/60))
    else:
      print('DONE (time {:4.2f} h)\n'.format((time.time() - start)/3600))
  else:
    print('\nDONE')
  return

donetime()

Requirement already up-to-date: pillow in /usr/local/lib/python3.6/dist-packages (6.2.1)

DONE


**Set Arguments, Clone git repository**


In [0]:
# set default parameters
DEVICE = 'cuda'
NUM_CLASSES = 101
BATCH_SIZE = 256 
MOMENTUM = 0.9    #rho  
WEIGHT_DECAY = 5e-5  
GAMMA = 0.1        
LOG_FREQUENCY = 10
DATA_DIR = 'Homework2-Caltech101/101_ObjectCategories'

# clone git repo, errer if interrupted while cloning github repo!
if not os.path.isdir('./Homework2-Caltech101'):
  print('Cloning github repository')
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

# simple status bar 
def statusBar(actual, finish): #no other prints!
  print(end="\r", flush=True)
  print('[ ', end = '') 
  for i in range(actual):
    print('###', end = '')
  if actual != (finish -1):
    print('#>-', end = '')
  for i in range(finish - actual - 2):
    print('---', end = '')
  print(' ]', end = '')
  return

donetime() 

Cloning github repository
Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 9256, done.[K
remote: Total 9256 (delta 0), reused 0 (delta 0), pack-reused 9256[K
Receiving objects: 100% (9256/9256), 129.48 MiB | 30.26 MiB/s, done.
Resolving deltas: 100% (4/4), done.
Checking out files: 100% (9149/9149), done.

DONE


**Custom Dataset Class** *1A 1B*

In [0]:
# proper image loader
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

# dataset class
class Caltech(VisionDataset):
    # initialization
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split

        self.dataset = [] # (PILimage, labelID)
        self.labels = [] # (label) ID by index
        
        if self.split == 'train':
          text = open("./Homework2-Caltech101/train.txt","r")
        else:
          if self.split == 'test':
            text = open("./Homework2-Caltech101/test.txt","r")
          else:
            print("ERROR")

        readText = text.readlines()
        for iterText in readText:
          label, image = os.path.split(iterText)
          if not label == 'BACKGROUND_Google': 
            if not label in self.labels:
              self.labels.append(label)
            self.dataset.append((pil_loader(DATA_DIR + '/' + iterText.replace('\n','')), self.labels.index(label)))
        print(split, "correctly loaded")
    # getter
    def __getitem__(self, index):
        image, label = self.dataset[index]
        if self.transform is not None:
            image = self.transform(image)
        return image, label
    # get lenght
    def __len__(self):
        length = len(self.dataset)
        return length

donetime()


DONE


**Prepare Transformations, Dataset and Dataloader** *2A*


In [0]:
# define transformations (default) 2C
#train_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # validation has same transformations of train
#test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])

# define transformations with standard alexnet mean and standard deviation 3B
#train_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
#test_transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])

# define transformations with processing 4A
#train_transform = transforms.Compose([transforms.RandomHorizontalFlip(0.5), transforms.RandomVerticalFlip(0.5), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
train_transform = transforms.Compose([transforms.RandomGrayscale(0.5), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
#train_transform = transforms.Compose([transforms.RandomGrayscale(0.1), transforms.RandomHorizontalFlip(0.5), transforms.RandomVerticalFlip(0.5), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
test_transform = transforms.Compose([transforms.CenterCrop(224), transforms.Resize(256), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])

# create datasets
tosplit_dataset = Caltech(DATA_DIR,'train',train_transform, None)

train_indexes = [idx for idx in range(len(tosplit_dataset)) if idx % 2]
validation_indexes = [idx for idx in range(len(tosplit_dataset)) if not idx % 2]
validation_dataset = Subset(tosplit_dataset, validation_indexes)
train_dataset = Subset(tosplit_dataset, train_indexes)

test_dataset = Caltech(DATA_DIR,'test',test_transform,None)

print("TRAIN", train_dataset.__len__(), "tuple")
print("VALIDATION", validation_dataset.__len__(), "tuple")
print("TEST", test_dataset.__len__(), "tuple\n")

# prepare dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

donetime()

train correctly loaded
test correctly loaded
TRAIN 2892 tuple
VALIDATION 2892 tuple
TEST 2893 tuple


DONE


**Main function Train Validation Test, Output Function**


In [0]:
#train model
def functionTrainValidationTest(net, LRn, NUM_EPOCHSn, STEP_SIZEn, type=None):
  print('STARTING TRAIN-VALIDATION-TEST {} (LR {}, NUM_EPOCHS {}, STEP_SIZE {})\n'.format(type, LRn, NUM_EPOCHSn, STEP_SIZEn))

  
  criterion = nn.CrossEntropyLoss()
  if type != '3D' and type != '3E':
    parameters_to_optimize = net.parameters()
  else:
    if type == '3D': # 3D optimize only fully connected layers
      parameters_to_optimize = net.classifier.parameters() 
    if type == '3E': # 3E optimize only convolutional layers
      parameters_to_optimize = net.features.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=LRn, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZEn, gamma=GAMMA)

  # By default, everything is loaded to cpu
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

  cudnn.benchmark # Calling this optimizes runtime

  current_step = 0

  best_accuracy = 0

  scores = [] #(epoch, accuracy, actualLOSS)
  best_accuracy = 0

  # Start iterating over the epochs
  for epoch in range(NUM_EPOCHSn):
    #print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

    # Iterate over the dataset
    for images, labels in train_dataloader:
      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      net.train() # Sets module in training mode

      # PyTorch, by default, accumulates gradients after each backward pass
      # We need to manually set the gradients to zero before starting a new iteration
      optimizer.zero_grad() # Zero-ing the gradients

      # Forward pass to the network
      outputs = net(images)

      # Compute loss based on output and ground truth
      loss = criterion(outputs, labels)

      actualLOSS = loss.item()

      '''# Log loss
      if current_step % LOG_FREQUENCY == 0:
        print('Step {}, Loss {}'.format(current_step, loss.item())) '''

      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients
      optimizer.step() # update weights based on accumulated gradients

      current_step += 1

    # Step the scheduler
    scheduler.step() 

    # VALIDATION each epoch

    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
    net.train(False) # Set Network to evaluation mode

    running_corrects = 0
    for images, labels in validation_dataloader:
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      # Forward Pass
      outputs = net(images)

      # Get predictions
      _, preds = torch.max(outputs.data, 1)

      # Update Corrects
      running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy = running_corrects / float(len(validation_dataset))
    scores.append((epoch, accuracy, actualLOSS))
    #print('Validation Accuracy: {}, epoch {}, loss {}'.format(accuracy, epoch,  actualLOSS))
    
    statusBar(epoch, NUM_EPOCHSn)

    # save best net
    if accuracy >= best_accuracy: # if equal take last
      best_accuracy = accuracy
      best_net = deepcopy(net)

  print("\n\nVALIDATION Final Results (epoch, accuracy, loss):")
  for tup in scores:
    print(tup)
  x = []
  y = []
  # accuracy per epoch graph
  for tup in scores:
    x.append(tup[0])
    y.append(tup[1])
  print('')
  plt.plot(x, y)
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.show()
  x = []
  y = []
  # loss per epoch graph
  for tup in scores:
    x.append(tup[0])
    y.append(tup[2])
  print('')
  plt.plot(x, y)
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.show()
  print('\nBest model accuracy: {}\n'.format(best_accuracy))

  # TEST

  best_net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  best_net.train(False) # Set Network to evaluation mode

  running_corrects = 0
  for images, labels in test_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = best_net(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(test_dataset))
  print('TEST Accuracy Final Result: {}\n'.format(accuracy))
  
  return

donetime()


DONE


***TRAIN-VALIDATION-TEST***

**Train and Validation tuning LR, NUM_EPOCH, STEP_SIZE** *2C*


In [0]:
startTime = time.time()

LR = 0.01
NUM_EPOCHS = 70
STEP_SIZE = 20

alnet = alexnet()
alnet.classifier[6] = nn.Linear(4096, NUM_CLASSES)

#functionTrainValidationTest(alnet, LR, NUM_EPOCHS, STEP_SIZE, '2C')

donetime(startTime)

# 0.001 30  20    0.09
# 0.005 50  40    0.36
# 0.01  50  40    0.52

# 0.01  40  20    0.30
# 0.01  40  30    0.41
# 0.01  50  20    0.34
# 0.05  50  20    0.40
# 0.1   60  20    0.01
# 0.1   70  20    0.32

DONE (time 0.01 m)



**Test on AlexNet pretrained with ImageNet database** *3C*

In [0]:
startTime = time.time()

LR = 0.005
NUM_EPOCHS = 50
STEP_SIZE = 40

net = alexnet(pretrained=True)

# functionTrainValidationTest(net, LR, NUM_EPOCHS, STEP_SIZE, '3C')

donetime(startTime)

# 0.01  50  40  0.01
# 0.001 50  40  0.66 
# 0.001 60  20  0.58
# 0.005 60  20  0.68 
# 0.005 50  40  0.71

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth
100%|██████████| 233M/233M [00:01<00:00, 151MB/s]


DONE (time 0.05 m)



**Test on AlexNet pretrained only fully connected and only convolutional layers** *3D 3E*

In [0]:
net = alexnet(pretrained=True)

LR = 0.005
NUM_EPOCHS = 50
STEP_SIZE = 40

net = alexnet(pretrained=True)

#functionTrainValidationTest(net, LR, NUM_EPOCHS, STEP_SIZE, '3D')

net = alexnet(pretrained=True)

#functionTrainValidationTest(net, LR, NUM_EPOCHS, STEP_SIZE, '3E')

donetime(startTime)

# 0.005 50  40  0.84

# 0.005 50  40  0.54
# 0.001 50  40  0.41

DONE (time 0.11 m)



**Test on AlexNet pretrained with training transformations** *4A*

In [0]:
startTime = time.time()

LR = 0.005
NUM_EPOCHS = 50
STEP_SIZE = 40

net = alexnet(pretrained=True)

functionTrainValidationTest(net, LR, NUM_EPOCHS, STEP_SIZE, '4A')

donetime(startTime)

# 0.005 50  40  orizontalFlip verticalFlip  0.69 
# 0.005 50  40  grayScale                   0.60
# 0.005 50  40  all                         0.60