###Import and dataset class


**Install requirements**

In [0]:
!pip3 install 'torch==1.3.1'
#!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet, vgg16, vgg16_bn, resnet18, vgg11

from PIL import Image
from tqdm import tqdm

from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys

from sklearn.model_selection import train_test_split

**dataset class**

In [0]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

from Caltech101.caltech_dataset import Caltech

#return the image converted into RGB
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')

        self.images = {}  #dictionary index -> path
        self.labels = {}  #dictionary label -> label_index
        self.N = -1       #actual number of different classes - 1
        idx = 0     
        actual_label = "" 
     
        file_path = "Caltech101/"+self.split+".txt"
        f = open(file_path, "r")
        for path in f:
          line = path.split("/")
          if line[0]!= "BACKGROUND_Google":
            if line[0]!=actual_label:
              self.N += 1
              self.labels[line[0]] = self.N
              actual_label = line[0]

            self.images[idx] = path.rstrip("\n")
            idx += 1

        f.close()

    def __getitem__(self, index):
    
        path = self.images.get(index)
        image = pil_loader("Caltech101/101_ObjectCategories/"+path)
        label_name = path.split("/")[0]
        label = self.labels.get(label_name)

        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):

        length = len(self.images)
        return length 

In [0]:
def divide_set(file_path):

  idx = 0
  N=0
  f = open(file_path, "r")
  X = []  #list of index
  y = []  #list of label

  for path in f:
    line = path.split("/")
    if line[0]!= "BACKGROUND_Google":
      y.append(line[0])
      X.append(idx)
      idx += 1

  f.close()
  train_indexes, val_indexes, a, b = train_test_split(X, y, stratify = y ,test_size=0.5, random_state=269317)

  return train_indexes, val_indexes

In [0]:
#to plot the histogram
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

file_path = "Caltech101/train.txt"
f = open(file_path, "r")
actual_label = ""
labels = {}
#initialize dictionary
for path in f:
  line = path.split("/")
  if line[0] != actual_label:
    labels[line[0]] = 0
f.close()

f = open(file_path, "r")
for path in f:
  line = path.split("/")
  labels[line[0]] += 1
  
f.close()

file_path = "Caltech101/test.txt"
f = open(file_path, "r")
for path in f:
  line = path.split("/")
  labels[line[0]] += 1
f.close()

v = np.array(list(labels.values()))
print(np.mean(v))

ax = sns.distplot(v, kde=False, rug=False, color='r');
ax.set_xlabel("N° of samples")
ax.set_ylabel("N° of classes")
plt.title("Class size distribution")
plt.savefig("fig1")

###Alexnet base

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101    # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results
LR = 0.001             # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default
NUM_EPOCHS = 41      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 10       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

**Prepare Dataset**

In [0]:
DATA_DIR = 'Caltech101/101_ObjectCategories'

# Prepare Pytorch train/test Datasets
train_val_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

train_indexes, val_indexes = divide_set("Caltech101/train.txt")

# split the train test
train_dataset = Subset(train_val_dataset, train_indexes)
val_dataset = Subset(train_val_dataset, val_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
net = alexnet() # Loading AlexNet model
# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
#optimizer = optim.ASGD(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)
# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [0]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0

# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step() 

**Validation**

In [0]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()
  
# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))
print('Validation Accuracy: {}'.format(accuracy))

**Test**

In [0]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:12<00:00,  1.08s/it]

Test Accuracy: 0.22779122018665746





###Transfer learning

**set parameters**

In [0]:
DEVICE = 'cuda' 
NUM_CLASSES = 101 
BATCH_SIZE = 256     
LR = 0.0001             
MOMENTUM = 0.9       
WEIGHT_DECAY = 5e-5  
NUM_EPOCHS = 20      
STEP_SIZE = 20      
GAMMA = 0.1          
LOG_FREQUENCY = 10

**Prepare Net**

In [18]:
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224),  
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) 
])

eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])

net = alexnet(pretrained=True)              # Loading AlexNet model with pretrained weight
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

# Define loss function
criterion = nn.CrossEntropyLoss() 
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
#optimizer = optim.Adamax(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




**Prepare Dataset**

In [0]:
DATA_DIR = 'Caltech101/101_ObjectCategories'

train_val_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

train_indexes, val_indexes = divide_set("Caltech101/train.txt")

train_dataset = Subset(train_val_dataset, train_indexes)
val_dataset = Subset(train_val_dataset, val_indexes)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Train**

In [20]:
net = net.to(DEVICE) 

cudnn.benchmark 
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  
  for images, labels in train_dataloader:
    
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 
    optimizer.zero_grad() 
    outputs = net(images)

    loss = criterion(outputs, labels)

    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    loss.backward()  
    optimizer.step() 

    current_step += 1

  scheduler.step() 

Starting epoch 1/20, LR = [0.0001]




Step 0, Loss 4.877261161804199
Step 10, Loss 4.597565174102783
Starting epoch 2/20, LR = [0.0001]
Step 20, Loss 4.0673604011535645
Starting epoch 3/20, LR = [0.0001]
Step 30, Loss 3.643270969390869
Starting epoch 4/20, LR = [0.0001]
Step 40, Loss 3.6092190742492676
Starting epoch 5/20, LR = [0.0001]
Step 50, Loss 3.378148317337036
Starting epoch 6/20, LR = [0.0001]
Step 60, Loss 3.2080416679382324
Starting epoch 7/20, LR = [0.0001]
Step 70, Loss 2.903881788253784
Starting epoch 8/20, LR = [0.0001]
Step 80, Loss 3.0390055179595947
Starting epoch 9/20, LR = [0.0001]
Step 90, Loss 2.8257811069488525
Starting epoch 10/20, LR = [0.0001]
Step 100, Loss 2.5244038105010986
Starting epoch 11/20, LR = [0.0001]
Step 110, Loss 2.653352975845337
Step 120, Loss 2.40788197517395
Starting epoch 12/20, LR = [0.0001]
Step 130, Loss 2.2199981212615967
Starting epoch 13/20, LR = [0.0001]
Step 140, Loss 2.371262550354004
Starting epoch 14/20, LR = [0.0001]
Step 150, Loss 2.1613168716430664
Starting epoch 1

**Validation**

In [21]:
net = net.to(DEVICE)
net.train(False) 

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  outputs = net(images)

  _, preds = torch.max(outputs.data, 1)

  running_corrects += torch.sum(preds == labels.data).data.item()
  
accuracy = running_corrects / float(len(val_dataset))
print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:16<00:00,  1.40s/it]

Validation Accuracy: 0.6587136929460581





**Test**

In [0]:
net = net.to(DEVICE) 
net.train(False)

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  outputs = net(images)

  _, preds = torch.max(outputs.data, 1)

  running_corrects += torch.sum(preds == labels.data).data.item()

accuracy = running_corrects / float(len(test_dataset))
print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:09<00:00,  1.22it/s]

Test Accuracy: 0.48772900103698585





####train test on the best hyperparameter



In [0]:
net = alexnet(pretrained=True)              
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

criterion = nn.CrossEntropyLoss() 
parameters_to_optimize = net.classifier.parameters()  #only fully connected layer
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**train**

In [0]:
net = net.to(DEVICE) 

cudnn.benchmark 
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  for images, labels in train_dataloader:
   
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 
    optimizer.zero_grad() 
    outputs = net(images)

    loss = criterion(outputs, labels)

    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    loss.backward()  
    optimizer.step() 

    current_step += 1

  scheduler.step() 

Starting epoch 1/50, LR = [0.01]




Step 0, Loss 4.960520267486572
Step 10, Loss 1.8881919384002686
Starting epoch 2/50, LR = [0.01]
Step 20, Loss 0.5519865155220032
Starting epoch 3/50, LR = [0.01]
Step 30, Loss 0.2731695771217346
Starting epoch 4/50, LR = [0.01]
Step 40, Loss 0.12358658760786057
Starting epoch 5/50, LR = [0.01]
Step 50, Loss 0.07788202911615372
Starting epoch 6/50, LR = [0.01]
Step 60, Loss 0.058573056012392044
Starting epoch 7/50, LR = [0.01]
Step 70, Loss 0.07298222929239273
Starting epoch 8/50, LR = [0.01]
Step 80, Loss 0.037475813180208206
Starting epoch 9/50, LR = [0.01]
Step 90, Loss 0.03557687625288963
Starting epoch 10/50, LR = [0.01]
Step 100, Loss 0.028908926993608475
Starting epoch 11/50, LR = [0.01]
Step 110, Loss 0.025699324905872345
Step 120, Loss 0.026386786252260208
Starting epoch 12/50, LR = [0.01]
Step 130, Loss 0.01895269751548767
Starting epoch 13/50, LR = [0.01]
Step 140, Loss 0.01287444494664669
Starting epoch 14/50, LR = [0.01]
Step 150, Loss 0.02579144760966301
Starting epoch 15

**test**

In [0]:
net = net.to(DEVICE) 
net.train(False)

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  outputs = net(images)

  _, preds = torch.max(outputs.data, 1)
  running_corrects += torch.sum(preds == labels.data).data.item()

accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:12<00:00,  1.02s/it]

Test Accuracy: 0.8427238161078465





In [0]:
net = alexnet(pretrained=True)              
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

criterion = nn.CrossEntropyLoss() 
parameters_to_optimize = net.features.parameters()  #only convolutional
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**train**

In [0]:
net = net.to(DEVICE) 
cudnn.benchmark 
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  for images, labels in train_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 
    optimizer.zero_grad()
    outputs = net(images)
    loss = criterion(outputs, labels)

    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    loss.backward()  
    optimizer.step() 

    current_step += 1

  scheduler.step() 

Starting epoch 1/50, LR = [0.01]




Step 0, Loss 4.875702381134033
Step 10, Loss 4.388437747955322
Starting epoch 2/50, LR = [0.01]
Step 20, Loss 3.9669642448425293
Starting epoch 3/50, LR = [0.01]
Step 30, Loss 3.824765920639038
Starting epoch 4/50, LR = [0.01]
Step 40, Loss 3.469757080078125
Starting epoch 5/50, LR = [0.01]
Step 50, Loss 3.130557060241699
Starting epoch 6/50, LR = [0.01]
Step 60, Loss 3.257209062576294
Starting epoch 7/50, LR = [0.01]
Step 70, Loss 2.9369053840637207
Starting epoch 8/50, LR = [0.01]
Step 80, Loss 2.7822089195251465
Starting epoch 9/50, LR = [0.01]
Step 90, Loss 2.8881897926330566
Starting epoch 10/50, LR = [0.01]
Step 100, Loss 2.372316598892212
Starting epoch 11/50, LR = [0.01]
Step 110, Loss 2.6702818870544434
Step 120, Loss 2.7272233963012695
Starting epoch 12/50, LR = [0.01]
Step 130, Loss 2.590404987335205
Starting epoch 13/50, LR = [0.01]
Step 140, Loss 2.4208436012268066
Starting epoch 14/50, LR = [0.01]
Step 150, Loss 2.3179149627685547
Starting epoch 15/50, LR = [0.01]
Step 16

**test**

In [0]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:12<00:00,  1.04s/it]

Test Accuracy: 0.5651572761838921





#Data augmentation

**Set Arguments**

In [0]:
DEVICE = 'cuda'
NUM_CLASSES = 101 
BATCH_SIZE = 256   
LR = 0.0001         
MOMENTUM = 0.9       
WEIGHT_DECAY = 5e-5 
NUM_EPOCHS = 20     
STEP_SIZE = 20       
GAMMA = 0.1          
LOG_FREQUENCY = 10

**Prepare Net**

In [0]:
# Define transforms for training phase

train_transform = transforms.Compose([transforms.RandomGrayscale(p=0.5),
                                      transforms.RandomVerticalFlip(p=0.5),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation and test phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) 
])

net = alexnet(pretrained=True) # Loading AlexNet model

net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 

# Define loss function
criterion = nn.CrossEntropyLoss() 
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Prepare Dataset**

In [0]:
DATA_DIR = 'Caltech101/101_ObjectCategories'

# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
val_dataset = Caltech(DATA_DIR, split='train', transform=eval_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

train_indexes, val_indexes = divide_set("Caltech101/train.txt")

# split the train test
train_dataset = Subset(train_dataset, train_indexes)
val_dataset = Subset(val_dataset, val_indexes)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Train**

In [26]:
net = net.to(DEVICE)
cudnn.benchmark 
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))


  for images, labels in train_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 
    optimizer.zero_grad() 

    outputs = net(images)
    loss = criterion(outputs, labels)

    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    loss.backward()  
    optimizer.step() 

    current_step += 1

  scheduler.step() 

Starting epoch 1/20, LR = [0.0001]




Step 0, Loss 5.0486955642700195
Step 10, Loss 4.716411590576172
Starting epoch 2/20, LR = [0.0001]
Step 20, Loss 4.288062572479248
Starting epoch 3/20, LR = [0.0001]
Step 30, Loss 4.198020935058594
Starting epoch 4/20, LR = [0.0001]
Step 40, Loss 3.902456045150757
Starting epoch 5/20, LR = [0.0001]
Step 50, Loss 3.5178685188293457
Starting epoch 6/20, LR = [0.0001]
Step 60, Loss 3.360595226287842
Starting epoch 7/20, LR = [0.0001]
Step 70, Loss 3.4114186763763428
Starting epoch 8/20, LR = [0.0001]
Step 80, Loss 3.4172277450561523
Starting epoch 9/20, LR = [0.0001]
Step 90, Loss 3.317321300506592
Starting epoch 10/20, LR = [0.0001]
Step 100, Loss 3.090567111968994
Starting epoch 11/20, LR = [0.0001]
Step 110, Loss 2.975327730178833
Step 120, Loss 2.9421327114105225
Starting epoch 12/20, LR = [0.0001]
Step 130, Loss 2.636490821838379
Starting epoch 13/20, LR = [0.0001]
Step 140, Loss 2.744166612625122
Starting epoch 14/20, LR = [0.0001]
Step 150, Loss 2.7109386920928955
Starting epoch 15

**Validation**

In [27]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)
  outputs = net(images)
  _, preds = torch.max(outputs.data, 1)
  running_corrects += torch.sum(preds == labels.data).data.item()
  
accuracy = running_corrects / float(len(val_dataset))
print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:15<00:00,  1.33s/it]

Validation Accuracy: 0.5404564315352697





**Test**

In [0]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:11<00:00,  1.06it/s]

Test Accuracy: 0.39751123401313515





#other net (Resnet18)

**Set Arguments**

In [0]:
DEVICE = 'cuda' 
NUM_CLASSES = 101 
BATCH_SIZE = 64
LR = 0.1           
MOMENTUM = 0.9       
WEIGHT_DECAY = 5e-5 
NUM_EPOCHS = 20      
STEP_SIZE = 20       
GAMMA = 0.1      
LOG_FREQUENCY = 10

**Prepare  Net**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224),  
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) 
])

net = resnet18(pretrained=True) 
net.fc = nn.Linear(512, NUM_CLASSES)

#net = vgg11(pretrained=True)
#net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters() 
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Prepare Dataset**

In [0]:
DATA_DIR = 'Caltech101/101_ObjectCategories'

train_val_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

train_indexes, val_indexes = divide_set("Caltech101/train.txt")

# split the train test
train_dataset = Subset(train_val_dataset, train_indexes)
val_dataset = Subset(train_val_dataset, val_indexes)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Train**

In [7]:
net = net.to(DEVICE) 
cudnn.benchmark 
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr())) 

  for images, labels in train_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 
    optimizer.zero_grad() 
    outputs = net(images)

    loss = criterion(outputs, labels)

    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    loss.backward()  
    optimizer.step() 

    current_step += 1

  scheduler.step() 

Starting epoch 1/20, LR = [0.1]




Step 0, Loss 4.707266330718994
Step 10, Loss 5.475186824798584
Step 20, Loss 8.89398193359375
Step 30, Loss 6.075498580932617
Step 40, Loss 4.70463752746582
Starting epoch 2/20, LR = [0.1]
Step 50, Loss 4.72268533706665
Step 60, Loss 4.764501094818115
Step 70, Loss 5.711373805999756
Step 80, Loss 5.067417144775391
Starting epoch 3/20, LR = [0.1]
Step 90, Loss 4.206470966339111
Step 100, Loss 4.1449875831604
Step 110, Loss 4.236044883728027
Step 120, Loss 4.193132400512695
Step 130, Loss 4.253264427185059
Starting epoch 4/20, LR = [0.1]
Step 140, Loss 3.9865050315856934
Step 150, Loss 3.9328885078430176
Step 160, Loss 4.390455722808838
Step 170, Loss 4.192368507385254
Starting epoch 5/20, LR = [0.1]
Step 180, Loss 3.7515127658843994
Step 190, Loss 3.7768361568450928
Step 200, Loss 3.997812271118164
Step 210, Loss 3.8569769859313965
Step 220, Loss 3.9684817790985107
Starting epoch 6/20, LR = [0.1]
Step 230, Loss 4.0500168800354
Step 240, Loss 3.6425962448120117
Step 250, Loss 4.082515716

**Validation**

In [8]:
net = net.to(DEVICE)
net.train(False)

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)
  outputs = net(images)
  _, preds = torch.max(outputs.data, 1)
  running_corrects += torch.sum(preds == labels.data).data.item()

accuracy = running_corrects / float(len(val_dataset))
print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 46/46 [00:12<00:00,  3.55it/s]

Validation Accuracy: 0.3921161825726141





**Test**

In [0]:
net = net.to(DEVICE)
net.train(False) 

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)
  outputs = net(images)
  _, preds = torch.max(outputs.data, 1)
  running_corrects += torch.sum(preds == labels.data).data.item()

accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 46/46 [00:09<00:00,  4.76it/s]

Test Accuracy: 0.9370895264431386



