
**Install requirements**

In [1]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting torch==1.3.1
  Using cached https://files.pythonhosted.org/packages/88/95/90e8c4c31cfc67248bf944ba42029295b77159982f532c5689bcfe4e9108/torch-1.3.1-cp36-cp36m-manylinux1_x86_64.whl
[31mERROR: torchvision 0.5.0 has requirement torch==1.4.0, but you'll have torch 1.3.1 which is incompatible.[0m
Installing collected packages: torch
  Found existing installation: torch 1.4.0
    Uninstalling torch-1.4.0:
      Successfully uninstalled torch-1.4.0
Successfully installed torch-1.3.1
Collecting torch==1.4.0
  Using cached https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl
Installing collected packages: torch
  Found existing installation: torch 1.3.1
    Uninstalling torch-1.3.1:
      Successfully uninstalled torch-1.3.1
Successfully installed torch-1.4.0


**Import libraries**

In [2]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

**Set Arguments**

In [46]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.01            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 40      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [23]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

**Prepare Dataset**

In [24]:
import random

# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/PierGiorgioMingoia/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech

# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)
print(len(train_dataset))
indexes = list(range(5784))
indexes= random.sample(indexes, len(indexes))
print(indexes)
train_indexes = indexes[:int(len(indexes)/2)]# split the indices for your train split
val_indexes = indexes[int(len(indexes)/2):]# split the indices for your val split



val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)


# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

5784
[2390, 1803, 2473, 1241, 3846, 2500, 5000, 1647, 97, 4881, 3470, 4451, 3445, 946, 4442, 3823, 530, 1433, 1422, 3613, 4281, 1589, 5415, 3323, 1289, 4087, 937, 2684, 518, 478, 5090, 2837, 1542, 826, 4015, 5678, 892, 2032, 5693, 1829, 3604, 4508, 5165, 1204, 3253, 3299, 2491, 1337, 5598, 2813, 4270, 2893, 4211, 4762, 4470, 3148, 3248, 4902, 5701, 5710, 1496, 16, 1392, 5222, 4064, 3554, 712, 2327, 1833, 1742, 3634, 3754, 5484, 3688, 873, 2960, 3914, 1288, 4977, 2248, 1544, 2384, 3907, 3621, 1891, 3412, 5652, 5161, 3405, 408, 788, 213, 992, 3059, 4837, 3216, 392, 3921, 4402, 5676, 5565, 2639, 4864, 5228, 4236, 5550, 702, 5392, 3905, 5018, 2273, 5523, 3643, 2641, 5047, 3140, 768, 3410, 3169, 4603, 316, 1953, 973, 431, 2616, 3245, 2607, 2649, 143, 2095, 4491, 517, 1303, 744, 4457, 2959, 2627, 4726, 3338, 3041, 3385, 2603, 698, 3490, 1716, 4833, 465, 2360, 1968, 5083, 4692, 4392, 2806, 5648, 979, 3172, 4804, 1176, 4760, 152, 3144, 2160, 652, 1147, 2290, 4117, 2510, 3576, 5140, 514, 4689, 

**Prepare Dataloaders**

In [25]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [26]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

**Prepare Training**

In [27]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [28]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step() 

Starting epoch 1/50, LR = [0.01]
Step 0, Loss 4.623413562774658
Step 10, Loss 4.611185550689697
Starting epoch 2/50, LR = [0.01]
Step 20, Loss 4.5904035568237305
Starting epoch 3/50, LR = [0.01]
Step 30, Loss 4.564298629760742
Starting epoch 4/50, LR = [0.01]
Step 40, Loss 4.509694576263428
Starting epoch 5/50, LR = [0.01]
Step 50, Loss 4.481357097625732
Starting epoch 6/50, LR = [0.01]
Step 60, Loss 4.3375372886657715
Starting epoch 7/50, LR = [0.01]
Step 70, Loss 4.141532897949219
Starting epoch 8/50, LR = [0.01]
Step 80, Loss 4.138078212738037
Starting epoch 9/50, LR = [0.01]
Step 90, Loss 4.221368312835693
Starting epoch 10/50, LR = [0.01]
Step 100, Loss 4.249381065368652
Starting epoch 11/50, LR = [0.01]
Step 110, Loss 4.1118364334106445
Step 120, Loss 3.988600730895996
Starting epoch 12/50, LR = [0.01]
Step 130, Loss 3.964048147201538
Starting epoch 13/50, LR = [0.01]
Step 140, Loss 3.895540714263916
Starting epoch 14/50, LR = [0.01]
Step 150, Loss 3.8449621200561523
Starting epo

**Validation**

In [10]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))

print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:07<00:00,  1.60it/s]

Validation Accuracy: 0.29564315352697096





**Test**

In [11]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:07<00:00,  1.54it/s]

Test Accuracy: 0.29934324230902176





In [12]:
torch.save(net.state_dict(), "model.pth")

In [13]:
# For freezeing some layers and retrain
#for param in net.parameters():
    #param.requires_grad = False


In [14]:
# For data augmentation aggiungere trasformazioni

# Step 3 Transfer Learning


In [57]:
AlexNet_model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
AlexNet_model.eval()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), 
                                      normalize # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                     normalize                                    
])

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


In [58]:
print(AlexNet_model.eval())

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [59]:
for param in AlexNet_model.features.parameters():
    param.requires_grad = False

In [60]:
# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)
print(len(train_dataset))
indexes = list(range(5784))
indexes= random.sample(indexes, len(indexes))
print(indexes)
train_indexes = indexes[:int(len(indexes)/2)]# split the indices for your train split
val_indexes = indexes[int(len(indexes)/2):]# split the indices for your val split



val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)


# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

5784
[5025, 3989, 4338, 2486, 4076, 1662, 1230, 5559, 3233, 46, 949, 5290, 1021, 308, 5062, 2300, 314, 3642, 2576, 5651, 2693, 4737, 1536, 3496, 5746, 5588, 5326, 3449, 5688, 4756, 4568, 3194, 1412, 3436, 1648, 4930, 1484, 96, 2750, 3983, 5221, 1817, 4369, 4680, 5120, 706, 479, 1983, 2846, 4589, 456, 1489, 694, 5706, 4437, 2003, 5077, 741, 1539, 472, 3317, 4323, 4968, 3729, 1173, 2216, 127, 2213, 5762, 4043, 287, 5402, 3718, 274, 5139, 762, 3318, 3235, 5151, 352, 1868, 4424, 759, 2662, 4738, 1415, 4385, 1731, 5173, 5427, 4034, 3272, 3752, 1055, 3714, 1944, 3331, 3289, 5629, 4592, 1857, 1390, 4335, 1756, 3924, 3435, 5412, 4371, 4772, 4691, 983, 3866, 1464, 5683, 1126, 3809, 4936, 3731, 1882, 688, 5121, 792, 129, 4479, 3748, 3094, 3473, 5314, 4610, 4759, 2829, 1564, 3677, 4070, 41, 2898, 1802, 2599, 4786, 2287, 4352, 3179, 3072, 1933, 3759, 2222, 1773, 2038, 93, 1795, 975, 3371, 4291, 5029, 1543, 1905, 323, 3140, 4284, 4455, 976, 3715, 2286, 4221, 61, 1195, 1531, 5748, 2514, 2974, 1371, 

In [61]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [63]:
AlexNet_model.classifier[6] = nn.Linear(4096, NUM_CLASSES)

In [64]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = AlexNet_model.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [65]:
# By default, everything is loaded to cpu
AlexNet_model = AlexNet_model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    AlexNet_model.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = AlexNet_model(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step() 

Starting epoch 1/40, LR = [0.01]
Step 0, Loss 4.954519271850586
Step 10, Loss 1.656557559967041
Starting epoch 2/40, LR = [0.01]
Step 20, Loss 0.4971584677696228
Starting epoch 3/40, LR = [0.01]
Step 30, Loss 0.2320249229669571
Starting epoch 4/40, LR = [0.01]
Step 40, Loss 0.13505080342292786
Starting epoch 5/40, LR = [0.01]
Step 50, Loss 0.06703073531389236
Starting epoch 6/40, LR = [0.01]
Step 60, Loss 0.048042476177215576
Starting epoch 7/40, LR = [0.01]
Step 70, Loss 0.05130879953503609
Starting epoch 8/40, LR = [0.01]
Step 80, Loss 0.04810072109103203
Starting epoch 9/40, LR = [0.01]
Step 90, Loss 0.02084573730826378
Starting epoch 10/40, LR = [0.01]
Step 100, Loss 0.019974827766418457
Starting epoch 11/40, LR = [0.01]
Step 110, Loss 0.019663618877530098
Step 120, Loss 0.020432494580745697
Starting epoch 12/40, LR = [0.01]
Step 130, Loss 0.02202167734503746
Starting epoch 13/40, LR = [0.01]
Step 140, Loss 0.02116966061294079
Starting epoch 14/40, LR = [0.01]
Step 150, Loss 0.0124

In [66]:
AlexNet_model = AlexNet_model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
AlexNet_model.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = AlexNet_model(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))

print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:07<00:00,  1.60it/s]

Validation Accuracy: 0.8385200553250346





In [None]:
# For possible data augmentation we can use different transformation
torchvision.transforms.RandomHorizontalFlip(p=0.5)
torchvision.transforms.functional.adjust_gamma(img: torch.Tensor, gamma: float, gain: float = 1)
torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0)