<a href="https://colab.research.google.com/github/TYZQ/temporary/blob/master/lab2_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Install requirements**

In [None]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm' 

**Import libraries**

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
import torchvision.models as models

from PIL import Image
from tqdm import tqdm

import copy
import pandas as pd

# Visualizations
import matplotlib.pyplot as plt




**Set Arguments**

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101    # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 128
#BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-2            # The initial Learning Rate (di quanto mi sposto nella curva. LR piccolo->sposto molto)
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default 

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Normalizes tensor with mean and standard deviation of ImageNet
                                      #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                                                         
])

# Define transforms for training set ONLY.
augmentation_transform = transforms.Compose([transforms.Resize(256),
                                              transforms.CenterCrop(224),
                                              #transforms.RandomRotation(degrees=15),
                                              #transforms.ColorJitter(),
                                              transforms.Grayscale(num_output_channels=3),
                                              transforms.RandomHorizontalFlip(),
                                              transforms.ToTensor(),
                                              transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])                                 
])


**Prepare Dataset**

In [None]:
#import shutil
#shutil.rmtree('./Caltech101', ignore_errors=True)

# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/lindaludovisi/ellelle.git
  !mv 'ellelle' 'Caltech101'


DATA_DIR = 'Caltech101/101_ObjectCategories' #ObjectCategories is the directory containing all images
from Caltech101.caltech_dataset import Caltech #Caltech is a class in caltech_dataset.py


# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)


#Create a list of all the indexes of the original train dataset
original_indexes=list(range(len(train_dataset)))

train_indexes = []
val_indexes = []
#Divide train dataset into train/val
#The idea here is to assign indexes divisible by 2 to val_dataset and the others to train_dataset
#The proportion between categories is maintained because the dataset is sorted by alphabetical order
for index in original_indexes:
  if ( (index %2) == 0) :  
    val_indexes.append(index)
  else:                    
    train_indexes.append(index)

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)

#Perform data augmentation for train_dataset
#train_dataset.dataset.transform = augmentation_transform

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))



**Prepare Dataloaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)



**Prepare Network**

In [None]:
#A useful function that sets the parameters of the model to be updated in training phase.
#When we are finetuning we can leave all of the .requires_gradâ€™s set to the default of True.
def set_parameter_requires_grad(model, feature_extracting):
  
  if feature_extracting == 'conv':  #Freeze conv layers
    for name, param in model.named_parameters(): 
      if 'features' in name :
        param.requires_grad = False
  
  elif feature_extracting == 'fc':  #Freeze fc layers
    for name, param in model.named_parameters(): 
      if 'classifier' in name :
        param.requires_grad = False
        print(name)
      
  else:
    return True


def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
  # Initialize these variables which will be set in this if statement. Each of these
  # variables is model specific.
  model_ft = None
  input_size = 0

  if model_name == "vgg":
    """ VGG11_bn
    """
    model_ft = models.vgg11_bn(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.classifier[6].in_features  
    model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
    input_size = 224

  elif model_name == "alexnet":
    """ Alexnet
    """
    model_ft = models.alexnet(pretrained=use_pretrained)  
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)  # nn.Linear in pytorch is a fully connected layer
                                                              # The convolutional layer is nn.Conv2d
    set_parameter_requires_grad(model_ft, feature_extract) 
    input_size = 224 

  return model_ft, input_size


# Loading AlexNet model (not pre-trained)
#net, input_size = initialize_model("alexnet", NUM_CLASSES, False, use_pretrained=False)

# Loading AlexNet pre-trained model
#net, input_size = initialize_model("alexnet", NUM_CLASSES, feature_extract=False, use_pretrained=True)

# Loading AlexNet pre-trained model, freezing conv layers
#net, input_size = initialize_model("alexnet", NUM_CLASSES, feature_extract='conv', use_pretrained=True)

# Loading AlexNet pre-trained model, freezing fc layers
#net, input_size = initialize_model("alexnet", NUM_CLASSES, feature_extract='fc', use_pretrained=True)

# Loading VGG pre-trained model, freezing conv layers
net, input_size = initialize_model("vgg", NUM_CLASSES, feature_extract='conv', use_pretrained=True)


# Print the model we just instantiated
print(net)



**Prepare Training**

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy


# Choose parameters to optimize
 
#parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

parameters_to_optimize = []
print("Params to learn:")
for name, param in net.named_parameters():
  if param.requires_grad == True:
    parameters_to_optimize.append(param)
    print("\t",name)


# Define optimizer
# An optimizer updates the weights based on loss

# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

#We also try to use Adam optimizer
#optimizer = optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY )


# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train and Validation**

In [None]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

history = []
val_acc_history = []
best_acc = 0
current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # keep track of training and validation loss each epoch
  train_loss = 0.0
  valid_loss = 0.0

  #
  #   TRAINING
  #

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # Clear the gradients
    optimizer.zero_grad() 

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)
    # Backpropagation of gradients
    loss.backward()
    
    # Update the weights
    optimizer.step()

    # Track train loss by multiplying average loss by number of examples in batch
    train_loss += loss.item() * images.size(0)   

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Train Loss {}'.format(current_step, loss.item()))    

    current_step += 1

  # Step the scheduler
  scheduler.step() 

  
  #
  #   VALIDATION
  #
  
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode

  running_corrects = 0
  for images, labels in tqdm(val_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass 
    outputs = net(images)

    # Validation loss
    loss = criterion(outputs, labels)
    # Multiply average loss times the number of examples in batch
    valid_loss += loss.item() * images.size(0)

    # Get predictions from the maximum value
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(val_dataset))

  print('Validation Accuracy: {}'.format(accuracy))

  # Append the value of the accuracy to the list
  val_acc_history.append(accuracy)

  #Save the model with the best accuracy
  if (accuracy > best_acc):
    best_acc = accuracy
    best_net = copy.deepcopy(net.state_dict())

  # Calculate average losses
  train_loss = train_loss / len(train_dataloader.dataset)
  valid_loss = valid_loss / len(val_dataloader.dataset)
  
  #Save train and validation loss
  history.append([train_loss, valid_loss])

#at the end, load best model weights
net.load_state_dict(best_net)

print(history)

**Validation**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(val_dataloader):
#for images, labels in val_dataloader:
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass in order to get logits/output
  outputs = net(images)

  # Get predictions from the maximum value
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))

print('Validation Accuracy performed on the best model: {}'.format(accuracy))


# History is a Dataframe
history = pd.DataFrame( history, columns=['train_loss', 'valid_loss'])

# Plot train_loss vs valid_loss
plt.figure(figsize=(8, 6))
for c in ['train_loss', 'valid_loss']:
  plt.plot(history[c], label=c)

plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.title(f'Training and Validation Losses using SGD optimizer, LR={LR}, epochs={NUM_EPOCHS}')

from torchsummary import summary
summary(
        net, input_size=(3, 224, 224), batch_size=BATCH_SIZE, device='cuda')

**Test**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))