In [17]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import time

Making the given dataset compatible with the input of AlexNet model. 

In [18]:
transform_custom = transforms.Compose([
    transforms.Resize((227,227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Loading the dataset

In [19]:
train_data = CIFAR10('data/', train=True, download=True, transform=transform_custom)
test_data = CIFAR10('data/', train=False, download=True, transform=transform_custom)

In [20]:
print(f"Train data size: {len(train_data)}")
print(f"Test data size: {len(test_data)}")

Data is split into train and validation data.

In [21]:
torch.manual_seed(30)
validation_size = 15000 
train_size = len(train_data) - validation_size
train_data, validation_data = random_split(train_data, [train_size, validation_size])

In [22]:
train_dl = DataLoader(train_data, batch_size=64, shuffle=True)
validation_dl = DataLoader(validation_data, batch_size=64, shuffle=False)
test_dl = DataLoader(test_data, batch_size=64, shuffle=False)

# CNN

A medium deep network with 3 Conv and 2 FC layers. Appropriate functions are called for different activation functions.

In [23]:
class Custom_AlexNet(nn.Module):
    def __init__(self, activation_fn):
        super(Custom_AlexNet, self).__init__()
        self.activation_fn = activation_fn
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels= 96, kernel_size= 11, stride=4, padding=0 )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=384, kernel_size=5, stride= 1, padding= 2)
        self.conv3 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.fc1  = nn.Linear(in_features= 9216, out_features= 4096)
        self.fc2  = nn.Linear(in_features= 4096, out_features= 10)

    def forward_activation_fn(self, x, fn):
        x = fn(self.conv1(x))
        x = self.maxpool(x)
        x = fn(self.conv2(x))
        x = self.maxpool(x)
        x = fn(self.conv3(x))
        x = self.maxpool(x)
        x = x.reshape(x.shape[0], -1)
        x = fn(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def forward(self,x):
        if self.activation_fn == "relu":
            return self.forward_activation_fn(x, F.relu)
        elif self.activation_fn == "tanh":
            return self.forward_activation_fn(x, F.tanh)
        elif self.activation_fn == "sigmoid":
            return self.forward_activation_fn(x, F.sigmoid)

# Training and Validation

Use CUDA (API for GPU), if available, to speed up the execution.

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [25]:
device

In [26]:
def train_validate(model, num_epoch, algo, momentum=0):
    model = model.to(device=device)
    
    learning_rate = 0.001
    criterion = nn.CrossEntropyLoss()
    
    if algo == "Adam":
        # Adam is widely used adaptive learning rate algorithm
        optimizer = optim.Adam(model.parameters(), lr= learning_rate)
    elif algo == "SGD":
        # SGD algorithm: default momentum is 0. i.e. no momentum
        optimizer = optim.SGD(model.parameters(), lr= learning_rate, momentum=momentum)
    else:
        return
    
    # To record time taken for training and validation
    begin = time.time()
    for epoch in range(num_epoch):
        print("\nTraining:")
        epoch_loss = 0
        for _, (data, targets) in enumerate(train_dl):
            # Send data to cuda (if available) 
            data = data.to(device=device)
            targets = targets.to(device=device)
            
            # sets gradients of optimized tensors to zero.
            optimizer.zero_grad()
            
            # Forward propogation
            output = model(data)
            loss = criterion(output, targets)
            # Backward propogation
            loss.backward()
            # Optimization
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Loss in epoch {epoch} = {epoch_loss/len(train_dl)}")

        with torch.no_grad():
            print("Validation:")
            correct = 0
            total = 0 
            for _, (data,targets) in enumerate(validation_dl):
                data = data.to(device=device)
                targets = targets.to(device=device)
                output = model(data)
                _, predictions = output.max(1)
                correct += (predictions == targets).sum()
                total += predictions.size(0)
            print(f"{correct} correct out of {total}. Accuracy = {float(correct) / float(total) * 100:.2f}%")
    print(f"Training and validation time: {time.time() - begin} seconds")

# Predict

In [27]:
def predict_score(model):
    print("\nPrediction:")
    correct = 0
    total = 0
    for _, (data,targets) in enumerate(test_dl):
        data = data.to(device=device)
        targets = targets.to(device=device)
        output = model(data)
        _, predictions = output.max(1)
        correct += (predictions == targets).sum()
        total += predictions.size(0)
    print(f"{correct} correct out of {total}. Accuracy = {float(correct) / float(total) * 100:.2f}%")

In [28]:
num_epoch = 5

# Adam

## ReLU

In [29]:
model = Custom_AlexNet("relu") 
model

In [30]:
# model = Custom_AlexNet("relu") 
train_validate(model, num_epoch=num_epoch, algo="Adam")

In [31]:
predict_score(model)

## Tanh

In [1]:
model = Custom_AlexNet("tanh") 
train_validate(model, num_epoch=num_epoch, algo="Adam")

In [None]:
predict_score(model)

## Sigmoid

In [None]:
model = Custom_AlexNet("sigmoid") 
train_validate(model, num_epoch=num_epoch, algo="Adam")

In [None]:
predict_score(model)

# SGD - Without Momentum

## ReLU

In [None]:
model = Custom_AlexNet("relu") 
train_validate(model, num_epoch=num_epoch, algo="SGD")

In [None]:
predict_score(model)

## Tanh

In [None]:
model = Custom_AlexNet("tanh") 
train_validate(model, num_epoch=num_epoch, algo="SGD")

In [None]:
predict_score(model)

## Sigmoid

In [None]:
model = Custom_AlexNet("sigmoid") 
train_validate(model, num_epoch=num_epoch, algo="SGD")

In [None]:
predict_score(model)

# SGD - With Momentum

## ReLU

In [None]:
model = Custom_AlexNet("relu") 
train_validate(model, num_epoch=num_epoch, algo="SGD", momentum=0.9)

In [None]:
predict_score(model)

## Tanh

In [None]:
model = Custom_AlexNet("tanh") 
train_validate(model, num_epoch=num_epoch, algo="SGD", momentum=0.9)

In [None]:
predict_score(model)

## Sigmoid

In [None]:
model = Custom_AlexNet("sigmoid") 
train_validate(model, num_epoch=num_epoch, algo="SGD", momentum=0.9)

In [None]:
predict_score(model)

# References
- https://analyticsindiamag.com/implementing-alexnet-using-pytorch-as-a-transfer-learning-model-in-multi-class-classification/
- https://medium.com/analytics-vidhya/alexnet-a-simple-implementation-using-pytorch-30c14e8b6db2
- https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1