# **CIFAR10 AND CIFAR100 CLASSIFICATION USING ALEXNET**

### Importing libraries

In [3]:
# Imports and Setup
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, SubsetRandomSampler
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

### Set device configuration

In [4]:
# Set device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Data loading and transformation

In [5]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)), 
    transforms.RandomCrop(224),    
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((227, 227)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar10'

# Load CIFAR-10 dataset once
cifar10_dataset = datasets.CIFAR10(root=data_directory, download=True, transform=None) 

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR10(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data_path_cifar10/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 47467551.86it/s]


Extracting ./data_path_cifar10/cifar-10-python.tar.gz to ./data_path_cifar10
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


In [6]:
# Get a single batch from the train_loader
data_iter = iter(train_loader)
features, targets = next(data_iter)

# Print the shapes of the features and targets
print(f'Shape of features: {features.shape}')
print(f'Shape of targets: {targets.shape}')


Shape of features: torch.Size([64, 3, 224, 224])
Shape of targets: torch.Size([64])


### Model implementation

In [7]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()

        # NB:CIFAR-10 is resized to 224x224
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

#         self._initialize_weights()

#     def _initialize_weights(self):
#         for layers in self.modules():
#             if isinstance(layers, nn.Conv2d):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 0)

#             elif isinstance(layers, nn.Linear):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 1)


    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

### Train, valid and test function

In [8]:
def train(model, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs):
    model = model.to(device)
    ema_loss = None
    best_acc = 0
    losses = []
    training_accuracies = []
    validation_accuracies = []

    print('Training Starting... ')

    for epoch in range(num_epochs):
        model.train()
        correct = 0
        
        for batch_idx, (features, target) in enumerate(train_loader):
            features, target = features.to(device), target.to(device)
            
            optimizer.zero_grad()
            output = model(features) # pass input feature for prediction
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            with torch.no_grad():
                pred = output.argmax(dim=1)
                correct += pred.eq(target).sum().item()
            
            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01

        train_score = correct / len(train_loader.dataset)
        _, _, valid_score = test(model, valid_loader, mode='Validation')
        
        losses.append(ema_loss)
        training_accuracies.append(train_score)
        validation_accuracies.append(valid_score)
        
        if valid_score > best_acc:
            best_acc = valid_score

        print(f'Epoch: {epoch + 1} \tLoss: {ema_loss:.4f} \tTraining Accuracy: {train_score:.4f} \tValidation Accuracy: {valid_score:.4f}')

        lr_scheduler.step()

    return losses, training_accuracies, validation_accuracies

def test(model, data_loader, mode='Test'):
    model.eval()
    correct_top1 = 0
    correct_top5 = 0
    correct = 0
    total = 0

    print(f'------{mode} Loop --------')
          
    with torch.no_grad():
        for features, target in data_loader:
            features, target = features.to(device), target.to(device)
            outputs = model(features)
            pred = outputs.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

            _, pred_top1 = outputs.topk(1, dim=1, largest=True, sorted=True)
            correct_top1 += (pred_top1.view(-1) == target).sum().item()

            _, pred_top5 = outputs.topk(5, dim=1, largest=True, sorted=True)
            correct_top5 += (pred_top5 == target.view(-1, 1)).sum().item()
            
    top1_error = 1 - correct_top1 / total
    top5_error = 1 - correct_top5 / total
    accuracy = correct / total
          
    return top1_error, top5_error, accuracy

### Train model

In [9]:
# We start training the model using this code block.

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_CLASSES = 10 # classes

model10 = AlexNet(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

learning_rate, weight_decay, momentum = 0.01, 0.0005, 0.9
optimizer = torch.optim.SGD(model10.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

# Approach for learning rate decay
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the model
num_epochs = 25
losses, training_accuracies, validation_accuracies = train(
    model10, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs
)

# Save the model weights
torch.save(model10.state_dict(), 'alexnet_10_weights_plain.pth')
torch.save(model10, 'alexnet_10_model_plain.pth') # save the entire model
print(f'\nModel weights for CIFAR10 saved successfully!')

Training Starting... 
------Validation Loop --------
Epoch: 1 	Loss: 1.7466 	Training Accuracy: 0.2011 	Validation Accuracy: 0.3948
------Validation Loop --------
Epoch: 2 	Loss: 1.3825 	Training Accuracy: 0.4079 	Validation Accuracy: 0.5156
------Validation Loop --------
Epoch: 3 	Loss: 1.1507 	Training Accuracy: 0.5063 	Validation Accuracy: 0.5998
------Validation Loop --------
Epoch: 4 	Loss: 0.9674 	Training Accuracy: 0.5779 	Validation Accuracy: 0.6780
------Validation Loop --------
Epoch: 5 	Loss: 0.8380 	Training Accuracy: 0.6223 	Validation Accuracy: 0.7024
------Validation Loop --------
Epoch: 6 	Loss: 0.7614 	Training Accuracy: 0.6553 	Validation Accuracy: 0.7340
------Validation Loop --------
Epoch: 7 	Loss: 0.7107 	Training Accuracy: 0.6760 	Validation Accuracy: 0.7816
------Validation Loop --------
Epoch: 8 	Loss: 0.6269 	Training Accuracy: 0.6962 	Validation Accuracy: 0.7976
------Validation Loop --------
Epoch: 9 	Loss: 0.6126 	Training Accuracy: 0.7131 	Validation Accur

### Plots

In [10]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - Plain (CIFAR10)')

# Show the figure
fig.show()
# Save the imnage
plt.savefig('accuracy_plot_Plain_cifar10.png')

<Figure size 640x480 with 0 Axes>

### Model evaluation

In [11]:
# Evaluate the Model
model10 = AlexNet(num_classes=NUM_CLASSES).to(device)
model10.load_state_dict(torch.load('/kaggle/working/alexnet_10_weights_plain.pth'))
model10.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model10, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%,  Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 13.08%,  Top-5 Error: 0.61%,  Test Accuracy: 86.92%


# **CIFAR100 CLASSIFICATIOIN ALEXNET ARCHITECTURE**

### Data loading and transformation

In [12]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)), 
    transforms.RandomCrop(224),    
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar100'

# Load CIFAR-10 dataset once
cifar100_dataset = datasets.CIFAR100(root=data_directory, download=True, transform=None) 

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR100(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data_path_cifar100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:03<00:00, 49153680.18it/s]


Extracting ./data_path_cifar100/cifar-100-python.tar.gz to ./data_path_cifar100
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


### Model Implementation

In [13]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()

        # CIFAR-10 is resized to 224x224
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

#         self._initialize_weights()

#     def _initialize_weights(self):
#         for layer in self.modules():
#             if isinstance(layer, nn.Conv2d):
#                 nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
#                 if layer.bias is not None:
#                     nn.init.constant_(layer.bias, 0)
#             elif isinstance(layer, nn.Linear):
#                 nn.init.normal_(layer.weight, 0, 0.01)
#                 nn.init.constant_(layer.bias, 1)

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


### Model Training

In [14]:
# Training the model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_CLASSES = 100  # Class labels

model100 = AlexNet(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

learning_rate, weight_decay, momentum = 0.01, 0.0005, 0.9
optimizer = torch.optim.SGD(model100.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

# Approach for learning rate decay
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

# Train the model
num_epochs = 25
losses, training_accuracies, validation_accuracies = train(
    model100, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs
)

# Save the model weights
torch.save(model100.state_dict(), 'alexnet_100_weights_plain.pth') # save the model weights
torch.save(model100, 'alexnet_model_100_plain.pth') # save the entire model
print(f'\nModel weights for CIFAR10 saved successfully!')

Training Starting... 
------Validation Loop --------
Epoch: 1 	Loss: 4.5717 	Training Accuracy: 0.0091 	Validation Accuracy: 0.0236
------Validation Loop --------
Epoch: 2 	Loss: 4.0962 	Training Accuracy: 0.0409 	Validation Accuracy: 0.0738
------Validation Loop --------
Epoch: 3 	Loss: 3.7386 	Training Accuracy: 0.0874 	Validation Accuracy: 0.1382
------Validation Loop --------
Epoch: 4 	Loss: 3.3929 	Training Accuracy: 0.1394 	Validation Accuracy: 0.1602
------Validation Loop --------
Epoch: 5 	Loss: 3.1329 	Training Accuracy: 0.1896 	Validation Accuracy: 0.2324
------Validation Loop --------
Epoch: 6 	Loss: 2.8218 	Training Accuracy: 0.2383 	Validation Accuracy: 0.3294
------Validation Loop --------
Epoch: 7 	Loss: 2.6245 	Training Accuracy: 0.2831 	Validation Accuracy: 0.3570
------Validation Loop --------
Epoch: 8 	Loss: 2.4154 	Training Accuracy: 0.3245 	Validation Accuracy: 0.3826
------Validation Loop --------
Epoch: 9 	Loss: 2.2700 	Training Accuracy: 0.3559 	Validation Accur

### Plots

In [17]:
# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - Plain (CIFAR100)')

# Show the figure
fig.show()
# Save the imnage
plt.savefig('accuracy_plot_Plain_cifar100.png')

<Figure size 640x480 with 0 Axes>

### Model evaluation

In [18]:
# Evaluate the Model
model100 = AlexNet(num_classes=NUM_CLASSES).to(device)
model100.load_state_dict(torch.load('/kaggle/working/alexnet_100_weights_plain.pth'))
model100.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model100, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%,  Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 42.92%,  Top-5 Error: 16.02%,  Test Accuracy: 57.08%
