# **CIFAR10 AND CIFAR100 CLASSIFICATION USING ALEXNET WITH CUTMIX**

Importing libraries

In [1]:
# Imports and Setup
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, SubsetRandomSampler
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import os
import numpy as np
import warnings
import plotly.graph_objs as go
from plotly.subplots import make_subplots
warnings.filterwarnings('ignore')

Setting device configuration

In [2]:
# Set device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Data loading and transformation

In [3]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)), 
    transforms.RandomCrop(224),    
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((227, 227)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar10'

# Load CIFAR-10 dataset once
cifar10_dataset = datasets.CIFAR10(root=data_directory, download=True, transform=None) 

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR10(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Files already downloaded and verified
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


Checking shape of features and target

In [4]:
# Get a single batch from the train_loader
data_iter = iter(train_loader)
features, targets = next(data_iter)

# Print the shapes of the features and targets
print(f'Shape of features: {features.shape}')
print(f'Shape of targets: {targets.shape}')


Shape of features: torch.Size([64, 3, 224, 224])
Shape of targets: torch.Size([64])


AlexNet implementation from scratch

In [5]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

#         self._initialize_weights()

#     def _initialize_weights(self):
#         for layers in self.modules():
#             if isinstance(layers, nn.Conv2d):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 0)

#             elif isinstance(layers, nn.Linear):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 1)


    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

CutMix Implementation

In [6]:
def cutmix_data(x, y, alpha=1.0):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    lam = np.random.beta(alpha, alpha)
    rand_index = torch.randperm(x.size()[0]).cuda()
    target_a = y
    target_b = y[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[rand_index, :, bbx1:bbx2, bby1:bby2]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size()[-1] * x.size()[-2]))
    return x, target_a, target_b, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)
    cx = np.random.randint(W)
    cy = np.random.randint(H)
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

def cutmix_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

Training function

In [7]:
def train_with_cutmix(model, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha=1.0, cutmix_prob=0.5):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    ema_loss = None
    best_acc = 0
    losses = []
    training_accuracies = []
    validation_accuracies = []

    print('----- Training Loop with CutMix -----')

    for epoch in range(num_epochs):
        model.train()
        correct = 0
        total = 0
        for batch_idx, (features, targets) in enumerate(train_loader):
            features, targets = features.to(device), targets.to(device)

            # Apply CutMix with a probability
            r = np.random.rand(1)
            if r < cutmix_prob:
                inputs, targets_a, targets_b, lam = cutmix_data(features, targets, alpha)
            else:
                inputs, targets_a, targets_b = features, targets, targets
                lam = 1

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = cutmix_criterion(criterion, outputs, targets_a, targets_b, lam)
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (lam * predicted.eq(targets_a.data).cpu().sum().float() +
                            (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float()).item()

            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01

        train_score = correct / total
        _, _, valid_score = test(model, valid_loader, mode='Validation')

        losses.append(ema_loss)
        training_accuracies.append(train_score)
        validation_accuracies.append(valid_score)

        if valid_score > best_acc:
            best_acc = valid_score

        print(f'Epoch: {epoch + 1} \tLoss: {ema_loss:.4f} \tTraining Accuracy: {train_score:.4f} \tValidation Accuracy: {valid_score:.4f}')

        lr_scheduler.step()

    return losses, training_accuracies, validation_accuracies

def test(model, data_loader, mode='Test'):
    model.eval()
    correct_top1 = 0
    correct_top5 = 0
    correct = 0
    total = 0

    print(f'------{mode} Loop --------')
          
    with torch.no_grad():
        for features, target in data_loader:
            features, target = features.to(device), target.to(device)
            outputs = model(features)
            pred = outputs.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

            _, pred_top1 = outputs.topk(1, dim=1, largest=True, sorted=True)
            correct_top1 += (pred_top1.view(-1) == target).sum().item()

            _, pred_top5 = outputs.topk(5, dim=1, largest=True, sorted=True)
            correct_top5 += (pred_top5 == target.view(-1, 1)).sum().item()
            
    top1_error = 1 - correct_top1 / total
    top5_error = 1 - correct_top5 / total
    accuracy = correct / total
          
    return top1_error, top5_error, accuracy

Training model

In [8]:
# Use the updated data loaders
NUM_CLASSES = 10  # Adjust as needed
model10 = AlexNet(num_classes=NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model10.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
num_epochs = 25
alpha = 1.0  # CutMix interpolation coefficient
cutmix_prob = 0.5  # Probability of applying CutMix

# Call the train function with CutMix augmentation
losses, training_accuracies, validation_accuracies = train_with_cutmix(
    model10, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha, cutmix_prob
)

# Save the model
torch.save(model10.state_dict(), 'alexnet_10_cutmix_weights.pth')
torch.save(model10, 'alexnet_10_model_cutmix.pth') # save the entire model
print('\nCutMix Model weights saved successfully!')

----- Training Loop with CutMix -----
------Validation Loop --------
Epoch: 1 	Loss: 2.0665 	Training Accuracy: 0.1700 	Validation Accuracy: 0.3090
------Validation Loop --------
Epoch: 2 	Loss: 1.7618 	Training Accuracy: 0.3213 	Validation Accuracy: 0.4506
------Validation Loop --------
Epoch: 3 	Loss: 1.6278 	Training Accuracy: 0.3941 	Validation Accuracy: 0.4666
------Validation Loop --------
Epoch: 4 	Loss: 1.4804 	Training Accuracy: 0.4591 	Validation Accuracy: 0.5964
------Validation Loop --------
Epoch: 5 	Loss: 1.3884 	Training Accuracy: 0.5060 	Validation Accuracy: 0.6662
------Validation Loop --------
Epoch: 6 	Loss: 1.2447 	Training Accuracy: 0.5620 	Validation Accuracy: 0.6860
------Validation Loop --------
Epoch: 7 	Loss: 1.1987 	Training Accuracy: 0.5820 	Validation Accuracy: 0.7248
------Validation Loop --------
Epoch: 8 	Loss: 1.1411 	Training Accuracy: 0.6158 	Validation Accuracy: 0.7388
------Validation Loop --------
Epoch: 9 	Loss: 1.1107 	Training Accuracy: 0.6304 	

Plots for accuracies and loss

In [9]:
# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - CutMix (CIFAR10)')

# Show the figure
fig.show()
# Save the imnage
plt.savefig('accuracy_plot_cutmix_cifar10.png')

<Figure size 640x480 with 0 Axes>

Model evaluation

In [10]:
# Evaluate the Model
NUM_CLASSES = 10  # Adjust as needed
model10 = AlexNet(num_classes=NUM_CLASSES).to(device)
model10.load_state_dict(torch.load('/kaggle/working/alexnet_10_cutmix_weights.pth'))
model10.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model10, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 15.17%,  Top-5 Error: 0.67%, Test Accuracy: 84.83%


# **CIFAR100 CLASSIFICATIOIN ALEXNET ARCHITECTURE**

Data loading and transformation

In [11]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)), 
    transforms.RandomCrop(224),    
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar100'

# Load CIFAR-10 dataset once
cifar100_dataset = datasets.CIFAR100(root=data_directory, download=True, transform=None) 

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR100(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=128, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=128)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data_path_cifar100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:04<00:00, 37386111.50it/s]


Extracting ./data_path_cifar100/cifar-100-python.tar.gz to ./data_path_cifar100
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


alexnet model

In [12]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()

        # CIFAR-10 is resized to 224x224
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)
            elif isinstance(layer, nn.Linear):
                nn.init.normal_(layer.weight, 0, 0.01)
                nn.init.constant_(layer.bias, 1)

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


Training model

In [13]:
# Use the updated data loaders
NUM_CLASSES = 100  # Adjust as needed
model100 = AlexNet(num_classes=NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model100.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
num_epochs = 25
alpha = 1.0  # CutMix interpolation coefficient
cutmix_prob = 0.5  # Probability of applying CutMix

# Call the train function with CutMix augmentation
losses, training_accuracies, validation_accuracies = train_with_cutmix(
    model100, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha, cutmix_prob
)

# Save the model
torch.save(model100.state_dict(), 'alexnet_100_cutmix_weights.pth')
torch.save(model100, 'alexnet_100_model_cutmix.pth') # save the entire model
print('\nCutMix Model weights saved successfully!')

----- Training Loop with CutMix -----
------Validation Loop --------
Epoch: 1 	Loss: 4.5797 	Training Accuracy: 0.0132 	Validation Accuracy: 0.0432
------Validation Loop --------
Epoch: 2 	Loss: 4.2146 	Training Accuracy: 0.0488 	Validation Accuracy: 0.0852
------Validation Loop --------
Epoch: 3 	Loss: 3.9806 	Training Accuracy: 0.0825 	Validation Accuracy: 0.1282
------Validation Loop --------
Epoch: 4 	Loss: 3.7981 	Training Accuracy: 0.1153 	Validation Accuracy: 0.1874
------Validation Loop --------
Epoch: 5 	Loss: 3.6666 	Training Accuracy: 0.1415 	Validation Accuracy: 0.2152
------Validation Loop --------
Epoch: 6 	Loss: 3.5754 	Training Accuracy: 0.1633 	Validation Accuracy: 0.2584
------Validation Loop --------
Epoch: 7 	Loss: 3.4702 	Training Accuracy: 0.1875 	Validation Accuracy: 0.2978
------Validation Loop --------
Epoch: 8 	Loss: 3.3036 	Training Accuracy: 0.2136 	Validation Accuracy: 0.3034
------Validation Loop --------
Epoch: 9 	Loss: 3.2190 	Training Accuracy: 0.2255 	

Plots for accuracies and loss

In [14]:
# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - CutMix (CIFAR100)')

# Show the figure
fig.show()

# Save the image
plt.savefig('accuracy_plot_cutmix_cifar100.png')

<Figure size 640x480 with 0 Axes>

Model evaluation

In [15]:
# Evaluate the Model
NUM_CLASSES = 100  # Adjust as needed
model100 = AlexNet(num_classes=NUM_CLASSES).to(device)
model100.load_state_dict(torch.load('/kaggle/working/alexnet_100_cutmix_weights.pth'))
model100.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model100, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 54.34%,  Top-5 Error: 24.13%, Test Accuracy: 45.66%
