# **CIFAR10 AND CIFAR100 CLASSIFICATION USING ALEXNET**

### Importing Librarie

In [1]:
# Imports and Setup
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, SubsetRandomSampler
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
import warnings
from torch.autograd import Variable
warnings.filterwarnings('ignore')


### Device configuration

In [2]:
# Set device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Data loading and transformation

In [3]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar10'

# Load CIFAR-10 dataset once
cifar10_dataset = datasets.CIFAR10(root=data_directory, download=True, transform=None)

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR10(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR10(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data_path_cifar10/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 46807007.77it/s]


Extracting ./data_path_cifar10/cifar-10-python.tar.gz to ./data_path_cifar10
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


### Get features and target size

In [4]:
# Get a single batch from the train_loader
data_iter = iter(train_loader)
features, targets = next(data_iter)

# Print the shapes of the features and targets
print(f'Shape of features: {features.shape}')
print(f'Shape of targets: {targets.shape}')


Shape of features: torch.Size([64, 3, 224, 224])
Shape of targets: torch.Size([64])


### Model implementaion

In [5]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()

        # NB:CIFAR-10 is resized to 224x224
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

#         self._initialize_weights()

#     def _initialize_weights(self):
#         for layers in self.modules():
#             if isinstance(layers, nn.Conv2d):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 0)

#             elif isinstance(layers, nn.Linear):
#                 nn.init.normal_(layers.weight, mean=0, std=0.01)
#                 if layers.bias is not None:
#                     nn.init.constant_(layers.bias, 1)


    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

### Implementing MixUp

In [6]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


### Train and evaluation function

In [7]:
# train function for mixup
def train_with_mixup(model, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha=1.0):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    ema_loss = None
    best_acc = 0
    losses = []
    training_accuracies = []
    validation_accuracies = []

    print('----- Training Loop with Mixup -----')

    for epoch in range(num_epochs):
        model.train()
        correct = 0
        total = 0
        for batch_idx, (features, targets) in enumerate(train_loader):
            features, targets = features.to(device), targets.to(device)
            inputs, targets_a, targets_b, lam = mixup_data(features, targets, alpha, use_cuda=True)
            inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (lam * predicted.eq(targets_a.data).cpu().sum().float() +
                            (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float()).item()

            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01

        train_score = correct / total
        _, _, valid_score = test(model, valid_loader, mode='Validation')

        losses.append(ema_loss)
        training_accuracies.append(train_score)
        validation_accuracies.append(valid_score)

        if valid_score > best_acc:
            best_acc = valid_score

        print(f'Epoch: {epoch + 1} \tLoss: {ema_loss:.4f} \tTraining Accuracy: {train_score:.4f} \tValidation Accuracy: {valid_score:.4f}')

        lr_scheduler.step()

    return losses, training_accuracies, validation_accuracies

def test(model, data_loader, mode='Test'):
    model.eval()
    correct_top1 = 0
    correct_top5 = 0
    correct = 0
    total = 0

    print(f'------{mode} Loop --------')

    with torch.no_grad():
        for features, target in data_loader:
            features, target = features.to(device), target.to(device)
            outputs = model(features)
            pred = outputs.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)

            _, pred_top1 = outputs.topk(1, dim=1, largest=True, sorted=True)
            correct_top1 += (pred_top1.view(-1) == target).sum().item()

            _, pred_top5 = outputs.topk(5, dim=1, largest=True, sorted=True)
            correct_top5 += (pred_top5 == target.view(-1, 1)).sum().item()

    top1_error = 1 - correct_top1 / total
    top5_error = 1 - correct_top5 / total
    accuracy = correct / total

    return top1_error, top5_error, accuracy

### Train model

In [8]:
NUM_CLASSES = 10
model10 = AlexNet(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

learning_rate, weight_decay, momentum = 0.01, 0.0005, 0.9
optimizer = torch.optim.SGD(model10.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
num_epochs = 25
alpha = 1.0  # Mixup interpolation coefficient

# Train
losses, training_accuracies, validation_accuracies = train_with_mixup(
    model10, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha
)

# Save the model
torch.save(model10.state_dict(), 'alexnet_10_mixup_weights.pth')
torch.save(model10, 'alexnet_10_model_mixup.pth') # save the entire model
print('\ntMixup Model weights saved successfully!')

----- Training Loop with Mixup -----
------Validation Loop --------
Epoch: 1 	Loss: 2.1170 	Training Accuracy: 0.1583 	Validation Accuracy: 0.3024
------Validation Loop --------
Epoch: 2 	Loss: 1.8914 	Training Accuracy: 0.2916 	Validation Accuracy: 0.4254
------Validation Loop --------
Epoch: 3 	Loss: 1.7700 	Training Accuracy: 0.3578 	Validation Accuracy: 0.5194
------Validation Loop --------
Epoch: 4 	Loss: 1.6805 	Training Accuracy: 0.4153 	Validation Accuracy: 0.5612
------Validation Loop --------
Epoch: 5 	Loss: 1.5915 	Training Accuracy: 0.4548 	Validation Accuracy: 0.6396
------Validation Loop --------
Epoch: 6 	Loss: 1.5191 	Training Accuracy: 0.4968 	Validation Accuracy: 0.6408
------Validation Loop --------
Epoch: 7 	Loss: 1.4816 	Training Accuracy: 0.5116 	Validation Accuracy: 0.7180
------Validation Loop --------
Epoch: 8 	Loss: 1.4408 	Training Accuracy: 0.5364 	Validation Accuracy: 0.7500
------Validation Loop --------
Epoch: 9 	Loss: 1.4426 	Training Accuracy: 0.5436 	V

### Plots

In [9]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - MixUp (CIFAR10)')

# Show the figure
fig.show()
# Save the imnage
plt.savefig('accuracy_plot_MixUp_cifar10.png')

<Figure size 640x480 with 0 Axes>

### Model evaluation

In [11]:
# Evaluate the Model
NUM_CLASSES = 10  # Adjust as needed
model10 = AlexNet(num_classes=NUM_CLASSES).to(device)
model10.load_state_dict(torch.load('/kaggle/working/alexnet_10_mixup_weights.pth'))
model10.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model10, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 13.86%,  Top-5 Error: 0.86%, Test Accuracy: 86.14%


# **CIFAR100 CLASSIFICATIOIN ALEXNET ARCHITECTURE**

### Data loading and transformation

In [12]:
# Image augmentation and transformation for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformation for validation and test
valid_test_transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Prepare the dataset for training and validation
data_directory = './data_path_cifar100'

# Load CIFAR-10 dataset once
cifar100_dataset = datasets.CIFAR100(root=data_directory, download=True, transform=None)

# Apply different transforms to train, validation, and test splits
train_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR100(root=data_directory, train=True, download=False, transform=valid_test_transform)
test_data = datasets.CIFAR100(root=data_directory, train=False, download=False, transform=valid_test_transform)

# Split the training data for validation
valid_split, shuffle, random_seed = 0.1, True, 42
data_size = len(train_data)
indices = list(range(data_size))
split = int(np.floor(valid_split * data_size))

if shuffle:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]

# Creating the data loader for training and validation
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=64, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64)

print(f'Training samples: {len(train_indices)}')
print(f'Validation samples: {len(valid_indices)}')
print(f'Testing samples: {len(test_data)}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data_path_cifar100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:03<00:00, 49318976.64it/s]


Extracting ./data_path_cifar100/cifar-100-python.tar.gz to ./data_path_cifar100
Training samples: 45000
Validation samples: 5000
Testing samples: 10000


### Model implementation

In [13]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()

        # CIFAR-10 is resized to 224x224
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  # N x 96 x 55 x 55
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 96 x 27 x 27

            nn.Conv2d(96, 256, kernel_size=5, padding=2),  # N x 256 x 27 x 27
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),  # N x 256 x 13 x 13

            nn.Conv2d(256, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1),  # N x 384 x 13 x 13
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # N x 256 x 13 x 13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)  # N x 256 x 6 x 6
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),

            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),

            nn.Linear(4096, num_classes)
        )

        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)
            elif isinstance(layer, nn.Linear):
                nn.init.normal_(layer.weight, 0, 0.01)
                nn.init.constant_(layer.bias, 1)

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


### Train model

In [14]:
NUM_CLASSES = 100
model100 = AlexNet(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()

learning_rate, weight_decay, momentum = 0.01, 0.0005, 0.9
optimizer = torch.optim.SGD(model100.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
num_epochs = 25
alpha = 1.0  # Mixup interpolation coefficient

# Train
losses, training_accuracies, validation_accuracies = train_with_mixup(
    model100, criterion, train_loader, valid_loader, optimizer, lr_scheduler, num_epochs, alpha
)

# Save the model
torch.save(model100.state_dict(), 'alexnet_100_mixup_weights.pth')
torch.save(model100, 'alexnet_100_model_mixup.pth') # save the entire model
print('\ntMixup Model weights saved successfully!')

----- Training Loop with Mixup -----
------Validation Loop --------
Epoch: 1 	Loss: 4.5917 	Training Accuracy: 0.0099 	Validation Accuracy: 0.0228
------Validation Loop --------
Epoch: 2 	Loss: 4.3122 	Training Accuracy: 0.0323 	Validation Accuracy: 0.0612
------Validation Loop --------
Epoch: 3 	Loss: 4.1443 	Training Accuracy: 0.0621 	Validation Accuracy: 0.1072
------Validation Loop --------
Epoch: 4 	Loss: 3.9950 	Training Accuracy: 0.0929 	Validation Accuracy: 0.1616
------Validation Loop --------
Epoch: 5 	Loss: 3.8399 	Training Accuracy: 0.1246 	Validation Accuracy: 0.2010
------Validation Loop --------
Epoch: 6 	Loss: 3.6961 	Training Accuracy: 0.1510 	Validation Accuracy: 0.2372
------Validation Loop --------
Epoch: 7 	Loss: 3.5962 	Training Accuracy: 0.1749 	Validation Accuracy: 0.2874
------Validation Loop --------
Epoch: 8 	Loss: 3.5191 	Training Accuracy: 0.1977 	Validation Accuracy: 0.3004
------Validation Loop --------
Epoch: 9 	Loss: 3.4447 	Training Accuracy: 0.2200 	V

### Plots

In [15]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create a subplot with two rows and one column
fig = make_subplots(rows=2, cols=1, subplot_titles=("Accuracy", "Loss"))

# Add training accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=training_accuracies, mode='lines', name='Train Accuracy'), row=1, col=1)

# Add validation accuracy to the first subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=validation_accuracies, mode='lines', name='Valid Accuracy'), row=1, col=1)

# Add training loss to the second subplot
fig.add_trace(go.Scatter(x=list(range(num_epochs)), y=losses, mode='lines', name='Loss'), row=2, col=1)

# Update the layout with the title
fig.update_layout(title='Model Accuracy and Loss - MixUp (CIFAR100)')

# Show the figure
fig.show()
# Save the imnage
plt.savefig('accuracy_plot_MixUp_cifar100.png')

<Figure size 640x480 with 0 Axes>

### Model evaluation

In [17]:
# Evaluate the Model
NUM_CLASSES = 100  # Adjust as needed
model100 = AlexNet(num_classes=NUM_CLASSES).to(device)
model100.load_state_dict(torch.load('/kaggle/working/alexnet_100_mixup_weights.pth'))
model100.eval()

# Evaluate the model on the test set
top1_error, top5_error, test_accuracy = test(model100, test_loader, mode='Test')
print(f"Top-1 Error: {top1_error * 100:.2f}%,  Top-5 Error: {top5_error * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%")

------Test Loop --------
Top-1 Error: 44.28%,  Top-5 Error: 17.32%, Test Accuracy: 55.72%
