#

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

In [None]:
class Alexnet(nn.Module):
    def __init__(self, num_classes=10): # Using Tiny imagenet with 200 classes
        super().__init__()
        self.net = nn.Sequential(
            #FIRST CONVOLUTIONAL LAYER
            nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11, stride=4),
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001,beta=0.75,k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),

            #SECOND CONVOLUTIONAL LAYER
            nn.Conv2d(in_channels =96, out_channels=256, kernel_size=5, stride=1,padding=2),
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),


            #THIRD CONVOLUTIONAL LAYER
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),

            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),  #13*13*384
            nn.ReLU(),

            #Fifth Convolutional layer
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3 , stride=1, padding=1), #13*13*256
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),

        )
        self.classifier = nn.Sequential(
        nn.Dropout(p=0.5),
        nn.Linear(in_features=(256*6*6), out_features=4096),  #6*6*256- #4096
        nn.ReLU(),

        nn.Dropout(p=0.5),
        nn.Linear(in_features=4096, out_features=4096),
        nn.ReLU(),

        nn.Linear(in_features=4096, out_features= num_classes),
    )


    def init_bias (self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(self.net[4].bias,1)
                nn.init.constant_(self.net[10].bias,1)
                nn.init.constant_(self.net[12].bias,1)

    def forward(self,x):
        x = self.net(x)
        x= x.view(-1,256*6*6) #Flattens the feaure map into a 1D vector
        return self.classifier(x)








In [None]:
train_transform = transforms.Compose([
    transforms.Resize((227,227)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2,saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914,0.4822,0.4465],std=[0.2470,0.2435,0.2616]),
])

test_transform = transforms.Compose([
    transforms.Resize((227,227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914,0.4822,0.4465], std=[0.2470,0.2435,0.2616])
])

train_dataset = CIFAR10(
    root = './data',
    train = True,
    transform = train_transform,
    download = True,
)

test_dataset = CIFAR10(
    root= './data',
    train =  False,
    transform = test_transform,
    download = True
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available()else 'cpu')

In [None]:
alexnet = Alexnet(num_classes=10).to(device)

In [None]:
print(alexnet)

Alexnet(
  (net): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): Re

In [None]:
alexnet.init_bias()

In [None]:
criterion= nn.CrossEntropyLoss()

In [None]:
optimizer= optim.Adam(alexnet.parameters(),lr=0.0001)

In [None]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [None]:
num_epochs = 90

for epoch in range(num_epochs):
    alexnet.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = alexnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # training stats
    train_acc = 100. * correct / total
    train_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}] | Loss: {train_loss:.4f} | Acc: {train_acc:.2f}%')

    scheduler.step()

    # validation
    alexnet.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = alexnet(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()

    val_acc = 100. * val_correct / val_total
    val_loss = val_loss / len(test_loader)

    print(f'Validation | Loss: {val_loss:.4f} | Acc: {val_acc:.2f}%')


Epoch [1/90] | Loss: 1.9015 | Acc: 28.68%
Validation | Loss: 1.5398 | Acc: 41.77%
Epoch [2/90] | Loss: 1.5080 | Acc: 45.06%
Validation | Loss: 1.2480 | Acc: 54.73%
Epoch [3/90] | Loss: 1.2925 | Acc: 53.96%
Validation | Loss: 1.0892 | Acc: 61.81%
Epoch [4/90] | Loss: 1.1319 | Acc: 59.84%
Validation | Loss: 0.9564 | Acc: 66.16%
Epoch [5/90] | Loss: 1.0043 | Acc: 64.57%
Validation | Loss: 0.9195 | Acc: 67.67%
Epoch [6/90] | Loss: 0.9211 | Acc: 67.75%
Validation | Loss: 0.8162 | Acc: 71.65%
Epoch [7/90] | Loss: 0.8432 | Acc: 70.27%
Validation | Loss: 0.7225 | Acc: 75.06%
Epoch [8/90] | Loss: 0.7777 | Acc: 72.59%
Validation | Loss: 0.6809 | Acc: 75.82%
Epoch [9/90] | Loss: 0.7227 | Acc: 74.72%
Validation | Loss: 0.6663 | Acc: 77.21%
Epoch [10/90] | Loss: 0.6732 | Acc: 76.46%
Validation | Loss: 0.6119 | Acc: 79.06%
Epoch [11/90] | Loss: 0.6331 | Acc: 77.86%
Validation | Loss: 0.5876 | Acc: 79.64%
Epoch [12/90] | Loss: 0.5952 | Acc: 79.25%
Validation | Loss: 0.5681 | Acc: 80.86%
Epoch [13/90]