In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# CIFAR10 dataset 
train_loader, valid_loader = get_train_valid_loader(data_dir = './data',                                      
                                                    batch_size = 64,
                                                    augment = False,                             		     
                                                    random_seed = 1)

test_loader = get_test_loader(data_dir = './data', batch_size = 64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
def evaluate(model, data_loader, optimizer_name, number_of_images, learning_rate, type_images):
    """
    Evaluate the model's performance on the given data loader.

    Args:
        model (nn.Module): The model to evaluate.
        data_loader (torch.utils.data.DataLoader): The data loader.
        optimizer_name (str): The name of the optimizer used during evaluation.

    Returns:
        float: The accuracy of the model on the data loader.

    """  
    with torch.no_grad():
      total = 0
      correct = 0
      for images, labels in data_loader:
          images = images.to(device)
          labels = labels.to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
          del images, labels, outputs
      print(optimizer_name+f'(learning rate = {learning_rate})'+' - Accuracy of the network on the {} '.format(number_of_images) + type_images + ' images: {} %'.format(100 * correct / total)) 
    return 100 * correct / total

In [None]:
def run_model(learning_rate, optimizer_name, optimizer, model):
  for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
      # Move tensors to the configured device
      images = images.to(device)
      labels = labels.to(device)

      # Forward pass
      outputs = model(images)
      loss = criterion(outputs, labels)
      running_loss += loss.item()

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    print(optimizer_name + ' - Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
      epoch+1, num_epochs, i+1, total_step, running_loss / len(train_loader)))
    
    
    # Validation
    evaluate(model, valid_loader, optimizer_name, 5000, learning_rate, 'validation')
    # Test
    evaluate(model, test_loader, optimizer_name, 10000, learning_rate, 'test')

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
class AlexNetModified(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNetModified, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            # nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            # nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
num_classes = 10
num_epochs = 10
batch_size = 64
learning_rate = 0.005

model = AlexNet(num_classes).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  


In [None]:
# (a)
# Train the model
total_step = len(train_loader)
# SGD Optimizer with learning rate = 0.005 (a)
run_model(learning_rate, optimizer_name='(a) SGD Optimizer', optimizer=optimizer, model = model)
# Adam Optimizer with learning rate = 0.00005 (a)
# Train the model with Adam optimizer
model = AlexNet(num_classes).to(device) # Reinitialize the model
learning_rate = 0.00005
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Use Adam optimizer
run_model(learning_rate, optimizer_name='(a) Adam Optimizer', optimizer=optimizer, model = model)

(a) SGD Optimizer - Epoch [1/10], Step [704/704], Loss: 1.5785
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 42.9 %
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 42.81 %
(a) SGD Optimizer - Epoch [2/10], Step [704/704], Loss: 1.1545
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 50.94 %
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 50.58 %
(a) SGD Optimizer - Epoch [3/10], Step [704/704], Loss: 0.9500
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 63.78 %
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 63.35 %
(a) SGD Optimizer - Epoch [4/10], Step [704/704], Loss: 0.8295
(a) SGD Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 61.28 %
(a) SGD Optimizer(lear

In [None]:
# (b)
# Adam Optimizer with learning rate = 0.005 (b)
# Train the model with Adam optimizer
model = AlexNet(num_classes).to(device) # Reinitialize the model
learning_rate = 0.005
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Use Adam optimizer
run_model(learning_rate, optimizer_name='(b) Adam Optimizer', optimizer=optimizer, model = model)

(b) Adam Optimizer - Epoch [1/10], Step [704/704], Loss: 4.2880
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 12.92 %
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 12.84 %
(b) Adam Optimizer - Epoch [2/10], Step [704/704], Loss: 2.2498
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 16.36 %
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 16.4 %
(b) Adam Optimizer - Epoch [3/10], Step [704/704], Loss: 2.1538
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 15.04 %
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 10000 test images: 15.98 %
(b) Adam Optimizer - Epoch [4/10], Step [704/704], Loss: 2.1350
(b) Adam Optimizer(learning rate = 0.005) - Accuracy of the network on the 5000 validation images: 16.8 %
(b) Adam Opt

In [None]:
# (c)
# Adam Optimizer with learning rate = 0.00005 after commeNtig out the relleNvat liNes i AlexNet(c)
# Train the model with Adam optimizer
model = AlexNetModified(num_classes).to(device) # Reinitialize the model
learning_rate = 0.00005
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Use Adam optimizer
run_model(learning_rate, optimizer_name='(c) Adam Optimizer', optimizer=optimizer, model = model)

(c) Adam Optimizer - Epoch [1/10], Step [704/704], Loss: 1.5490
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 5000 validation images: 55.64 %
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 10000 test images: 55.41 %
(c) Adam Optimizer - Epoch [2/10], Step [704/704], Loss: 1.0797
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 5000 validation images: 65.32 %
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 10000 test images: 65.87 %
(c) Adam Optimizer - Epoch [3/10], Step [704/704], Loss: 0.8387
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 5000 validation images: 71.84 %
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 10000 test images: 70.58 %
(c) Adam Optimizer - Epoch [4/10], Step [704/704], Loss: 0.6641
(c) Adam Optimizer(learning rate = 5e-05) - Accuracy of the network on the 5000 validation images: 74.5 %
(c) Adam Op