In [None]:
from IPython.display import clear_output

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt

from torch.optim import Adam, SGD, RMSprop

from google.colab import drive

drive.mount("/content/drive")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Augmentation

In [22]:
# Load and preprocess data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees = 10),
    transforms.ColorJitter( brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

val_size = 5000
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4, persistent_workers=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4, persistent_workers=True)


Files already downloaded and verified
Files already downloaded and verified




In [23]:
num_epochs = 50
lr = 1e-4

train_losses = []
val_losses = []

criterion = nn.CrossEntropyLoss()  # multi-class


In [24]:
class Inception(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Inception, self).__init__()

        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU()
        )

        self.out_channels = out_channels * 4

        if in_channels != self.out_channels:
            self.residual = nn.Conv2d(in_channels, self.out_channels, kernel_size=1)
        else:
            self.residual = nn.Identity()

    def forward(self, x):
        branch1_out = self.branch1(x)
        branch2_out = self.branch2(x)
        branch3_out = self.branch3(x)
        branch4_out = self.branch4(x)

        inception_out = torch.cat([branch1_out, branch2_out, branch3_out, branch4_out], 1)
        residual_out = self.residual(x)

        return F.relu(inception_out + residual_out)
class Cifar10Classifier(nn.Module):
    def __init__(self):
        super(Cifar10Classifier, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.inception1 = Inception(64, 64)
        self.inception2 = Inception(256, 128)
        self.inception3 = Inception(512, 256)
        self.inception4 = Inception(1024, 512)
        self.conv2 = nn.Sequential(
            nn.Conv2d(2048, 1024, kernel_size=3, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(1024 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.inception3(x)
        x = self.inception4(x)
        x = self.conv2(x)
        x = F.adaptive_avg_pool2d(x, (4, 4))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model10 = Cifar10Classifier().to(device)
print(model10)

Cifar10Classifier(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (inception1): Inception(
    (branch1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (branch2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
    )
    (branch3): Sequential(
      (0): Conv2d(64, 64, kernel_size=(1, 1),

In [25]:
model10.load_state_dict(torch.load("/content/drive/MyDrive/saved_models/model16.pth"))

<All keys matched successfully>

In [None]:
# %%time
optimizers = {
    'Adam': Adam,
    'RMSprop': RMSprop
}
lr = 1e-4
results = {}
num_epochs = 50
criterion = nn.CrossEntropyLoss()  # multi-class

for opt_name, opt_func in optimizers.items():
  torch.save(model10.state_dict(),"/content/drive/MyDrive/saved_models/model16_changedOptimizer.pth")
  optimizer = opt_func (model10.parameters(), lr=lr)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, threshold=0.0001, threshold_mode='abs')
  for epoch_no in range(num_epochs):

    model10.train() 

    epoch_weighted_loss = 0

    for batch_X, batch_y in train_loader:

      batch_X = batch_X.to(device)
      batch_y = batch_y.to(device)

      batch_y_probs = model10(batch_X) 

      loss = criterion(batch_y_probs, batch_y)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      epoch_weighted_loss += (len(batch_y)*loss.item())

    epoch_loss = epoch_weighted_loss/len(train_loader.dataset)
    train_losses.append(epoch_loss)
    scheduler.step(loss)
    torch.save(model10.state_dict(),"/content/drive/MyDrive/saved_models/model16.pth")
    # validation time

    model10.eval()  
    correctly_labelled = 0

    with torch.no_grad():

      val_epoch_weighted_loss = 0

      for val_batch_X, val_batch_y in val_loader:

        val_batch_X = val_batch_X.to(device)
        val_batch_y = val_batch_y.to(device)

        val_batch_y_probs = model10(val_batch_X)

        loss = criterion(val_batch_y_probs, val_batch_y)
        val_epoch_weighted_loss += (len(val_batch_y)*loss.item())

        val_batch_y_pred = val_batch_y_probs.argmax(dim=1)  

        correctly_labelled += (val_batch_y_pred == val_batch_y).sum().item() 

    val_epoch_loss = val_epoch_weighted_loss/len(val_loader.dataset)
    val_losses.append(val_epoch_loss)

    print(f'Epoch: {epoch_no}, train_loss={epoch_loss}, val_loss={val_epoch_loss}. labelled {correctly_labelled}/{len(val_loader.dataset)} correctly ({correctly_labelled/len(val_loader.dataset)*100}% accuracy)')

  print(f'Training complete on device {device}.')


Epoch: 0, train_loss=0.17680582208103604, val_loss=0.135933628821373. labelled 4761/5000 correctly (95.22% accuracy)
Epoch: 1, train_loss=0.16765926986005572, val_loss=0.13776738399267197. labelled 4758/5000 correctly (95.16% accuracy)
Epoch: 2, train_loss=0.1524897727502717, val_loss=0.16279858412742615. labelled 4715/5000 correctly (94.3% accuracy)
Epoch: 3, train_loss=0.14581695112834375, val_loss=0.16887922039031983. labelled 4689/5000 correctly (93.78% accuracy)
Epoch: 4, train_loss=0.14258659785456126, val_loss=0.18064341720342636. labelled 4675/5000 correctly (93.5% accuracy)
Epoch: 5, train_loss=0.13846262357234954, val_loss=0.2100191987991333. labelled 4661/5000 correctly (93.22% accuracy)
Epoch: 6, train_loss=0.12766707763448618, val_loss=0.175384697920084. labelled 4719/5000 correctly (94.38% accuracy)
Epoch: 7, train_loss=0.11916795507205857, val_loss=0.23303988707065582. labelled 4645/5000 correctly (92.9% accuracy)
Epoch: 8, train_loss=0.11899313571386867, val_loss=0.1893