In [1]:
from IPython.display import clear_output

In [None]:
%pip install torch torchvision torchsummary
%pip install matplotlib
%pip install pillow

clear_output()

In [2]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

from PIL import Image

from torchvision import transforms
from torchvision.datasets import CIFAR10
import pandas as pd

from time import sleep

%matplotlib inline

## Let's download Cifar10 images

In [3]:
# We're only using this to download dataset. we will manually create a dataset from downloaded files
data = CIFAR10(root='./datasets', train=True, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets\cifar-10-python.tar.gz


  0%|▎                                                                 | 753664/170498071 [00:17<1:06:38, 42454.59it/s]


KeyboardInterrupt: 

In [None]:
data

In [None]:
class myDataset(Dataset):

  def __init__(self,data,transform,train=True):
    self.dataset = data
    self.transform=transform
    self.train = train

    self.imgdf_list = self.extract_image_and_create_list()


  def extract_image_and_create_list(self):

    dataset_path = "cifar_images"
    if not os.path.isdir(dataset_path):
      os.mkdir(dataset_path)
    dataset_len = len(self.dataset)

    image_path_list = []
    image_label_list = []
    if self.train:
      start_idx = 0
      end_idx = 30000
    else:
      start_idx = 30000
      end_idx = dataset_len

    for i in range(start_idx,end_idx):
      img,label = self.dataset[i]

      save_image_path = dataset_path+"/image_"+str(i)+".png"

      img.save(save_image_path)

      image_path_list.append(save_image_path)
      image_label_list.append(label)

    imgdf = pd.DataFrame({"image_path":image_path_list, "image_label":image_label_list})
    print(imgdf)
    return imgdf


  def __getitem__(self, index):
    df_row = self.imgdf_list.iloc[index]

    image_path = df_row["image_path"]
    label = df_row["image_label"]

    image = Image.open(image_path)

    image = self.transform(image)


    return image,label


  def __len__(self):
    return len(self.imgdf_list)





In [None]:
mytranform = transforms.Compose([ transforms.ToTensor(),
                                 transforms.Normalize(std=(0.5, 0.5, 0.5),
                                                      mean=(0.5, 0.5, 0.5)) ])

train_dataset = myDataset(data=data, transform=mytranform,train=True)
val_dataset = myDataset(data=data, transform=mytranform,train=False)


                         image_path  image_label
0          cifar_images/image_0.png            6
1          cifar_images/image_1.png            9
2          cifar_images/image_2.png            9
3          cifar_images/image_3.png            4
4          cifar_images/image_4.png            1
...                             ...          ...
29995  cifar_images/image_29995.png            2
29996  cifar_images/image_29996.png            5
29997  cifar_images/image_29997.png            3
29998  cifar_images/image_29998.png            7
29999  cifar_images/image_29999.png            1

[30000 rows x 2 columns]
                         image_path  image_label
0      cifar_images/image_30000.png            0
1      cifar_images/image_30001.png            6
2      cifar_images/image_30002.png            0
3      cifar_images/image_30003.png            2
4      cifar_images/image_30004.png            7
...                             ...          ...
19995  cifar_images/image_49995.png        

In [None]:
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)  # no need to shuffle validation data

In [None]:
class Cifar10Classifier(nn.Module):

  def __init__(self):

    super(Cifar10Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1,stride=1)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

    self.relu = nn.ReLU()  # Relu isn't learnable. no need to intialize different relu objects for each layer
    self.pool = nn.MaxPool2d(2, 2)  # pool isn't learnable to no need to initialize different pool layers unless we want to change window size

    self.fc1 = nn.Linear(128 * 4 * 4, 512)
    self.fc2 = nn.Linear(512, 10)

    self.softmax = nn.Softmax(dim=-1)

  def forward(self, x: torch.Tensor):
    single_input = False
    if x.ndim == 3:  # 3 dimensions mean [C, H, W] instead of [B, C, H, W] so we're dealing with a single image
      x = x.unsqueeze(dim=0)  # convert [C, H, W] to [1, C, H, W] where 1 will act as batch size

      # keep track of whether input was one (non-batch) image.
      # If so, we want to convert it back to the same format after inference for consistency purposes
      single_input = True
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x = self.pool(self.relu(self.conv3(x)))
    x = x.flatten(start_dim=1)
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    x = self.softmax(x)


    if single_input:
      x = x.squeeze(dim=0)  # or x = x[0]

    return x

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # checks if machine supports cuda and if it does, we use that, otherwise cpu
model = Cifar10Classifier().to(device)

In [None]:
from torch.optim import Adam
num_epochs = 30
lr = 1e-4

train_losses = []
val_losses = []

optimizer = Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()  # multi-class

model.to(device)  # we need to send all input tensors as well as our model to this device. by default they are on cpu

print(f'Using device {device}')

Using device cpu


In [None]:
%%time
for epoch_no in range(num_epochs):

  model.train()  # convert to train model. This turns out train-specific layers in the model (if you dont know about them, an example of them is dropout. more on this later)

  epoch_weighted_loss = 0

  for batch_X, batch_y in train_loader:

    batch_X = batch_X.to(device)
    batch_y = batch_y.to(device)

    batch_y_probs = model(batch_X)  # outputs [N, 10] where each [:, 10] is probabilities for class (0-9)

    loss = criterion(batch_y_probs, batch_y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    epoch_weighted_loss += (len(batch_y)*loss.item())

  epoch_loss = epoch_weighted_loss/len(train_loader.dataset)
  train_losses.append(epoch_loss)


  # validation time

  model.eval()  # take model to evaluation mode. turn off train-only layers
  correctly_labelled = 0

  with torch.no_grad():

    val_epoch_weighted_loss = 0

    for val_batch_X, val_batch_y in val_loader:

      val_batch_X = val_batch_X.to(device)
      val_batch_y = val_batch_y.to(device)

      val_batch_y_probs = model(val_batch_X)

      loss = criterion(val_batch_y_probs, val_batch_y)
      val_epoch_weighted_loss += (len(val_batch_y)*loss.item())

      val_batch_y_pred = val_batch_y_probs.argmax(dim=1)  # convert probailities to labels by picking the label (index) with the highest prob

      correctly_labelled += (val_batch_y_pred == val_batch_y).sum().item()  # item converts tensor to float/int/list

  val_epoch_loss = val_epoch_weighted_loss/len(val_loader.dataset)
  val_losses.append(val_epoch_loss)

  print(f'Epoch: {epoch_no}, train_loss={epoch_loss}, val_loss={val_epoch_loss}. labelled {correctly_labelled}/{len(val_loader.dataset)} correctly ({correctly_labelled/len(val_loader.dataset)*100}% accuracy)')

print(f'Training complete on device {device}.')

Epoch: 0, train_loss=2.163019166692098, val_loss=2.1003595169067384. labelled 7183/20000 correctly (35.915% accuracy)
Epoch: 1, train_loss=2.0722880416870115, val_loss=2.0555192184448243. labelled 8128/20000 correctly (40.64% accuracy)
Epoch: 2, train_loss=2.0347116586049396, val_loss=2.0234809391021726. labelled 8728/20000 correctly (43.64% accuracy)
Epoch: 3, train_loss=2.0094847787221273, val_loss=2.022584180831909. labelled 8688/20000 correctly (43.44% accuracy)
Epoch: 4, train_loss=1.992315344619751, val_loss=2.006229560470581. labelled 9091/20000 correctly (45.455% accuracy)
Epoch: 5, train_loss=1.9745855347951253, val_loss=1.9889001819610597. labelled 9432/20000 correctly (47.160000000000004% accuracy)
Epoch: 6, train_loss=1.9611418661117554, val_loss=1.9802022159576416. labelled 9555/20000 correctly (47.775% accuracy)
Epoch: 7, train_loss=1.9462307881037395, val_loss=1.9757911838531494. labelled 9665/20000 correctly (48.325% accuracy)
Epoch: 8, train_loss=1.934156399790446, val