In [2]:
from torchvision import datasets, transforms

import shutil
import torch

import torchvision
import torch.nn as nn

from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as func
from torchvision import datasets, transforms
from torchvision.io import read_image
from torch.optim import lr_scheduler
import torch.optim as optim

import os
import torch
from torchvision import transforms
from PIL import Image

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"GPU: {device}")

GPU: cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
# Path to the folder containing the images in Google Drive
folder_path = '/content/drive/MyDrive/ImageNet'

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


In [5]:
dataset = ImageFolder(root=folder_path, transform=transform)
dataset

Dataset ImageFolder
    Number of datapoints: 4418
    Root location: /content/drive/MyDrive/ImageNet
    StandardTransform
Transform: Compose(
               Resize(size=(227, 227), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [6]:
# Define the dataset class
class ImageNetDataset(Dataset):
    def __init__(self, dataset):
        self.octmnist = dataset

    def __getitem__(self, index):
        image, label = self.octmnist[index]
        return image, label

    def __len__(self):
        return len(self.octmnist)

dataset = ImageNetDataset(dataset)

In [7]:
data_size = len(dataset)
train_size = int(0.7 * data_size)
val_size = int(0.15 * data_size)
test_size = data_size - train_size - val_size

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

In [8]:
len(train_dataset), len(test_dataset), len(val_dataset)

(3092, 664, 662)

In [9]:
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1)


In [10]:
# Implementing AlexNet
class ImageNetCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # [227x227x3] INPUT

        self.conv1 = nn.Conv2d(3, 96, kernel_size=(11, 11), stride=4, padding=0)  # [55x55x96] CONV1: 96 11x11 filters at stride 4, pad 0
        self.pool1 = nn.MaxPool2d(kernel_size=(3, 3), stride = 2) # [27x27x96] MAX POOL1: 3x3 filters at stride 2
        self.norm1 = nn.BatchNorm2d(96)  # [27x27x96] NORM1: Normalization layer

        self.conv2 = nn.Conv2d(96, 256, kernel_size=(5, 5), stride=1, padding=2) # [27x27x256] CONV2: 256 5x5 filters at stride 1, pad 2
        self.drop1 = nn.Dropout(0.3)

        self.pool2 = nn.MaxPool2d(kernel_size=(3, 3), stride = 2) # [13x13x256] MAX POOL2: 3x3 filters at stride 2
        self.norm2 = nn.BatchNorm2d(256) # [13x13x256] NORM2: Normalization layer

        self.conv3 = nn.Conv2d(256, 384, kernel_size=(3, 3), stride=1, padding=1)  # [13x13x384] CONV3: 384 3x3 filters at stride 1, pad 1
        self.norm3 = nn.BatchNorm2d(384)

        self.conv4 = nn.Conv2d(384, 384, kernel_size=(3, 3), stride=1, padding=1)  # [13x13x384] CONV4: 384 3x3 filters at stride 1, pad 1
        self.norm4 = nn.BatchNorm2d(384),

        self.conv5 = nn.Conv2d(384, 256, kernel_size=(3, 3), stride=1, padding=1)  # [13x13x256] CONV5: 256 3x3 filters at stride 1, pad 1
        self.norm5 = nn.BatchNorm2d(256),

        self.pool3 = nn.MaxPool2d(kernel_size=(3, 3), stride = 2)  # [6x6x256] MAX POOL3: 3x3 filters at stride 2

        self.relu = nn.ReLU()

        self.flat = nn.Flatten()
        self.drop2 = nn.Dropout(0.5)

        self.fc6 = nn.Linear(9216, 4096)      # [4096] FC6: 4096 neurons
        self.fc7 = nn.Linear(4096, 4096)      # [4096] FC7: 4096 neurons
        self.fc8 = nn.Linear(4096, 10)        # [1000] FC8: 1000 neurons (class scores)



    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x))) # self.norm1(
        #x = self.drop1(x)

        x = self.pool2(self.relu(self.conv2(x))) # self.norm2(

        x = self.relu(self.conv3(x))
        #x = self.drop2(x)

        x = self.relu(self.conv4(x))
        #x = self.drop2(x)

        x = self.pool3(self.relu(self.conv5(x))) # self.norm5(

        x = self.flat(x)
        #x = self.drop2(x)

        x = self.relu(self.fc6(x))
        #x = self.drop2(x)

        x = self.relu(self.fc7(x))
        x = self.fc8(x)
        return x


In [11]:
def val_model(model, loss_fn):
    val_acc = 0
    count = 0
    val_loss = []

    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        y_pred = model(inputs)

        pred_proba = nn.Softmax(dim=1)(y_pred)
        prob_index = torch.argmax(pred_proba, dim=1)

        prob_index = prob_index.float()

        loss = loss_fn(pred_proba, labels.long())
        val_loss.append(loss.item())

        val_acc += (prob_index == labels).float().sum()
        count += len(labels)

    val_acc /= count
    val_loss = round(sum(val_loss)/(len(test_loader)*batch_size), 4)

    return round(val_acc.item(), 4) * 100, val_loss


In [15]:
def train_model_imagenet(model, optimizer, loss_fn, n_epochs=50, early_stopping = False, plot_metrics = False):

  for epoch in range(n_epochs):

      train_loss = []
      len_labels = 0
      correct_train_pred = 0
      print("Epoch started")
      for inputs, labels in train_loader:
          inputs = inputs.cuda()
          labels = labels.cuda()

          y_pred = model(inputs)

          pred_proba = nn.Softmax(dim=1)(y_pred)
          prob_index = torch.argmax(pred_proba, dim=1)

          prob_index = prob_index.float()

          loss = loss_fn(pred_proba, labels.long())  # Convert labels to long type
          train_loss.append(loss.item())

          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          correct_train_pred += (prob_index == labels.data).sum().item()
          len_labels += len(labels)

      train_acc = round((correct_train_pred / len_labels) * 100, 4)
      train_loss = round(sum(train_loss)/(len(test_loader) * batch_size), 4)

      val_acc, val_loss = val_model(model, loss_fn)

      print(f'epoch {epoch + 1}, training accuracy: {train_acc}, training loss: {train_loss}, Validation Accuracy: {val_acc}, Validation loss: {val_loss}')

  return model



In [None]:
# Learning rate 1e-2
# momentum 0.9
# manually when val accuracy plateaus • L2 weight decay 5e-4

imagenet_model = ImageNetCNN().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(imagenet_model.parameters(), lr=0.01, momentum=0.9, weight_decay = 0.0005)
n_epochs = 20

imagenet_model = train_model_imagenet(imagenet_model, optimizer, loss_fn, n_epochs)


Epoch started
