In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision import transforms
import torch.nn.functional as F
from datasets import load_dataset
import matplotlib.pyplot as plt
import pandas as pd

### Check if cuda is available

In [2]:
torch.cuda.is_available()

True

### Cifar10 dataset 

In [3]:
cifar = load_dataset("cifar10",)
i = cifar["train"]["img"][0]
print(i.size)

(32, 32)


Images in cifar10 has small size 32 x 32 which is faster for training. Moreover, the dataset has 10 classes and we can see that both train and test datasets are balanced with equal number of smaples per class

In [None]:
df_train = pd.DataFrame(cifar["train"])
df_train.groupby(by="label").count()

In [None]:
df_test = pd.DataFrame(cifar["test"])
df_test.groupby(by="label").count()

Names of classes

In [None]:
labels = cifar["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
    
print(labels)

### The Model Architecture

In [4]:
class NaturalSceneClassification(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3, 32, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        
            nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Flatten(),
            nn.Linear(4096,1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    
    def forward(self, xb):
        return self.network(xb)

### Dataset Class

In [5]:
transform = transforms.Compose([
    # you can add other transformations in this list
     transforms.Resize((32,32)),
    transforms.ToTensor()
])

class Dataset(Dataset):
    
    # Constructor 
    def __init__(self, X_data, Y_data, transform=transform):
        self.len = len(X_data)
        self.x = X_data
        self.y = Y_data
        self.transform = transform
             
    # Getter
    def __getitem__(self, index):
        x = self.x[index] 
        y = self.y[index]
        if self.transform:
            x = self.transform(x)     
        return x, y
    
    # Get Length
    def __len__(self):
        return self.len


In [None]:
xx = pd.DataFrame(cifar["train"][10000:-1])
xx.groupby("label").count()
#yy = pd.DataFrame(cifar["train"])
#yy.shape


In [6]:
dataset_val = Dataset( X_data = cifar["train"]["img"][0:10000], Y_data = cifar["train"]["label"][0:10000])
val_loader = DataLoader(dataset=dataset_val, batch_size=128, shuffle=True)
dataset_train = Dataset( X_data = cifar["train"]["img"][10000:-1], Y_data = cifar["train"]["label"][10000:-1])
train_loader = DataLoader(dataset=dataset_train, batch_size=128, shuffle=True)
dataset_test = Dataset( X_data = cifar["test"]["img"], Y_data = cifar["test"]["label"])
test_loader = DataLoader(dataset=dataset_test, batch_size=128, shuffle=True)

Preview an image from the dataset

In [None]:
img,label = dataset_train[110]

def display_img(img,label):
    print(f"Label : {label}")
    plt.imshow(img.permute(1,2,0))

#display the first image in the dataset
display_img(img, label)

Choosing the Optimizer and the Cost function

In [7]:
model = NaturalSceneClassification()
model.to("cuda:0")
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_new = nn.CrossEntropyLoss()

### Training the model

In [8]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))




for epoch in range(10):

    train_losses = []
    train_acc = []
    val_losses = []
    val_acc = []
    
    model.train()
    for x,y in train_loader:

        optimizer.zero_grad()
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        output = model(x)
        loss = loss_new(output, y)
        acc = accuracy(output, y)
        
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        train_acc.append(acc.item())
        

    print("Train epoch loss", sum(train_losses)/len(train_losses), "acc", sum(train_acc)/len(train_acc))
    
    model.eval()
    with torch.no_grad():
        for x,y in val_loader:

            
            x = x.to("cuda:0")
            y = y.to("cuda:0")
            output = model(x)
            loss = loss_new(output, y)
            acc = accuracy(output, y)
            

            val_losses.append(loss.item())
            val_acc.append(acc.item())
            

    print("Val epoch loss", sum(val_losses)/len(val_losses), "acc", sum(val_acc)/len(val_acc))
    



Train epoch loss 1.7866771556317997 acc 0.3274265144960568
Val epoch loss 1.49209642712074 acc 0.4464003164556962
Train epoch loss 1.333000196435581 acc 0.5129820066519057
Val epoch loss 1.2590207797062547 acc 0.5355023734177216
Train epoch loss 1.0888102648738094 acc 0.6044887704209397
Val epoch loss 1.0342649425132364 acc 0.6237143987341772
Train epoch loss 0.9271895883563227 acc 0.6679459690285948
Val epoch loss 0.9539303364633005 acc 0.667128164556962
Train epoch loss 0.7829874957712314 acc 0.7203807242381306
Val epoch loss 0.9745298963558825 acc 0.6640625
Train epoch loss 0.6716162053921733 acc 0.7640188397310032
Val epoch loss 0.816970946290825 acc 0.7217167721518988
Train epoch loss 0.5734268170766557 acc 0.7977882214247609
Val epoch loss 0.8156805642043488 acc 0.7321993670886076
Train epoch loss 0.4755494545062129 acc 0.832061558676223
Val epoch loss 0.8282793496228471 acc 0.734375
Train epoch loss 0.37939704159578197 acc 0.8645868990748835
Val epoch loss 0.8606839828853365 acc

#### Evaluating the model on the test data

In [9]:
model.eval()
with torch.no_grad():
    
    test_losses = []
    test_acc = []
    
    for x,y in test_loader:
        
        x = x.to("cuda:0")
        y = y.to("cuda:0")
        output = model(x)
        
        loss = loss_new(output, y)
        acc = accuracy(output, y)
        
        test_losses.append(loss.item())
        test_acc.append(acc.item())

    print("epoch loss", sum(test_losses)/len(test_losses), "acc", sum(test_acc)/len(test_acc))

epoch loss 0.9665000317971918 acc 0.7389240506329114


Saving the model

In [None]:
torch.save(model.state_dict(), 'model.pth')