In [1]:
%%bash

pip install torchinfo torchmetrics

Collecting torchinfo
  Downloading torchinfo-1.7.0-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.0




In [2]:
import glob
import pathlib
import pickle
import requests
import tarfile
import time

import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import model_selection
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils import data
import torchinfo
import torchmetrics
from torchvision import models, transforms

# Load the data

In [3]:
%%bash

ls /kaggle/input/sdaiakaust-ai-bootcamp-august

README.md
sample-submission.csv
test.csv
train.csv


In [4]:
DATA_DIR = pathlib.Path("/kaggle/input/sdaiakaust-ai-bootcamp-august/")

# training data contains both features and labels
train_df = pd.read_csv(DATA_DIR / "train.csv", dtype="uint8")

# split training data into features and target
_train_features = train_df.drop("labels", axis=1)
_train_target = train_df.loc[:, "labels"]

# testing data contains only features
test_features = pd.read_csv(DATA_DIR / "test.csv", dtype="uint8")

In [5]:
_train_features.head()

Unnamed: 0,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p1014,p1015,p1016,p1017,p1018,p1019,p1020,p1021,p1022,p1023
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
_train_target.head()

0    19
1    23
2     9
3    27
4    23
Name: labels, dtype: uint8

In [7]:
test_features.head()

Unnamed: 0,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p1014,p1015,p1016,p1017,p1018,p1019,p1020,p1021,p1022,p1023
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# split the dataset into training and testing data
_seed = 42
_random_state = np.random.RandomState(_seed)

train_features, val_features, train_target, val_target = model_selection.train_test_split(
    _train_features,
    _train_target - 1, # need to shift targets to have values 0 - 27
    test_size=1e-1,
    random_state=_random_state
)

# Load the code for the fitting loop

In [9]:
def accuracy(output, target):
    return torchmetrics.functional.accuracy(output, target)


def partial_fit(model_fn, loss_fn, X_batch, y_batch, opt):
    # forward pass
    loss = loss_fn(model_fn(X_batch), y_batch)

    # back propagation
    loss.backward()
    opt.step()
    opt.zero_grad() # don't forget to reset the gradient after each batch!
    

def validate(model_fn, loss_fn, data_loader):
    with torch.no_grad():

        batch_accs = []
        batch_losses = []
        
        for X, y in data_loader:
            batch_accs.append(accuracy(model_fn(X), y))
            batch_losses.append(loss_fn(model_fn(X), y))
        
        avg_accuracy = (torch.stack(batch_accs)
                             .mean())
        avg_loss = (torch.stack(batch_losses)
                         .mean())

    return avg_accuracy, avg_loss


def fit(model_fn,
        loss_fn,
        train_data_loader,
        opt, lr_scheduler,
        val_data_loader=None,
        number_epochs=1):
    
    for epoch in range(number_epochs):
        # train the model
        model_fn.train()
        for X_batch, y_batch in train_data_loader:
            partial_fit(model_fn, loss_fn, X_batch, y_batch, opt)
        
        # compute validation loss after each training epoch
        model_fn.eval()
        if val_data_loader is not None:
            val_acc, val_loss = validate(model_fn, loss_fn, val_data_loader)
            print(f"Training epoch: {epoch}, Validation accuracy: {val_acc}, Validation loss: {val_loss}")

        # update the learning rate
        lr_scheduler.step()


In [10]:
class CustomDataset(data.Dataset):
    
    def __init__(self, features, target=None, transforms = None):
        self._data = (features.to_numpy()
                              .reshape(-1, 1, 32, 32)
                              .transpose(0, 2, 3, 1))
        self._target = target.to_numpy() if target is not None else target
        self._transforms = transforms
        
    def __getitem__(self, index):
        X, y = self._data[index], self._target[index]
        return (self._transforms(X), y) if self._transforms is not None else (X, y)
        
    def __len__(self):
        return len(self._data)


class LambdaLayer(nn.Module):
    
    def __init__(self, f):
        super().__init__()
        self._f = f
        
    def forward(self, X):
        return self._f(X)


class WrappedDataLoader:
    
    def __init__(self, data_loader, f):
        self._data_loader = data_loader
        self._f = f
        
    def __len__(self):
        return len(self._data_loader)
    
    def __iter__(self):
        for batch in iter(self._data_loader):
            yield self._f(*batch)

# Define your model

In [11]:
class LeNet5(nn.Module):
    
    def __init__(self):
        super().__init__()
        self._conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0)
        self._conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
        self._dense1 = nn.Linear(400, 120)
        self._dense2 = nn.Linear(120, 84)
        
    def forward(self, X):
        X = X.view(-1, 1, 32, 32) # implicit knowledge of CIFAR-10 data shape!
        X = F.avg_pool2d(F.tanh(self._conv1(X)), 2)
        X = F.avg_pool2d(F.tanh(self._conv2(X)), 2)
        X = X.view(X.size(0), -1)
        X = F.tanh(self._dense1(X))
        X = self._dense2(X)
        return X

In [12]:
model_fn = LeNet5()

In [13]:
torchinfo.summary(model_fn, input_size=(64, 1, 32, 32))



Layer (type:depth-idx)                   Output Shape              Param #
LeNet5                                   [64, 84]                  --
├─Conv2d: 1-1                            [64, 6, 28, 28]           156
├─Conv2d: 1-2                            [64, 16, 10, 10]          2,416
├─Linear: 1-3                            [64, 120]                 48,120
├─Linear: 1-4                            [64, 84]                  10,164
Total params: 60,856
Trainable params: 60,856
Non-trainable params: 0
Total mult-adds (M): 27.02
Input size (MB): 0.26
Forward/backward pass size (MB): 3.33
Params size (MB): 0.24
Estimated Total Size (MB): 3.84

# Define your DataLoaders

In [14]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# move the tensor from the CPU to the GPU
_to_device = lambda X, y: (X.to(device), y.to(device))

In [15]:
# use same loss function from last time
loss_fn = F.cross_entropy

# define some preprocessing transforms (done on CPU!)
_transforms = transforms.Compose([
    transforms.ToTensor(),
])

# define the datasets and dataloaders
_train_dataset = CustomDataset(train_features, train_target, _transforms)
_train_data_loader = data.DataLoader(_train_dataset, num_workers=2, batch_size=64, shuffle=True)
train_data_loader = WrappedDataLoader(_train_data_loader, _to_device)

_val_dataset = CustomDataset(val_features, val_target, _transforms)
_val_data_loader = data.DataLoader(_val_dataset, num_workers=2, batch_size=128, shuffle=False)
val_data_loader = WrappedDataLoader(_val_data_loader, _to_device)

# define the optimizer and the learning rate scheduler
opt = optim.SGD(model_fn.parameters(), momentum=0.9, lr=1e-2)
lr_scheduler = optim.lr_scheduler.ExponentialLR(opt, gamma=0.9, verbose=True)


Adjusting learning rate of group 0 to 1.0000e-02.


# Train your model

In [16]:
_ = model_fn.to(device)

fit(model_fn,
    loss_fn,
    train_data_loader,
    opt,
    lr_scheduler,
    val_data_loader,
    number_epochs=10)

Training epoch: 0, Validation accuracy: 0.125, Validation loss: 3.0532400608062744
Adjusting learning rate of group 0 to 9.0000e-03.
Training epoch: 1, Validation accuracy: 0.3068181872367859, Validation loss: 2.245680809020996
Adjusting learning rate of group 0 to 8.1000e-03.
Training epoch: 2, Validation accuracy: 0.41690343618392944, Validation loss: 1.88618803024292
Adjusting learning rate of group 0 to 7.2900e-03.
Training epoch: 3, Validation accuracy: 0.5063920617103577, Validation loss: 1.6350936889648438
Adjusting learning rate of group 0 to 6.5610e-03.
Training epoch: 4, Validation accuracy: 0.5617898106575012, Validation loss: 1.4477553367614746
Adjusting learning rate of group 0 to 5.9049e-03.
Training epoch: 5, Validation accuracy: 0.5660511255264282, Validation loss: 1.3338687419891357
Adjusting learning rate of group 0 to 5.3144e-03.
Training epoch: 6, Validation accuracy: 0.6292613744735718, Validation loss: 1.211959719657898
Adjusting learning rate of group 0 to 4.7830

# Make predictions using the test data

In [17]:
def predict(model_fn, data_loader):
    with torch.no_grad():

        batch_preds = []
        
        for X, in data_loader:
            batch_preds.append(model_fn(X).argmax(dim=1))
        
        predictions = torch.stack(batch_preds)

    return  predictions


In [18]:
# need to create your test dataloader!
_test_features = (test_features.to_numpy()
                               .reshape(-1, 1, 32, 32)
                               .transpose(0, 2, 3, 1))
_test_features_tensor = torch.stack([transforms.ToTensor()(arr) for arr in _test_features])
_test_dataset = data.TensorDataset(_test_features_tensor.to(device))
test_data_loader = data.DataLoader(_test_dataset, batch_size=1, shuffle=False)


In [19]:
_test_predictions = predict(model_fn, test_data_loader) + 1 # shift targets back!
test_predictions = (_test_predictions.to("cpu")
                                     .numpy()
                                     .flatten())

# Generate your submission file

In [20]:
_ = (pd.DataFrame
       .from_dict({"Id": test_features.index, "Category": test_predictions})
       .to_csv("submission.csv", index=False))