In [1]:
import torch 
import torch.nn as nn

import torch.nn.functional as F
import matplotlib.pylab as plt
import numpy as np

import os
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim

# from PIL import Image
import torchvision.transforms as transforms
import pandas as pd

torch.manual_seed(2)

# https://www.kaggle.com/competitions/digit-recognizer/overview

<torch._C.Generator at 0x1faad7156d0>

In [2]:
class MNIST_data:
    def __init__(self, train_path, test_path, batch_size, train_split_ratio):
        self.train_path = train_path
        self.test_path = test_path
        self.batch_size = batch_size
        self.train_split_ratio = train_split_ratio

    class MNIST_train(Dataset):
        def __init__(self, pixels, labels):
            self.pixels = torch.tensor(pixels, dtype=torch.float32).view(-1, 1, 28, 28) / 255.0  # Normalize to [0, 1]
            self.labels = torch.tensor(labels, dtype=torch.long)

        def __len__(self):
            return len(self.labels)

        def __getitem__(self, idx):
            return self.pixels[idx], self.labels[idx]

    class MNIST_test(Dataset):
        def __init__(self, pixels, test_df):
            self.pixels = torch.tensor(pixels, dtype=torch.float32).view(-1, 1, 28, 28) / 255.0  # Normalize to [0, 1]
            self.test_df = test_df

        def __len__(self):
            return len(self.test_df)

        def __getitem__(self, idx):
            return self.pixels[idx]


    def make_train(self, batch_size=None):
        batch_size = batch_size or self.batch_size
        train_df = pd.read_csv(self.train_path)
        labels = train_df['label'].values
        pixels = train_df.drop(columns=['label']).values
        mnist_dataset = self.MNIST_train(pixels, labels)

        train_size = int(self.train_split_ratio * len(mnist_dataset))
        val_size = len(mnist_dataset) - train_size
        train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size])

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,drop_last=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

        return train_loader, val_loader
    
    def make_test(self, batch_size=None):
        batch_size = batch_size or self.batch_size # should not be necessary as test datas are not used in the grid_search
        test_df = pd.read_csv(self.test_path)
        pixels = test_df.values

        mnist_dataset_test = self.MNIST_test(pixels, test_df)
        test_loader = DataLoader(mnist_dataset_test, batch_size=batch_size, shuffle=False, drop_last=False)
        return test_loader


In [13]:
class MNIST:
    def __init__(self, mnist_data):
        self.mnist_data = mnist_data

        # loaders :
        self.train_loader, self.val_loader = mnist_data.make_train()
        self.test_loader = mnist_data.make_test()

    def train(self, model, criterion, optimizer, epochs=10, verbose = 0):
        output = {'training_loss': []}  
        for epoch in range(epochs):
            if verbose : print(str(epoch) + " / " + str(epochs))
            for i, (image, pred) in enumerate(self.train_loader):
                optimizer.zero_grad()
                z = model(image)
                loss = criterion(z, pred)
                loss.backward()
                optimizer.step()
                output['training_loss'].append(loss.data.item())
        return output
    

    def evaluation(self, model):
        model.eval()
        count = 0
        for img, label in self.val_loader:
            for i in range(len(label)):
                if model(img[i]).argmax() == label[i] :
                    count = count+1
        return count/(len(self.val_loader)*len(label))
    
    

    def submit(self, model):
        f = open("submission.csv", "a")
        f.write("ImageId,Label\n")
        i = 1
        for x in self.test_loader:
            batch_pred = model(x)
            for elt in batch_pred:
                f.write(str(str(i) + "," + str(elt.argmax().numpy()) + "\n"))
                i = i + 1
        f.close()
        print("File created, ready to submit.")
    

In [18]:
class MNIST_gridSearch:
    def __init__(self, model, mnist: MNIST, criterions, optimizers,
                 epochs = [10],
                 learning_rates = [0.001],
                 batch_sizes = [32]):
        self.model = model
        self.mnist = mnist
        self.criterions = criterions
        self.optimizers = optimizers
        self.epochs = epochs
        self.learning_rates = learning_rates
        self.batch_sizes = batch_sizes
        self.total_iterations = len(criterions) * len(optimizers) * len(learning_rates) * len(batch_sizes) * len(epochs)

    def gridSearch(self, verbose = 0) :
        max = [0, 0, 0, 0, 0, 0] # opt, crit, epoch, l_rate, batch_size, score
        iteration = 0
        for batch_size in self.batch_sizes:
            self.mnist.train_loader, self.mnist.val_loader = self.mnist.mnist_data.make_train(batch_size)
            for optimizer in self.optimizers:
                for criterion in self.criterions:
                    for epoch in self.epochs:
                        for l_rate in self.learning_rates:
                            iteration += 1
                            optim = optimizer(self.model.parameters(), lr=l_rate)
                            self.mnist.train(self.model, criterion(), optim, epoch)

                            score = self.mnist.evaluation(self.model)

                            if verbose >= 1 : print(f"Iteration {iteration} / {self.total_iterations} : score : {score}")
                            if score > max[5] :
                                max = [optimizer, criterion, epoch, l_rate, batch_size, score]
        
        return max

In [19]:
class PaulNet(nn.Module):
    def __init__(self):
        super(PaulNet, self).__init__()

        self.conv1 = nn.Conv2d(1, 8, kernel_size=5, stride = 1, padding = 0)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.fc1 = nn.Linear(12*12*8, 56)
        self.fc2 = nn.Linear(56, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.pool1(out)
        out = out.view(-1, 12*12*8)

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        # out = F.softmax(out, dim=1)

        return out

In [20]:
# learning_rate = 0.001
# epochs = 10
# model = PaulNet()

# optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# criterion = nn.CrossEntropyLoss()

# data = MNIST_data('train.csv', 'test.csv', 32, 0.8)
# mnist = MNIST(data)

# mnist.train(model, criterion, optimizer, epochs)
# mnist.evaluation(model)

In [25]:
batch_sizes = [16, 32, 64]
lrs = [0.01, 0.007, 0.0005]
epochs = [1, 2, 3]
model = PaulNet()
criterions = [nn.CrossEntropyLoss]
optimizers = [optim.AdamW]

data = MNIST_data('train.csv', 'test.csv', 32, 0.8)
mnist = MNIST(data)
grid_search = MNIST_gridSearch(model, mnist, criterions, optimizers, epochs, lrs, batch_sizes)

best = grid_search.gridSearch(verbose = 1)
best

Iteration 1 / 27 : score : 0.9589285714285715
Iteration 2 / 27 : score : 0.9720238095238095
Iteration 3 / 27 : score : 0.9809523809523809
Iteration 4 / 27 : score : 0.9673809523809523
Iteration 5 / 27 : score : 0.9769047619047619
Iteration 6 / 27 : score : 0.9830952380952381
Iteration 7 / 27 : score : 0.9780952380952381
Iteration 8 / 27 : score : 0.976547619047619
Iteration 9 / 27 : score : 0.9832142857142857
Iteration 10 / 27 : score : 0.9847328244274809
Iteration 11 / 27 : score : 0.9846135496183206
Iteration 12 / 27 : score : 0.9921278625954199
Iteration 13 / 27 : score : 0.9834208015267175
Iteration 14 / 27 : score : 0.9824666030534351
Iteration 15 / 27 : score : 0.9899809160305344
Iteration 16 / 27 : score : 0.9767414122137404
Iteration 17 / 27 : score : 0.9763835877862596
Iteration 18 / 27 : score : 0.9862833969465649
Iteration 19 / 27 : score : 0.9934398854961832
Iteration 20 / 27 : score : 0.9921278625954199
Iteration 21 / 27 : score : 0.9945133587786259
Iteration 22 / 27 : sco

[torch.optim.adamw.AdamW,
 torch.nn.modules.loss.CrossEntropyLoss,
 1,
 0.0005,
 64,
 0.9945133587786259]

In [None]:
# [torch.optim.adamw.AdamW,
#  torch.nn.modules.loss.CrossEntropyLoss,
#  1,
#  0.0005,
#  64,
#  0.9945133587786259]