In [1]:


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('digit-recognizer/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


digit-recognizer/train.csv
digit-recognizer/test.csv
digit-recognizer/sample_submission.csv


In [2]:
df_train = pd.read_csv("digit-recognizer/train.csv")

In [3]:
x, y = df_train.drop(columns = ["label"]), df_train["label"]

In [4]:
x

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.1)

In [7]:
x_train.shape, x_val.shape, x_test.shape

((30240, 784), (3360, 784), (8400, 784))

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader


class MNISTDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.Tensor(x.values.reshape(-1, 1, 28, 28))
        self.y = torch.LongTensor(y.values)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]
    
datasets = {}
datasets["train"] = MNISTDataset(x_train, y_train)
datasets["val"] = MNISTDataset(x_val, y_val)
datasets["test"] = MNISTDataset(x_test, y_test)

In [None]:
sample_x, sample_y = datasets["train"][0]
sample_x, sample_y

In [10]:
dataloaders = {}
BATCH_SIZE = 64
dataloaders["train"] = DataLoader(datasets["train"], batch_size=BATCH_SIZE, shuffle=True)
dataloaders["val"] = DataLoader(datasets["val"], batch_size=BATCH_SIZE, shuffle=False)
dataloaders["test"] = DataLoader(datasets["test"], batch_size=BATCH_SIZE, shuffle=False)

In [11]:
import torch
import torch.nn as nn

class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        in_channels = [1, 32, 64]
        out_channels = [32, 64, 128]
        kernel_sizes = [3, 3, 3]
        strides = [1, 1, 1]
        paddings = [1, 1, 1]
        
        self.layer0 = nn.Sequential(
            nn.Conv2d(
                in_channels = in_channels[0], 
                out_channels = out_channels[0], 
                kernel_size = kernel_sizes[0], 
                stride = strides[0], 
                padding = paddings[0]),
            nn.Conv2d(
                in_channels = out_channels[0], 
                out_channels = out_channels[0], 
                kernel_size = kernel_sizes[0], 
                stride = strides[0], 
                padding = paddings[0]),
            nn.BatchNorm2d(out_channels[0]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(
                in_channels = in_channels[1], 
                out_channels = out_channels[1], 
                kernel_size = kernel_sizes[1], 
                stride = strides[1], 
                padding = paddings[1]),
            nn.Conv2d(
                in_channels = out_channels[1], 
                out_channels = out_channels[1], 
                kernel_size = kernel_sizes[1], 
                stride = strides[1], 
                padding = paddings[1]),
            nn.BatchNorm2d(out_channels[1]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(
                in_channels = in_channels[2], 
                out_channels = out_channels[2], 
                kernel_size = kernel_sizes[2], 
                stride = strides[2], 
                padding = paddings[2]),
            nn.Conv2d(
                in_channels = out_channels[2], 
                out_channels = out_channels[2], 
                kernel_size = kernel_sizes[2], 
                stride = strides[2], 
                padding = paddings[2]),
            nn.BatchNorm2d(out_channels[2]),
            nn.ReLU(),
        )
        
        self.linear0 = nn.Linear(128*7*7, 512)
        self.linear1 = nn.Linear(512, 256)
        self.linear2 = nn.Linear(256, 10)
        
        
    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.reshape(-1, 128*7*7)
        x = self.linear0(x)
        x = self.linear1(x)
        out = self.linear2(x)
        return out

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [13]:
model = MNISTModel()
model.to(device)

MNISTModel(
  (layer0): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer1): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_

In [14]:
sample_x, sample_y = next(iter(dataloaders["train"]))

In [20]:
from torch import optim
EPOCH = 2
optimizer = optim.Adam(model.parameters(), lr = 1e-03)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)
loss_fn = nn.CrossEntropyLoss()

In [21]:
from tqdm import tqdm


min_val_loss = 10000000

PATH = "best_model.pth"

for e in range(1, EPOCH+1):
    print(f"EPOCH {e}")
    
    model.train()
    train_loss = 0
    for X, y in tqdm(dataloaders["train"]):
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        
        optimizer.zero_grad()
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    train_loss /= len(dataloaders["train"])
    
    
    with torch.no_grad():
        model.eval()
        val_loss = 0.0
        for X, y in tqdm(dataloaders["val"]):
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            val_loss += loss.item()
        val_loss /= len(dataloaders["val"])
    
    print(f"train loss: {train_loss}, val loss: {val_loss}")
    
    if val_loss < min_val_loss:
        print("saving best model...")
        min_val_loss = val_loss
        torch.save(model.state_dict(), PATH)
        
    scheduler.step()
    print("lr: ", optimizer.param_groups[0]['lr'])

EPOCH 1


100%|██████████| 473/473 [00:43<00:00, 10.75it/s]
100%|██████████| 53/53 [00:01<00:00, 39.24it/s]


train loss: 0.11612252498968026, val loss: 0.11720772627038213
saving best model...
lr:  0.0009755282581475768
EPOCH 2


100%|██████████| 473/473 [00:46<00:00, 10.27it/s]
100%|██████████| 53/53 [00:01<00:00, 38.52it/s]


train loss: 0.06837405225835017, val loss: 0.07479441874959278
saving best model...
lr:  0.0009045084971874736


In [22]:
best_model = MNISTModel()
best_model.load_state_dict(torch.load(PATH))
best_model.to(device)

MNISTModel(
  (layer0): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer1): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_

In [24]:
best_model.eval()

with torch.no_grad():
    correct = 0
    total = 0
    for X, y in tqdm(dataloaders["test"]):
        X, y = X.to(device), y.to(device)
        y_pred = best_model(X)
        y_pred = torch.argmax(y_pred, dim = 1)
        
        correct += torch.sum(y_pred == y)
        total += len(y)
    print(correct/total)

100%|██████████| 132/132 [00:03<00:00, 38.90it/s]

tensor(0.9805, device='cuda:0')





In [26]:
X_final = pd.read_csv("digit-recognizer/test.csv")

In [27]:
X_final = torch.Tensor(X_final.values.reshape(-1, 1, 28, 28)).to(device)

In [28]:
y_final = []

for i in range(0, len(X_final), 1000):
    if i + 1000 >= len(X_final):
        small_y_final = torch.argmax(best_model(X_final[i:]), dim = 1)
    else:
        small_y_final = torch.argmax(best_model(X_final[i:i+1000]), dim = 1)
    y_final.append(small_y_final)
    
y_final = torch.cat(y_final)

In [33]:
y_final

tensor([2, 0, 9,  ..., 3, 9, 2], device='cuda:0')

In [34]:
df_submission = pd.read_csv("digit-recognizer/sample_submission.csv")

In [35]:
df_submission["Label"] = y_final.to("cpu")

In [36]:
df_submission.to_csv("submission4.csv", index = False)

In [37]:
df_submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
