In [36]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [37]:
# Import necessary libraries
import torch
import torch.nn as nn
import random
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from PIL import Image

In [38]:
# Define transform function
train_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307),(0.3081))
])

test_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307),(0.3081))
])


In [39]:
# Define model
class My_model(nn.Module):
    def __init__(self):
        super(My_model, self).__init__()
        
        self.nn_series = nn.Sequential(                   #output (C, H, W)
            nn.Conv2d(1, 32, 3, stride=1, padding=1),     #output (32, 28, 28)
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),                        #output (32, 14, 14)
            
            nn.Conv2d(32, 64, 3, stride=1, padding=1),    #output (64, 14, 14)
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),                        #output (64, 7, 7)
        )
        
        self.fn_series = nn.Sequential(
            nn.Linear(64*7*7, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, data):
        out = self.nn_series(data)
        out = out.view(out.size()[0],-1)
        
        return self.fn_series(out)


In [40]:
# Hyper parameters
seed = 111
learning_rate = 1e-3
batch_size = 64
epochs = 20
device = "cuda" if torch.cuda.is_available() else "cpu"
path = "./mnist"
data_split = 0.3

# Fix random seed and determinitic
random.seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [41]:
# Set loss function and optimizer.
model = My_model().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=1e-5)

In [42]:
# Download MNIST data and split the data into training and validation sets.
train_data = datasets.MNIST(root=path, train=True, transform=train_tf, download=True)
valid_length = int(len(train_data)*data_split)
train_length = len(train_data)-valid_length
train_set, valid_set = random_split(train_data, [train_length, valid_length], torch.Generator().manual_seed(seed))

train_loader = DataLoader(train_set, batch_size, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size, shuffle=True, pin_memory=True)

In [43]:
# Training process
best_acc = 0
best_loss = torch.inf

for epoch in range(epochs):
    
    # Training stage
    model.train()
    total_loss = 0
    acc = []
    for image, target in train_loader:
        optimizer.zero_grad()
        
        predict = model(image.to(device))
        loss = loss_fn(predict, target.to(device))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        acc.append(( predict.argmax(-1) == target.to(device) ).float().mean())
        
    mean_train_acc = sum(acc)/len(acc)
    mean_train_loss = total_loss/len(train_loader)
    
    # Valid stage
    model.eval()
    total_loss = 0
    acc = []
    with torch.no_grad():
        for image, target in valid_loader:
            predict = model(image.to(device))
            loss = loss_fn(predict, target.to(device))
            
            total_loss += loss.item()
            acc.append(( predict.argmax(-1) == target.to(device) ).float().mean())
        
        mean_valid_acc = sum(acc)/len(acc)
        mean_valid_loss = total_loss/len(valid_loader)
    
    if best_acc < mean_valid_acc:
        best_acc = mean_valid_acc
        torch.save(model.state_dict(), path + "/model.ckpt")
        print(f'epoch: {epoch+1}/{epochs} => train loss: {mean_train_loss}, train acc: {mean_train_acc}/ valid loss: {mean_valid_loss}, valid acc: {mean_valid_acc} => Best accuracy !!')
    else:
        print(f'epoch: {epoch+1}/{epochs} => train loss: {mean_train_loss}, train acc: {mean_train_acc}/ valid loss: {mean_valid_loss}, valid acc: {mean_valid_acc}')
    

epoch: 1/20 => train loss: 0.13453978600151995, train acc: 0.9574771523475647/ valid loss: 0.05285190119996753, valid acc: 0.983543872833252 => Best accuracy !!
epoch: 2/20 => train loss: 0.0427317079573925, train acc: 0.986182451248169/ valid loss: 0.06884920044103637, valid acc: 0.9793328642845154
epoch: 3/20 => train loss: 0.03000703252191213, train acc: 0.9904157519340515/ valid loss: 0.04651563716676674, valid acc: 0.9871453642845154 => Best accuracy !!
epoch: 4/20 => train loss: 0.020170041908725494, train acc: 0.993008017539978/ valid loss: 0.05507111665410905, valid acc: 0.9852614998817444
epoch: 5/20 => train loss: 0.01748364122712683, train acc: 0.9941495656967163/ valid loss: 0.04692623305277105, valid acc: 0.987311601638794 => Best accuracy !!
epoch: 6/20 => train loss: 0.011680142875066576, train acc: 0.9961234927177429/ valid loss: 0.0525284823672098, valid acc: 0.9874777793884277 => Best accuracy !!
epoch: 7/20 => train loss: 0.011971633606933032, train acc: 0.9961710572

In [44]:
# Testing process
test_data = datasets.MNIST(root="./mnist", train=False, transform=test_tf, download=True)
test_loader = DataLoader(test_data, batch_size=64, pin_memory=True)


total_loss = 0
acc = []
output_list = []
target_list = []
model.eval()

with torch.no_grad():
    for image, target in test_loader:
        predict = model(image.to(device))
        output_list += predict.argmax(-1).tolist()
        target_list += target.tolist()

        loss = loss_fn(predict, target.to(device))
        
        total_loss += loss.item()
        acc.append(( predict.argmax(-1) == target.to(device) ).float().mean())

mean_test_acc = sum(acc)/len(acc)
mean_test_loss = total_loss/len(valid_loader)

print(f'test loss: {mean_test_loss}, test acc: {mean_test_acc}')

# Save predict value
df = pd.DataFrame()
df["ID"] = [ i for i in range(1, len(output_list)+1)]
df["Predict Number"] = output_list
df["Target Number"] = target_list

df.to_csv("./predict.csv", index = False)

test loss: 0.023500364182168717, test acc: 0.990744411945343
