In [1]:
import torch
import math
import numpy as np
dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
import pandas as pd
from PIL import Image
import utils
from torch.utils.data import DataLoader
import model
import importlib
from train import trainmodel
from sklearn.model_selection import train_test_split


In [2]:
# Loading dataset via Kaggle
import opendatasets as od
import pandas

od.download("https://www.kaggle.com/competitions/digit-recognizer/data")

Skipping, found downloaded files in "./digit-recognizer" (use force=True to force download)


In [3]:
# Loading datasets into dataframe and arrays
file = ('./digit-recognizer/train.csv')
testfile = ('./digit-recognizer/test.csv')
df = pd.read_csv(file)
dftest = pd.read_csv(testfile)
y = np.array(df.label)
x = np.array(df)[:,1:]
x_test = np.array(dftest)

In [4]:
# Splitting into train and eval
x_train, x_eval, y_train, y_eval = train_test_split(
    x, y, test_size=0.2, random_state=1)

In [5]:
# Displaying image and label of index (i'th) image in training set, not important
def pixtoimg(index):
  pixels = x_test[index].reshape(28,28)
  img = Image.fromarray(pixels.astype('uint8'))
  img.show()
##################################################################################



In [6]:
# Setting parameters for dataloader
parameters = {'batch_size':32, 'shuffle':True}
dataset = utils.digitdataset(x_train, y_train)
dataseteval = utils.digitdataset(x_eval, y_eval)
training_generator = DataLoader(dataset, **parameters)
eval_generator = DataLoader(dataseteval, **parameters)


In [7]:
# Initializing model
cnn = model.ConvNeuralNet(10)

In [8]:
# Training model
trainmodel(training_generator, eval_generator, cnn, 20)

Epoch [1/20], Loss: 0.2332
Epoch [1/20], Eval: 95.9762 %
Epoch [2/20], Loss: 0.0700
Epoch [2/20], Eval: 96.3452 %
Epoch [3/20], Loss: 0.0453
Epoch [3/20], Eval: 97.5238 %
Epoch [4/20], Loss: 0.0354
Epoch [4/20], Eval: 98.0238 %
Epoch [5/20], Loss: 0.0280
Epoch [5/20], Eval: 98.0238 %
Epoch [6/20], Loss: 0.0192
Epoch [6/20], Eval: 98.3214 %
Epoch [7/20], Loss: 0.0195
Epoch [7/20], Eval: 98.3214 %
Epoch [8/20], Loss: 0.0170
Epoch [8/20], Eval: 97.9881 %
Epoch [9/20], Loss: 0.0138
Epoch [9/20], Eval: 98.4167 %
Epoch [10/20], Loss: 0.0109
Epoch [10/20], Eval: 98.3690 %
Epoch [11/20], Loss: 0.0133
Epoch [11/20], Eval: 98.3095 %
Epoch [12/20], Loss: 0.0152
Epoch [12/20], Eval: 98.3214 %
Epoch [13/20], Loss: 0.0091
Epoch [13/20], Eval: 98.2381 %
Epoch [14/20], Loss: 0.0132
Epoch [14/20], Eval: 98.5476 %
Epoch [15/20], Loss: 0.0090
Epoch [15/20], Eval: 98.2262 %
Epoch [16/20], Loss: 0.0100
Epoch [16/20], Eval: 98.1429 %
Epoch [17/20], Loss: 0.0098
Epoch [17/20], Eval: 98.3214 %
Epoch [18/20], 

In [48]:
# Making the submission file
results = pd.DataFrame(columns = ['ImageId', 'Label'])
for i, pix in enumerate(x_test):
    prediction = cnn(torch.from_numpy(pix).type(torch.float32).to(device))
    pred = torch.argmax(prediction).item()

    results.loc[i] = [i+1, pred]

    

In [50]:
#results.to_csv('submission.csv', encoding='utf-8', index=False)