In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## START
settings

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

%matplotlib inline
print("Pytorch version:[%s]"%(torch.__version__))


In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print ("device:[%s]."%(device))

# data load

In [None]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sub = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [None]:
print(f"Training data size is {train.shape}\nTesting data size is {test.shape}")

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# split data into features(pixels) and labels(numbers from 0 to 9)
targets_numpy = train.label.values
features_numpy = train.loc[:,train.columns != "label"].values/255 # normalization

# train test split. Size of train data is 80% and size of test data is 20%. 
features_train, features_test, targets_train, targets_test = train_test_split(features_numpy,
                                                                             targets_numpy,
                                                                             test_size = 0.2,
                                                                             random_state = 42) 

In [None]:
# create feature and targets tensor for train set. As you remember we need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test).type(torch.LongTensor) # data type is long


In [None]:
# batch_size, epoch and iteration
batch_size = 100
n_iters = 10000
num_epochs = n_iters / (len(features_train) / batch_size)
num_epochs = int(num_epochs)

In [None]:
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest,targetsTest)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = False)

---

In [None]:
from torchvision import datasets,transforms
mnist_train = datasets.MNIST(root='./data/',train=True,transform=transforms.ToTensor(),download=True)
mnist_test = datasets.MNIST(root='./data/',train=False,transform=transforms.ToTensor(),download=True)

In [None]:
BATCH_SIZE = 256
train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
print ("Done.")

# Make Model

In [None]:
class MnistCNN(nn.Module):
    def __init__(self, name='cnn',xdim=[1,28,28],ydim=10):
        super(MnistCNN,self).__init__()
        self.name = name
        self.xdim = xdim
        self.ydim = ydim
        
        self.ksize = 5
        self.cdim = 64
        self.layers = []
        
        #----------------------------------------------------------------
        self.layers.append(
            nn.Conv2d(in_channels=1, out_channels=self.cdim,
                     kernel_size = self.ksize, padding=self.ksize//2)
        )
        self.layers.append(nn.ReLU(True))
        self.layers.append(nn.BatchNorm2d(self.cdim))
        
        self.layers.append(
            nn.Conv2d(in_channels=self.cdim, out_channels=self.cdim,
                     kernel_size = self.ksize, padding=self.ksize//2)
        )
        self.layers.append(nn.ReLU(True))
        self.layers.append(nn.BatchNorm2d(self.cdim))
        
        self.layers.append(nn.MaxPool2d(kernel_size=(2,2))) # max-pooling 
        self.layers.append(nn.Dropout2d(p=0.25))  # dropout
        
        #----------------------------------------------------------------
        self.ksize = 3
        self.layers.append(
            nn.Conv2d(in_channels=self.cdim, out_channels=self.cdim,
                     kernel_size = self.ksize, padding=self.ksize//2)
        )
        self.layers.append(nn.ReLU(True))
        self.layers.append(nn.BatchNorm2d(self.cdim))
        
        self.layers.append(
            nn.Conv2d(in_channels=self.cdim, out_channels=self.cdim,
                     kernel_size = self.ksize, padding=self.ksize//2)
        )
        self.layers.append(nn.ReLU(True))
        self.layers.append(nn.BatchNorm2d(self.cdim))
        
        self.layers.append(nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))) # max-pooling 
        self.layers.append(nn.Dropout2d(p=0.25))  # dropout
        
        #----------------------------------------------------------------
        self.layers.append(
            nn.Conv2d(in_channels=self.cdim, out_channels=self.cdim,
                     kernel_size = self.ksize, padding=self.ksize//2)
        )
        self.layers.append(nn.BatchNorm2d(self.cdim))
        self.layers.append(nn.Dropout2d(p=0.25))
        
        #----------------------------------------------------------------
        self.layers.append(nn.Flatten())
        self.layers.append(nn.Linear(3136, 256))
        self.layers.append(nn.ReLU(True))  # activation
        self.layers.append(nn.BatchNorm1d(256))
        self.layers.append(nn.Dropout2d(p=0.25))  # dropout
        
        #----------------------------------------------------------------
        self.layers.append(nn.Linear(256,self.ydim))
        
        # Concatenate all layers 
        self.net = nn.Sequential()
        for l_idx,layer in enumerate(self.layers):
            layer_name = "%s_%02d"%(type(layer).__name__.lower(),l_idx)
            self.net.add_module(layer_name,layer)
        #self.init_param() # initialize parameters
        
    def forward(self,x):
        return self.net(x)
    
    def init_param(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d): # init conv
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m,nn.BatchNorm2d): # init BN
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.BatchNorm1d): # lnit BN
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.Linear): # lnit dense
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
                
C = MnistCNN(name='cnn').to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(C.parameters(),lr=1e-3)
print ("Done.")

print(C)

In [None]:
from torchsummary import summary 
summary(C, input_size=(1, 28, 28))

In [None]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx,(param_name,param) in enumerate(C.named_parameters()):
    if param.requires_grad:
        param_numpy = param.detach().cpu().numpy() # to numpy array 
        n_param += len(param_numpy.reshape(-1))
        print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
        print ("    val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))

In [None]:
def func_eval(model,data_iter,device):
    with torch.no_grad():
        n_total,n_correct = 0,0
        model.eval() # evaluate (affects DropOut and BN)
        for batch_in,batch_out in data_iter:
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view(-1,1,28,28).to(device))
            _,y_pred = torch.max(model_pred.data,1)
            n_correct += (y_pred==y_trgt).sum().item()
            n_total += batch_in.size(0)
        val_accr = (n_correct/n_total)
        model.train() # back to train mode 
    return val_accr
print ("Done")

In [None]:
print ("Start training.")
#C.init_param() # initialize parameters
C.train() # to train mode 
EPOCHS,print_every = 10,1
for epoch in range(EPOCHS):
    loss_val_sum = 0
    for batch_in,batch_out in train_iter:
        # Forward path
        y_pred = C.forward(batch_in.view(-1,1,28,28).to(device))
        loss_out = loss(y_pred,batch_out.to(device))
        # Update
        optm.zero_grad()      # reset gradient 
        loss_out.backward()      # backpropagate
        optm.step()      # optimizer update
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(C,train_iter,device)
        test_accr = func_eval(C,test_iter,device)
        print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
               (epoch,loss_val_avg,train_accr,test_accr))
print ("Done")

In [150]:
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

features_numpy = test.loc[:,test.columns != "label"].values/255 # normalization
features_numpy = torch.from_numpy(features_numpy)
#targetsTest = test.label.values

test = torch.utils.data.TensorDataset(features_numpy)

In [153]:
C.eval()
ans = []
for data in test:
    #print(idx)
    outputs = C(data[0].float().view(-1,1,28,28).to(device))    
    prediction = torch.max(outputs.data, 1)[1]
    #print(prediction[0].item())
    ans.append(prediction[0].item())
    
a = pd.Series(ans,name="Label")

In [178]:
#a = pd.DataFrame(ans,columns=["Label"])

In [174]:
# Save the final result in cnn_mnist_submission.csv
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),a],axis = 1)

In [175]:
a.head()

Unnamed: 0,Label,ImageId
0,2,1
1,0,2
2,9,3
3,0,4
4,3,5


In [177]:
a.to_csv("cnn_mnist_submission.csv",index=False)