# MNIST Digit Recognizer

## Resource 
[Notebook1](https://www.kaggle.com/code/juiyangchang/cnn-with-pytorch-0-995-accuracy) : Dataloader is to complicate

[Notebook2](https://www.kaggle.com/code/sdelecourt/cnn-with-pytorch-for-mnist): Simple1.0

[Notebook3](https://www.kaggle.com/code/ateplyuk/pytorch): Simple1.05

[Notebook4](https://www.kaggle.com/code/gustafsilva/cnn-digit-recognizer-pytorch): Simple1.99

## Table of Contents:
[Import](#import)

[Load and Display](#load)

[Pytorch Fast Food](#pytorch)

[Data Loader](#loader)

[Transformation](#transformation)

[CNN](#cnn)

[Train & Evaluation](#train)

[Prediction](#predict)

<a id='import'></a>
## Import Library 

In [84]:
import pandas as pd
import random 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F

<a id='load'></a>
## Load data

In [85]:
train_data=pd.read_csv("../input/digit-recognizer/train.csv")
test_data=pd.read_csv("../input/digit-recognizer/test.csv")

In [86]:
train_data.head()

In [87]:
# Some properties:
n_train=len(train_data)
n_test=len(test_data)

In [88]:
# Display some img
n=random.randint(0,n_train-1)
i_img=train_data.iloc[n,1:].to_numpy()
plt.imshow(i_img.reshape((28,28)))

In [89]:
# Balance ?
labels=train_data["label"]
plt.bar(labels.value_counts().index,labels.value_counts())

<a id='pytorch'></a>
## Pytorch Basic

### Resource:
[Notebook](https://www.kaggle.com/code/kanncaa1/pytorch-tutorial-for-deep-learning-lovers)

- Tensor

    torch.Tensor() Vs torch.tensor()
    
    .shape / .type
    
- Variable : 
    
    Difference between variables and tensor is variable accumulates gradients.

- Numpuy <-> Tensor
    
    torch.from_numpy()
    
    tensor.numpy()

In [90]:
array = [[1,2,3],[4,5,6]]
tensor= torch.Tensor(array)
print(tensor.shape)

In [91]:
torch.ones((2,3))
torch.rand((2,3))

In [92]:
array = [[1,2,3],[4,5,6]]
np_array=np.array(array)
torch_array=torch.from_numpy(np_array)
torch_array.numpy()

In [93]:
tensor=torch.ones((3,3))
tensor.div(tensor)
tensor.view((9,1)).shape
tensor.mean()

In [94]:
tensor = torch.Tensor([2,4])
x = Variable(tensor, requires_grad = True)

y = x**2
print(" y =  ",y)

o = (1/2)*sum(y)
print(" o =  ",o)

# backward
o.backward() # calculates gradients

print("gradients: ",x.grad)

<a id='loader'></a>
## Data Loader

- Train test split 
- Numpy -> Long Float Tensor -> Dataloader(Batch)
    
    Long tensor
    
    Float type 
    
    Reshape

In [95]:
y=train_data['label'].values
x=train_data.drop(['label'],1).values
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.15)
type(x_train)

In [96]:
# df -np - tensor
torch_x_train = torch.from_numpy(x_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor)
torch_x_test = torch.from_numpy(x_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor)
torch_x_train = torch_x_train.view(-1,1,28,28).float()
torch_x_test = torch_x_test.view(-1,1,28,28).float()

In [97]:
# pytorch dataset
train = torch.utils.data.TensorDataset(torch_x_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_x_test,torch_y_test)


In [98]:
# data loader
BATCH_SIZE=32
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [99]:
x_submission=test_data.values
torch_x_submission=torch.from_numpy(x_submission).type(torch.LongTensor)
torch_x_submission=torch_x_submission.view(-1,1,28,28).float()
submission_loader=torch.utils.data.DataLoader(torch_x_submission,batch_size=BATCH_SIZE,shuffle=False)

<a id='cnn'></a>
## CNN

### Questions:
- Why the fc1 is 64*3*3

In [100]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.conv1=nn.Conv2d(1,32,kernel_size=5)
        self.conv2=nn.Conv2d(32,32,kernel_size=5)
        self.conv3=nn.Conv2d(32,64,kernel_size=5)
        self.fc1=nn.Linear(64*3*3,256)
        self.fc2=nn.Linear(256,10)
    
    def forward(self,x):
        x=F.relu(self.conv1(x))
        x=F.max_pool2d(self.conv2(x),2)
        x=F.relu(x)
        x=F.dropout(x,p=0.5,training=self.training)
        x=F.max_pool2d(self.conv3(x),2)
        x=F.relu(x)
        x=F.dropout(x,p=0.5,training=self.training)
        x=x.view(-1,3*3*64)
        x=self.fc1(x)
        x=F.relu(x)
        x=F.dropout(x,training=self.training)
        x=self.fc2(x)
        return F.log_softmax(x,dim=1)
cnn=CNN()
print(cnn)


# try if work in 1 iteration 
it = iter(train_loader)
X_batch, y_batch = next(it)
print(cnn.forward(X_batch).shape)

<a id="train"></a>
## Train

- Batch Gradient descent 

In [101]:
# train fit function:
def fit(model,train_loader):
    optimizer=torch.optim.Adam(model.parameters())
    error=nn.CrossEntropyLoss()
    EPOCHS=5
    model.train()
    for epoch in range(EPOCHS):
        correct=0
        for batch_id,(x_batch,y_batch) in enumerate(train_loader):
            optimizer.zero_grad()
            output=model(x_batch)
            loss=error(output,y_batch)
            loss.backward()
            optimizer.step()
                        
            predicted =torch.max(output.data,1)[1]
            correct += (predicted==y_batch).sum()
            if batch_id %100==0:
                 print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                     epoch,batch_id*len(x_batch),len(train_loader.dataset),
                                                100.*batch_id/len(train_loader),loss.data, float(correct*100)/float(BATCH_SIZE*(batch_id+1))))

In [102]:
fit(cnn,train_loader)

In [103]:
# Evaluation:
def evaluate(model):
    correct=0
    for x_test,y_test in test_loader:
        output=model(x_test)
        predicted= torch.max(output,1)[1]
        correct+=(predicted==y_test).sum()
    print("Test accuracy:{:.3f}% ".format(100.*float(correct)/(len(test_loader)*BATCH_SIZE)))

In [104]:
evaluate(cnn)

<a id='predict'></a>

## Predict 

- torch.cat -> Batch
- Use dictionary to create dataframe


In [105]:
def prediction(model):
    y_pred=torch.LongTensor()
    for x in submission_loader:
        output=model(x)
        prediction=torch.max(output,1)[1]
        y_pred=torch.cat((y_pred,prediction),0)
    return y_pred

In [106]:
y_pred=prediction(cnn)

In [107]:
submission_dict={"ImageId":np.array(range(1,n_test+1)),"Label":y_pred.numpy()}

In [108]:
Submission = pd.DataFrame(submission_dict)

In [109]:
Submission.head()

In [110]:
Submission.to_csv('submission.csv', index=False)