# MNIST: Shallow, Deep, and CNN
Author: Yinjie Xu
* Fill out `None` and `pass` to proceed  

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import os
import torch
print(torch.__version__)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

## One method for loading Data

In [None]:
mnist_test = pd.read_csv("../input/mnist-in-csv/mnist_test.csv")
mnist_train = pd.read_csv("../input/mnist-in-csv/mnist_train.csv")
train_Y = mnist_train.iloc[:, 0].values
train_X = mnist_train.iloc[:, 1:].values
test_Y = mnist_test.iloc[:, 0].values
test_X = mnist_test.iloc[:, 1:].values
print(train_X.shape, train_Y.shape)
print(test_X.shape, test_Y.shape)

1. ## Another method - using torch.Dataset
you 
A batch will be `List[Dict]` with keys "image", "label"

You will need to use:  
https://pytorch.org/docs/stable/data.html

In [None]:
#create self-defining data
class MNISTDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, transform=None): #Some initialization
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.dataframe = pd.read_csv(csv_file)
        self.transform = transform
        
    def __len__(self): #Returns the amount of all data
        return len(self.dataframe)
    
    def __getitem__(self, index): #Return data and labels
        if torch.is_tensor(index):
            index = index.tolist()
            
        image = self.dataframe.iloc[index, 1:].values.astype('float32')
        #print(type(self.dataframe.iloc[index, 0]))
        label = self.dataframe.iloc[index, 0]
        image = image.reshape((28, 28))
        # Normalize TODO:use transform to do it
        image = (image - image.mean()) / image.std()
        return image, label

#1.Create dataset for both CtrainDataset and testDataset
#2.Use torch.utils.data.DataLoader to make some batches. You can think of it as a packaging process.
#example = torch.utils.data.DataLoader(dataset=trainDataset, batch_size=10,shuffle=True)
trainDataset = None
trainDataloader = None
testDataset = None
testDataloader = None

<details>
<summary>Solution</summary>

```
trainDataset = MNISTDataset("../input/mnist-in-csv/mnist_train.csv")
trainDataloader = torch.utils.data.DataLoader(trainDataset, batch_size=10, shuffle=True)
testDataset = MNISTDataset("../input/mnist-in-csv/mnist_test.csv")
testDataloader = torch.utils.data.DataLoader(testDataset, batch_size=10000)
```
</details>  

In [None]:
#Run the code below to inspect your data
plt.figure(figsize=(3, 3))
i = 1
plt.gca().invert_yaxis()
plt.pcolormesh(trainDataset[i][0])
print(trainDataset[i][1])

# Helper Function: fit(), val()

In [None]:
#Create a fitting network
def fit(model, epochs):
    pass#Define the optimizer like SGD
    pass#Define the loss function
    for epoch in range(epochs):
        overall_loss = 0
        for i, (X, Y) in enumerate(trainDataloader):
            pass#The forward propagation
            pass#Count the loss
            pass#Accumulative the loss
            pass#The back Propagation
            pass#Update the gradient
            pass#Don't forget to clear the gradient
            if i % 1000 == 999:
                print(f'in epoch {epoch}, batch {i+1}, loss = {overall_loss / 1000}')
                overall_loss = 0.
            
def val(model):
    for X, Y in testDataloader:
        Y_pred = model(X).argmax(dim=1)
        rightCount = (Y == Y_pred).sum().item()
        accuracy = rightCount / 10000
        print("accuracy =", accuracy)

<details>
<summary>Solution</summary>

```
    def fit(model, epochs):
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    loss_fn = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        overall_loss = 0
        for i, (X, Y) in enumerate(trainDataloader):
            Y_pred = model(X)S
            loss = loss_fn(Y_pred, Y)
            overall_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if i % 1000 == 999:
                print(f'in epoch {epoch}, batch {i+1}, loss = {overall_loss / 1000}')
                overall_loss = 0.
```
</details>  

## Shallow Network 88%

In [None]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 10)
)
fit(model, 1)
val(model)

## Deep Network 95.24%

You will need to use:  
https://pytorch.org/docs/stable/nn.html#sequential  
https://pytorch.org/docs/stable/nn.html#flatten  
https://pytorch.org/docs/stable/nn.html#linear    
https://pytorch.org/docs/stable/nn.html#relu  

In [None]:
model = nn.Sequential(
    #First you need a flatten layer 
    pass
    #Then you need to create the linear transformation from the input layer to the hidden layer
    pass
    #Then the activation function
    pass
    #Then the linear transformation from the hidden layer to the output layer
    pass
)
fit(model, 1)
val(model)

<details>
<summary>Solution</summary>

```
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 64),
    nn.ReLU(),
    nn.Linear(64, 10))
fit(model, 1)
val(model)
```
</details>  

## CNN 97.85%

You will need to use:  
https://pytorch.org/docs/stable/nn.html#module  
https://pytorch.org/docs/stable/nn.html#conv2d  
https://pytorch.org/docs/stable/nn.html#adaptivemaxpool2d  

A CNN architecture generally consists of the following layers:
* Convolutional layer: used for feature extraction and feature mapping
* ReLU layer: used to increase nonlinearity
* Pooling layer: sampling and sparse processing of feature map to reduce the loss of feature information

In [None]:
class Preprocess(nn.Module):
    def __init__(self):
        super(Preprocess, self).__init__()
        
    def forward(self, x):
        return x.unsqueeze(1)

In [None]:
# A CNN architecture generally consists of the following layers:
# Convolutional layer: used for feature extraction and feature mapping
# ReLU layer: used to increase nonlinearity
# Pooling layer: sampling and sparse processing of feature map to reduce the loss of feature information
# Fully connected layer

pass


<details>
<summary>Solution</summary>

```
model = nn.Sequential(
    Preprocess(),
    nn.Conv2d(1, 32, 3),# 26 * 26 * 32
    nn.ReLU(),
    nn.MaxPool2d(2),# 13 * 13 * 32
    nn.Conv2d(32, 16, 5),# 9 * 9 S* 16
    nn.ReLU(),
    nn.MaxPool2d(3),# 3 * 3 * 16
    nn.Conv2d(16, 10, 3),# 1 * 1 * 10
    nn.Flatten())
fit(model, 1)
val(model)
```
</details>  

# Well done!
Check out our [github repo](https://github.com/AIwaffle/AIwaffle) for more information