# Classifying Handwritten Digits Using A Neural Network
## Dataset: MNIST Handwritten Digits via Kaggle

In [91]:
# Import required libraries
import pandas as pd
import numpy as np

### Import and clean data

In [92]:
data = pd.read_csv('train.csv')

In [93]:
# Split data into train and test sets
data = data.reindex(np.random.permutation(data.index))
split = int(data.shape[0]*.75)
train_data = data.iloc[:split]
test_data = data.iloc[split:]

In [94]:
# Format the data into a list of tuples (x,y) where x is the input and y is the desired output
def format_data(df):
    out = []
    for i in range(len(df)):
        arr = np.asarray([df.iloc[i,1:]]).T
        val = np.zeros(10)
        val[df.iloc[i,0]] = 1
        val = np.reshape(val,(10,1))
        out.append((arr,val))
    return out

In [95]:
# Format the train and test sets
train = format_data(train_data)
test = format_data(test_data)

### Construct model

In [96]:
class Net(object):
    def __init__(self,sizes):
        # Initialize random weights and biases based on the given layer sizes
        self.n_layers = len(sizes)
        self.weights = [np.random.randn(r,c) for r,c in zip(sizes[1:],sizes[:-1])]
        self.biases = [np.random.randn(r,1) for r in sizes[1:]]
    
    def sigmoid(self,x):
        # Nonlinear activation function
        return 1/(1+np.exp(-x))
    
    def sigmoid_deriv(self,x):
        # Derivative of the nonlinear activation function
        return self.sigmoid(x) * (1-self.sigmoid(x))
    
    def forward(self,a):
        # Forward propagation through the network
        for w,b in zip(self.weights,self.biases):
            a = self.sigmoid(np.dot(w,a) + b)
        return a
    
    def train(self,data,epochs,alpha):
        # Trains the network
        for e in range(epochs):
            # Loop through every pair of inputs and outputs once per epoch
            for x,y in data:
                a = x
                zs = []
                activations = [a]
                
                # Feedforward
                for w,b in zip(self.weights,self.biases):
                    # Store z vector
                    z = np.dot(w,a)+b
                    zs.append(z)
                    
                    # Store activation vector
                    a = self.sigmoid(z)
                    activations.append(a)
                               
                # Find derivatives of the weights and biases of the last layer
                dz = activations[-1] - y
                dw = np.dot(dz,activations[-2].T)
                
                # Update weights and biases of last layer
                self.weights[-1] -= alpha*dw
                self.biases[-1] -= alpha*dz
                
                # Update weights and biases of previous layers
                for i in range(2,len(zs)+1):
                    dz = np.dot(self.weights[-i+1].T,dz)*self.sigmoid_deriv(zs[-i])
                    dw = np.dot(dz,activations[-i-1].T)
                    
                    self.weights[-i] -= alpha*dw
                    self.biases[-i] -= alpha*dz
                
            print('Epoch',e+1)
            print('Accuracy:',self.accuracy(data))
                    
    def cost(self,data):
        # Find the cost of the network
        cost = np.zeros(data[0][1].shape)
        for x,y in data:
            ypred = self.forward(x)
            cost += (ypred-y)**2
        return np.sum(cost)/len(data)
    
    def accuracy(self,data):
        # Find the percentage of correctly classified outputs
        count = 0
        for x,y in data:
            if np.argmax(self.forward(x)) == np.argmax(y):
                count += 1
                
        return count/len(data)

In [97]:
n = Net([784,300,10])

### Train model

In [98]:
n.train(train,30,0.001)

Epoch 1
Accuracy: 0.6543174603174603
Epoch 2
Accuracy: 0.741015873015873
Epoch 3
Accuracy: 0.7740317460317461
Epoch 4
Accuracy: 0.7913015873015873
Epoch 5
Accuracy: 0.8141269841269841
Epoch 6
Accuracy: 0.823015873015873
Epoch 7
Accuracy: 0.8301904761904761
Epoch 8
Accuracy: 0.8408571428571429
Epoch 9
Accuracy: 0.8522539682539683
Epoch 10
Accuracy: 0.8564761904761905
Epoch 11
Accuracy: 0.8561587301587301
Epoch 12
Accuracy: 0.8635555555555555
Epoch 13
Accuracy: 0.8644126984126984
Epoch 14
Accuracy: 0.870984126984127
Epoch 15
Accuracy: 0.8726031746031746
Epoch 16
Accuracy: 0.8739047619047619
Epoch 17
Accuracy: 0.8790793650793651
Epoch 18
Accuracy: 0.8828571428571429
Epoch 19
Accuracy: 0.8808253968253968
Epoch 20
Accuracy: 0.8828888888888888
Epoch 21
Accuracy: 0.8845079365079365
Epoch 22
Accuracy: 0.8854603174603175
Epoch 23
Accuracy: 0.8890476190476191
Epoch 24
Accuracy: 0.893079365079365
Epoch 25
Accuracy: 0.8934603174603175
Epoch 26
Accuracy: 0.892
Epoch 27
Accuracy: 0.8950476190476191


### Evaluate model performance

In [99]:
# Find training set metrics
print('Training Set')
print('Cost:',n.cost(train))
print('Accuracy:',n.accuracy(train))

Training Set
Cost: 0.185380395753
Accuracy: 0.9006984126984127


In [100]:
# Find test set metrics
print('Test Set')
print('Cost:',n.cost(test))
print('Accuracy:',n.accuracy(test))

Test Set
Cost: 0.193956183987
Accuracy: 0.8961904761904762


The model performs similarly on the training and test sets, showing that the model is not overfitting to the training set

### Make predictions on unseen data

In [101]:
pred_data = pd.read_csv('test.csv')

In [102]:
def predict(data,model):
    out = pd.DataFrame()
    ids = []
    labels = []
    for i in range(len(data)):
        x = np.asarray([data.iloc[i,:]]).T
        y = np.argmax(model.forward(x))
        labels.append(y)
        ids.append(i+1)
        
    out['ImageId'] = pd.Series(ids)   
    out['Label'] = pd.Series(labels)
    out.set_index('ImageId',inplace = True)
        
    return out

In [103]:
predictions = predict(pred_data,n)

In [104]:
predictions.to_csv('predictions.csv')