## The Network Class

In [None]:
import numpy as np
import random

class Network(object):
    def __init__(self,sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y,1) for y in sizes[1:]]
        self.weights = [np.random.randn(y,x) for x,y in zip(sizes[:-1],sizes[1:])]


    def feedforward(self, a):
        for b,w in zip(self.biases,self.weights):
            a = sigmoid(np.dot(w,a) +b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, alpha, test_data=None):
  
        if(test_data != None):
            n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size) ]

            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, alpha)
          
            if test_data!=None:
                print("Epoch {0} : {1}/{2}".format(j, self.evaluate(test_data), n_test))
            else:
                print("Epoch {0} Complete".format(j))
  
    def update_mini_batch(self, mini_batch, alpha):
        nabla_b = [ np.zeros(b.shape) for b in self.biases ]
        nabla_w = [ np.zeros(w.shape) for w in self.weights ]
      
        for x,y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x,y)
            nabla_b = [ nb+dnb for nb,dnb in zip(nabla_b, delta_nabla_b) ]
            nabla_w = [ nw+dnw for nw,dnw in zip(nabla_w, delta_nabla_w) ]
        self.weights = [ w - (alpha/len(mini_batch))*nw for w,nw in zip(self.weights, nabla_w) ]
        self.biases = [ b - (alpha/len(mini_batch))*nb for b,nb in zip(self.biases, nabla_b) ]
    
    def backprop(self,x,y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
  
        # x = x.reshape(-1,1)
        activation = x
        activations = [x]
        zs = []
  
        for b,w in zip(self.biases,self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
    
        ## backward pass
        delta = self.cost_derivative(activations[-1], y)*sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)
  
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
      
    def evaluate(self, test_data):
        test_results = [ (np.argmax(self.feedforward(x)), y) for (x,y) in test_data ]
        return sum(int(x==y) for (x,y) in test_results)

    def cost_derivative(self,output_activations,y):
        target = np.zeros((10,1))
        target[y] = 1
        return (output_activations - target)
        #return (output_activations - y)

def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

## Loading the Data CSV Files

In [None]:
import pandas as pd
train_data = pd.read_csv(r'FILE PATH\train.csv')
test_data = pd.read_csv(r'FILE PATH\test.csv')

## Creating the Training Data List
### The List of Tuples of (x,y) of each example

In [83]:
## number of examples in the training data
m = train_data.shape[0]

## getting the pixel values and labels and storing them in x and y respectively
y = train_data['label'].values
x = train_data.iloc[:, 1:].values

## normalizing the pixel values x
x = x/255

## creating an empty list to store the tuples of (x,y) of each example
data = []


for i in range(m):
    ## appending the 'data' list with tuples of (x,y) for each example
    xx = x[i,:].reshape(-1,1)
    yy = y[i].reshape(-1,1)
    data.append((xx,yy))

## Running the Network
### Making A Net object and calling the SGD() Function

In [84]:
## A Neural Net with one hidden layer of 30 neurons
net = Network([784,30,10])
net.SGD(training_data = data, epochs = 20, mini_batch_size = 10, alpha = 3.0, test_data = None)

Epoch 0 Complete
Epoch 1 Complete
Epoch 2 Complete
Epoch 3 Complete
Epoch 4 Complete
Epoch 5 Complete
Epoch 6 Complete
Epoch 7 Complete
Epoch 8 Complete
Epoch 9 Complete
Epoch 10 Complete
Epoch 11 Complete
Epoch 12 Complete
Epoch 13 Complete
Epoch 14 Complete
Epoch 15 Complete
Epoch 16 Complete
Epoch 17 Complete
Epoch 18 Complete
Epoch 19 Complete


## Doing Predictions For The Test Set

In [85]:
## number of exmaples in the test set
m_test = test_data.shape[0]

## making a numpy array of the test_data dataframe
x_test = test_data.values

## normalizing the images in the test set
x_test = x_test/255

## making an empty array for the storing the predictions for the each example in the test set
predictions = []

## for each example in the test set
for i in range(m_test):
    ## reshaping the rank-1 python array o/w it would give some real nasty error
    xxx = x_test[i,:].reshape(-1,1)
    ## doing a forward pass with the new updated weights and making the predictions
    pred = np.argmax(net.feedforward(xxx))
    ## storing the predictions
    predictions.append(pred)

## Making A CSV File of our Predictions

In [86]:
ddf = pd.DataFrame({'ImageId':[i+1 for i in range(m_test)],'label':predictions})
ddf.to_csv(r'FILE PATH\kagle_submission.csv',index=False)