In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', cache=False)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

### Converting pandas dataframe to numpy array

In [None]:
X = X.to_numpy()
y = y.to_numpy()

In [None]:
print(X.shape)
print(y.shape)

(70000, 784)
(70000,)


### Splitting

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)

In [None]:
print(X_train.shape)
print(type(X_train[0]))
print(y_train.shape)
print(type(y_train[0]))
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)

(42000, 784)
<class 'numpy.ndarray'>
(42000,)
<class 'numpy.int64'>
(14000, 784)
(14000,)
(14000, 784)
(14000,)


### Initialization

In [None]:
layer_sizes = [784, 64 ,10]
num_layers = len(layer_sizes)
biases = [np.random.randn(layer_size, 1) for layer_size in layer_sizes[1:]]
weights = [np.random.randn(layer_size, layer_size_prev) for layer_size, layer_size_prev in zip(layer_sizes[1:], layer_sizes[:-1])]

### Activation Functions

In [None]:
def sigmoid(Z):
  return(1/(1 + np.exp(-Z)))

def ReLU(Z):
  return(np.maximum(0, Z))

def sigmoid_derivative(Z):
  return(sigmoid(Z)*(1-sigmoid(Z)))

### Forward Propagation

In [None]:
def forward_propagation(x):
  # Make sure x is a numpy nd array/vector
  x = np.reshape(x, (-1,1))
  activations = [x]
  zs =[]
  for b, w in zip(biases, weights):
    z = np.dot(w, x) + b
    zs.append(z)
    x = sigmoid(z)
    activations.append(x)
  return(zs, activations)

### SGD

In [None]:
def SGD(training_data, epochs, mini_batch_size, eta, test_data=None):
    # training_data shape is a list of tuples

    if test_data:
        m_test = len(test_data)
    
    m_train = len(training_data)
    
    for i in range(epochs):
      random.shuffle(training_data)
      mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, m_train, mini_batch_size)]

      for mini_batch in mini_batches:
       update_mini_batch(mini_batch, eta)

      if test_data:
        #print "Epoch {0}: {1} / {2}".format(j, evaluate(test_data), m_test)
        print(i)
      else:
        # print "Epoch {0} complete".format(j)
        pass

In [None]:
def update_mini_batch(mini_batch, eta):

    global weights, biases
    sum_of_w_derivs = [np.zeros(w.shape) for w in weights]
    sum_of_b_derivs = [np.zeros(b.shape) for b in biases]

    for x,y in mini_batch:
        b_deriv, w_deriv = backprop(x, y)
        sum_of_b_derivs = [ sobd+bd for sobd, bd in zip(sum_of_b_derivs, b_deriv)]
        sum_of_w_derivs = [sowd+wd for sowd, wd in zip(sum_of_w_derivs, w_deriv)]

    weights = [w - (eta/len(mini_batch))*sowd for w, sowd in zip(weights, sum_of_w_derivs)]
    biases = [b - (eta/len(mini_batch))*sobd for b, sobd in zip(biases, sum_of_b_derivs)] 

### Evaluate

In [21]:
def forward_propagation_simp(x):
  # Make sure x is a numpy nd array/vector
  # x = np.reshape(x, (-1,1))
  for b, w in zip(biases, weights):
    x = sigmoid(np.dot(w, x) + b)
  return(x) 
  # output will be zero dimensional

def evaluate(test_data):
  y = [y for x,y in test_data]
  predictions = [forward_propagation_simp(x)>0.9 for x,y in test_data]
  predicted_number = np.where(predictions)[0][0]
  return sum(int(pred == act) for pred, act in zip(predicted_number, y))

### Backward propogation

In [None]:
def backprop(x, y):
    b_deriv = [np.zeros(b.shape) for b in biases]
    w_deriv = [np.zeros(w.shape) for w in weights]
    
    zs, activations = forward_propagation(x)
    
    # Setting up parameters for the last layer
    delta = (activations[-1]-y) * sigmoid_derivative(zs[-1]) # * gives element wise multiplication if both are numpy nd type
    b_deriv[-1] = delta
    w_deriv[-1] = np.dot(delta, activations[-2].T) # 1 X n(l) times n(l-1) X 1
    
    for i in range(num_layers-3, -1, -1):
        delta = np.dot(weights[i+1].T, delta)* sigmoid_derivative(zs[i])
        b_deriv[i] = delta
        w_deriv[i] = np.dot(delta, activations[i].T)

    return b_deriv, w_deriv

### Main

In [None]:
training_data = list(zip(X_train, y_train))
test_data = list(zip(X_test, y_test))

In [None]:
print(type(test_data))
print(type(test_data[0]))
print(type(test_data[0][1]))

<class 'list'>
<class 'tuple'>
<class 'numpy.int64'>


In [None]:
SGD(training_data , 30, 10, 3.0, test_data=test_data)

  


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
