In [197]:
#init libraries + data
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import math

data = pd.read_csv('sample_data/mnist_train_small.csv')

data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]

In [198]:
#inits weights and biases + activation functions
w_1 = np.random.rand(10, 784)
w_2 = np.random.rand(10, 10)
b_1 = np.random.rand(10)
#w_1 = np.zeros((10,784))
#w_2 = np.zeros((10,10))
#b_1 = np.zeros(10)

def relu(x):
    return (x>0) * x
    
def drelu(x):
    return 1 * (x > 0)

def sigmoid(x):
    return 1/(1 + (math.e)**(-x))

def dsigmoid(x):
    return sigmoid(x) * (1-sigmoid(x))
  
print(dsigmoid(199))

0.0


In [201]:
def feedforward(activation, w, b, a):
  z_out = np.matmul(w,a)
  z_out += b.reshape(10)
  n = z_out.size
  a_out = np.copy(z_out)
  if activation == "sigmoid":
    for i in range(n):
      a_out[i] = sigmoid(z_out[i])
  elif activation == "relu":
    for i in range(n):
      a_out[i] = relu(z_out[i])
  elif activation == "none":
    return z_out,a_out
  else:
    return "Unrecognized activation function"
  return z_out,a_out
      
def cost(a, i):
  temp = np.zeros(10)
  temp[int(Y_train[i])] = 1
  return np.sum((a-temp)**2)
  
def backprop(activation,z,a,w,dz):
  dw = np.matmul(dz.reshape((dz.size,1)),a.reshape((1,a.size)))
  dz_out = np.matmul(dz.reshape((1,dz.size)),w)
  n = dz_out.size
  if activation == "sigmoid":
    for x in range(n):
      dz_out[0][x] = dz_out[0][x] * dsigmoid(z[x])
      #print(dsigmoid(z[x]))
  elif activation == "relu":
    for x in range(n):
      dz_out[0][x] = dz_out[0][x] * drelu(z[x])
  else:
    return dz_out.reshape(n),dw
  return dz_out.reshape(n),dw
  
feedforward("sigmoid",np.ones((10,784)),np.zeros(10),np.ones(784))

(array([784., 784., 784., 784., 784., 784., 784., 784., 784., 784.]),
 array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))

In [194]:
def sigmoid_epoch(alpha,w_1, w_2, b_1):
    dz_2 = np.ones(10)
    for i in range(189):
        z_1,a_1 = feedforward("sigmoid",w_1,b_1,X_train[:,i])
        z_2,a_2 = feedforward("sigmoid",w_2,np.zeros(10),a_1) 
        #solution array
        y_2 = np.zeros(10) 
        y_2[int(Y_train[i])] = 1
        #dw_2
        da_2 = (a_2-y_2)
        for x in range(10):
          dz_2[x] = da_2[x] * dsigmoid(z_2[x])
        dz_1, dw_2 = backprop("sigmoid",z_1,a_1,w_2,dz_2)

        dw_1 = backprop("none",X_train[:,i],X_train[:,i],w_1,dz_1)[1]
        w_1 -= alpha * dw_1
        w_2 -= alpha * dw_2
        b_1 -= alpha * dz_1
    return w_1,w_2,z_1
  
def relu_epoch(alpha,w_1, w_2, b_1):
    dz_2 = np.ones(10)
    for i in range(189):
        z_1,a_1 = feedforward("relu",w_1,b_1,X_train[:,i])
        z_2,a_2 = feedforward("relu",w_2,np.zeros(10),a_1) 
        #solution array
        y_2 = np.zeros(10) 
        y_2[int(Y_train[i])] = 1
        #dw_2
        da_2 = (a_2-y_2)
        for x in range(10):
          dz_2[x] = da_2[x] * drelu(z_2[x])
        dz_1, dw_2 = backprop("relu",z_1,a_1,w_2,dz_2)

        dw_1 = backprop("none",X_train[:,i],X_train[:,i],w_1,dz_1)[1]
        w_1 -= alpha * dw_1
        w_2 -= alpha * dw_2
        b_1 -= alpha * dz_1
    return w_1,w_2,z_1

In [195]:
#training
for i in range(19):
    w_1, w_2, b_1 = relu_epoch(0.1, w_1, w_2, b_1)
    a_1 = feedforward("relu",w_1,b_1,X_train[:,2])[1]
    a_2 = feedforward("relu",w_2,np.zeros(10),a_1)[1] 
    print(cost(a_2,2))

1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0


In [196]:
#accuracy
def accuracy(w_1,w_2,b_1):
    n = 0
    for i in range(18999):
        a_1 = feedforward("sigmoid",w_1,b_1,X_train[:,i])[1]
        a_2 = feedforward("sigmoid",w_2,np.zeros(10),a_1)[1] 
        max = 0
        index = 0
        for x in range(10):
            if a_2[x] > max:
                max = a_2[x]
                index = x
        if Y_train[i] == x:
            n += 1
    return n / 18999
for i in range(10):
    a_1 = feedforward("sigmoid",w_1,b_1,X_train[:,i])[1]
    a_2 = feedforward("sigmoid",w_2,np.zeros(10),a_1)[1] 
    print(a_2,Y_train[i])

[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 6
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 7
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 2
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 4
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 9
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 1
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 0
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 0
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 8
[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] 1
