In [93]:
#init libraries + data
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import math

data = pd.read_csv('sample_data/mnist_train_small.csv')
#data = pd.read_csv('train.csv')

data = np.array(data)
m, n = data.shape

data_dev = data[0:1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n]

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255

In [94]:
#activation functions
w_1 = np.random.rand(10, 784)
w_2 = np.random.rand(10, 10)
b_1 = np.random.rand(10)

def relu(x):
    return max(x,0)
def drelu(x):
    return x > 0
def leaky_relu(x):
  if x > 0:
    return x
  else:
    return 0.01 * x
def dleaky_relu(x):
  if x > 0:
    return 1
  else:
    return 0.01
def sigmoid(x):
    return 1/(1 + (math.e)**(-x))
def dsigmoid(x):
    return sigmoid(x) * (1-sigmoid(x))

In [95]:
def feedforward(activation, w, b, a):
  z_out = np.matmul(w,a)
  z_out += b.reshape(b.size)
  n = z_out.size
  a_out = np.copy(z_out)
  if activation == "sigmoid":
    for i in range(n):
      a_out[i] = sigmoid(z_out[i])
  elif activation == "relu":
    for i in range(n):
      a_out[i] = relu(z_out[i])
  elif activation == "leaky_relu":
    for i in range(n):
      a_out[i] = leaky_relu(z_out[i])
  elif activation == "none":
    return z_out,a_out
  else:
    return "Unrecognized activation function"
  return z_out,a_out
      
def cost(a, i):
  temp = np.zeros(10)
  temp[int(Y_train[i])] = 1
  return np.sum((a-temp)**2)
  
def backprop(activation,z,a,w,dz):
  dw = np.matmul(dz.reshape((dz.size,1)),a.reshape((1,a.size)))
  #print(dw)
  dz_out = np.matmul(dz.reshape((1,dz.size)),w)
  n = dz_out.size
  if activation == "sigmoid":
    for x in range(n):
      dz_out[0][x] = dz_out[0][x] * dsigmoid(z[x])
  elif activation == "relu":
    for x in range(n):
      dz_out[0][x] = dz_out[0][x] * drelu(z[x])
  elif activation == "leaky_relu":
    for x in range(n):
      dz_out[0][x] = dz_out[0][x] * dleaky_relu(z[x])
  return dz_out.reshape(n),dw


In [96]:
def leaky_relu_epoch(alpha,w_1, w_2,X_train,Y_train):
  dz_2 = np.ones(10)
  for i in range(Y_train.size):
    #feedforward
    z_1,a_1 = feedforward("leaky_relu",w_1,np.zeros(10),X_train[:,i])
    z_2,a_2 = feedforward("leaky_relu",w_2,np.zeros(10),a_1) 
    
    #solution array
    y_2 = np.zeros(10) 
    y_2[int(Y_train[i])] = 1

    #derivatives
    da_2 = a_2-y_2
    for x in range(10):
      dz_2[x] = da_2[x] * dleaky_relu(z_2[x])
    dz_1, dw_2 = backprop("leaky_relu",z_1,a_1,w_2,dz_2)
    dz_0, dw_1 = backprop("none",X_train[:,i],X_train[:,i],w_1,dz_1)
    
    #update
    w_1 -= alpha * dw_1
    w_2 -= alpha * dw_2
  return w_1,w_2

def epoch(alpha,w_1, w_2,X_train,Y_train):
  for i in range(Y_train.size):
    #feedforward
    z_1,a_1 = feedforward("none",w_1,np.zeros(10),X_train[:,i])
    z_2,a_2 = feedforward("none",w_2,np.zeros(10),a_1) 
    
    #solution array
    y_2 = np.zeros(10) 
    y_2[int(Y_train[i])] = 1

    #derivatives
    dz_2 = a_2-y_2
    dz_1, dw_2 = backprop("none",z_1,a_1,w_2,dz_2)
    dz_0, dw_1 = backprop("none",X_train[:,i],X_train[:,i],w_1,dz_1)
    
    #update
    w_1 -= alpha * dw_1
    w_2 -= alpha * dw_2
  return w_1,w_2

In [113]:
def accuracy(w_1,w_2,b_1,X_train,Y_train):
  n = 0
  for i in range(Y_train.size):
    z_1,a_1 = feedforward("leaky_relu",w_1,b_1,X_train[:,i])
    z_2,a_2 = feedforward("leaky_relu",w_2,np.zeros(10),a_1)
    if a_2.argmax() == Y_train[i]:
      n += 1
  return n / Y_train.size

print(accuracy(w_1,w_2,b_1,X_train,Y_train))

0.8973630191062687


In [None]:
for i in range(10):
    w_1, w_2 = leaky_relu_epoch(0.01, w_1, w_2,X_train,Y_train)
    if i % 1 == 0:
      z_1,a_1 = feedforward("leaky_relu",w_1,np.zeros(10),X_train[:,0])
      z_2,a_2 = feedforward("leaky_relu",w_2,np.zeros(10),a_1)
      print(a_2,Y_train[0])
      #print(a_1,a_2)
      plt.scatter(i,accuracy(w_1,w_2,np.zeros(10),X_train,Y_train))
    
plt.show()

[-0.00598555 -0.01376745 -0.00347155 -0.00580747 -0.00093011 -0.00078477
 -0.00087191 -0.0019968  -0.00173508  0.53413685] 9
[-0.00597399 -0.01382599 -0.00342899 -0.00588496 -0.00092842 -0.00080515
 -0.00086972 -0.00196385 -0.00176004  0.53481386] 9
[-0.00599511 -0.01382651 -0.00341593 -0.00592721 -0.00094224 -0.00085374
 -0.00083232 -0.00199043 -0.00180762  0.53632002] 9
[-0.00598048 -0.01385873 -0.00340575 -0.00597178 -0.00089299 -0.00085016
 -0.00082738 -0.00195207 -0.0018478   0.53503448] 9
[-0.00597227 -0.01419566 -0.00359967 -0.00640631 -0.00091944 -0.00098117
 -0.00078882 -0.00183954 -0.00200835  0.52730002] 9
[-0.00596471 -0.01391492 -0.00343043 -0.00629227 -0.00091787 -0.00096824
 -0.00075433 -0.00197509 -0.00191733  0.53403531] 9
[-0.00596338 -0.0139534  -0.00350327 -0.00639543 -0.00098937 -0.00094083
 -0.00077055 -0.00202271 -0.00197359  0.53631018] 9
