In [23]:
import math
import numpy as np

class Params:
    def __init__(self, W1, W2, b1, b2):
        ''' assumes 1 hidden layer, 1 output layer, where W1 is all weights from input to first hidden layer, 
        W2 is all weights from hidden layer to output layer.'''
        self.W1 = W1
        self.W2 = W2
        self.b1 = b1
        self.b2 = b2

def init_matrix(row, col):
    '''initialize from uniform distribution, divided by sqrt(col) for reasons I'm not sure of '''
    v= 1.0 /np.sqrt(col)
    result = np.random.uniform(low=(-1*v), high=v, size=(row,col) )
    return result

def init_model(hidden_dim, input_dim, output_dim):
    '''initializes all the parameters of our model with dh hidden nodes'''
    W1 = init_matrix(hidden_dim, input_dim)
    W2 = init_matrix(output_dim, hidden_dim)
    b1 = np.zeros((1, hidden_dim))
    b2 = np.zeros((1, output_dim))
    p = Params(W1, W2, b1, b2)
    return p

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(a):
    """ Stable implementation on softmax function """
    a_max = np.max(a, axis=1)
    
    s = np.exp(a - np.tile(a_max, (a.shape[1],1)).T)
    
    # normalisation_factor
    n = np.sum(s, axis=1)
    n = 1.0/np.tile(n, (s.shape[1], 1))
       
    return np.multiply(s,n.T)

def output_act(w,x,b):
    '''output act func including preact func'''
    return softmax(np.dot(x, w.T) + b)

def hidden_pre(w,x,b):   
    '''hidden layer preact func'''
    return np.dot(x, w.T) + b

def hidden_act(ha):    
    '''hidden layer act func'''
    print(np.shape(ha))
    return sigmoid(ha)

def onehot(m, y):
    '''one hot encoding'''
    result = np.zeros(m)
    result[np.arange(m[0]), y.astype(int)] = 1
    return result

def L(os,y):
    '''loss func'''
    return np.sum(np.multiply(-1.0*np.log(os), onehot(os.shape,y)), axis=1)

def compute_fprop(w1,w2,b1,b2,x,y):
    ha = hidden_pre(w1,x,b1)
    hs = hidden_act(ha)
    os = output_act(w2,hs,b2)
    l = L(os,y) 
    return os, ha, hs, l

def compute_predictions(fprop):
    return np.argmax(fprop[0], axis=1)

## with init data

In [24]:
W1 = np.array([[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0]])
W2 = np.array([[1.0,2.0,3.0,4.0],[1.0,2.0,3.0,4.0]])
b1 = np.array([2.0,2.0,2.0,2.0])
b2 = np.array([1.0,1.0])
x = np.array([[1.0,1.0,1.0],[1.0,1.0,1.0]])
y = np.array([1,1])
val = compute_fprop(W1,W2,b1,b2, x,y)

(2, 4)


In [28]:
compute_predictions(val)

array([0, 0])