In [65]:
import numpy as np

In [66]:
def sigmoid(input):
    return 1/(1 + np.exp(-input))

#relu activation
def relu(input):
    return np.maximum(input, 0)

In [67]:
#derivate of a sigmoid w.r.t. input
def d_sigmoid(d_init, out):
    sig = sigmoid(out)
    return d_init * sig * (1 - sig)

#derivate of a relu w.r.t. input
def d_relu(d_init, out):
    d = np.array(d_init, copy = True)
    d[out <= 0] = 0.
    return d

In [68]:
def init(layers=[4, 5, 1]):
    np.random.seed(42)

    params_w = {}
    params_b = {}

    for index in range(len(layers)-1):

        layer_num = index + 1
        in_layer_size = layers[index]
        out_layer_size = layers[index + 1]

        params_w['weight' + str(layer_num)] = np.random.randn(out_layer_size, in_layer_size) * 0.1
        params_b['bias' + str(layer_num)] = np.random.randn(out_layer_size, 1) * 0.1

    return params_w, params_b


In [69]:
def one_layer_forward_pass(input_activations, weights, bias, activation='R'):
    output = np.dot(weights, input_activations) + bias

    if activation is 'R':
        activation_next = relu(output)
    elif activation is 'S':
        activation_next = sigmoid(output)
    else:
        raise Exception('Nahh!')

    return activation_next, output


In [70]:
def forward_pass(train_X, params_w, params_b, layers=[4, 5, 1], activate=['R', 'S']):

    num_layers = len(layers) - 1

    activation_dict = {}
    output_dict = {}

    curr_act = train_X

    for index in range(num_layers):

        layer_index = index + 1
        prev_act = curr_act      

        curr_weight = params_w["weight" + str(layer_index)]
        curr_bias = params_b["bias" + str(layer_index)]

        curr_act, curr_out = one_layer_forward_pass(prev_act, curr_weight, curr_bias, activate[index])

        activation_dict["act" + str(index)] = prev_act
        output_dict["out" + str(layer_index)] = curr_out

    return curr_act, activation_dict, output_dict


In [71]:
#binary negative log likelihood loss
def cross_entropy_loss(y_pred, train_Y):
    num_samples = y_pred.shape[1]
    cost = -1 / num_samples * (np.dot(train_Y, np.log(y_pred).T) + np.dot(1 - train_Y, np.log(1 - y_pred).T))
    return np.squeeze(cost)

#convert probabilities to class prediction with threshold 0.5
def get_class_from_probs(probabilities):
    class_ = np.copy(probabilities)
    class_[class_ > 0.5] = 1
    class_[class_ <= 0.5] = 0
    return class_

#accuracy of predictions (0 to 1)
def accuracy_metric(y_pred, train_Y):
    y_pred_class = get_class_from_probs(y_pred)
    return (y_pred_class == train_Y).all(axis=0).mean()


In [72]:
def one_layer_backward_pass(curr_grad, curr_weight, curr_bias, curr_out, prev_act, activation='R'):
    
    num = prev_act.shape[1]

    #find out what we are differentiating
    if activation is 'R':
        d_act_func = d_relu
    elif activation is 'S':
        d_act_func = d_sigmoid
    else:
        raise Exception('Nahh!')

    #derivative of activation function
    d_curr_out = d_act_func(curr_grad, curr_out)

    #derivative of weight matrix
    d_curr_weight = np.dot(d_curr_out, prev_act.T) / num #shape = (num_current_layer, num_prev_layer)

    #derivative of bias matrix
    d_curr_bias = np.sum(d_curr_out, axis=1, keepdims=True) / num

    #derivative of input activations from previous layer
    d_prev_act = np.dot(curr_weight.T, d_curr_out) #shape = (num_prev_layer, 1)

    return d_prev_act, d_curr_weight, d_curr_bias


In [73]:
def backward_pass(y_pred, train_Y, activation_dict, output_dict, params_w, params_b, layers=[4, 5, 1], activate=['R', 'S']):

    gradients = {}

    num_samples = train_Y.shape[0]

    train_Y = train_Y.reshape(y_pred.shape)

    #derivative of binary cross entropy function w.r.t. predictions
    d_prev_act = - (np.divide(train_Y, y_pred) - np.divide(1 - train_Y, 1 - y_pred))

    num_layers = len(layers) - 1
    layer_num = [x + 1 for x in range(num_layers)]
    layer_num.reverse()

    activate_ = activate
    activate_.reverse()

    for index, layer_num in enumerate(layer_num):

        activation = activate_[layer_num-1]

        d_curr_act = d_prev_act

        prev_act = activation_dict['act' + str(layer_num - 1)] #activations are one index behind
        curr_out = output_dict['out' + str(layer_num)]

        curr_weight = params_w['weight' + str(layer_num)]
        curr_bias = params_b['bias' + str(layer_num)]

        d_prev_act, d_curr_weight, d_curr_bias = one_layer_backward_pass(d_curr_act, curr_weight, curr_bias, curr_out, prev_act, activation)

        gradients["d_weight" + str(layer_num)] = d_curr_weight
        gradients["d_bias" + str(layer_num)] = d_curr_bias
    
    return gradients

In [74]:
def param_updates(params_w, params_b, gradients, lr, layers=[4, 5, 1]):

    for index in range(len(layers) - 1):
        #gradient descent
        params_w["weight" + str(index + 1)] -= lr * gradients["d_weight" + str(index + 1)]        
        params_b["bias" + str(index + 1)] -= lr * gradients["d_bias" + str(index + 1)]

    return params_w, params_b


In [81]:
def train(train_X, train_Y, epochs, lr, layers=[4, 5, 1], activate=['R', 'S']):
    # initiation of neural netowrk parameters
    params_w, params_b = init(layers)

    losses = []
    accuracies = []
    
    # performing calculations for subsequent iterations
    for i in range(epochs):
        # step forward
        y_pred, activations, outputs = forward_pass(train_X, params_w, params_b, layers, activate)
        
        # monitor loss and accuracy and keep a record of them.
        loss = cross_entropy_loss(y_pred, train_Y)
        losses.append(loss)
        accuracy = accuracy_metric(y_pred, train_Y)
        accuracies.append(accuracy)
        
        # back prop to calculate the gradients
        gradients = backward_pass(y_pred, train_Y, activations, outputs, params_w, params_b)

        # update the weights and biases
        params_w, params_b = param_updates(params_w, params_b, gradients, lr)
        
        print('Loss for epoch {} : {}, accuracy is {}'.format(i+1, loss, accuracy))

#     y_pred, activations, outputs = forward_pass(train_X, params_w, params_b, layers, activate)
    
    return params_w, params_b

In [82]:
def test(val_X, val_Y, layers=[4, 5, 1], activate=['R', 'S']):
    # initiation of neural net parameters
    params_w, params_b = init(layers)

    accuracies = []

    # step forward
    y_pred, activations, outputs = forward_pass(val_X, params_w, params_b, layers, activate)
    
    # calculating metrics and saving them in history
    accuracy = accuracy_metric(y_pred, val_Y)
    accuracies.append(accuracy)
    
    print('Accuracy is {}'.format(accuracy))

In [83]:
def train_val_split(X, Y, train_percent=0.8):

    '''
        Function takes in the training data as input and returns
        a training validation split based on a given percentage.
    '''

    num_points = X.shape[0]

    train_size = int(num_points * 100 * train_percent // 100)

    inds = np.arange(num_points)
    np.random.shuffle(inds)

    train_inds = inds[:train_size]
    val_inds = inds[train_size: ]

    train_X = X[train_inds, :]
    val_X = X[val_inds, :]

    train_Y = Y[train_inds]
    val_Y = Y[val_inds]

    return train_X, train_Y, val_X, val_Y


In [84]:
def parse_txt(fname, num_features=4, num_targets=1, num_points=1372):
    
    '''
        Read data from a text file and generate arrays 
        ready to be fed into the network as inputs.
        Each line in the text file is separated by a
        newline, and represents a data point.
        Features in a line are separated by blank space 
        and the last data point is the target.
    '''

    X = np.empty((num_points, num_features), dtype=float)
    Y = np.empty(num_points, dtype=int)

    with open(fname) as f:
        for index, line in enumerate(f):
            line = line.rstrip('\n')
            data = line.split(',')


            X[index, :] = np.asarray(data[:-1])
            Y[index] = np.asarray(data[num_features])

    return X, Y

In [85]:
epochs = 100
lr = 0.1

X, Y = parse_txt('data/data.txt')
train_X, train_Y, val_X, val_Y = train_val_split(X, Y)

params_w, params_b = train(train_X.T, train_Y.T, epochs, lr)


Loss for epoch 1 : nan, accuracy is 0.5624430264357339
Loss for epoch 2 : nan, accuracy is 0.0
Loss for epoch 3 : nan, accuracy is 0.0
Loss for epoch 4 : nan, accuracy is 0.0
Loss for epoch 5 : nan, accuracy is 0.0
Loss for epoch 6 : nan, accuracy is 0.0
Loss for epoch 7 : nan, accuracy is 0.0
Loss for epoch 8 : nan, accuracy is 0.0
Loss for epoch 9 : nan, accuracy is 0.0
Loss for epoch 10 : nan, accuracy is 0.0
Loss for epoch 11 : nan, accuracy is 0.0
Loss for epoch 12 : nan, accuracy is 0.0
Loss for epoch 13 : nan, accuracy is 0.0
Loss for epoch 14 : nan, accuracy is 0.0
Loss for epoch 15 : nan, accuracy is 0.0
Loss for epoch 16 : nan, accuracy is 0.0
Loss for epoch 17 : nan, accuracy is 0.0
Loss for epoch 18 : nan, accuracy is 0.0
Loss for epoch 19 : nan, accuracy is 0.0
Loss for epoch 20 : nan, accuracy is 0.0
Loss for epoch 21 : nan, accuracy is 0.0
Loss for epoch 22 : nan, accuracy is 0.0
Loss for epoch 23 : nan, accuracy is 0.0
Loss for epoch 24 : nan, accuracy is 0.0
Loss for e

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if __name__ == '__main__':
