In [3]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict,Counter
from random import sample

In [6]:
class NeuralNet:
    def __init__(self,nodes,lamb=0.0,alpha=0.1,eps=0.0):
        '''
        Constructor for neural net
        nodes - list detailing number of nodes in each layer
        lamb - regularization
        alpha - learning rate
        eps - cost function stopping condition
        '''
        self.nodes = nodes
        self.lamb = lamb
        self.alpha = alpha
        self.weights = []
        self.eps = eps
        #initialize weights for each layer, include bias
        for i in range(len(nodes)-1):
            self.weights.append(np.random.normal(0,1,(nodes[i]+1,nodes[i+1])))
    
    def get_sigmoid(self, x):
        return 1 / (1+np.exp(-x))
    
    def deriv_sigmoid(self, x):
        return x * (1-x)

    def train(self, features, targs, for_exam=False):
        prev_cost = -math.inf
        gradients = [0]*len(self.weights)
        num_inst = len(targs)
        keep_learn = True
        count = 1

        while(keep_learn):
            J = 0
            for instance,target in zip(features,targs):
                #iterate through layers, vectorize forward pass
                activations = [np.atleast_2d(instance)]
                for i in range(len(self.weights)-1):
                    this_a = self.get_sigmoid(self.weights[i].dot(activations[i].T))
                    activations.append(np.insert(this_a,0,1))
                activations.append(self.get_sigmoid(activations[len(self.weights)-1].dot(self.weights[len(self.weights)-1])))
                guess = activations[-1]

                #accumulate sum loss
                cost = np.sum((-target).dot(math.log(guess)) - (1-target).dot(math.log(1-guess)))
                J += cost

                #begin backwards propogation
                error = guess - target
                delta_inst = [error]

                #get delta values for all weights on current instance
                for i in range(len(self.weights)-1, 0, -1):
                    this_del = (self.weights[i].T*(delta_inst[-1])) * self.deriv_sigmoid(activations[i])
                    delta_inst.append(this_del[0][1:])

                #reverse delta values
                delta_inst = delta_inst[::-1]

                #accumulate gradients
                for i in range(len(self.weights)-1,-1,-1):
                    gradients[i] += (delta_inst[i]*(activations[i].T)).T

                #print for examples
                if for_exam:
                    print(f'OUTPUTS FOR INSTANCE {count}')
                    print(f'activations: ')
                    for i in range(len(activations)):
                        print(f'a{i+1}: {activations[i]}')
                    print()
                    print(f'prediction: {guess}')
                    print(f'expected: {target}')
                    print(f'cost J: {cost}')
                    print()
                    print('delta for this instance: ')
                    for i in range(len(delta_inst)):
                        print(f'delta {i+2}: {delta_inst[i]}')
                    print()
                    print('gradients for this instance: ')
                    for i in range(len(self.weights)):
                        print_del = (delta_inst[i]*(activations[i].T)).T
                        print(f'theta {i+1}: {print_del}')
                    #print(f'gradients for this instance: {gradients}')
                    print()
                    count += 1
            
            #regularize weights and update
            for i in range(len(self.weights)-1,-1,-1):
                P = self.lamb * (self.weights[i]).T
                gradients[i] = gradients[i] + P
                gradients[i] = gradients[i] / num_inst
                learn_diff = self.alpha * gradients[i]
                self.weights[i] = self.weights[i] - learn_diff.T

            J /= num_inst
            curr_s = 0
            for i in range(len(self.weights)):
                curr_s += np.sum(self.weights[i][1:]**2)

            #curr_s = np.sum(self.weights[1:]**2)
            curr_s *= (self.lamb/(2*num_inst))
            new_cost = J + curr_s

            #if improvement in cost is less than epsilon, stop
            if new_cost - prev_cost < self.eps:
                keep_learn = False

            prev_cost = new_cost

            if for_exam:
                print('regularized gradients: ')
                for i in range(len(gradients)):
                    print(f'theta {i+1}: {gradients[i]}')
                keep_learn = False

    #def calculate_loss(self,data,targets):
        #predictions = aaaa

    def predict(self,instance):
        pred = [np.ones(len(instance)),instance]

        for i in len(self.weights):
            pred = self.sigmoid(np.dot(pred,self.weights[i]))
        
        return pred
    
    #def calc_cost(self,guess,target,)

'''
TODO: 
    - Implement calculate_loss
    - Debug using sample data
    - Swag, nae nae, and finesse
'''



'\nTODO: \n    - Implement calculate_loss\n    - Debug using sample data\n    - Swag, nae nae, and finesse\n'

In [8]:
#function to train on backprop_example1.txt
#outputs to stdout, if full output cannot be viewed when calling this function, increase number of lines shown in output to 100
def train_on_first():
	train_nn = NeuralNet([1,2,1],eps=0.001)
	train_nn.weights[0] = np.array([[0.40000,0.10000 ],[0.30000,0.20000 ]])
	train_nn.weights[1] = np.array([[0.7],[0.5],[0.6]])
	train_set_1 = {'x': [0.13000,0.42000], 'y': [0.90000,0.23000]}
	#NOTE: X values are preprocessed to include bias term (1) as first element
	X = np.array([[1,0.13000],[1,0.42000]])
	Y = np.array([[0.90000],[0.23000]])
	train_df = pd.DataFrame(data=train_set_1)
	train_df.insert(0,'bias',np.ones)
	train_nn.train(X,Y,True)

train_on_first()

OUTPUTS FOR INSTANCE 1
activations: 
a1: [[1.   0.13]]
a2: [1.        0.601807  0.5807858]
a3: [0.79402743]

prediction: [0.79402743]
expected: [0.9]
cost J: 0.36557477431084995

delta for this instance: 
delta 2: [-0.01269739 -0.01548092]
delta 3: [-0.10597257]

gradients for this instance: 
theta 1: [[-0.01269739 -0.00165066]
 [-0.01548092 -0.00201252]]
theta 2: [-0.10597257 -0.06377504 -0.06154737]

OUTPUTS FOR INSTANCE 2
activations: 
a1: [[1.   0.42]]
a2: [1.         0.60873549 0.59483749]
a3: [0.79596607]

prediction: [0.79596607]
expected: [0.23]
cost J: 1.2763768066887786

delta for this instance: 
delta 2: [0.06739994 0.08184068]
delta 3: [0.56596607]

gradients for this instance: 
theta 1: [[0.06739994 0.02830797]
 [0.08184068 0.03437309]]
theta 2: [0.56596607 0.34452363 0.33665784]

regularized gradients: 
theta 1: [[0.02735127 0.01332866]
 [0.03317988 0.01618028]]
theta 2: [[0.22999675 0.1403743  0.13755523]]


In [10]:
#function to train on backprop_example2.txt
#outputs to stdout, if full output cannot be viewed when calling this function, increase number of lines shown in output to 100
def train_on_sec():
	train_nn = NeuralNet([2,4,3,2],eps=0.001)
	train_nn.weights[0] = np.array([[0.42000,0.15000,0.40000],[0.72000,0.10000,0.54000],[0.01000,0.19000,0.42000],[0.30000,0.35000,0.68000]])
	train_nn.weights[1] = np.array([[0.21000,0.67000,0.14000,0.96000,0.87000],[0.87000,0.42000,0.20000,0.32000,0.89000],[0.03000,0.56000,0.80000,0.69000,0.09000]])
	train_nn.weights[2] = np.array([[0.04000,0.87000,0.42000,0.53000],[0.17000,0.10000,0.95000,0.69000 ]])
	train_set_1 = {'x': [0.13000,0.42000], 'y': [0.90000,0.23000]}
	#NOTE: X values are preprocessed to include bias term (1) as first element
	X = np.array([[1,0.32000,0.68000],[1,0.83000,0.02000]])
	Y = np.array([[0.75000,0.98000],[0.75000,0.28000]])
	train_df = pd.DataFrame(data=train_set_1)
	train_df.insert(0,'bias',np.ones)
	train_nn.train(X,Y,True)

train_on_sec()

ValueError: shapes (4,) and (2,4) not aligned: 4 (dim 0) != 2 (dim 0)