In [87]:
import numpy as np

def sigmoid(x):
    '''Sigmoid function'''
    return 1 / (1+np.exp(-x))

def mse(y_true, y_pred):
    '''Mean Squared Error'''
    return ((y_true - y_pred) ** 2).mean()

def sigmoid_derv(x):
    '''Derivative of sigmoid function'''
    return sigmoid(x)*(1-sigmoid(x))

class NeuralNetwork:
    def __init__(self):
        '''Initialize weights and biases with random values'''
        # weights
        for i in range(1,9):
            setattr(self, 'w'+str(i), np.random.normal())
            
        # biases
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()
    
    def feedforward(self, inputs):
        h1 = sigmoid( self.w1*inputs[0] + self.w3*inputs[1] + self.w5*inputs[2] + self.b1)
        h2 = sigmoid( self.w2*inputs[0] + self.w4*inputs[1] + self.w6*inputs[2] + self.b2)
        o1 = sigmoid( self.w7*h1 + self.w8*h2 + self.b3)
        print("running inside now", o1)
        return o1
    
    def train(self, train_data, ground_truth):
        ''' 
        train_data is a n x 3 numpy array where n is the number of samples in the dataset
        ground_truth is a numpy array with n elems.
        All data units in ground_truth represent the true values of the data
        '''
        learning_rate = 0.05
        epochs = 1000 # total number of iterations to loop through the entire dataset
        
        for epoch in range(epochs):
            for x, y_true in zip(train_data, ground_truth):
                
                '''Compute feedforwards for all neurons'''
                add_h1 = self.w1*train_data[0] + self.w3*train_data[1] + self.w5*train_data[2] + self.b1
                h1 = sigmoid(add_h1)
                add_h2 = self.w2*train_data[0] + self.w4*train_data[1] + self.w6*train_data[2] + self.b2
                h2 = sigmoid(add_h2)
                add_o1 = self.w7*h1 + self.w8*h2 + self.b3
                o1 = sigmoid(add_o1)
                y_pred = o1
                
                '''Computing Partial derivates'''
                # derv_L_ypred represents partial derivative of dL / dy_pred
                derv_L_ypred = -2*(1-y_pred)
                
                # Neuron h1
                derv_h1_w1 = train_data[0] * sigmoid_derv(add_h1)
                derv_h1_w3 = train_data[1] * sigmoid_derv(add_h1)
                derv_h1_w5 = train_data[2] * sigmoid_derv(add_h1)
                derv_h1_b1 = sigmoid_derv(add_h1)
                
                # Neuron h2
                derv_h2_w2 = train_data[0] * sigmoid_derv(add_h2)
                derv_h2_w4 = train_data[1] * sigmoid_derv(add_h2)
                derv_h2_w6 = train_data[2] * sigmoid_derv(add_h2)
                derv_h2_b2 = sigmoid_derv(add_h2)
                
                # Neuron o1
                derv_ypred_h1 = self.w7 * sigmoid_derv(add_o1)
                derv_ypred_h2 = self.w8 * sigmoid_derv(add_o1)
                
                derv_ypred_w7 = h1 * sigmoid_derv(add_o1)
                derv_ypred_w8 = h2 * sigmoid_derv(add_o1)
                derv_ypred_b3 = sigmoid_derv(add_o1)
                
                '''Doing SGD updates'''
                # Neuron h1
                self.w1 -= learning_rate * derv_L_ypred * derv_ypred_h1 * derv_h1_w1
                self.w3 -= learning_rate * derv_L_ypred * derv_ypred_h1 * derv_h1_w3
                self.w5 -= learning_rate * derv_L_ypred * derv_ypred_h1 * derv_h1_w5
                self.b1 -= learning_rate * derv_L_ypred * derv_ypred_h1 * derv_h1_b1
                
                # Neuron h2
                self.w2 -= learning_rate * derv_L_ypred * derv_ypred_h2 * derv_h2_w2
                self.w4 -= learning_rate * derv_L_ypred * derv_ypred_h2 * derv_h2_w4
                self.w6 -= learning_rate * derv_L_ypred * derv_ypred_h2 * derv_h2_w6
                self.b2 -= learning_rate * derv_L_ypred * derv_ypred_h2 * derv_h2_b2
                
                # Neuron o1
                self.w7 = learning_rate * derv_L_ypred * derv_ypred_w7
                self.w8 = learning_rate * derv_L_ypred * derv_ypred_w8
                self.b3 = learning_rate * derv_L_ypred * derv_ypred_b3
                
                '''Calclating the loss for groups of ten epochs'''
                if epoch % 10 == 0:
                    y_prediction = np.apply_along_axis(self.feedforward, 1, train_data)
                    print(y_prediction, ground_truth, len(ground_truth), len(y_prediction))
                    loss = mse(ground_truth, y_prediction)
                    print("Epoch {} loss: {}".format(epoch, loss))
                    
    

In [88]:
train_data = np.array([[103, 2, 10], [91, -9, 10], [88, -14, 12], [108, -8, -12], [97, -4, -2], [115, 17, -18]])
ground_truth = np.array([1, 0, 0, 1, 0, 1])

neural1 = NeuralNetwork()

print( np.apply_along_axis(neural1.feedforward, 1, train_data) )


neural1.train(train_data, ground_truth)

running inside now 0.18875352890043964
running inside now 0.18875352890043964
running inside now 0.18875352890043964
running inside now 0.18875352890043964
running inside now 0.18875352890043964
running inside now 0.18875352890043964
[0.18875353 0.18875353 0.18875353 0.18875353 0.18875353 0.18875353]
running inside now [0.49378919 0.49676476 0.49378919]
running inside now [0.49378919 0.49676476 0.49378919]
running inside now [0.49378919 0.49676476 0.49378919]
running inside now [0.49378919 0.49676476 0.49378919]
running inside now [0.49378919 0.49676476 0.49378919]
running inside now [0.49378919 0.49676476 0.49378919]
[[0.49378919 0.49676476 0.49378919]
 [0.49378919 0.49676476 0.49378919]
 [0.49378919 0.49676476 0.49378919]
 [0.49378919 0.49676476 0.49378919]
 [0.49378919 0.49676476 0.49378919]
 [0.49378919 0.49676476 0.49378919]] [1 0 0 1 0 1] 6 6


ValueError: operands could not be broadcast together with shapes (6,) (6,3) 