In [22]:
import numpy as np
from numpy.random import RandomState

In [2]:
def sigmoid(x):
    # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
    return 1 / (1 + np.exp(-x))

# Derived sigmoid function
def deriv_sigmoid(x):
    # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
    fx = sigmoid(x)
    return fx * (1 - fx)

# Squared error function
def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()


In [110]:
class Neural_Network:
    ''' This neural network has :
        - 2 inputs (x1, x2) (1 training example)
        - a hidden layer with 2 neurons (h1, h2)
        - an output layer with 1 neuron(o1)
    '''
    
    def __init__(self):
        # Initializing Weights
        # 3 neurons total, 2 features = 6 weights
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()
        
        # Biases
        # 3 neurons = 3 biases
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()
    
    def feedforward(self, x):
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
        h2 = sigmoid(self.w3 * x[0]+ self.w4 * x[1] + self.b2)
        o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
        return o1
   
    def train(self, data, all_true_y):
        '''
        - data is (n x m) numpy matrix. n = # of samples. m = features
        in our case, n = 1, m = 2
        - all_true_y = array with n elements of labels
        '''
        
        learning_rate = 0.1
        epochs = 10000 # number of iterations
        for epoch in range(epochs):
            for x, y_true in zip(data, all_true_y):
#                 print(self.w1,self.w2,self.w3,self.w4,self.w5,self.w6)
#                 print("x:", x)
#                 print("y_true:", y_true)
#                 x: [1 2]
#                 y_true: 1
#                 x: [1 2]
#                 y_true: 1
                weighted_sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1_out = sigmoid(weighted_sum_h1)
                
                weighted_sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2_out = sigmoid(weighted_sum_h2)
                
                weighted_sum_o1 = self.w5 * h1_out + self.w6 * h2_out + self.b3
                o1 = sigmoid(weighted_sum_o1)
                
                y_pred = o1
                
                # --- Calculate partial derivatives
                
                # We eventually need d_L__d_w1 -> "Partial L / Partial w1"
                # but we need to find the other derivatives that makes it up
                # 1st
                
                d_L__d_ypred = -2 * (y_true - y_pred)
                
                # Back propagation
                
                # Neuron o1
                d_ypred__d_w5 = h1_out * deriv_sigmoid(weighted_sum_o1)
                d_ypred__d_w6 = h2_out * deriv_sigmoid(weighted_sum_o1)
                d_ypred__d_b3 = deriv_sigmoid(weighted_sum_o1)
                
                d_y_pred__d_h1 = self.w5 * deriv_sigmoid(weighted_sum_o1)
                d_y_pred__d_h2 = self.w6 * deriv_sigmoid(weighted_sum_o1)
                
                
                # Neuron h1
                d_h1__d_w1 = x[0] * deriv_sigmoid(weighted_sum_h1)
                d_h1__d_w2 = x[1] * deriv_sigmoid(weighted_sum_h1)
                d_h1__d_b1 = deriv_sigmoid(weighted_sum_h1)
                
                # Neuron h2
                d_h2__d_w3 = x[0] * deriv_sigmoid(weighted_sum_h2)
                d_h2__d_w4 = x[1] * deriv_sigmoid(weighted_sum_h2)
                d_h2__d_b2 = deriv_sigmoid(weighted_sum_h2)
                
                # ---- updating weights and biases
                # Neuron h1
                self.w1 -= learning_rate * d_L__d_ypred * d_y_pred__d_h1 * d_h1__d_w1
                self.w2 -= learning_rate * d_L__d_ypred * d_y_pred__d_h1 * d_h1__d_w2
                self.b1 -= learning_rate * d_L__d_ypred * d_y_pred__d_h1 * d_h1__d_b1
                
                # Neuron h2
                
                self.w3 -= learning_rate * d_L__d_ypred * d_y_pred__d_h2 * d_h2__d_w3
                self.w4 -= learning_rate * d_L__d_ypred * d_y_pred__d_h2 * d_h2__d_w4
                self.b2 -= learning_rate * d_L__d_ypred * d_y_pred__d_h2 * d_h2__d_b2
                
                # Neuron o1
                
                self.w5 -= learning_rate * d_L__d_ypred * d_ypred__d_w5
                self.w6 -= learning_rate * d_L__d_ypred * d_ypred__d_w6
                self.b3 -= learning_rate * d_L__d_ypred * d_ypred__d_b3
                
                # --- calculate total loss at the end of each epoch
                loss = mse_loss(all_true_y, y_pred)
                final_coeff = [self.w1, self.w2,self.w3,self.w4,self.w5,self.w6]
                print(loss)
        return final_coeff

In [111]:
x = np.array([[1,2]])

y = np.array([1])

temp = Neural_Network()

coeff = temp.train(x, y)


0.23016168552071226
0.2200416905622807
0.21041451445374942
0.20126735315796382
0.19258530547400757
0.18435186315703778
0.17654935274444047
0.16915932541824882
0.16216289410098167
0.15554101917328986
0.14927474576875255
0.14334539662591822
0.13773472505568068
0.13242503280983423
0.1273992576035714
0.12264103482865077
0.11813473765933413
0.11386549935089359
0.10981922109900993
0.10598256839551229
0.1023429584006258
0.098888540466179
0.09560817159479607
0.09249138830978086
0.08952837613933322
0.08670993768525079
0.08402746004761667
0.0814728822097993
0.07903866284884287
0.07671774892143712
0.0745035452817587
0.07238988551145425
0.07037100408109663
0.06844150991410396
0.0665963613862331
0.06483084276447527
0.06314054206692991
0.06152133030868035
0.059969342086726815
0.058480957448728756
0.057052784984904376
0.055681646079302066
0.05436456025528897
0.05309873155007574
0.05188153585408519
0.050710509152702955
0.04958333661020743
0.04849784243829951
0.04745198049450092
0.046443825558663734
0.

In [112]:
coeff

[-0.08390104145790185,
 1.2289277466033934,
 -1.077627807612466,
 -0.5874597194631103,
 2.2140989778506506,
 0.22543039229971984]