In [1]:
import numpy as np

In [2]:
def sigmoid(x): # Activation function: f(x) = 1/(1 + e^(-x))
    
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x): # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
    fx = sigmoid(x)
    
    return fx * (1 - fx)

In [3]:
class Neuron:
    
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias
        
    def feedforward(self, inputs): # weight inputs, add bias, activation function
        total = np.dot(self.weights, inputs) + self.bias
        return sigmoid(total)

In [4]:
weights = np.array([0, 1])
bias = 4

neuron = Neuron(weights, bias)

x = np.array([2, 3])

print(neuron.feedforward(x))

0.9990889488055994


In [5]:
class Network:
    '''
    Neural network with:
        - 2 inputs
        - a hidden layer with 2 neurons (h1, h2)
        - an output layer with 1 neuron (o1)
    Each neuron would have same weights and bias
    '''
    
    def __init__(self, weights, bias):
        self.h1 = Neuron(weights, bias)
        self.h2 = Neuron(weights, bias)
        
        self.o1 = Neuron(weights, bias)
        
    def feedforward(self, x):
        out_h1 = self.h1.feedforward(x)
        out_h2 = self.h2.feedforward(x)
        
        # outputs of h1 and h2 are fed into o1
        out_o1 = self.o1.feedforward(np.array([out_h1, out_h2]))
        
        return out_o1

In [6]:
weights = np.array([0, 1])
bias = 0

network = Network(weights, bias)

x = np.array([2, 3])

print(network.feedforward(x))

0.7216325609518421


In [7]:
def mse_loss(y_true, y_pred): # y_true and y_pred are numpy arrays of the same length
    
    return ((y_true - y_pred) ** 2).mean() 

In [8]:
y_true = np.array([1, 0, 0, 1])
y_pred = np.array([0, 0, 0, 0])

print(mse_loss(y_true, y_pred))

0.5


In [13]:
# Here we go!

class NeuralNetwork:
    
    def __init__(self, weights=None, biases=None):
        if not weights:
            self.w1 = np.random.normal()
            self.w2 = np.random.normal()
            self.w3 = np.random.normal()
            self.w4 = np.random.normal()
            self.w5 = np.random.normal()
            self.w6 = np.random.normal()
        else:
            self.w1 = weights[0]
            self.w2 = weights[1]
            self.w3 = weights[2]
            self.w4 = weights[3]
            self.w5 = weights[4]
            self.w6 = weights[5]
        
        if not biases:
            self.b1 = np.random.normal()
            self.b2 = np.random.normal()
            self.b3 = np.random.normal()
        else:
            self.b1 = biases[0]
            self.b2 = biases[1]
            self.b3 = biases[2]
        
        print("Initialized weights:", self.w1, self.w2, self.w3, self.w4, self.w5, self.w6, "\n")
        print("Initialized biases:", self.b1, self.b2, self.b3, "\n")
    
    def feedforward(self, x):
        # x is a numpy array with 2 elements
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
        h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
        
        o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
        
        return o1
    
    def train(self, data, y_trues):
        '''
        - data is a (n x 2) numpy array, n = # of samples in the dataset
        - y_trues is a numpy array with n elements
        '''
        learn_rate = 0.1
        epochs = 1000 # number of times to loop through the entire dataset
        
        for epoch in range(epochs):
            for x, y_true in zip(data, y_trues):
                
                # --- feedforward step
                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1 = sigmoid(sum_h1)
                
                sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2 = sigmoid(sum_h2)
                
                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(sum_o1)
                
                y_pred = o1
                
                # --- partial derivatives
                dL_dypred = -2 * (y_true - y_pred)
                
                # neuron o1
                dypred_dw5 = h1 * deriv_sigmoid(sum_o1)
                dypred_dw6 = h2 * deriv_sigmoid(sum_o1)
                dypred_db3 = deriv_sigmoid(sum_o1)
                
                dypred_dh1 = self.w5 * deriv_sigmoid(sum_o1)
                dypred_dh2 = self.w6 * deriv_sigmoid(sum_o1)
                
                # neuron h1
                dh1_dw1 = x[0] * deriv_sigmoid(sum_h1)
                dh1_dw2 = x[1] * deriv_sigmoid(sum_h1)
                dh1_db1 = deriv_sigmoid(sum_h1)
                
                # neuron h2
                dh2_dw3 = x[0] * deriv_sigmoid(sum_h2)
                dh2_dw4 = x[1] * deriv_sigmoid(sum_h2)
                dh2_db2 = deriv_sigmoid(sum_h2)
                
                # --- updation of weights and biases
                # neuron h1
                self.w1 -= learn_rate * dL_dypred * dypred_dh1 * dh1_dw1
                self.w2 -= learn_rate * dL_dypred * dypred_dh1 * dh1_dw2
                self.b1 -= learn_rate * dL_dypred * dypred_dh1 * dh1_db1
                
                # neuron h2
                self.w3 -= learn_rate * dL_dypred * dypred_dh2 * dh2_dw3
                self.w4 -= learn_rate * dL_dypred * dypred_dh2 * dh2_dw4
                self.b2 -= learn_rate * dL_dypred * dypred_dh2 * dh2_db2
                
                # neuron o1
                self.w5 -= learn_rate * dL_dypred * dypred_dw5
                self.w6 -= learn_rate * dL_dypred * dypred_dw5
                self.b3 -= learn_rate * dL_dypred * dypred_db3
            
            # --- calculation of total loss
            if epoch % 10 == 0:
                y_preds = np.apply_along_axis(self.feedforward, 1, data)
                loss = mse_loss(y_trues, y_preds)
                
                print("Epoch %d loss: %.5f" % (epoch, loss))

In [14]:
# define dataset
data = np.array([
    [-2, -1],
    [25, 6],
    [17, 4],
    [-15, -6]
])

y_trues = np.array([
    1,
    0,
    0,
    1
])

In [15]:
# let's train!

network = NeuralNetwork()
network.train(data, y_trues)

Initialized weights: -1.1201684756891748 -0.7271361958196407 -0.8514273569225397 -0.9851633412279298 0.6786072887572642 0.45157429471042543 

Initialized biases: 0.3075893422381531 -0.13905118149692813 -0.18398712413481405 

Epoch 0 loss: 0.14015
Epoch 10 loss: 0.10915
Epoch 20 loss: 0.08708
Epoch 30 loss: 0.07113
Epoch 40 loss: 0.05935
Epoch 50 loss: 0.05047
Epoch 60 loss: 0.04361
Epoch 70 loss: 0.03820
Epoch 80 loss: 0.03386
Epoch 90 loss: 0.03032
Epoch 100 loss: 0.02738
Epoch 110 loss: 0.02491
Epoch 120 loss: 0.02282
Epoch 130 loss: 0.02102
Epoch 140 loss: 0.01947
Epoch 150 loss: 0.01811
Epoch 160 loss: 0.01692
Epoch 170 loss: 0.01586
Epoch 180 loss: 0.01492
Epoch 190 loss: 0.01408
Epoch 200 loss: 0.01332
Epoch 210 loss: 0.01263
Epoch 220 loss: 0.01201
Epoch 230 loss: 0.01144
Epoch 240 loss: 0.01092
Epoch 250 loss: 0.01045
Epoch 260 loss: 0.01001
Epoch 270 loss: 0.00960
Epoch 280 loss: 0.00923
Epoch 290 loss: 0.00888
Epoch 300 loss: 0.00855
Epoch 310 loss: 0.00825
Epoch 320 loss: 0.

In [16]:
# let's predict!

emily = np.array([-7, -3])
frank = np.array([20, 2])

print("Emily: %.3f" % network.feedforward(emily))
print("Frank: %.3f" % network.feedforward(frank))

Emily: 0.965
Frank: 0.055
