In [17]:
import numpy as np

def sigmoid(x):
  # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
  return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
  # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
  fx = sigmoid(x)
  return fx * (1 - fx)

def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()

class OurNeuralNetwork:
  '''
  A neural network with:
    - 2 inputs
    - a hidden layer with 2 neurons (h1, h2)
    - an output layer with 1 neuron (o1)
  *** DISCLAIMER ***:
  The code below is intended to be simple and educational, NOT optimal.
  Real neural net code looks nothing like this. DO NOT use this code.
  Instead, read/run it to understand how this specific network works.
  '''
  def __init__(self):
    # Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        print(y_preds)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))

# Define dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = OurNeuralNetwork()
# all_y_trues
network.train(data, all_y_trues)

# Make some predictions
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
print("Emily: %.3f" % network.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network.feedforward(frank)) # 0.039 - M


[0.37391917 0.33498617 0.33498612 0.39110908]
Epoch 0 loss: 0.247
[0.43976673 0.29151475 0.2915147  0.46317028]
Epoch 10 loss: 0.193
[0.5078354  0.26546344 0.26546341 0.53460459]
Epoch 20 loss: 0.150
[0.56718365 0.24385547 0.24385547 0.59546734]
Epoch 30 loss: 0.117
[0.61552888 0.22433862 0.22433864 0.64420418]
Epoch 40 loss: 0.094
[0.65426562 0.20690999 0.20691002 0.68269304]
Epoch 50 loss: 0.076
[0.68547774 0.19164872 0.19164876 0.71331223]
Epoch 60 loss: 0.064
[0.710963   0.17842148 0.17842151 0.73803233]
Epoch 70 loss: 0.054
[0.73208592 0.16698083 0.16698086 0.75831741]
Epoch 80 loss: 0.046
[0.74984771 0.15705776 0.15705779 0.77522513]
Epoch 90 loss: 0.041
[0.76498147 0.14840545 0.14840549 0.78951983]
Epoch 100 loss: 0.036
[0.77802919 0.14081315 0.14081318 0.80176003]
Epoch 110 loss: 0.032
[0.78939705 0.13410656 0.13410659 0.81235991]
Epoch 120 loss: 0.029
[0.79939402 0.1281435  0.12814352 0.82163151]
Epoch 130 loss: 0.026
[0.80825854 0.12280835 0.12280837 0.82981351]
Epoch 140 los

In [18]:
# y_preds = np.apply_along_axis(feedforward, 1, data)

In [19]:
nn = OurNeuralNetwork()
y_preds = np.apply_along_axis(nn.feedforward, 1, data)
y_preds

array([0.53600237, 0.4761111 , 0.47083297, 0.51021671])

In [20]:
# Weights

# Input Layer
w1 = np.random.normal()
w2 = np.random.normal()
w3 = np.random.normal()
w4 = np.random.normal()
# Output Layer
w5 = np.random.normal()
w6 = np.random.normal()
# Concatenate weights
wg_in = [w1,w2,w3,w4]
wg_out = [w5,w6]

# Biases
b1 = np.random.normal()
b2 = np.random.normal()
b3 = np.random.normal()
# wg

In [21]:
for x, y_true in zip(data, all_y_trues):
    h1 = sigmoid(w1 * x[0] + w2 * x[1] + b1)
    h2 = sigmoid(w3 * x[0] + w4 * x[1] + b2)
    o1 = sigmoid(w5 * h1 + w6 * h2 + b3)
o1

0.28381597057510705

In [22]:
# Feed Forward
def feedforward(wg_in,wg_out,x):
    h1 = sigmoid(wg_in[0] * x[0] + wg_in[1]  * x[1] + b1)
    h2 = sigmoid(wg_in[2]  * x[0] + wg_in[3]  * x[1] + b2)
    o1 = sigmoid(wg_out[0] * h1 + wg_out[1] * h2 + b3)
    return o1

In [23]:
# y_preds = np.apply_along_axis(feedforward,1,wg_in,wg_out, data)
# y_preds