<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#A-neural-network-in-numpy" data-toc-modified-id="A-neural-network-in-numpy-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>A neural network in numpy</a></span></li></ul></div>

In [1]:
# Imports 
import numpy as np

## A neural network in numpy

2 inputs, 1 hidden layer with 5 nodes, 2 outputs

In [12]:
# Create some mock data
n_samples = 1000
features = np.random.rand(n_samples, 2)
targets = 10 * features[:, 0] * features[:, 1] + 0.3 * np.random.rand(n_samples) - 0.15
features.shape, targets.shape

((1000, 2), (1000,))

In [13]:
features[:3], targets[:3]

(array([[ 0.87719419,  0.59026528],
        [ 0.04778934,  0.33822739],
        [ 0.35290574,  0.13613203]]),
 array([ 5.03090619,  0.14209776,  0.42142432]))

In [85]:
w1 = np.random.rand(2, 5)
b1 = np.random.rand(5)

In [15]:
hidden = np.dot(features, w1) + b1
hidden.shape

(1000, 5)

In [16]:
def relu(z):
    return np.maximum(0, z)

In [86]:
hidden = relu(hidden)
hidden.shape

(1000, 5)

In [87]:
w2 = np.random.rand(5, 1)
b2 = np.random.rand(1)

In [19]:
output = np.dot(hidden, w2) + b2
output.shape

(1000, 1)

In [22]:
output[:3]

array([[ 3.23453558],
       [ 2.06376869],
       [ 2.31335353]])

In [20]:
def forward_pass(features, weights, biases):
    hidden = relu(np.dot(features, weights[0]) + biases[0])
    return np.dot(hidden, weights[1]) + biases[1]

In [23]:
forward_pass(features, [w1, w2], [b1, b2])[:3]

array([[ 3.23453558],
       [ 2.06376869],
       [ 2.31335353]])

In [35]:
def loss(predictions, targets):
    """Mean squared error"""
    return np.abs((np.squeeze(predictions) - targets) ** 2)

In [36]:
loss(output, targets)

1.600690458970484

In [38]:
def dloss(predictions, targets):
    return np.squeeze(predictions) - targets

In [78]:
def drelu(z):
    return 1. * (z > 0)

In [47]:
delta2 = output - targets.reshape(output.shape)
delta2.shape

(1000, 1)

In [48]:
dw2 = np.dot(hidden.T, delta2)
dw2.shape

(5, 1)

In [49]:
db2 = np.sum(delta2, axis=0)
db2.shape

(1,)

In [55]:
delta1 = np.dot(delta2, w2.T)  * drelu(hidden)
delta1.shape

(1000, 5)

In [57]:
dw1 = np.dot(features.T, delta1)
dw1.shape

(2, 5)

In [58]:
db1 = np.sum(delta1, axis=0)
db1.shape

(5,)

In [114]:
import pdb

In [131]:
def backward_pass(features, targets, weights, biases, lr=0.0001):
    # Forward pass
    #pdb.set_trace()
    hidden = relu(np.dot(features, weights[0]) + biases[0])
    outp = np.dot(hidden, weights[1]) + biases[1]
    
    # Compute loss
    loss = np.mean((np.squeeze(outp) - targets) ** 2)
    print('Loss:', loss)
    
    # Compute Ds
    delta2 = outp - targets.reshape(output.shape)
    dw2 = np.dot(hidden.T, delta2)
    db2 = np.sum(delta2, axis=0)
    delta1 = np.dot(delta2, weights[1].T)  * drelu(hidden)
    dw1 = np.dot(features.T, delta1)
    db1 = np.sum(delta1, axis=0)
    
    # Update parameters
    weights[0] -= lr * dw1
    biases[0] -= lr * db1
    weights[1] -= lr * dw2
    biases[1] -= lr * db2

In [174]:
w1 = np.random.rand(2, 5)
b1 = np.random.rand(5)
w2 = np.random.rand(5, 1)
b2 = np.random.rand(1)

In [175]:
weights = [w1, w2]
biases = [b1, b2]
weights[0]

array([[ 0.20578793,  0.78753153,  0.55609003,  0.04458742,  0.22208434],
       [ 0.6765867 ,  0.52794629,  0.24290937,  0.3831397 ,  0.96958867]])

In [203]:
for i in range(10):
    backward_pass(features, targets, weights, biases, lr=0.0001)

Loss: 0.198957801803
Loss: 0.198787412709
Loss: 0.198621072284
Loss: 0.198459425615
Loss: 0.198301056583
Loss: 0.198143815494
Loss: 0.197988257048
Loss: 0.197832674504
Loss: 0.197679744971
Loss: 0.197529295423


In [204]:
forward_pass(features, [w1, w2], [b1, b2])[:3]

array([[ 5.20619023],
       [ 0.07565037],
       [ 0.35724453]])

In [205]:
output[:3]

array([[ 3.23453558],
       [ 2.06376869],
       [ 2.31335353]])