In [115]:
import numpy as np
import matplotlib.pyplot as plt

In [116]:
np.random.seed(1)

# Forward pass

In [117]:
# create a single input example (lower-case x)
x = np.random.random((1,1))
x, x.shape

(array([[0.417022]]), (1, 1))

In [118]:
np.random.seed(1)

In [119]:
# normalizing a value

n_examples = 100000
d0 = np.random.random((n_examples,1))  # before norm
d1 = 2*np.random.random((n_examples,1))  # after scaling (now between 0 and 2.0)
d2 = 2*np.random.random((n_examples,1))-1  # after shifting - final norm result

print("min, mean, max")
print([np.around(d, decimals=3) for d in [d0.min(), d0.mean(), d0.max()]])
print([np.around(d, decimals=3) for d in [d1.min(), d1.mean(), d1.max()]])
print([np.around(d, decimals=3) for d in [d2.min(), d2.mean(), d2.max()]])


min, mean, max
[0.0, 0.499, 1.0]
[0.0, 1.0, 2.0]
[-1.0, -0.001, 1.0]


In [120]:
np.random.seed(1)

In [121]:
# create normalized input - single example with single node
x = 2*np.random.random((1,1))-1
x, x.shape

(array([[-0.16595599]]), (1, 1))

In [122]:
# create single node weight
w = 2*np.random.random((1,1))-1
w, w.shape

(array([[0.44064899]]), (1, 1))

In [123]:
# create single node bias
b = 2*np.random.random((1,1))-1
b, b.shape

(array([[-0.99977125]]), (1, 1))

In [124]:
# forward (linear) pass
y = x*w +b
y, y.shape

(array([[-1.07289959]]), (1, 1))

In [125]:
# define activation function
sigmoid = lambda x: 1/(1+np.exp(-x))

In [126]:
# forward (non-linear) pass
y = sigmoid(x*w +b)
y, y.shape

(array([[0.25485205]]), (1, 1))

In [127]:
# when we will use vectors and matrices
# the mul (*) operation is replaced with dot product
y = sigmoid(x.dot(w) +b)
y, y.shape

(array([[0.25485205]]), (1, 1))

In [128]:
'''
example with multiple input nodes (features) output nodes (classes/...)
still using a single example
'''
n_input = 3
n_output = 2

n_examples = 1

In [129]:
np.random.seed(1)

In [130]:
x = 2 * np.random.random((n_examples, n_input)) - 1
x, x.shape

(array([[-0.16595599,  0.44064899, -0.99977125]]), (1, 3))

In [131]:
# weights - now use upper-case W because is a matrix
# weights shape should match the input and output of the x*W result
W = 2 * np.random.random((n_input, n_output)) - 1
W, W.shape

(array([[-0.39533485, -0.70648822],
        [-0.81532281, -0.62747958],
        [-0.30887855, -0.20646505]]),
 (3, 2))

In [132]:
# bias is with the shape of a single output example
b = 2 * np.random.random((1, n_output)) - 1
b, b.shape

(array([[ 0.07763347, -0.16161097]]), (1, 2))

In [133]:
# forward pass
y = sigmoid(x.dot(W) +b)
y, y.shape

(array([[0.52317797, 0.47141983]]), (1, 2))

In [134]:
# multiple examples
n_input = 3
n_output = 2

n_examples = 5

In [135]:
np.random.seed(1)

In [136]:
# inputs are now in matrix, so use upper-case X
# each row is an example
X = 2 * np.random.random((n_examples, n_input)) - 1
X, X.shape

(array([[-0.16595599,  0.44064899, -0.99977125],
        [-0.39533485, -0.70648822, -0.81532281],
        [-0.62747958, -0.30887855, -0.20646505],
        [ 0.07763347, -0.16161097,  0.370439  ],
        [-0.5910955 ,  0.75623487, -0.94522481]]),
 (5, 3))

In [137]:
# weights shape does not depend on the number of examples
# only on the sape of a single input and output example
W = 2 * np.random.random((n_input, n_output)) - 1
W, W.shape

(array([[ 0.34093502, -0.1653904 ],
        [ 0.11737966, -0.71922612],
        [-0.60379702,  0.60148914]]),
 (3, 2))

In [138]:
np.random.seed(1)

In [139]:
# bias is with the shape of a single output example - still lower-case b because is a vector
b = 2 * np.random.random((1, n_output)) - 1
b, b.shape

(array([[-0.16595599,  0.44064899]]), (1, 2))

In [154]:
# forward pass
# the same code of single example - dot product "magic"
# the dot product is applied to each x example in X

# output is also a matrix - so now use upper-case Y

y0 = sigmoid(X[0].dot(W) +b)
Y = sigmoid(X.dot(W) +b)
print(y0, y0.shape)
print(Y, Y.shape)

[[0.8227841  0.22028919]] (1, 2)
[[0.8227841  0.22028919]
 [0.77049384 0.4279916 ]
 [0.69232262 0.45725323]
 [0.67268437 0.48826675]
 [0.80130784 0.19975338]] (5, 2)


In [155]:
# Adding a hidden layer
n_input = 3
n_hidden = 4
n_output = 2

n_examples = 5

In [156]:
np.random.seed(1)

In [157]:
X = 2 * np.random.random((n_examples, n_input)) - 1
X, X.shape


(array([[-0.16595599,  0.44064899, -0.99977125],
        [-0.39533485, -0.70648822, -0.81532281],
        [-0.62747958, -0.30887855, -0.20646505],
        [ 0.07763347, -0.16161097,  0.370439  ],
        [-0.5910955 ,  0.75623487, -0.94522481]]),
 (5, 3))

In [158]:
# hidden layer shape depend on its number of nodes
# AND on the number of examples

H = 2 * np.random.random((n_examples, n_input)) - 1
H, H.shape

(array([[ 0.34093502, -0.1653904 ,  0.11737966],
        [-0.71922612, -0.60379702,  0.60148914],
        [ 0.93652315, -0.37315164,  0.38464523],
        [ 0.7527783 ,  0.78921333, -0.82991158],
        [-0.92189043, -0.66033916,  0.75628501]]),
 (5, 3))

In [159]:
# now there are multiple weight matrices and bias vectors

# input to hidden
Wih = 2 * np.random.random((n_input, n_hidden)) - 1
bih = 2 * np.random.random((1, n_hidden)) - 1

# hidden to output
Who = 2 * np.random.random((n_hidden, n_output)) - 1
bho = 2 * np.random.random((1, n_output)) - 1

In [163]:
# forward pass through all layers
H = sigmoid(X.dot(Wih) + bih)
Y = sigmoid(H.dot(Who) + bho)

Y, Y.shape

(array([[0.49028916, 0.15646898],
        [0.48967728, 0.15585518],
        [0.44143544, 0.15017967],
        [0.32213702, 0.16615516],
        [0.50568541, 0.14845111]]),
 (5, 2))

# Backpropagation

In [174]:
sigmoid = lambda x: 1/(1+np.exp(-x))
sigmoid_backward = lambda x: sigmoid(x) * (1-sigmoid(x))

In [175]:
np.random.seed(1)

In [176]:
n_input = 3
n_hidden = 4
n_output = 2

n_examples = 5

In [177]:
# for simplicity we won't use biases this time

X = np.random.random((n_examples, n_input)) > 0.5
H = 2 * np.random.random((n_examples, n_input)) - 1
Y = np.zeros((n_examples, n_output))  # the actual outputs

# input to hidden
Wih = 2 * np.random.random((n_input, n_hidden)) - 1

# hidden to output
Who = 2 * np.random.random((n_hidden, n_output)) - 1

In [178]:
# forward pass
H = sigmoid(X.dot(Wih))
Yp = sigmoid(H.dot(Who))  # output predictions

In [None]:
error = Y - Yp  # per example