Mental model of neural network

In [None]:
input_layer = [0.2, 0.5, 0.4, 0.7]
# We randomly initialize the weights (values) for the hidden layer... We will
# need to "train" to make these weights give us the output layers we desire. We
# will cover that shortly!
hidden_layer = [0.98, 0.4, 0.86, -0.08]

output_neuron = 0
# This is really matrix multiplication. We explicitly _do not_ use a
# matrix/tensor, because they add overhead to understanding what happens here
# unless you work with them every day--which you probably don't. More on using
# matrices later.
for index, input_neuron in enumerate(input_layer):
    output_neuron += input_neuron * hidden_layer[index]
print(output_neuron)
# => 0.68

0.6839999999999999


STEP 1: CREATING A DATASET


In [None]:
import random

rectangles = []
rectangle_average = []

for i in range(0, 1000):
    # Generate a 2x2 rectangle [0.1, 0.8, 0.6, 1.0]
    rectangle = [round(random.random(), 1),
                 round(random.random(), 1),
                 round(random.random(), 1),
                 round(random.random(), 1)]
    rectangles.append(rectangle)
    # Take the _actual_ average for our training dataset!
    rectangle_average.append(sum(rectangle) / 4)

STEP 2: MEAN SQUARED ERROR

In [None]:
# Take the average of all the differences squared!
# This calculates how "wrong" our predictions are.
# This is called our "loss".
def mean_squared_error(actual, expected):
    error_sum = 0
    for a, b in zip(actual, expected):
        error_sum += (a - b) ** 2
    return error_sum / len(actual)

print(mean_squared_error([1.], [2.]))
# => 1.0
print(mean_squared_error([1.], [3.]))
# => 4.0

1.0
4.0


implementing train

In [None]:
def model(rectangle, hidden_layer):
    output_neuron = 0.
    for index, input_neuron in enumerate(rectangle):
        output_neuron += input_neuron * hidden_layer[index]
    return output_neuron

def train(rectangles, hidden_layer):
  outputs = []
  for rectangle in rectangles:
      output = model(rectangle, hidden_layer)
      outputs.append(output)
  return outputs

hidden_layer = [0.98, 0.4, 0.86, -0.08]
outputs = train(rectangles, hidden_layer)

print(outputs[0:10])
# [1.472, 0.7, 1.369, 0.8879, 1.392, 1.244, 0.644, 1.1179, 0.474, 1.54]
print(rectangle_average[0:10])
# [0.575, 0.45, 0.549, 0.35, 0.525, 0.475, 0.425, 0.65, 0.4, 0.575]
mean_squared_error(outputs, rectangle_average)
# 0.4218

[0.9359999999999999, 1.026, 1.016, 1.424, 1.248, 1.186, 0.41200000000000003, 1.316, 0.716, 1.774]
[0.55, 0.525, 0.425, 0.55, 0.675, 0.5249999999999999, 0.22500000000000003, 0.5, 0.44999999999999996, 0.725]


0.4345181889999996

updating hidden layer with gradient descent

In [None]:
mean_squared_error(
  train(rectangles, hidden_layer),
  rectangle_average
)

0.4345181889999996

introducing gradient descent

In [1]:
def model(rectangle, hidden_layer):
    output_neuron = 0.
    for index, input_neuron in enumerate(rectangle):
        output_neuron += input_neuron * hidden_layer[index]
    return output_neuron

def train(rectangles, hidden_layer):
  outputs = []
  for rectangle in rectangles:
      output = model(rectangle, hidden_layer)
      outputs.append(output)

  mean_squared_error(outputs, rectangle_average)

  # We go through all the weights in the hidden layer. These correspond to all
  # the weights of the function we're trying to minimize the value of: our
  # model, respective of its loss (how wrong it is).
  # 
  # For each of the weights, we want to increase/decrease it based on the slope.
  # Exactly like we showed in the one-weight example above with just x. Now
  # we just have 4 values instead of 1! Big models have billions.
  for index, _ in enumerate(hidden_layer):
    learning_rate = 0.1
    # But... how do we get the slope/derivative?!
    hidden_layer[index] -= learning_rate * hidden_layer[index].slope

  return outputs

hidden_layer = [0.98, 0.4, 0.86, -0.08]
train(rectangles, hidden_layer)

NameError: ignored

calculating slope

In [None]:
import torch

# A tensor is a matrix in PyTorch. It is the fundamental data-structure of neural
# networks. Here we say PyTorch, please keep track of the gradient/derivative
# as I do all kinds of things to the parameter(s) of this tensor.
x = torch.tensor(2., requires_grad=True)

# At this point we're applying our function f(x) = x^2.
y = x ** 2

# This tells `autograd` to compute the derivative values for all the parameters
# involved. Backward is neural network jargon for this operation, which we'll
# explain momentarily.
y.backward()

# And show us the lovely gradient/derivative, which is 4! Sick.
print(x.grad)
# => 4

tensor(4.)


In [None]:
https://colab.research.google.com/drive/1qPtXZQhuXtqBBtG8ypWgpag0cFGlC6rS#scrollTo=sk1s9aJryQuo