In [1]:
"""
    Tensors
    Layers
    Neural Nets
    Loss Functions
    Optimizers
    Data
    Training
    XOR Example
    FizzBuzz Example (time permitting)
"""

# https://github.com/joelgrus/odscnet
# Since this code was writing as Python library, I need to learn how to organize it in the Notebook
# https://stackoverflow.com/questions/36427747/scientific-computing-ipython-notebook-how-to-organize-code
# Also look into tips to organize a Data Science project and more importantly with Jupyter Notebooks
### https://medium.com/outlier-bio-blog/a-quick-guide-to-organizing-data-science-projects-updated-for-2016-4cbb1e6dac71
### https://drivendata.github.io/cookiecutter-data-science/
### https://swcarpentry.github.io/2014-01-31-ucsb/lessons/jk-python/reproducible_workflow.html
### http://www.carlboettiger.info/2012/05/06/research-workflow.html

# Note: When running in IPython Notebook, we can remove the library import

'\n    Tensors\n    Layers\n    Neural Nets\n    Loss Functions\n    Optimizers\n    Data\n    Training\n    XOR Example\n    FizzBuzz Example (time permitting)\n'

In [2]:
"""
A tensor is just a multidimensinoal array
"""
from numpy import ndarray as Tensor

In [3]:
"""
our neural nets will be made up of layers
one might look like
inputs -> linear -> tanh -> linear -> outputs
"""
from typing import Dict, Callable
import numpy as np

# from odscnet.tensor import Tensor

class Layer:
    def __init__(self) -> None:
#        self.params: Dict[str, Tensor] = {}
#        self.grads: Dict[str, Tensor] = {}
#        self.params[Dict[str, Tensor]] = {}
#        self.grads[Dict[str, Tensor]] = {}
        self.params = {}
        self.grads = {} 
    
    def forward(self, inputs: Tensor) -> Tensor:
        raise NotImplementedError

    def backward(self, grad: Tensor) -> Tensor:
        raise NotImplementedError

class Linear(Layer):
    """
    computes inputs @ w + b
    """
    def __init__(self, input_size: int, output_size: int) -> None:
        super().__init__()
        self.params["w"] = np.random.randn(input_size, output_size)
        self.params["b"] = np.random.randn(output_size)

    def forward(self, inputs: Tensor) -> Tensor:
        # batch_size, input_size = inputs.shape
        self.inputs = inputs
        return inputs @ self.params["w"] + self.params["b"]

    def backward(self, grad: Tensor) -> Tensor:
        """
        if y = f(x) and x = a * b
        then dy/da = f'(x) * b
        and dy/db = f'(x) * a
        now if x = a @ b
        then dy/da = f'(x) @ b.T
        and dy/db = a.T @ f'(x)
        """
        self.grads["b"] = np.sum(grad, axis=0)
        self.grads["w"] = self.inputs.T @ grad
        return grad @ self.params["w"].T

F = Callable[[Tensor], Tensor]

class Activation(Layer):
    def __init__(self, f: F, f_prime: F) -> None:
        super().__init__()
        self.f = f
        self.f_prime = f_prime

    def forward(self, inputs: Tensor) -> Tensor:
        self.inputs = inputs
        return self.f(inputs)

    def backward(self, grad: Tensor) -> Tensor:
        return self.f_prime(self.inputs) * grad

def tanh(x: Tensor) -> Tensor:
    return np.tanh(x)

def tanh_prime(x: Tensor) -> Tensor:
    y = tanh(x)
    return 1 - y ** 2

class Tanh(Activation):
    def __init__(self):
        super().__init__(tanh, tanh_prime)

In [4]:
"""
A neural net is just a collection of layers
"""
from typing import Sequence, Iterator, Tuple

#from odscnet.tensor import Tensor
#from odscnet.layers import Layer

class NeuralNet:
    def __init__(self, layers: Sequence[Layer]) -> None:
        self.layers = layers

    def forward(self, inputs: Tensor) -> Tensor:
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, grad: Tensor) -> Tensor:
        for layer in reversed(self.layers):
            grad = layer.backward(grad)
        return grad

    def params_and_grads(self) -> Iterator[Tuple[Tensor, Tensor]]:
        for layer in self.layers:
            for name, param in layer.params.items():
                grad = layer.grads[name]
                yield param, grad

In [6]:
"""
An optimizer uses the gradients to adjust the weights
of the neural net
"""
#from odscnet.nn import NeuralNet

class Optimizer:
    def step(self, net: NeuralNet) -> None:
        raise NotImplementedError

class SGD(Optimizer):
    def __init__(self, lr: float = 0.01) -> None:
        self.lr = lr

    def step(self, net: NeuralNet) -> None:
        for param, grad in net.params_and_grads():
            param -= self.lr * grad

In [7]:
"""
A loss function measures how good or bad
our predictions are, and gives us a gradient
"""
import numpy as np

#from odscnet.tensor import Tensor

class Loss:
    def loss(self, predicted: Tensor, actual: Tensor) -> float:
        raise NotImplementedError

    def grad(self, predicted: Tensor, actual: Tensor) -> Tensor:
        raise NotImplementedError


class MSE(Loss):
    """
    Actually total squared error
    """
    def loss(self, predicted: Tensor, actual: Tensor) -> float:
        return np.sum((predicted - actual) ** 2)

    def grad(self, predicted: Tensor, actual: Tensor) -> Tensor:
        return 2 * (predicted - actual)

In [8]:
"""
We want to process our data in batches
"""
from typing import NamedTuple, Iterator

import numpy as np

#from odscnet.tensor import Tensor

Batch = NamedTuple("Batch", [("inputs", Tensor), ("targets", Tensor)])

class DataIterator:
    def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
        raise NotImplementedError

class BatchIterator(DataIterator):
    def __init__(self, batch_size: int = 32, shuffle: bool = True) -> None:
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
        starts = np.arange(0, len(inputs), self.batch_size)
        if self.shuffle:
            np.random.shuffle(starts)
        for start in starts:
            end = start + self.batch_size
            batch_inputs = inputs[start:end]
            batch_targets = targets[start:end]
            yield Batch(batch_inputs, batch_targets)

In [16]:
"""
Here's a function to train a neural net
"""

#from odscnet.tensor import Tensor
#from odscnet.nn import NeuralNet
#from odscnet.loss import Loss, MSE
#from odscnet.optim import Optimizer, SGD
#from odscnet.data import DataIterator, BatchIterator

def train(net: NeuralNet,
          inputs: Tensor,
          targets: Tensor,
          num_epochs: int = 5000,
          iterator: DataIterator = BatchIterator(),
          loss: Loss = MSE(),
          optimizer: Optimizer = SGD()) -> None:
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for batch in iterator(inputs, targets):
            predictions = net.forward(batch.inputs)
            epoch_loss += loss.loss(predictions, batch.targets)
            grad = loss.grad(predictions, batch.targets)
            net.backward(grad)
            optimizer.step(net)
        if (epoch % 100) == 0:
            print(epoch, epoch_loss)

In [10]:
# Binary encode an Integer into array of binary values
from typing import List
def binary_encode(x: int) -> List[int]:
    """
    return x as a 10-digit binary number
    """
    return [x >> i & 1 for i in range(10)]

#print(binary_encode(100))
#print(binary_encode(101))

In [12]:
from typing import List
def fizz_buzz_encode(x: int) -> List[int]:
    if x % 15 == 0:
        return [0, 0, 0, 1]
    elif x % 5 == 0:
        return [0, 0, 1, 0]
    elif x % 3 == 0:
        return [0, 1, 0, 0]
    else:
        return [1, 0, 0, 0]

#print(fizz_buzz_encode(1)) 
#print(fizz_buzz_encode(3)) 
#print(fizz_buzz_encode(4)) 
#print(fizz_buzz_encode(5)) 
#print(fizz_buzz_encode(10))
#print(fizz_buzz_encode(15))
#print(fizz_buzz_encode(20)) 

In [17]:
"""
FIZZBUZZ (5000)

fizz buzz is the following problem

given an input x,
if x % 3 == 0, print "fizz"
if x % 5 == 0, print "buzz"
if x % 15 == 0, print "fizzbuzz"
otherwise just print x

To train the model, we use binary encoding (see cells above) - why???
  ...Train data are values from 101 to 1024
  ...Test data are values from 1 to 100

"""
from typing import List

import numpy as np

#from odscnet.train import train
#from odscnet.nn import NeuralNet
#from odscnet.layers import Linear, Tanh
#from odscnet.optim import SGD

inputs = np.array([
    binary_encode(x) for x in range(101, 1024)
])

targets = np.array([
    fizz_buzz_encode(x) for x in range(101, 1024)
])

net = NeuralNet([
    Linear(input_size=10, output_size=50),
    Tanh(),
    Linear(input_size=50, output_size=4)
])

train(net, inputs, targets, num_epochs=5000,
      optimizer=SGD(lr=0.001))

# This is where we check the test data prediction.
# We print strings "fizz, buzz and fizzbuzz" to easily recognize the match
# With 5000 iterations, the prediction is perfect
match = 0
for x in range(1, 101):
    inputs = binary_encode(x)
    prediction = net.forward(inputs)
    
    actual = fizz_buzz_encode(x)
    labels = [str(x), "fizz", "buzz", "fizzbuzz"]
    prediction_idx = np.argmax(prediction)
    actual_idx = np.argmax(actual)
    if (prediction_idx == actual_idx):
        print(x, labels[prediction_idx], labels[actual_idx], "  --match")
        match=match+1
    else:
        print(x, labels[prediction_idx], labels[actual_idx], "  --miss")
print("Score= " + str(match) + " over 100") 

0 128974.471922
100 573.567067796
200 560.738491206
300 537.305282075
400 507.174444508
500 453.53425435
600 378.124695649
700 348.112591679
800 308.164533847
900 324.820731585
1000 238.531213196
1100 223.601815312
1200 190.115919604
1300 191.493855096
1400 153.350230791
1500 199.230537494
1600 129.354616411
1700 132.459758087
1800 112.040805283
1900 110.277342558
2000 97.09589892
2100 93.5802551167
2200 89.2317561699
2300 82.8715292139
2400 82.577699859
2500 79.2305005409
2600 80.2699885935
2700 79.3173038208
2800 71.8011974765
2900 72.3444665917
3000 67.6419629833
3100 63.6265299629
3200 62.7282108452
3300 61.6941135616
3400 61.4470951189
3500 62.4868725369
3600 58.1548915322
3700 55.391529993
3800 55.8295976218
3900 54.3915726571
4000 54.8773937739
4100 54.775626096
4200 53.4802488673
4300 47.8599051617
4400 45.5255881229
4500 39.3692707952
4600 43.5312487375
4700 39.9271274625
4800 36.8794582821
4900 39.8319641214
1 1 1   --match
2 2 2   --match
3 fizz fizz   --match
4 4 4   --matc