In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import nnfs
import math

from nnfs.datasets import spiral_data
from nnfs.datasets import vertical_data

from layers import Dense
from activations import ReLU
from activations import SoftMax
from losses import CategoricalCrossEntropy, Softmax_CategoricalCrossentropy
from optimizers import SGD

In [5]:
nnfs.init()

## CH 10: Optimizers

In [11]:
# Dataset
X, y = spiral_data(samples=100, classes=3)

In [12]:
# First Layer
dense1 = Dense(2, 64)
activation1 = ReLU()

In [13]:
# Second Layer
dense2 = Dense(64, 3)

In [14]:
# Categorical-CrossEntropy with Activation
loss_activation = Softmax_CategoricalCrossentropy()

In [15]:
# Optimizer
optimizer = SGD(learning_rate=0.85)

### A single Forward-Backward-Optimization step

In [42]:
# Forward Pass
dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)

In [43]:
# Loss Computation
loss = loss_activation.forward(dense2.output, y)
loss

0.67091066

In [44]:
loss_activation.output.shape

(300, 3)

In [45]:
# Accuracy
predictions = np.argmax(loss_activation.output, axis=1)
if len(y.shape) ==2:
    y = np.argmax(y, axis=1)
    
acc = np.mean(predictions==y)
acc

0.69

In [46]:
# Backward Pass
loss_activation.backward(loss_activation.output, y)

dense2.backward(loss_activation.dinputs)

activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

In [47]:
# Optimize
optimizer.update_params(dense1)
optimizer.update_params(dense2)

### Loop-wise model run

In [16]:
for epoch in range(10001):
    # Forward Pass
    dense1.forward(X)
    activation1.forward(dense1.output)

    dense2.forward(activation1.output)
    # Loss Computation
    loss = loss_activation.forward(dense2.output, y)

    # Accuracy
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) ==2:
        y = np.argmax(y, axis=1)

    acc = np.mean(predictions==y)
    
    if not epoch % 100:
        print(f'epoch: {epoch}, ' +
              f'acc: {acc:.3f}, ' +
              f'loss: {loss:.3f}')

    # Backward Pass
    loss_activation.backward(loss_activation.output, y)

    dense2.backward(loss_activation.dinputs)

    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    # Optimize
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)

epoch: 0, acc: 0.343, loss: 1.099
epoch: 100, acc: 0.390, loss: 1.089
epoch: 200, acc: 0.403, loss: 1.072
epoch: 300, acc: 0.400, loss: 1.070
epoch: 400, acc: 0.413, loss: 1.069
epoch: 500, acc: 0.410, loss: 1.068
epoch: 600, acc: 0.413, loss: 1.067
epoch: 700, acc: 0.407, loss: 1.065
epoch: 800, acc: 0.420, loss: 1.063
epoch: 900, acc: 0.433, loss: 1.059
epoch: 1000, acc: 0.447, loss: 1.053
epoch: 1100, acc: 0.463, loss: 1.042
epoch: 1200, acc: 0.510, loss: 1.027
epoch: 1300, acc: 0.407, loss: 1.029
epoch: 1400, acc: 0.417, loss: 1.021
epoch: 1500, acc: 0.423, loss: 1.013
epoch: 1600, acc: 0.430, loss: 1.005
epoch: 1700, acc: 0.437, loss: 0.996
epoch: 1800, acc: 0.437, loss: 1.006
epoch: 1900, acc: 0.473, loss: 1.000
epoch: 2000, acc: 0.437, loss: 0.977
epoch: 2100, acc: 0.473, loss: 0.959
epoch: 2200, acc: 0.497, loss: 0.977
epoch: 2300, acc: 0.473, loss: 0.954
epoch: 2400, acc: 0.473, loss: 0.974
epoch: 2500, acc: 0.463, loss: 1.004
epoch: 2600, acc: 0.410, loss: 1.035
epoch: 2700, 

### Learning Rate Decay

In [17]:
starting_learning_rate = 1.
learning_rate_decay = 0.1
step = 1

learning_rate = starting_learning_rate * (1. / (1 + learning_rate_decay * step))
learning_rate

0.9090909090909091

In [18]:
starting_learning_rate = 1.
learning_rate_decay = 0.1
step = 20

learning_rate = starting_learning_rate * (1. / (1 + learning_rate_decay * step))
learning_rate

0.3333333333333333

In [21]:
starting_learning_rate = 1.
learning_rate_decay = 0.1

for step in range(20):
    learning_rate = starting_learning_rate * (1. / (1 + learning_rate_decay * step))
    print("LR: ",learning_rate)


LR:  1.0
LR:  0.9090909090909091
LR:  0.8333333333333334
LR:  0.7692307692307692
LR:  0.7142857142857143
LR:  0.6666666666666666
LR:  0.625
LR:  0.588235294117647
LR:  0.5555555555555556
LR:  0.5263157894736842
LR:  0.5
LR:  0.47619047619047616
LR:  0.45454545454545453
LR:  0.4347826086956522
LR:  0.41666666666666663
LR:  0.4
LR:  0.3846153846153846
LR:  0.37037037037037035
LR:  0.35714285714285715
LR:  0.3448275862068965


In [7]:
# Dataset
X, y = spiral_data(samples=100, classes=3)

# First Layer
dense1 = Dense(2, 64)
activation1 = ReLU()

# Second Layer
dense2 = Dense(64, 3)

# Categorical-CrossEntropy with Activation
loss_activation = Softmax_CategoricalCrossentropy()

# Optimizer
optimizer = SGD(decay=1e-3)

for epoch in range(10001):
    # Forward Pass
    dense1.forward(X)
    activation1.forward(dense1.output)

    dense2.forward(activation1.output)
    # Loss Computation
    loss = loss_activation.forward(dense2.output, y)

    # Accuracy
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) ==2:
        y = np.argmax(y, axis=1)

    acc = np.mean(predictions==y)
    
    if not epoch % 100:
        print(f'epoch: {epoch}, ' +
              f'acc: {acc:.3f}, ' +
              f'loss: {loss:.3f}, ' +
              f'lr: {optimizer.current_learning_rate}')

    # Backward Pass
    loss_activation.backward(loss_activation.output, y)

    dense2.backward(loss_activation.dinputs)

    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    # Optimize
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()

epoch: 0, acc: 0.343, loss: 1.099, lr: 1.0
epoch: 100, acc: 0.403, loss: 1.085, lr: 0.9099181073703367
epoch: 200, acc: 0.400, loss: 1.072, lr: 0.8340283569641367
epoch: 300, acc: 0.403, loss: 1.070, lr: 0.7698229407236336
epoch: 400, acc: 0.410, loss: 1.069, lr: 0.7147962830593281
epoch: 500, acc: 0.410, loss: 1.068, lr: 0.66711140760507
epoch: 600, acc: 0.407, loss: 1.068, lr: 0.6253908692933083
epoch: 700, acc: 0.410, loss: 1.066, lr: 0.5885815185403178
epoch: 800, acc: 0.407, loss: 1.064, lr: 0.5558643690939411
epoch: 900, acc: 0.413, loss: 1.062, lr: 0.526592943654555
epoch: 1000, acc: 0.427, loss: 1.058, lr: 0.5002501250625312
epoch: 1100, acc: 0.433, loss: 1.054, lr: 0.4764173415912339
epoch: 1200, acc: 0.453, loss: 1.048, lr: 0.45475216007276037
epoch: 1300, acc: 0.457, loss: 1.042, lr: 0.43497172683775553
epoch: 1400, acc: 0.490, loss: 1.035, lr: 0.4168403501458941
epoch: 1500, acc: 0.507, loss: 1.028, lr: 0.4001600640256102
epoch: 1600, acc: 0.527, loss: 1.021, lr: 0.38476337

### Momentum

In [13]:
# Dataset
X, y = spiral_data(samples=100, classes=3)

# First Layer
dense1 = Dense(2, 64)
activation1 = ReLU()

# Second Layer
dense2 = Dense(64, 3)

# Categorical-CrossEntropy with Activation
loss_activation = Softmax_CategoricalCrossentropy()

# Optimizer
optimizer = SGD(decay=1e-3, momentum=0.9)

for epoch in range(10001):
    # Forward Pass
    dense1.forward(X)
    activation1.forward(dense1.output)

    dense2.forward(activation1.output)
    # Loss Computation
    loss = loss_activation.forward(dense2.output, y)

    # Accuracy
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) ==2:
        y = np.argmax(y, axis=1)

    acc = np.mean(predictions==y)
    
    if not epoch % 100:
        print(f'epoch: {epoch}, ' +
              f'acc: {acc:.3f}, ' +
              f'loss: {loss:.3f}, ' +
              f'lr: {optimizer.current_learning_rate}')

    # Backward Pass
    loss_activation.backward(loss_activation.output, y)

    dense2.backward(loss_activation.dinputs)

    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    # Optimize
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()

epoch: 0, acc: 0.297, loss: 1.099, lr: 1.0
epoch: 100, acc: 0.403, loss: 1.041, lr: 0.9099181073703367
epoch: 200, acc: 0.520, loss: 0.955, lr: 0.8340283569641367
epoch: 300, acc: 0.687, loss: 0.694, lr: 0.7698229407236336
epoch: 400, acc: 0.713, loss: 0.568, lr: 0.7147962830593281
epoch: 500, acc: 0.793, loss: 0.478, lr: 0.66711140760507
epoch: 600, acc: 0.837, loss: 0.394, lr: 0.6253908692933083
epoch: 700, acc: 0.840, loss: 0.377, lr: 0.5885815185403178
epoch: 800, acc: 0.873, loss: 0.319, lr: 0.5558643690939411
epoch: 900, acc: 0.883, loss: 0.289, lr: 0.526592943654555
epoch: 1000, acc: 0.883, loss: 0.281, lr: 0.5002501250625312
epoch: 1100, acc: 0.863, loss: 0.270, lr: 0.4764173415912339
epoch: 1200, acc: 0.907, loss: 0.237, lr: 0.45475216007276037
epoch: 1300, acc: 0.907, loss: 0.230, lr: 0.43497172683775553
epoch: 1400, acc: 0.897, loss: 0.227, lr: 0.4168403501458941
epoch: 1500, acc: 0.910, loss: 0.217, lr: 0.4001600640256102
epoch: 1600, acc: 0.913, loss: 0.209, lr: 0.38476337