## Visualisation of Backpropagadion on 3 Neural networks
- Raw
- L1 normalized
- L2 normalized

networks will try to classify (x,y) ∈ (-1, 1)² if their product >= 0 or <= 0

In [None]:
import numpy as np
import math

In [None]:
from Layers import DenseLayer
from Activation import *
from trainutils import *
from Loss import Crossentropy
from helpers import *

Let's prepare parameters:

In [None]:
hparams = {
    'lr': 0.1,                  # Learning rate
    'batch_size': 128,          # Batch size
    'epochs': 500,              # Number of epochs
    'lr_decay': 0.9999,         # Learning rate decay factor to not overfit
    'momentum': 0.9,            # Momentum factor
}
BATCH_SIZE = hparams['batch_size']

In [None]:
X, y = generate_random_data(10000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
y_test = matrix_encode(y_test)
y_train = matrix_encode(y_train)

Make a network model by stacking layers (Choose 1):

In [None]:
model = [
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Relu(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

In [None]:
model = [
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Sigmoid(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

Propagation:

In [None]:
for epoch in range(hparams['epochs']):
    epoch_cost = 0
    epoch_acc = 0
    for batch_num in range(int(math.ceil(X_train.shape[1] / BATCH_SIZE))):
        input = X_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]
        y_batch = y_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]

        # Forward pass
        for layer in model:
            input = layer.forward(input)

        acc = get_accuracy(y_batch, input)
        cost = Crossentropy(y_batch, input)

        epoch_cost += cost
        epoch_acc += acc

        gradient = y_batch
        
        # Backward pass
        for layer in reversed(model):
            gradient = layer.backward(gradient, hparams)

    epoch_acc /= int(math.ceil(X_train.shape[1] / BATCH_SIZE))
    hparams['lr'] *= hparams['lr_decay']
    print(f"Epoch: {epoch}, cost: {epoch_cost}, acc: {epoch_acc}")

Weights peek:

In [None]:
for layer in model:
    if isinstance(layer, DenseLayer):
        print(f"Weights:\n{layer.weights}\n, Bias:\n{layer.bias}")

Test:

In [None]:
input = X_test
for layer in model:
    input = layer.forward(input)
acc = get_accuracy(y_test, input)
print(f"Test accuracy: {acc}")

Test for random new data:

In [None]:
input_X, input_labels = generate_random_data(10000, random_state=123) # Different random state
input = input_X

for layer in model:
    input = layer.forward(input)

input_labels_oh = matrix_encode(input_labels.flatten())

acc = get_accuracy(input_labels_oh, input)
print(f"Test accuracy on new data: {acc}")


### Now for L1:

Let's prepare parameters:

In [None]:
hparams = {
    'lr': 0.1,                  # Learning rate
    'batch_size': 128,          # Batch size
    'epochs': 500,              # Number of epochs
    'lr_decay': 0.9999,         # Learning rate decay factor to not overfit
    'momentum': 0.9,            # Momentum factor
}
BATCH_SIZE = hparams['batch_size']

In [None]:
X, y = generate_random_data(10000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
y_test = matrix_encode(y_test)
y_train = matrix_encode(y_train)

Make a network model by stacking layers:

In [None]:
model = [
    L1(),
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Relu(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

In [None]:
model = [
    L1(),
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Sigmoid(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

Propagation:

In [None]:
for epoch in range(hparams['epochs']):
    epoch_cost = 0
    epoch_acc = 0
    for batch_num in range(int(math.ceil(X_train.shape[1] / BATCH_SIZE))):
        input = X_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]
        y_batch = y_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]

        # Forward pass
        for layer in model:
            input = layer.forward(input)

        acc = get_accuracy(y_batch, input)
        cost = Crossentropy(y_batch, input)

        epoch_cost += cost
        epoch_acc += acc

        gradient = y_batch
        
        # Backward pass
        for layer in reversed(model):
            gradient = layer.backward(gradient, hparams)

    epoch_acc /= int(math.ceil(X_train.shape[1] / BATCH_SIZE))
    hparams['lr'] *= hparams['lr_decay']
    print(f"Epoch: {epoch}, cost: {epoch_cost}, acc: {epoch_acc}")

Weights peek:

In [None]:
for layer in model:
    if isinstance(layer, DenseLayer):
        print(f"Weights:\n{layer.weights}\n, Bias:\n{layer.bias}")

Test:

In [None]:
input = X_test
for layer in model:
    input = layer.forward(input)
acc = get_accuracy(y_test, input)
print(f"Test accuracy: {acc}")

Test for random new data:

In [None]:
input_X, input_labels = generate_random_data(10000, random_state=123) # Different random state
input = input_X

for layer in model:
    input = layer.forward(input)

input_labels_oh = matrix_encode(input_labels.flatten())

acc = get_accuracy(input_labels_oh, input)
print(f"Test accuracy on new data: {acc}")


### Now for L2

Let's prepare parameters:

In [None]:
hparams = {
    'lr': 0.1,                  # Learning rate
    'batch_size': 128,          # Batch size
    'epochs': 500,              # Number of epochs
    'lr_decay': 0.9999,         # Learning rate decay factor to not overfit
    'momentum': 0.9,            # Momentum factor
}
BATCH_SIZE = hparams['batch_size']

In [None]:
X, y = generate_random_data(10000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
y_test = matrix_encode(y_test)
y_train = matrix_encode(y_train)

Make a network model by stacking layers:

In [None]:
model = [
    L2(),
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Relu(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

In [None]:
model = [
    L2(),
    DenseLayer(input_size=X_train.shape[0], output_size=4),
    Sigmoid(),
    Dense_Softmax_Cross_Entropy(input_size=4, output_size=2)
]

Propagation:

In [None]:
for epoch in range(hparams['epochs']):
    epoch_cost = 0
    epoch_acc = 0
    for batch_num in range(int(math.ceil(X_train.shape[1] / BATCH_SIZE))):
        input = X_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]
        y_batch = y_train[:, batch_num * BATCH_SIZE:(batch_num + 1) * BATCH_SIZE]

        # Forward pass
        for layer in model:
            input = layer.forward(input)

        acc = get_accuracy(y_batch, input)
        cost = Crossentropy(y_batch, input)

        epoch_cost += cost
        epoch_acc += acc

        gradient = y_batch
        
        # Backward pass
        for layer in reversed(model):
            gradient = layer.backward(gradient, hparams)

    epoch_acc /= int(math.ceil(X_train.shape[1] / BATCH_SIZE))
    hparams['lr'] *= hparams['lr_decay']
    print(f"Epoch: {epoch}, cost: {epoch_cost}, acc: {epoch_acc}")

Weights peek:

In [None]:
for layer in model:
    if isinstance(layer, DenseLayer):
        print(f"Weights:\n{layer.weights}\n, Bias:\n{layer.bias}")

Test:

In [None]:
input = X_test
for layer in model:
    input = layer.forward(input)
acc = get_accuracy(y_test, input)
print(f"Test accuracy: {acc}")

Test for random new data:

In [None]:
input_X, input_labels = generate_random_data(10000, random_state=123) # Different random state
input = input_X

for layer in model:
    input = layer.forward(input)

input_labels_oh = matrix_encode(input_labels.flatten())

acc = get_accuracy(input_labels_oh, input)
print(f"Test accuracy on new data: {acc}")
