In [None]:
%load_ext autoreload
%autoreload 2
from mlp2 import *
import numpy as np
from os import listdir
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
import gzip


In [None]:
def read_images(path, n):
    f = gzip.open(path, 'r')
    image_size = 28
    num_images = n

    f.read(16)
    buf = f.read(image_size * image_size * num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, image_size * image_size)
    return data

def read_labels(path, n):
    f = gzip.open(path, 'r')
    num_images = n

    f.read(8)
    buf = f.read(num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images)
    return data

X_train = read_images('MNIST/raw/train-images-idx3-ubyte.gz', n=60000) / 256
T_tr = read_labels('MNIST/raw/train-labels-idx1-ubyte.gz', n=60000)

X_test = read_images('MNIST/raw/t10k-images-idx3-ubyte.gz', n=10000) / 256
T_tst = read_labels('MNIST/raw/t10k-labels-idx1-ubyte.gz', n=10000)

In [None]:
X_train.shape, X_test.shape, T_tr.shape, T_tst.shape

In [None]:
T_tr[59600]

In [None]:
px.imshow(X_train[59600].reshape(28, 28))

In [None]:
T_tr

In [None]:
T_train = np.zeros((T_tr.size, 10))
T_train[np.arange(T_tr.size),T_tr] = 1

In [None]:
T_test = np.zeros((T_tst.size, 10))
T_test[np.arange(T_tst.size),T_tst] = 1

In [None]:
X_train.shape[1]

In [None]:
T_test.shape[1]

In [None]:
layers = [
    LinearLayer(n_inputs=784, n_units=256, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=256, n_units=10, rng=rng, bias=True, name='Linear_OUT'),
]

In [None]:
np.seterr(all='raise', under='warn', over='warn')
rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          bias=True, 
          batch_size=32, 
          n_epochs=300, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [None]:
output =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_weight'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean weight / initial mean weight')
fig

In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_update'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean update')
fig

In [None]:
go.Figure(
    [
        go.Scatter(y=output['acc_train'], name='train accuracy'),
        go.Scatter(y=output['acc_test'], name='test accuracy'),
    ]
)

# Experiments

## How does activation function affect the model's accuracy? 
Experiment with sigmoid and two other activation functions. The activation function in an output layer should be chosen accordingly to the problem

In [None]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=784, n_units=256, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=256, n_units=10, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [None]:
output_relu =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=784, n_units=256, rng=rng, bias=True, name='Linear_1'),
    TanhLayer(name='Tanh_1'),
    LinearLayer(n_inputs=256, n_units=10, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [None]:
output_tanh = net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
rng = np.random.RandomState(1234)
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=784, n_units=256, rng=rng, bias=True, name='Linear_1'),
    SigmaLayer(name='Sigma_1'),
    LinearLayer(n_inputs=256, n_units=10, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )


In [None]:
output_sigmoid =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
fig = go.Figure()
for (act, output, color) in zip(['sigmoid', 'relu', 'tanh'], 
                              [output_sigmoid, output_relu, output_tanh],
                              ['red', 'green', 'blue']
                             ):
    fig.add_trace(go.Scatter(
        y=output['acc_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{act} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['acc_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{act} test'
    ))
fig

## How does the number of hidden layers and number of neurons in hidden layers impact the model's accuracy? 

In [None]:
rng = np.random.RandomState(1234)
n_layers = np.array([2, 3, 5])
n_units = np.array([20, 50, 100])
results = np.zeros((3, 3))
for i, n_layer in enumerate(n_layers):
    for j, n_unit in enumerate(n_units):
        net = MLP(
            rng=rng,
            n_inputs= X_train.shape[1],
            layers=None, 
            layers_default=[n_layer, n_unit, 10, ReLULayer], 
            bias=True, 
            batch_size=32, 
            n_epochs=200, 
            eta=0.01, 
            momentum=0.9, 
            classification=True,
        )
        output_sigmoid = net.train(
            X_train, 
            T_train, 
            X_test=X_test, 
            T_test=T_test, 
            verbose=False)
        results[i, j] = max(output_sigmoid['acc_test'])

In [None]:
results

## How does the loss function affect the model's accuracy? 
Consider two different loss functions for both classification and regression.

In [None]:
# x = np.random.uniform(-3, 3, size=10000)
# y = np.random.uniform(-3, 3, size=10000)
x = np.random.randn(10000)
y = np.random.randn(10000)
X = np.vstack([x, y]).T
# z = np.sin(x) * np.cos(y) + np.random.randn(10000) / 100
z = np.cos(x) * np.sin(y) #+ np.random.randn(10000) / 100
z = z.reshape([len(z), 1])
X_train = X[:9000]
T_train = z[:9000]
X_test = X[9000:]
T_test = z[9000:]

In [None]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanAbsoluteError(name='MAE')
         )

output_MAE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

In [None]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanSquareError(name='MSE')
         )

output_MSE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

In [None]:
fig = go.Figure()
for (loss, output, color) in zip(['MSE', 'MAE'], 
                              [output_MAE, output_MSE],
                              ['red', 'green']
                             ):
    fig.add_trace(go.Scatter(
        y=output['loss_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{loss} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['loss_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{loss} test'
    ))
fig