In [8]:
%load_ext autoreload
%autoreload 2
from mlp2 import *
import numpy as np
from os import listdir
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
import gzip


In [172]:
def read_images(path, n):
    f = gzip.open(path, 'r')
    image_size = 28
    num_images = n

    f.read(16)
    buf = f.read(image_size * image_size * num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, image_size * image_size)
    return data

def read_labels(path, n):
    f = gzip.open(path, 'r')
    num_images = n

    f.read(8)
    buf = f.read(num_images)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images)
    return data

X_train = read_images('MNIST/raw/train-images-idx3-ubyte.gz', n=60000)
T_tr = read_labels('MNIST/raw/train-labels-idx1-ubyte.gz', n=60000)

X_test = read_images('MNIST/raw/t10k-images-idx3-ubyte.gz', n=10000)
T_tst = read_labels('MNIST/raw/t10k-labels-idx1-ubyte.gz', n=10000)

In [173]:
X_train.shape, X_test.shape, T_tr.shape, T_tst.shape

((60000, 784), (10000, 784), (60000,), (10000,))

In [174]:
T_tr[59600]

5

In [175]:
px.imshow(X_train[59600].reshape(28, 28))

In [176]:
T_tr

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [177]:
T_train = np.zeros((T_tr.size, 10))
T_train[np.arange(T_tr.size),T_tr] = 1

In [178]:
T_test = np.zeros((T_tst.size, 10))
T_test[np.arange(T_tst.size),T_tst] = 1

In [179]:
X_train.shape[1]

784

In [180]:
T_test.shape[1]

10

In [184]:
np.seterr(all='raise', under='warn', over='warn')
rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=None, 
          layers_default=[4, 30, 10, SigmaLayer], 
          bias=True, 
          batch_size=32, 
          n_epochs=300, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [185]:
X_train.shape

(60000, 784)

In [None]:
output =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

 78%|███████▊  | 234/300 [14:17<10:56,  9.95s/it]

In [14]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_weight'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean weight / initial mean weight')
fig

In [15]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_update'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean update')
fig

In [16]:
go.Figure(
    [
        go.Scatter(y=output['acc_train'], name='train accuracy'),
        go.Scatter(y=output['acc_test'], name='test accuracy'),
    ]
)

In [17]:
xx = np.linspace(-2, 2, 300)
yy = np.linspace(-2, 2, 300)
xx, yy = np.meshgrid(xx, yy)
X_sample = np.array([xx.flatten(), yy.flatten()]).T

In [18]:
y = net.propagate(X_sample)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 0], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 1], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 2], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

# Regression

In [None]:
# x = np.random.uniform(-3, 3, size=10000)
# y = np.random.uniform(-3, 3, size=10000)
x = np.random.randn(10000)
y = np.random.randn(10000)
X = np.vstack([x, y]).T
# z = np.sin(x) * np.cos(y) + np.random.randn(10000) / 100
z = np.cos(x) * np.sin(y) #+ np.random.randn(10000) / 100
z = z.reshape([len(z), 1])
X_train = X[:9000]
y_train = z[:9000]
X_test = X[9000:]
y_test = z[9000:]
px.scatter(x=x, y=y, color=z[:, 0])

In [23]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
#     LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_3'),
#     ReLULayer(name='ReLU_3'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

np.seterr(all='raise', under='warn', over='warn')
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
         )

output =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

100%|██████████| 200/200 [00:19<00:00, 10.09it/s]


In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_weight'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean weight / initial mean weight')
fig

In [25]:
xx = np.linspace(-3, 3, 100)
yy = np.linspace(-3, 3, 100)
xxx, yyy = np.meshgrid(xx, yy)
X_sample = np.array([xxx.flatten(), yyy.flatten()]).T

In [26]:
y = net.propagate(X_sample)

In [None]:
go.Figure(data=[
    go.Surface(x=xxx, y=yyy, z=y.reshape(100, 100), colorscale='Greens', opacity=0.5),
    go.Surface(x=xxx, y=yyy, z=np.cos(xxx) * np.sin(yyy), colorscale='Reds', opacity=0.5),
])

In [None]:
go.Figure(
    [
        go.Scatter(y=output['loss_train'], name='train loss'),
        go.Scatter(y=output['loss_test'], name='test loss'),
    ],
    layout=dict(yaxis_type='log')
)

# Experiments

In [29]:
X_train = datasets['data.three_gauss.train.10000'][:,:-1]
T_tr = datasets['data.three_gauss.train.10000'][:,-1].astype(int)
X_test = datasets['data.three_gauss.test.10000'][:,:-1]
T_tst = datasets['data.three_gauss.test.10000'][:,-1].astype(int)

T_train = np.zeros((T_tr.size, T_tr.max()))
T_train[np.arange(T_tr.size),T_tr-1] = 1

T_test = np.zeros((T_tst.size, T_tst.max()))
T_test[np.arange(T_tst.size),T_tst-1] = 1

## How does activation function affect the model's accuracy? 
Experiment with sigmoid and two other activation functions. The activation function in an output layer should be chosen accordingly to the problem

In [31]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [32]:
output_relu =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)


underflow encountered in multiply


underflow encountered in multiply


underflow encountered in true_divide


underflow encountered in multiply

100%|██████████| 500/500 [03:21<00:00,  2.48it/s]


In [33]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    TanhLayer(name='Tanh_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    TanhLayer(name='Tanh_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [34]:
output_tanh =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

100%|██████████| 500/500 [03:33<00:00,  2.34it/s]


In [35]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    SigmaLayer(name='ReLU_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    SigmaLayer(name='ReLU_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
         )

In [36]:
output_sigmoid =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

100%|██████████| 500/500 [03:25<00:00,  2.44it/s]


In [37]:
output_sigmoid.keys()

dict_keys(['acc_train', 'loss_train', 'loss_test', 'acc_test', 'Linear_1_mean_weight', 'Linear_1_mean_update', 'Linear_2_mean_weight', 'Linear_2_mean_update', 'Linear_OUT_mean_weight', 'Linear_OUT_mean_update'])

In [None]:
fig = go.Figure()
for (act, output, color) in zip(['sigmoid', 'relu', 'tanh'], 
                              [output_sigmoid, output_relu, output_tanh],
                              ['red', 'green', 'blue']
                             ):
    fig.add_trace(go.Scatter(
        y=output['acc_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{act} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['acc_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{act} test'
    ))
fig

## How does the number of hidden layers and number of neurons in hidden layers impact the model's accuracy? 

In [40]:
rng = np.random.RandomState(1234)
n_layers = np.array([2, 3, 5, 10])
n_units = np.array([10, 20, 30, 50])
results = np.zeros((4, 4))
for i, n_layer in enumerate(n_layers):
    for j, n_unit in enumerate(n_units):
        net = MLP(
            rng=rng,
            n_inputs= X_train.shape[1],
            layers=None, 
            layers_default=[n_layer, n_unit, 3, SigmaLayer], 
            bias=True, 
            batch_size=32, 
            n_epochs=200, 
            eta=0.01, 
            momentum=0.9, 
            classification=True,
        )
        output_sigmoid =  net.train(
            X_train, 
            T_train, 
            X_test=X_test, 
            T_test=T_test, 
            verbose=False)
        results[i, j] = max(output_sigmoid['acc_test'])

100%|██████████| 200/200 [01:07<00:00,  2.95it/s]
100%|██████████| 200/200 [01:05<00:00,  3.07it/s]
100%|██████████| 200/200 [01:12<00:00,  2.76it/s]
100%|██████████| 200/200 [01:24<00:00,  2.38it/s]
100%|██████████| 200/200 [01:20<00:00,  2.47it/s]
100%|██████████| 200/200 [01:33<00:00,  2.14it/s]
100%|██████████| 200/200 [01:48<00:00,  1.85it/s]
100%|██████████| 200/200 [02:28<00:00,  1.34it/s]
100%|██████████| 200/200 [01:57<00:00,  1.71it/s]
100%|██████████| 200/200 [02:27<00:00,  1.35it/s]
100%|██████████| 200/200 [02:53<00:00,  1.15it/s]
100%|██████████| 200/200 [04:22<00:00,  1.31s/it]
100%|██████████| 200/200 [03:29<00:00,  1.05s/it]
100%|██████████| 200/200 [04:28<00:00,  1.34s/it]
100%|██████████| 200/200 [05:41<00:00,  1.71s/it]
100%|██████████| 200/200 [09:10<00:00,  2.75s/it]


In [41]:
results

array([[0.92473333, 0.913     , 0.8945    , 0.8338    ],
       [0.9206    , 0.90426667, 0.8789    , 0.8285    ],
       [0.33333333, 0.33333333, 0.33333333, 0.33333333],
       [0.33333333, 0.33333333, 0.33333333, 0.33333333]])

## How does the loss function affect the model's accuracy? 
Consider two different loss functions for both classification and regression.

In [42]:
# x = np.random.uniform(-3, 3, size=10000)
# y = np.random.uniform(-3, 3, size=10000)
x = np.random.randn(10000)
y = np.random.randn(10000)
X = np.vstack([x, y]).T
# z = np.sin(x) * np.cos(y) + np.random.randn(10000) / 100
z = np.cos(x) * np.sin(y) #+ np.random.randn(10000) / 100
z = z.reshape([len(z), 1])
X_train = X[:9000]
T_train = z[:9000]
X_test = X[9000:]
T_test = z[9000:]

In [43]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanAbsoluteError(name='MAE')
         )

output_MAE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

100%|██████████| 200/200 [00:19<00:00, 10.51it/s]


In [44]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanSquareError(name='MSE')
         )

output_MSE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

100%|██████████| 200/200 [00:19<00:00, 10.06it/s]


In [None]:
fig = go.Figure()
for (loss, output, color) in zip(['MSE', 'MAE'], 
                              [output_MAE, output_MSE],
                              ['red', 'green']
                             ):
    fig.add_trace(go.Scatter(
        y=output['loss_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{loss} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['loss_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{loss} test'
    ))
fig