In [None]:
%load_ext autoreload
%autoreload 2
from mlp2 import *
import numpy as np
from os import listdir
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
datasets = {}
path = 'project-1-part-1-data'
for file in listdir(path):
    datasets[file[:-4]] = np.loadtxt(path+'/'+file, delimiter=',', skiprows=1)

In [None]:
datasets.keys()

In [None]:
X_train = datasets['data.simple.train.10000'][:,:-1]
T_tr = datasets['data.simple.train.10000'][:,-1].astype(int)
X_test = datasets['data.simple.test.10000'][:,:-1]
T_tst = datasets['data.simple.test.10000'][:,-1].astype(int)

In [None]:
X_train = datasets['data.three_gauss.train.10000'][:,:-1]
T_tr = datasets['data.three_gauss.train.10000'][:,-1].astype(int)
X_test = datasets['data.three_gauss.test.10000'][:,:-1]
T_tst = datasets['data.three_gauss.test.10000'][:,-1].astype(int)

In [None]:
px.scatter(x=X_train[:, 0], y=X_train[:, 1], color=T_tr, opacity=0.08)

In [None]:
T_train = np.zeros((T_tr.size, T_tr.max()))
T_train[np.arange(T_tr.size),T_tr-1] = 1

In [None]:
T_test = np.zeros((T_tst.size, T_tst.max()))
T_test[np.arange(T_tst.size),T_tst-1] = 1

In [None]:
X_train.shape[1]

In [None]:
T_test.shape[1]

In [None]:
np.seterr(all='raise', under='warn', over='warn')
rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=None, 
          layers_default=[3, 10, 3, TanhLayer], 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
          learning_history=True
         )

In [None]:
output =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
output.keys()

In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_weight'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean weight / initial mean weight')
fig

In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_update'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean update')
fig

In [None]:
go.Figure(
    [
        go.Scatter(y=output['acc_train'], name='train accuracy'),
        go.Scatter(y=output['acc_test'], name='test accuracy'),
    ]
)

In [None]:
xx = np.linspace(-2, 2, 300)
yy = np.linspace(-2, 2, 300)
xx, yy = np.meshgrid(xx, yy)
X_sample = np.array([xx.flatten(), yy.flatten()]).T

In [None]:
y = net.propagate(X_sample)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 0], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 1], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

In [None]:
go.Figure(
    [go.Contour(x=xx.flatten(), y=yy.flatten(), z=y[:, 2], 
                contours_showlabels=True, contours_coloring ='heatmap'),
    go.Scattergl(x=X_train[:,0], y=X_train[:,1], marker_size=2,
               marker_color=T_tr, mode='markers', opacity=0.3)]
)

# Regression

In [None]:
# x = np.random.uniform(-3, 3, size=10000)
# y = np.random.uniform(-3, 3, size=10000)
x = np.random.randn(10000)
y = np.random.randn(10000)
X = np.vstack([x, y]).T
# z = np.sin(x) * np.cos(y) + np.random.randn(10000) / 100
z = np.cos(x) * np.sin(y) #+ np.random.randn(10000) / 100
z = z.reshape([len(z), 1])
X_train = X[:9000]
y_train = z[:9000]
X_test = X[9000:]
y_test = z[9000:]
px.scatter(x=x, y=y, color=z[:, 0])

In [None]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
#     LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_3'),
#     ReLULayer(name='ReLU_3'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

np.seterr(all='raise', under='warn', over='warn')
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
         )

output =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

In [None]:
fig = go.Figure()
for i in range(1,5):
    fig.add_trace(
        go.Scatter(y=output[f'Linear_{i}_mean_weight'], name=f'layer {i}')
    )
fig.update_layout(xaxis_title='Epoch', yaxis_title='mean weight / initial mean weight')
fig

In [None]:
xx = np.linspace(-3, 3, 100)
yy = np.linspace(-3, 3, 100)
xxx, yyy = np.meshgrid(xx, yy)
X_sample = np.array([xxx.flatten(), yyy.flatten()]).T

In [None]:
y = net.propagate(X_sample)

In [None]:
go.Figure(data=[
    go.Surface(x=xxx, y=yyy, z=y.reshape(100, 100), colorscale='Greens', opacity=0.5),
    go.Surface(x=xxx, y=yyy, z=np.cos(xxx) * np.sin(yyy), colorscale='Reds', opacity=0.5),
])

In [None]:
go.Figure(
    [
        go.Scatter(y=output['loss_train'], name='train loss'),
        go.Scatter(y=output['loss_test'], name='test loss'),
    ],
    layout=dict(yaxis_type='log')
)

# Experiments

In [None]:
X_train = datasets['data.three_gauss.train.10000'][:,:-1]
T_tr = datasets['data.three_gauss.train.10000'][:,-1].astype(int)
X_test = datasets['data.three_gauss.test.10000'][:,:-1]
T_tst = datasets['data.three_gauss.test.10000'][:,-1].astype(int)

T_train = np.zeros((T_tr.size, T_tr.max()))
T_train[np.arange(T_tr.size),T_tr-1] = 1

T_test = np.zeros((T_tst.size, T_tst.max()))
T_test[np.arange(T_tst.size),T_tst-1] = 1

## How does activation function affect the model's accuracy? 
Experiment with sigmoid and two other activation functions. The activation function in an output layer should be chosen accordingly to the problem

In [None]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
          learning_history=True
         )

In [None]:
output_relu =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    TanhLayer(name='Tanh_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    TanhLayer(name='Tanh_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
          learning_history=True
         )

In [None]:
output_tanh =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
rng = np.random.RandomState(1234)
layers = [
    LinearLayer(n_inputs=2, n_units=10, rng=rng, bias=True, name='Linear_1'),
    SigmaLayer(name='ReLU_1'),
    LinearLayer(n_inputs=10, n_units=10, rng=rng, bias=True, name='Linear_2'),
    SigmaLayer(name='ReLU_2'),
    LinearLayer(n_inputs=10, n_units=3, rng=rng, bias=True, name='Linear_OUT'),
]

rng = np.random.RandomState(1234)
net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=500, 
          eta=0.01, 
          momentum=0.9, 
          classification=True,
          learning_history=True
         )

In [None]:
output_sigmoid =  net.train(X_train, 
          T_train, 
          X_test=X_test, 
          T_test=T_test, 
          verbose=False)

In [None]:
output_sigmoid.keys()

In [None]:
fig = go.Figure()
for (act, output, color) in zip(['sigmoid', 'relu', 'tanh'], 
                              [output_sigmoid, output_relu, output_tanh],
                              ['red', 'green', 'blue']
                             ):
    fig.add_trace(go.Scatter(
        y=output['acc_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{act} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['acc_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{act} test'
    ))
fig

## How does the number of hidden layers and number of neurons in hidden layers impact the model's accuracy? 

In [None]:
rng = np.random.RandomState(1234)
n_layers = np.array([2, 3, 5, 10])
n_units = np.array([10, 20, 30, 50])
results = np.zeros((4, 4))
for i, n_layer in enumerate(n_layers):
    for j, n_unit in enumerate(n_units):
        net = MLP(
            rng=rng,
            n_inputs= X_train.shape[1],
            layers=None, 
            layers_default=[n_layer, n_unit, 3, SigmaLayer], 
            bias=True, 
            batch_size=32, 
            n_epochs=200, 
            eta=0.01, 
            momentum=0.9, 
            classification=True,
            learning_history=True
        )
        output_sigmoid =  net.train(
            X_train, 
            T_train, 
            X_test=X_test, 
            T_test=T_test, 
            verbose=False)
        results[i, j] = max(output_sigmoid['acc_test'])

In [None]:
results

## How does the loss function affect the model's accuracy? 
Consider two different loss functions for both classification and regression.

In [None]:
# x = np.random.uniform(-3, 3, size=10000)
# y = np.random.uniform(-3, 3, size=10000)
x = np.random.randn(10000)
y = np.random.randn(10000)
X = np.vstack([x, y]).T
# z = np.sin(x) * np.cos(y) + np.random.randn(10000) / 100
z = np.cos(x) * np.sin(y) #+ np.random.randn(10000) / 100
z = z.reshape([len(z), 1])
X_train = X[:9000]
T_train = z[:9000]
X_test = X[9000:]
T_test = z[9000:]

In [None]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanAbsoluteError(name='MAE')
         )

output_MAE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

In [None]:
rng = np.random.RandomState(123)
layers = [
    LinearLayer(n_inputs=2, n_units=20, rng=rng, bias=True, name='Linear_1'),
    ReLULayer(name='ReLU_1'),
    LinearLayer(n_inputs=20, n_units=20, rng=rng, bias=True, name='Linear_2'),
    ReLULayer(name='ReLU_2'),
    LinearLayer(n_inputs=20, n_units=1, rng=rng, bias=True, name='Linear_OUT'),
]

net = MLP(rng=rng,
          n_inputs= X_train.shape[1],
          layers=layers, 
          layers_default=None, 
          bias=True, 
          batch_size=32, 
          n_epochs=200, 
          eta=0.01, 
          momentum=0, 
          classification=False,
          loss=LossMeanSquareError(name='MSE')
         )

output_MSE =  net.train(X_train, 
          y_train, 
          X_test=X_test, 
          T_test=y_test)

In [None]:
fig = go.Figure()
for (loss, output, color) in zip(['MSE', 'MAE'], 
                              [output_MAE, output_MSE],
                              ['red', 'green']
                             ):
    fig.add_trace(go.Scatter(
        y=output['loss_train'], 
        mode='lines',
        line=dict(color=color, dash='dash'), 
        opacity=0.5,
        name=f'{loss} train'
    ))
    fig.add_trace(go.Scatter(
        y=output['loss_test'], 
        mode='lines',
        line=dict(color=color), 
        opacity=0.5,
        name=f'{loss} test'
    ))
fig