# Comparison of different probabilistic Machine Learning algorithms

## Description
This notebook is a comparison of the different algorithms with respect to a fit of a 1D function with noise and UCI regression data sets. The algorithms not only predict the median but also the 90 % prediction intervall for each case. For the Fitting of the 1D function the comparison is done on the graph of the function itself. For the UCI data sets the comparison is done using unity plots. 

In [12]:
import sys
import matplotlib.pyplot as plt
sys.path.append('models/')

# Import models
from bootstrap_net_pytorch_cuda import bootstrap_ensemble
from mc_net_pytorch_cuda import MCDnet

In [1]:
import numpy as np
import plotly.graph_objects as go

# 1D Function to predict
def f(x):
    return x * np.sin(x)

#  First the noiseless case
X = np.atleast_2d(np.random.uniform(0, 10.0, size=100)).T
X = X.astype(np.float32)

# Observations
y = f(X).ravel()

dy = 1.5 + 1.0 * np.random.random(y.shape)
noise = np.random.normal(0, dy)
y += noise
y = y.reshape(-1,1).astype(np.float32)

# Create random data with numpy
import numpy as np
np.random.seed(1)


fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=X.flatten(), y=y.flatten(),
                    mode='markers',
                    name='Function to Predict'))
fig.update_layout(title='1D Function to Predict')
fig.show()

In [4]:
num_features=X.shape[1]
num_labels=y.shape[1]

# Make dict of models to compare
models_dict={
            'bootstrap_net':bootstrap_ensemble(num_features, num_labels, hidden_layers=(500,500,), droprate=0.0, lr=0.02,            weight_decay=1e-6, cuda=False, num_nets=10, activation='CELU',max_epoch=500),
            'monte-carlo-dropout-net':bootstrap_ensemble(num_features, num_labels, hidden_layers=(500,500,),
            droprate=0.5, lr=0.02,weight_decay=1e-6, cuda=False, activation='CELU',max_epoch=500)
}

# Train and save results in results dict
results_dict={}
for model in models_dict:
    reg = models_dict[model]
    reg.fit(X,y, verbose=True)
    X_pred = np.linspace(0,10,11).reshape(-1,1)
    y_median, y_lower_upper_quantil = reg.predict(X_pred, return_std=True)
    results_dict[model]={'X_pred':X_pred, 'y_median':y_median, 'y_lower_upper_quantil':y_lower_upper_quantil}

Epoch 1 loss: 22.555824279785156
Epoch 51 loss: 15.820417404174805
Epoch 101 loss: 8.659209251403809
Epoch 151 loss: 6.026151657104492
Epoch 201 loss: 3.0034914016723633
Epoch 251 loss: 3.0599875450134277
Epoch 301 loss: 2.953775644302368
Epoch 351 loss: 3.89857816696167
Epoch 401 loss: 3.111260175704956
Epoch 451 loss: 2.889955520629883
Epoch 1 loss: 23.92467498779297
Epoch 51 loss: 12.42674446105957
Epoch 101 loss: 7.9803466796875
Epoch 151 loss: 4.58760404586792
Epoch 201 loss: 3.4949090480804443
Epoch 251 loss: 4.193452835083008
Epoch 301 loss: 3.2647533416748047
Epoch 351 loss: 3.231954336166382
Epoch 401 loss: 3.3530972003936768
Epoch 451 loss: 3.2039122581481934
Epoch 1 loss: 23.91594696044922
Epoch 51 loss: 8.703564643859863
Epoch 101 loss: 4.920696258544922
Epoch 151 loss: 4.009305953979492
Epoch 201 loss: 3.663220167160034
Epoch 251 loss: 3.803056001663208
Epoch 301 loss: 2.726311445236206
Epoch 351 loss: 3.115910291671753
Epoch 401 loss: 2.293016195297241
Epoch 451 loss: 2.1

In [47]:
# Graph results

fig = go.Figure()

# Add Function to Predict
fig.add_trace(go.Scatter(x=X.flatten(), y=y.flatten(),
                    mode='markers',
                    name='Function to Predict', marker_color='black')), 

# Add predictions of fitted models

colors=plt.cm.Set1(np.linspace(0, 1, len(results_dict)))
for i, model in enumerate(results_dict):

    X_pred=X_pred.flatten()
    y_median = results_dict[model]['y_median'].flatten()
    y_lower_upper_quantil = results_dict[model]['y_lower_upper_quantil']

    color=f"rgba({colors[i][0]},{colors[i][1]},{colors[i][2]},{colors[i][3]})"
    color_fill=f"rgba({colors[i][0]},{colors[i][1]},{colors[i][2]},{0.5})"
    if len(y_lower_upper_quantil)>1:
        y_lower = y_median - y_lower_upper_quantil[:,0]
        y_upper = y_median + y_lower_upper_quantil[:,1]
    else:
        y_lower = y_median - y_lower_upper_quantil
        y_lower = y_median + y_lower_upper_quantil

    # median
    fig.add_trace(go.Scatter(x=X_pred.flatten(), y=y_median.flatten(),
                    mode='lines',
                    name=f'{model} median', marker_color=color))
    fig.add_trace(go.Scatter(x=X_pred,y=y_upper,mode='lines',name=f'{model} upper intervall',fill=None, fillcolor=color_fill, line_color=color_fill))
    fig.add_trace(go.Scatter(x=X_pred,y=y_lower,mode='lines',name=f'{model} lower intervall',fill='tonexty',fillcolor=color_fill, line_color=color_fill))


fig.update_layout(title='Comparison of model predictions',
                  height=600,
                  width=800,
                  template="plotly_white")
fig.show()

In [16]:
# Compare for UCI Datsets
from sklearn.model_selection import train_test_split

# To do implement multiple datasets
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,y)

num_features=X_train.shape[1]
num_labels=1

# Make dict of models to compare
models_dict={
            'bootstrap_net':bootstrap_ensemble(num_features, num_labels, hidden_layers=(500,500,), droprate=0.0, lr=0.02,            weight_decay=1e-6, cuda=False, num_nets=10, activation='CELU',max_epoch=500),
            'monte-carlo-dropout-net':bootstrap_ensemble(num_features, num_labels, hidden_layers=(500,500,),
            droprate=0.5, lr=0.02,weight_decay=1e-6, cuda=False, activation='CELU',max_epoch=500)
}

results_dict={}
for model in models_dict:
    reg = models_dict[model]
    reg.fit(X_train,y_train, verbose=False)
    y_median, y_lower_upper_quantil = reg.predict(X_test, return_std=True)
    results_dict[model]={'X_pred':X_train,'y_test':y_test, 'y_median':y_median, 'y_lower_upper_quantil':y_lower_upper_quantil}

In [25]:
# Graph results as unity plots
fig = go.Figure()

# Add Function to Predict
fig.add_trace(go.Scatter(x=y_test.flatten(), y=y_test.flatten(),
                    mode='lines',
                    name='Unity line', marker_color='black')), 

# Add predictions of fitted models

colors=plt.cm.Set1(np.linspace(0, 1, len(results_dict)))
for i, model in enumerate(results_dict):

    X_pred=results_dict[model]['X_pred'].flatten()
    y_test=results_dict[model]['y_test'].flatten()
    y_median = results_dict[model]['y_median'].flatten()
    y_lower_upper_quantil = results_dict[model]['y_lower_upper_quantil']

    color=f"rgba({colors[i][0]},{colors[i][1]},{colors[i][2]},0.5)"

    if len(y_lower_upper_quantil)>1:
        y_lower = y_median - y_lower_upper_quantil[:,0]
        y_upper = y_median + y_lower_upper_quantil[:,1]
    else:
        y_lower = y_median - y_lower_upper_quantil
        y_lower = y_median + y_lower_upper_quantil

    fig.add_trace(go.Scatter(
        x=y_test,
        y=y_median,
        mode='markers',
        name=f'{model}',
        marker_color=color,
        error_y=dict(
            type='data',
            symmetric=False,
            array=y_upper,
            arrayminus=y_lower)
        ))

fig.update_layout(title='Comparison of model predictions',
                  height=600,
                  width=800,
                  template="plotly_white")
fig.show()