In [1]:
import numpy as np
import pandas as pd
import catboost
import ngboost
import KTBoost.KTBoost as KTBoost
import xgboost
import matplotlib
from matplotlib import pyplot as plt
import sklearn
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import IPython

# Compare predictions from different methods

In [2]:
def linear_data(nobs, noise_std):
    coefficients = np.array([13, -10, 7])
    x = np.random.uniform(size=[nobs, 3])
    y = x @ coefficients + np.random.normal(size=nobs)*noise_std
    sorted_inds = np.argsort(y)
    y = y[sorted_inds]
    x = x[sorted_inds, ...]
    return x, y

def nonlinear_data(nobs, noise_std):
    coefficients = np.array([13, -10, 7])
    x = np.random.uniform(size=[nobs, 3])
    y = x[:, 0]**2 + 2*np.sin(x[:, 1]) + x[:, 1]*x[:, 2] + np.random.normal(size=nobs)*noise_std
    sorted_inds = np.argsort(y)
    y = y[sorted_inds]
    x = x[sorted_inds, ...]
    return x, y

In [3]:
def func(nobs, noise_std, linear):
    methods = {
    'Linear regression': [LinearRegression(), {}],
    'SKLearn GB': [GradientBoostingRegressor(), {}],
    'NGB': [ngboost.NGBRegressor(), {}],
    'Catboost': [catboost.CatBoostRegressor(), {'verbose': False}],
    'KTBoost': [KTBoost.BoostingRegressor(), {}],
    'XGBoost': [xgboost.XGBRegressor(), {}]}

    if linear:
        x, y = linear_data(nobs, noise_std)
        xtest, ytest = linear_data(nobs, noise_std)
    else:
        x, y = nonlinear_data(nobs, noise_std)
        xtest, ytest = nonlinear_data(nobs, noise_std)
    
    for key, value in methods.items():
        value[0].fit(x, y, **value[1])
    
    plt.figure(figsize=[10, 5])
    plt.subplot(1, 2, 1)
    plt.plot(y, 'ok', label='Observations')
    
    for key, value in methods.items():
        plt.plot(value[0].predict(x), label=key)
    plt.title('Training')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(ytest, 'ok', label='Observations')
    for key, value in methods.items():
        plt.plot(value[0].predict(xtest), label=key)
    plt.title('Testing')
    plt.legend()

In [4]:
%matplotlib widget
y = interactive(func, {'manual': True}, nobs=(10, 1000), noise_std=(0, 1, 0.1), linear=[('True', True), ('False', False)])
display(y)

interactive(children=(IntSlider(value=505, description='nobs', max=1000, min=10), FloatSlider(value=0.0, descr…

# Compare probabilistic predictions

In [5]:
def func(nobs, noise_std, linear):
    methods = {
    'Linear regression': [LinearRegression(), {}],
    'NGB': [ngboost.NGBRegressor(), {}],
    'Catboost': [catboost.CatBoostRegressor(), {'verbose': False}]}

    if linear:
        x, y = linear_data(nobs, noise_std)
        xtest, ytest = linear_data(nobs, noise_std)
    else:
        x, y = nonlinear_data(nobs, noise_std)
        xtest, ytest = nonlinear_data(nobs, noise_std)
    
    for key, value in methods.items():
        value[0].fit(x, y, **value[1])
    
    plt.figure(figsize=[10, 5])
    plt.subplot(1, 2, 1)
    plt.plot(y, 'ok', label='Observations')
    
    for key, value in methods.items():
        plt.plot(value[0].predict(x), label=key)
    plt.title('Training')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(ytest, 'ok', label='Observations')
    for key, value in methods.items():
        plt.plot(value[0].predict(xtest), label=key)
    plt.title('Testing')
    plt.legend()

In [66]:
def func(nobs, noise_std, linear, posterior_sampling):
    x, y = linear_data(nobs, noise_std)
    xtest, ytest = linear_data(nobs, noise_std)

    ngb = ngboost.NGBRegressor()
    catgb = catboost.CatBoostRegressor(posterior_sampling=posterior_sampling, loss_function='RMSEWithUncertainty')

    ngb.fit(x, y)
    catgb.fit(x, y, verbose=False)

    plt.figure(figsize=[10, 5])
    plt.subplot(1, 2, 1)
    plt.plot(y, 'ok', label='Observations')
    pred_dist = ngb.pred_dist(x)
    mu = pred_dist.params['loc']
    std = pred_dist.params['scale']
    line, = plt.plot(mu, lw=2, label='NGB Predicted $\mu$')
    plt.fill_between(
        line.get_xdata(), (mu - 2*std), (mu + 2*std), color=predict_line.get_color(), alpha=0.4,
        label='NGB Predicted $\mu \pm 2*\sigma$')
    predictions = catgb.predict(x)
    #ensemble_predictions = catgb.virtual_ensembles_predict(x, prediction_type='VirtEnsembles')
    #mins = np.min(ensemble_predictions, axis=1)
    #maxs = np.max(ensemble_predictions, axis=1)
    line, = plt.plot(predictions[:, 0], lw=2, label='Conditional mean, Catboost')
    plt.fill_between(
    line.get_xdata(), 
    predictions[:, 0]-2*np.sqrt(predictions[:, 1]), predictions[:, 0]+2*np.sqrt(predictions[:, 1]), color=line.get_color(), alpha=0.4,
    label='Catboost $\mu \pm 2*\sigma$')
    #plt.fill_between(
    #    line.get_xdata(), 
    #    mins[:, 0], maxs[:, 0], color=line.get_color(), alpha=0.4,
    #    label='Catboost min and max')
    plt.legend()
    plt.title('Training')

    plt.subplot(1, 2, 2)
    plt.plot(ytest, 'ok', label='Observations')
    pred_dist = ngb.pred_dist(xtest)
    mu = pred_dist.params['loc']
    std = pred_dist.params['scale']
    line, = plt.plot(mu, lw=2, label='NGB Predicted $\mu$')
    plt.fill_between(
        line.get_xdata(), (mu - 2*std), (mu + 2*std), color=predict_line.get_color(), alpha=0.4,
        label='NGB Predicted $\mu \pm 2*\sigma$')
    predictions = catgb.predict(xtest)
    #ensemble_predictions = catgb.virtual_ensembles_predict(xtest, prediction_type='VirtEnsembles', virtual_ensembles_count=10)
    #mins = np.min(ensemble_predictions, axis=1)
    #maxs = np.max(ensemble_predictions, axis=1)
    line, = plt.plot(predictions[:, 0], lw=2, label='Conditional mean, Catboost')
    plt.fill_between(
    line.get_xdata(), 
    predictions[:, 0]-2*np.sqrt(predictions[:, 1]), predictions[:, 0]+2*np.sqrt(predictions[:, 1]), color=line.get_color(), alpha=0.4,
    label='Catboost $\mu \pm 2*\sigma$')
    plt.title('Test')

In [67]:
%matplotlib widget
y = interactive(func, {'manual': True}, nobs=(10, 1000), noise_std=(0, 1, 0.1), linear=[('True', True), ('False', False)], posterior_sampling=[('True', True), ('False', False)])
display(y)

interactive(children=(IntSlider(value=505, description='nobs', max=1000, min=10), FloatSlider(value=0.0, descr…