#### Sergio Pereira

# Using ipywidgets to explore machine learning: regression/curve fitting

# Interactive graphical user interface -  Fitting an arbitrary curve with Polynomial regression

with scikit-learn

Notebooks come alive when interactive widgets are used. One can easily see how changing inputs to a model impact the results. 

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import scipy as sp
%matplotlib inline

from scipy.optimize import curve_fit



In [5]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression , QuantileRegressor, Lasso

from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import Ridge

from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, explained_variance_score
from sklearn.metrics import mean_squared_error



import ipywidgets as widgets
from ipywidgets import GridspecLayout,interact


In [6]:

# function is defined and some noise is added, which depends on the parameter noise_factor

def f(x, noise_factor):
#    np.random.seed(0)
    noise=np.random.normal(0, noise_factor, size=len(x))
    
    return x**2*np.sin(np.pi*x)+noise
   # return 0.5*x**2+noise

################################################################################################################ 



def fig_1(d, nf, N, alpha_ridge, alpha_lasso):
   
    
    #x=np.random.uniform(0,3,N)
    x=np.linspace(0,3,N)
    xx = np.linspace(0,3,50)
    
    y = f(x,nf)
    
    fig, (ax1,ax2) = plt.subplots(1,2, figsize=(14,5))
    ax1.set_facecolor('whitesmoke')
#     fig.set_facecolor('lightsteelblue')
    ax1.set_title('Fitting')
    ax2.set_title('Residues')
       
    pol_reg = Pipeline([('poly', PolynomialFeatures(degree=d)),
                          ('linear', LinearRegression())])
    
    pol_reg_l2 = Pipeline([('poly', PolynomialFeatures(degree=d)),
                          ('linear', Ridge(alpha=alpha_ridge, max_iter=200000))])
    
    pol_reg_l1 = Pipeline([('poly', PolynomialFeatures(degree=d)),
                          ('linear', Lasso(alpha=alpha_lasso, max_iter=200000))])

    pol_reg = pol_reg.fit(x[:, np.newaxis], y)
    pol_reg_l2 = pol_reg_l2.fit(x[:, np.newaxis], y)
    pol_reg_l1 = pol_reg_l1.fit(x[:, np.newaxis], y)
    
    predictions = pol_reg.predict(x[:, np.newaxis])
    predictions_l2 = pol_reg_l2.predict(x[:, np.newaxis])
    predictions_l1 = pol_reg_l1.predict(x[:, np.newaxis])
    
    ax1.plot(xx, xx**2*np.sin(np.pi*xx), c='black',alpha=0.3, linewidth = 5)
    ax1.scatter(x,y, c='black', s=80, alpha=0.5)
    ax1.plot(xx,pol_reg.predict(xx[:, np.newaxis]), c='r', linewidth = 3,label='no regularization')
    ax1.plot(xx,pol_reg_l2.predict(xx[:, np.newaxis]), c='b', linewidth = 3, label='L2 regularization')
    ax1.plot(xx,pol_reg_l1.predict(xx[:, np.newaxis]), c='g', linewidth = 3,label='L1 regularization')
    ax1.set_ylim(-4, 10)
    ax1.legend(loc = 'upper left')
        
    residues = predictions-y
    residues_l2 = predictions_l2-y
    residues_l1 = predictions_l1-y
    
    ax2.scatter(x,residues, c ='r', s=50, alpha=0.5)
    ax2.scatter(x,residues_l2, c ='b' , s=50, alpha=0.5)
    ax2.scatter(x,residues_l1, c ='g' , s=50, alpha=0.5)
    ax2.axhline(0,c='black')

#    mae_train = mean_absolute_error(y, predictions)
#    exp_var_score_train = explained_variance_score(y, predictions)
    mse_train = mean_squared_error(y, predictions)
    mse_train_l2 = mean_squared_error(y, predictions_l2)
    mse_train_l1 = mean_squared_error(y, predictions_l1)
    
#    mae_test = mean_absolute_error(y, pol_reg.predict(xx[:, np.newaxis]))
#    exp_var_score_test = explained_variance_score(y, pol_reg_l2.predict(xx[:, np.newaxis]))
    mse_test = mean_squared_error(f(xx,nf), pol_reg.predict(xx[:, np.newaxis]))
    mse_test_l2 = mean_squared_error(f(xx,nf), pol_reg_l2.predict(xx[:, np.newaxis]))
    mse_test_l1 = mean_squared_error(f(xx,nf), pol_reg_l1.predict(xx[:, np.newaxis]))
    
    
    
# #    print('mean_absolute_error', mae_train,mae_test )
#     print('mean_square_error', mse_train,mse_test)
#     print('mean_square_error', mse_train_l2,mse_test_l2)
#     print('mean_square_error', mse_train_l1,mse_test_l1)
    
#    print('explained_variance_score',round(exp_var_score_train,2))

#    ax.set_ylim(-10,10)
#    ax.set_xlim(-3,5)
#    plt.show()

 #   print('poly coeff :', model.named_steps['linear'].coef_)


################################################################################################################

def fig_2(nf, N, max_dep_DT, max_dep_RF):
   

    #x=np.random.uniform(0,3,N)
    x=np.linspace(0,3,N)
    xx = np.linspace(0,3,50)
    
    y = f(x,nf)
    
    fig, (ax1,ax2) = plt.subplots(1,2, figsize=(14,5))
    ax1.set_facecolor('whitesmoke')
#     fig.set_facecolor('lightsteelblue')
    ax1.set_title('Fitting')
    ax2.set_title('Residues')
    
    DT_reg = DecisionTreeRegressor(max_depth= max_dep_DT)
    RF_reg =  RandomForestRegressor(max_depth= max_dep_RF) 


    DT_reg = DT_reg.fit(x[:, np.newaxis], y)
    RF_reg = RF_reg.fit(x[:, np.newaxis], y)
    
    pred_DT_reg = DT_reg.predict(x[:, np.newaxis])
    pred_RF_reg = RF_reg.predict(x[:, np.newaxis])
    
    ax1.plot(xx, xx**2*np.sin(np.pi*xx), c='black',alpha=0.3, linewidth = 5)
    ax1.scatter(x,y, c='black', s=80, alpha=0.5)
    ax1.plot(xx,DT_reg.predict(xx[:, np.newaxis]), c='r', linewidth = 3,label='dt')
    ax1.plot(xx,RF_reg.predict(xx[:, np.newaxis]), c='b', linewidth = 3, label='rf')

    ax1.set_ylim(-4, 10)
    ax1.legend(loc = 'upper left')
        
    residues_DT = pred_DT_reg-y
    residues_RF = pred_RF_reg-y

    
    ax2.scatter(x,residues_DT, c ='r', s=50, alpha=0.5)
    ax2.scatter(x,residues_RF, c ='b' , s=50, alpha=0.5)
    ax2.axhline(0,c='black')

    
#    mae_train = mean_absolute_error(y, predictions)
#    exp_var_score_train = explained_variance_score(y, predictions)
    mse_train_DT = mean_squared_error(y, pred_DT_reg)
    mse_train_RF = mean_squared_error(y, pred_RF_reg)
    
#    mae_test = mean_absolute_error(y, pol_reg.predict(xx[:, np.newaxis]))
#    exp_var_score_test = explained_variance_score(y, pol_reg_l2.predict(xx[:, np.newaxis]))
    mse_test_DT = mean_squared_error(f(xx,nf), DT_reg.predict(xx[:, np.newaxis]))
    mse_test_RF = mean_squared_error(f(xx,nf), RF_reg.predict(xx[:, np.newaxis]))

    
    
#    print('mean_absolute_error', mae_train,mae_test )
    print('mean_square_error', mse_train_DT,mse_test_DT)
    print('mean_square_error', mse_train_RF,mse_test_RF)

    
#    print('explained_variance_score',round(exp_var_score_train,2))
 
    
#    ax.set_ylim(-10,10)
#    ax.set_xlim(-3,5)
#    plt.show()

 #   print('poly coeff :', model.named_steps['linear'].coef_)


################################################################################################################     



    
# ################################################################################################################ 




    
N = widgets.IntSlider(value=10,min=5,max=50,step=5,description='N points : ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='d')

d = widgets.IntSlider(value=1,min=0,max=30,step=1,description='poly degree : ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='d')

nf = widgets.FloatSlider(value=0.5, min=0.0,max=5,step=.1,description='noise : ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='.3f')

alpha_ridge = widgets.FloatLogSlider(value=1e-3,base=10, min=-5, max = 5, step=0.5,description='ridge(alpha)')

alpha_lasso = widgets.FloatLogSlider(value=1e-3,base=10, min=-6, max = 4, step=0.5,description='lasso(alpha)')

NN = widgets.IntSlider(value=5, min=1,max=50,step=1,description='n_neighbors: ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='d')

max_dep_DT = widgets.IntSlider(value=3, min=1,max=100,step=1,description='Max depth DT: ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='d')

max_dep_RF = widgets.IntSlider(value=3, min=1,max=100,step=1,description='Max depth RF: ',disabled=False,continuous_update=True,
                           orientation='horizontal', readout=True, readout_format='d')





out1 = widgets.interactive_output(fig_1, {'d': d, 'nf': nf, 'N': N, 'alpha_ridge' : alpha_ridge, 'alpha_lasso' : alpha_lasso})
caption11 = widgets.Label(value='Degree of the polinomial to use, Number of data, and ammount of noise')
box1a = widgets.HBox([d, N, nf])
caption12 = widgets.Label(value='L2 and L1 regularization parameters')
box1b = widgets.VBox([alpha_ridge,alpha_lasso])
box11=widgets.VBox([caption11, box1a,caption12, box1b,out1])


out2=widgets.interactive_output(fig_2, {'nf': nf, 'N': N, 'max_dep_DT' :max_dep_DT,'max_dep_RF' :max_dep_RF})
box2a = widgets.HBox([ N, nf])
box2b = widgets.HBox([max_dep_DT,max_dep_RF])
box22=widgets.VBox([out2,box2a,box2b])





tab = widgets.Tab(children = [box11, box22])
tab.set_title(0, 'Least Sq (l1 and l2)')
tab.set_title(1, 'DT, RF regr')



#right_box = widgets.VBox([a,nf])
#ui = widgets.VBox([left_box, right_box])



display(tab)







Tab(children=(VBox(children=(Label(value='Degree of the polinomial to use, Number of data, and ammount of nois…