<a href="https://colab.research.google.com/github/JoDeMiro/InteractiveDemo/blob/main/Jupyter_Notebook_Interactive_Demo_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Interactive Machine Learning Demo - Linear Regression

In [9]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets

from ipywidgets import interact, interactive, fixed, interact_manual, IntSlider, Layout
from IPython.display import display


## Function definitions (ideal fitting function and actual data generating function with noise)

In [10]:

N_samples = 25
x_min = -5
x_max = 5
x1 = np.linspace(x_min, x_max, N_samples * 5)
x = np.random.choice(x1, size = N_samples)
noise_std = 1
noise_mean = 0
noise_magnitude = 2

## Function definitions (ideal fitting function and actual data generating function with noise)

In [17]:
def func_gen(N_samples, x_min, x_max, noise_magnitude, noise_sd, noise_mean):

    x1 = np.linspace(x_min, x_max, N_samples * 5)
    x  = np.random.choice(x1, size = N_samples)
    y  = 2*x - 0.6 * x**2  + 0.2 * x**3  + 18 * np.sin(x)
    y1 = 2*x1- 0.6 * x1**2 + 0.2 * x1**3 + 18 * np.sin(x1)
    y  = y + noise_magnitude * np.random.normal(loc = noise_mean, scale = noise_sd, size = N_samples)
    
    plt.figure(figsize=(8,5))
    plt.plot(x1, y1, c = 'k', lw =2)
    plt.scatter(x, y, edgecolors = 'k', c = 'yellow', s = 60)
    plt.grid(True)
    plt.show()

    return (x,y,x1,y1)

## Call the 'interactive' widget with the data generating function, which also plots the data real-time

In [25]:
p = interactive(func_gen,
                N_samples = {'Low (50 samples)':50,'High (200 samples)':200},
                x_min = (-5, 0, 1),
                x_max = widgets.IntSlider(value = 4, min = 0, max = 5, step = 1),
                noise_magnitude = (0, 5, 1),
                noise_sd = (0.1, 1, 0.1),
                noise_mean = (-2, 2, 0.5))
display(p)

interactive(children=(Dropdown(description='N_samples', options={'Low (50 samples)': 50, 'High (200 samples)':…

## Extract the data

In [26]:
x,y,x1,y1 = p.result

## Load scikit-learn libraries

In [27]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

## Machine learning (regression) model encapsulated within a function

In [28]:
lasso_eps = 0.01
lasso_nalpha = 20
lasso_iter = 3000
ridge_alphas = (0.001, 0.01, 0.1, 1)

def func_fit(model_type,test_size,degree):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = test_size, random_state = 55)
    
    if (model_type == 'Linear regression'):
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                          LinearRegression(normalize=True))
    if (model_type == 'LASSO with CV'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              LassoCV(eps=lasso_eps,n_alphas=lasso_nalpha,max_iter=lasso_iter,normalize=True,cv=5))
        
    if (model_type == 'Ridge with CV'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              RidgeCV(alphas=ridge_alphas,normalize=True,cv=5))
    
    X_train = X_train.reshape(-1, 1)
    X_test  = X_test.reshape(-1, 1)
    
    model.fit(X_train, y_train)
    
    train_pred = np.array(model.predict(X_train))
    train_score = model.score(X_train,y_train)
    
    test_pred = np.array(model.predict(X_test))
    test_score = model.score(X_test,y_test)
    
    RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))
    RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))
       
    plt.figure(figsize=(14,6))
    
    plt.subplot(1,2,1)
    plt.title("Test set performance\nTest score: %.3f"%(test_score),fontsize=16)
    plt.xlabel("X-test",fontsize=13)
    plt.ylabel("y-test",fontsize=13)
    plt.scatter(X_test,y_test,edgecolors='k',c='blue',s=60)
    plt.scatter(X_test,test_pred,edgecolors='k',c='yellow',s=60)
    plt.grid(True)
    plt.legend(['Actual test values','Predicted values'])
    
    plt.subplot(1,2,2)
    plt.title("Training set performance\nTraining score: %.3f"%(train_score),fontsize=16)
    plt.xlabel("X-train",fontsize=13)
    plt.ylabel("y-train",fontsize=13)
    plt.scatter(X_train,y_train,c='blue')
    plt.scatter(X_train,train_pred,c='yellow')
    plt.grid(True)
    plt.legend(['Actual training values','Fitted values'])
    
    plt.show()
       
    return (train_score,test_score)

## Run the encapsulated ML function with ipywidget interactive

In [29]:
style = {'description_width': 'initial'}
# Continuous_update = False for IntSlider control to stop continuous model evaluation while the slider is being dragged
m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linear regression','LASSO with CV', 'Ridge with CV'],
                                                    description = "Choose Model",style=style,
                                                        layout=Layout(width='250px')),
                test_size=widgets.Dropdown(options={"10% of data":0.1,"20% of data":0.2, "30% of data":0.3,
                                                    "40% of data":0.4,"50% of data":0.5},
                                          description="Test set size ($X_{test}$)",style=style),
               degree=widgets.IntSlider(min=1,max=10,step=1,description= 'Polynomial($\sum\limits_{i=1}^n a_nx^n$)',
                                       style=style,continuous_update=False))

# Set the height of the control.children[-1] so that the output does not jump and flicker
output = m.children[-1]
output.layout.height = '450px'

# Display the control
display(m)

interactive(children=(RadioButtons(description='Choose Model', layout=Layout(width='250px'), options=('Linear …