# Linear Regression 2022
### Christian Igel


I copied the basic framework from https://github.com/tirthajyoti/Interactive_Machine_Learning developed by Tirthajyoti Sarkar.

In [None]:
from ipywidgets import interact, interactive, IntSlider, Layout, interact_manual, fixed
import ipywidgets as widgets
from IPython.display import display

import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV, Lasso
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

## Linear Regression and Regularization

### Function to fit
Just some artificial function with Gaussian noise.

In [None]:
def func_gen(N_samples,x_min,x_max,noise_sd, resolution=500):
    x1= np.linspace(x_min,x_max,resolution)
    x= np.random.choice(x1,size=N_samples)
    y=2*x-0.6*x**2+0.2*x**3+18*np.sin(x)
    y1=2*x1-0.6*x1**2+0.2*x1**3+18*np.sin(x1)
    y= y+np.random.normal(scale=noise_sd,size=N_samples)
    plt.figure(figsize=(8,5))
    plt.plot(x1,y1,c='k',lw=2)
    plt.scatter(x,y,edgecolors='k',c='orange',s=60)
    plt.grid(True)
    plt.show()
    return (x,y,x1,y1,x_min,x_max)

### Call the interactive widget with the data generating function

In [None]:
p=interactive(func_gen, 
              N_samples=widgets.IntSlider(min=10,max=200.,step=10,continuous_update=False,value=30),
              x_min=(-5,0,1), x_max=(0,5,1),
              noise_sd=widgets.FloatSlider(min=0.,max=10.,step=0.5,continuous_update=False),resolution=fixed(500))
display(p)

In [None]:
# Extract the data from the plot
x,y,x1,y1,x_min,x_max = p.result

### Linear models encapsulated in a function 

In [None]:
lasso_eps = 0.01
lasso_nalpha=20
lasso_iter=10000
ridge_alphas = (0.001,0.01,0.1,1)

def func_fit(model_type,test_size,degree,alpha,resolution=500):
    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=55)
    
    if (model_type=='Linear regression'):
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                          LinearRegression(fit_intercept=False))
    if (model_type=='LASSO with CV'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              LassoCV(eps=lasso_eps,n_alphas=lasso_nalpha,max_iter=lasso_iter,normalize=True,cv=5, fit_intercept=False))      
    if (model_type=='Ridge with CV'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              RidgeCV(alphas=ridge_alphas,cv=5,fit_intercept=False))
    if (model_type=='Ridge'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              Ridge(alpha=np.exp(alpha),fit_intercept=False))
    if (model_type=='LASSO'):    
        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), 
                              Lasso(alpha=np.exp(alpha),fit_intercept=False))
        
    X_train=X_train.reshape(-1,1)
    X_test=X_test.reshape(-1,1)
    
    scaler = StandardScaler()
    scaler.fit_transform(X_train)
    scaler.transform(X_test)
    
    model.fit(X_train,y_train)
    
    train_pred = np.array(model.predict(X_train))
    train_score = model.score(X_train,y_train)
    
    test_pred = np.array(model.predict(X_test))
    test_score = model.score(X_test,y_test)
    
    X_grid = np.linspace(x_min, x_max, resolution).reshape(-1,1)
    y_grid = np.array(model.predict(X_grid))
    
    plt.figure(figsize=(14,6))
    
    plt.subplot(1,2,2)
    plt.title("Test set performance\nTest score: %.3f"%(test_score))
    plt.xlabel("X-test")
    plt.ylabel("y-test")
    plt.scatter(X_test,y_test,c='blue',s=60,label='Actual test values')
    plt.scatter(X_test,test_pred,c='orange',s=40,label='Predicted values')
    plt.plot(X_grid, y_grid, label="Model",c='k',lw=2)
    y_min = np.min([y_test.min(), test_pred.min()])
    y_max = np.min([y_test.max(), test_pred.max()])
    plt.ylim([np.amin([1.1*y_min, 0.9*y_min]), np.amax([1.1*y_max, 0.9*y_max])])
    plt.plot(x1, y1, label="Function w/o noise",c='red',lw=2)
    plt.grid(True)
    plt.legend()
    
    plt.subplot(1,2,1)
    plt.title("Training set performance\nTraining score: %.3f"%(train_score))
    plt.xlabel("X-train")
    plt.ylabel("y-train")
    plt.scatter(X_train,y_train,c='blue', label="Traning data", s=60)
    plt.scatter(X_train,train_pred,c='orange', label="Fitted values", s=40)
    plt.plot(X_grid, y_grid, label="Model",c='k',lw=2)
    y_min = np.min([y_train.min(), train_pred.min()])
    y_max = np.min([y_train.max(), train_pred.max()])
    plt.ylim([np.amin([1.1*y_min, 0.9*y_min]), np.amax([1.1*y_max, 0.9*y_max])])
    plt.plot(x1, y1, label="Function w/o noise",c='red',lw=2)
    plt.grid(True)
    plt.legend()
    
    plt.show()
       
    return (train_score,test_score,model)    

### Run the encapsulated ML function 

In [None]:
style = {'description_width': 'initial'}
# Continuous_update = False for IntSlider control to stop continuous model evaluation while the slider is being dragged
m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linear regression','LASSO', 'Ridge','LASSO with CV', 'Ridge with CV'],
                                                    description = "Choose Model",style=style,
                                                        layout=Layout(width='250px')),
                test_size=widgets.Dropdown(options={"10% of data":0.1,"20% of data":0.2, "30% of data":0.3,
                                                    "40% of data":0.4,"50% of data":0.5},
                                          description="Test set size ($X_{test}$)",style=style, value=0.3),
                degree=widgets.IntSlider(min=1,max=40,step=1,description= 'Polynomial ($\sum_{i=0}^n w_i x^i$)',
                                       style=style,continuous_update=False),
                alpha=widgets.FloatSlider(min=-15,max=10,step=1,description= '$\ln\gamma$',
                                       style=style,continuous_update=False),
                resolution=fixed(500))

# Display the control
display(m)

### Visualize coefficients

In [None]:
e_traim, e_text, model = m.result
print(model[1].coef_)

plt.xlabel('Parameter number (0 is intercept)')
plt.ylabel('Parameter value')
plt.grid(axis='y')
plt.xticks(np.arange(0, len(model[1].coef_)))
plt.bar(np.arange(0, len(model[1].coef_)), model[1].coef_);