# Linear, Ridge, Lasso, and Elastic Net Regression

The next step is to fit the data to a few different machine learning models and then tweak the hyperparameters to see what the best model is.

In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

In [3]:
# Bring in the data
%store -r X_train
%store -r X_test
%store -r y_train
%store -r y_test

### Linear Regression

First, I will try a vanilla linear regression.

In [4]:
# Fit the training data to the model.
model1 = LinearRegression()
model1.fit(X_train, y_train)

LinearRegression()

In [41]:
# Define a linear regression function that can output R squared and RMSE.
def lr_R2_RMSE():
    data = []
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    model_r2_train = model.score(X_train, y_train)
    model_r2_test = model.score(X_test, y_test)
    
    model_y_pred_train = model.predict(X_train)
    model_y_pred_test = model.predict(X_test)
    model_RMSE_train = np.sqrt(mean_squared_error(y_train, model_y_pred_train))
    model_RMSE_test = np.sqrt(mean_squared_error(y_test, model_y_pred_test))
    
    data.append(['Linear', 'NA', 'NA', model_r2_train, model_r2_test, model_RMSE_train, model_RMSE_test])
            
    table = pd.DataFrame(data, columns = ['Model', 'Alpha', 'L1 Ratio', 'Training R2', 'Test R2', 'Training RMSE', 'Test RMSE'])
    
    return table

In [42]:
linear_results = lr_R2_RMSE()
linear_results

Unnamed: 0,Model,Alpha,L1 Ratio,Training R2,Test R2,Training RMSE,Test RMSE
0,Linear,,,0.756279,0.734275,6055.905613,5978.637219


In [9]:
# View the coefficients of each term.
print(X_train.columns)
print(model1.coef_)

Index(['age', 'bmi', 'sex_male', 'smoker_yes', 'children_1', 'children_2',
       'children_3', 'children_4', 'children_5', 'region_northwest',
       'region_southeast', 'region_southwest'],
      dtype='object')
[11592.50671814 12004.15232447  -268.42233783 24067.87704993
  -103.80379432  1311.19532811   682.03649306  2127.71041988
  1057.26082258  -251.34733476  -855.07435369  -654.86702882]


Smoker is the largest predictor of charges, followed by age and bmi.

### Ridge Regression

Now, I will see if regularization helps improve the model and reduces overfitting.

In [43]:
# Define a ridge regression function that can take different values of alpha and output R squared and RMSE.
def ridge_R2_RMSE(alpha):
    data = []
    for a in alpha:
        model = Ridge(alpha=a)
        model.fit(X_train, y_train)
    
        model_r2_train = model.score(X_train, y_train)
        model_r2_test = model.score(X_test, y_test)
    
        model_y_pred_train = model.predict(X_train)
        model_y_pred_test = model.predict(X_test)
        model_RMSE_train = np.sqrt(mean_squared_error(y_train, model_y_pred_train))
        model_RMSE_test = np.sqrt(mean_squared_error(y_test, model_y_pred_test))
    
        data.append(['Ridge', a, 'NA', model_r2_train, model_r2_test, model_RMSE_train, model_RMSE_test])
            
    table = pd.DataFrame(data, columns = ['Model', 'Alpha','L1 Ratio','Training R2', 'Test R2', 'Training RMSE', 'Test RMSE'])
    
    return table

In [44]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
ridge_results = ridge_R2_RMSE(alpha)
ridge_results

Unnamed: 0,Model,Alpha,L1 Ratio,Training R2,Test R2,Training RMSE,Test RMSE
0,Ridge,0.001,,0.756279,0.734275,6055.905614,5978.642138
1,Ridge,0.01,,0.756279,0.734271,6055.905704,5978.686458
2,Ridge,0.1,,0.756278,0.734231,6055.914666,5979.13488
3,Ridge,1.0,,0.75621,0.733789,6056.768114,5984.108551
4,Ridge,10.0,,0.751457,0.726636,6115.524076,6063.969647
5,Ridge,100.0,,0.627399,0.605047,7487.802904,7288.848304
6,Ridge,1000.0,,0.18857,0.18175,11049.891353,10491.291756


Ridge regression with alpha set to 0.001 performed the best. Performance is similar to the first linear regression.

### Lasso Regression

Now, I will try Lasso regression.

In [45]:
# Define a lasso regression function that can take different values of alpha and output R squared and RMSE.
def lasso_R2_RMSE(alpha):
    data = []
    for a in alpha:
        model = Lasso(alpha=a)
        model.fit(X_train, y_train)
    
        model_r2_train = model.score(X_train, y_train)
        model_r2_test = model.score(X_test, y_test)
    
        model_y_pred_train = model.predict(X_train)
        model_y_pred_test = model.predict(X_test)
        model_RMSE_train = np.sqrt(mean_squared_error(y_train, model_y_pred_train))
        model_RMSE_test = np.sqrt(mean_squared_error(y_test, model_y_pred_test))
    
        data.append(['Lasso', a, 'NA', model_r2_train, model_r2_test, model_RMSE_train, model_RMSE_test])
            
    table = pd.DataFrame(data, columns = ['Model', 'Alpha', 'L1 Ratio', 'Training R2', 'Test R2', 'Training RMSE', 'Test RMSE'])
    
    return table

In [46]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
lasso_results = lasso_R2_RMSE(alpha)
lasso_results

Unnamed: 0,Model,Alpha,L1 Ratio,Training R2,Test R2,Training RMSE,Test RMSE
0,Lasso,0.001,,0.756279,0.734275,6055.905613,5978.638781
1,Lasso,0.01,,0.756279,0.734274,6055.905616,5978.652878
2,Lasso,0.1,,0.756279,0.734261,6055.905868,5978.793975
3,Lasso,1.0,,0.756277,0.734133,6055.931079,5980.231914
4,Lasso,10.0,,0.756074,0.732604,6058.449823,5997.408123
5,Lasso,100.0,,0.750835,0.721436,6123.164553,6121.367999
6,Lasso,1000.0,,0.616996,0.56983,7591.616079,7606.87153


Lasso regression with alpha set to 0.001 performed about the same as ridge regression.

### Elastic Net Regression

Finally, I will try using elastic regression to see if results can be improved.

In [28]:
# Define an elastic net regression function that can 
# take different values of alpha and L1 ratios and output R squared and RMSE.
def en_R2_RMSE(alpha, l1_ratio):
    data = []
    for a in alpha:
        for r in l1_ratio:
            model = ElasticNet(alpha=a)
            model.fit(X_train, y_train)
    
            model_r2_train = model.score(X_train, y_train)
            model_r2_test = model.score(X_test, y_test)
    
            model_y_pred_train = model.predict(X_train)
            model_y_pred_test = model.predict(X_test)
            model_RMSE_train = np.sqrt(mean_squared_error(y_train, model_y_pred_train))
            model_RMSE_test = np.sqrt(mean_squared_error(y_test, model_y_pred_test))
    
            data.append(['Elastic Net', a, r, model_r2_train, model_r2_test, model_RMSE_train, model_RMSE_test])
            
    table = pd.DataFrame(data, columns = ['Model', 'Alpha', 'L1 Ratio', 'Training R2', 'Test R2', 'Training RMSE', 'Test RMSE'])
    
    return table

In [34]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
l1_ratio = [0.2, 0.4, 0.6, 0.8]
en_results = en_R2_RMSE(alpha, l1_ratio)
en_results

Unnamed: 0,Model,Alpha,L1 Ratio,Training R2,Test R2,Training RMSE,Test RMSE
0,Elastic Net,0.001,0.2,0.756261,0.734043,6056.128399,5981.248036
1,Elastic Net,0.001,0.4,0.756261,0.734043,6056.128399,5981.248036
2,Elastic Net,0.001,0.6,0.756261,0.734043,6056.128399,5981.248036
3,Elastic Net,0.001,0.8,0.756261,0.734043,6056.128399,5981.248036
4,Elastic Net,0.01,0.2,0.754827,0.731078,6073.919579,6014.501211
5,Elastic Net,0.01,0.4,0.754827,0.731078,6073.919579,6014.501211
6,Elastic Net,0.01,0.6,0.754827,0.731078,6073.919579,6014.501211
7,Elastic Net,0.01,0.8,0.754827,0.731078,6073.919579,6014.501211
8,Elastic Net,0.1,0.2,0.700729,0.675363,6710.652611,6608.229914
9,Elastic Net,0.1,0.4,0.700729,0.675363,6710.652611,6608.229914


Even the best elastic net regression isn't better than ridge and lasso regression.

### Hyperparameter table

Below are the best models of each type.

In [51]:
best_results = pd.concat([linear_results.iloc[:1], ridge_results.iloc[:1], lasso_results.iloc[:1], en_results.iloc[:1]])
best_results

Unnamed: 0,Model,Alpha,L1 Ratio,Training R2,Test R2,Training RMSE,Test RMSE
0,Linear,,,0.756279,0.734275,6055.905613,5978.637219
0,Ridge,0.001,,0.756279,0.734275,6055.905614,5978.642138
0,Lasso,0.001,,0.756279,0.734275,6055.905613,5978.638781
0,Elastic Net,0.001,0.2,0.756261,0.734043,6056.128399,5981.248036


Plain old vanilla linear regression appears to perform the best.

In [52]:
# Save the table for final metrics file
%store best_results

Stored 'best_results' (DataFrame)
