# Regularization

In [57]:
# Requirements
import course.utils.paths as path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## Load dataset

In [58]:
happiness_file = path.data_raw_dir('happiness.csv')
happiness_data = pd.read_csv(happiness_file)

display(happiness_data.describe())

Unnamed: 0,rank,score,high,low,gdp,family,lifexp,freedom,generosity,corruption,dystopia
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,78.0,5.354019,5.452326,5.255713,0.984718,1.188898,0.551341,0.408786,0.246883,0.12312,1.850238
std,44.888751,1.13123,1.118542,1.14503,0.420793,0.287263,0.237073,0.149997,0.13478,0.101661,0.500028
min,1.0,2.693,2.864884,2.521116,0.0,0.0,0.0,0.0,0.0,0.0,0.377914
25%,39.5,4.5055,4.608172,4.374955,0.663371,1.042635,0.369866,0.303677,0.154106,0.057271,1.591291
50%,78.0,5.279,5.370032,5.193152,1.064578,1.253918,0.606042,0.437454,0.231538,0.089848,1.83291
75%,116.5,6.1015,6.1946,6.006527,1.318027,1.414316,0.723008,0.516561,0.323762,0.153296,2.144654
max,155.0,7.537,7.62203,7.479556,1.870766,1.610574,0.949492,0.658249,0.838075,0.464308,3.117485


In [59]:
X = happiness_data[['gdp', 'family', 'lifexp', 'freedom', 'corruption', 'generosity', 'dystopia']]
y = happiness_data['score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123456)

In [60]:
estimator_linReg = LinearRegression()
estimator_linReg.fit(X_train, y_train)
linReg_pred = estimator_linReg.predict(X_test)

linReg_loss = mean_squared_error(y_test, linReg_pred)
print("Linear loss:", linReg_loss)
print('Coef. linear:', estimator_linReg.coef_)
print("Score for linear:", estimator_linReg.score(X_test, y_test))

Linear loss: 9.417229850107195e-08
Coef. linear: [1.00017608 0.9999101  0.9997673  1.00013297 0.99960098 1.00018574
 0.99989258]
Score for linear: 0.9999999187665876


In [61]:
estimator_lasso = Lasso(alpha=0.05)
estimator_lasso.fit(X_train, y_train)
lasso_pred = estimator_lasso.predict(X_test)

lasso_loss = mean_squared_error(y_test, lasso_pred)
print("Lasso loss:", lasso_loss)
print('Coef. Lasso:', estimator_lasso.coef_)
print("Score for Lasso:", estimator_lasso.score(X_test, y_test))

Lasso loss: 0.1517833118661325
Coef. Lasso: [1.59272818 0.59430463 0.         0.         0.         0.
 0.81661673]
Score for Lasso: 0.8690710902573036


In [62]:
estimator_ridge = Ridge(alpha=0.05 )
estimator_ridge.fit(X_train, y_train)
ridge_pred = estimator_ridge.predict(X_test)

rige_loss = mean_squared_error(y_test, ridge_pred)
print("Rige loss:", rige_loss)
print('Coef. Ridge:', estimator_ridge.coef_)
print("Score for Ridge:", estimator_ridge.score(X_test, y_test))

Rige loss: 2.9993815056998323e-05
Coef. Ridge: [1.00794149 0.9949322  0.98950428 0.99980913 0.96169526 0.98673606
 0.99737264]
Score for Ridge: 0.9999741272116404


In [63]:
estimator_elastic = ElasticNet(alpha=0.05, l1_ratio=0.5)
estimator_elastic.fit(X_train, y_train)
elastic_pred = estimator_elastic.predict(X_test)

elastic_loss = mean_squared_error(y_test, elastic_pred)
print("Elastic loss:", elastic_loss)
print('Coef. elastic:', estimator_elastic.coef_)
print("Score for elastic:", estimator_elastic.score(X_test, y_test))

Elastic loss: 0.10537192583606875
Coef. elastic: [1.18943271 0.7374407  0.55080841 0.36477495 0.         0.01024534
 0.81019057]
Score for elastic: 0.9091057429332381
