<a href="https://colab.research.google.com/github/SaikatGhosh94/MachineLearningCourse/blob/main/Hands_on_practice/simple_linear_regression/04_python_course_regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [63]:
df = pd.read_csv('Advertising.csv')

In [64]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [65]:
X = df.drop('sales',axis=1).values
y = df['sales'].values

In [66]:
from sklearn.preprocessing import PolynomialFeatures
polynomial_converter = PolynomialFeatures(degree = 3, include_bias=False)

In [67]:
poly_features = polynomial_converter.fit_transform(X)

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.33, random_state=42)

In [69]:
#use scaler to scale all numerical features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [70]:
from sklearn.linear_model import Ridge
ridge_model = Ridge(alpha=10)

In [71]:
ridge_model.fit(X_train_scaled,y_train)

In [72]:
test_predictions = ridge_model.predict(X_test_scaled)

In [73]:
from sklearn.metrics import mean_absolute_error,mean_squared_error
MAE = mean_absolute_error(y_test,test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test,test_predictions))

In [74]:
MAE

0.6329556348463365

In [75]:
RMSE

np.float64(0.8927190051123457)

In [76]:
from sklearn.metrics._scorer import _SCORERS
_SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'neg_max_error', 'matthews_corrcoef', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_root_mean_squared_log_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'd2_absolute_error_score', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'positive_likelihood_ratio', 'neg_negative_likelihood_ratio', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'reca

In [77]:
#use ridge regression with cross validation to select better alpha values
from sklearn.linear_model import RidgeCV

ridge_cv_model = RidgeCV(alphas = (0.1,1,10),scoring = 'neg_mean_absolute_error')

ridge_cv_model.fit(X_train_scaled, y_train)

In [78]:
test_predictions = ridge_cv_model.predict(X_test_scaled)

In [79]:
ridge_cv_model.coef_

array([ 5.84681185,  0.52142086,  0.71689997, -6.17948738,  3.75034058,
       -1.36283352, -0.08571128,  0.08322815, -0.34893776,  2.16952446,
       -0.47840838,  0.68527348,  0.63080799, -0.5950065 ,  0.61661989,
       -0.31335495,  0.36499629,  0.03328145, -0.13652471])

In [80]:
ridge_cv_model.alpha_

np.float64(0.1)

In [81]:
from sklearn.metrics import mean_absolute_error,mean_squared_error
MAE = mean_absolute_error(y_test,test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test,test_predictions))

In [82]:
MAE

0.4343075766486241

In [83]:
RMSE

np.float64(0.5635899169556714)

In [84]:
#Lasso regression
from sklearn.linear_model import LassoCV

lasso_cv_model = LassoCV(eps=0.001,n_alphas=100,cv=5,max_iter=100000)

lasso_cv_model.fit(X_train_scaled,y_train)

In [85]:
test_predictions = lasso_cv_model.predict(X_test_scaled)

In [86]:
lasso_cv_model.coef_

array([ 5.19612354,  0.43037087,  0.29876351, -4.80417579,  3.46665205,
       -0.40507212,  0.        ,  0.        ,  0.        ,  1.35260206,
       -0.        ,  0.        ,  0.14879719, -0.        ,  0.        ,
        0.        ,  0.09649665,  0.        ,  0.04353956])

In [87]:
lasso_cv_model.alpha_

np.float64(0.004968802520343365)

In [88]:
MAE = mean_absolute_error(y_test,test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test,test_predictions))

In [89]:
MAE

0.4629188302693299

In [90]:
RMSE

np.float64(0.5785146895301981)

In [91]:
#ElasticNet
from sklearn.linear_model import ElasticNetCV

elastic_cv_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],eps=0.001,n_alphas=100,cv=5,max_iter=100000)

elastic_cv_model.fit(X_train_scaled,y_train)

In [92]:
test_predictions = elastic_cv_model.predict(X_test_scaled)
MAE = mean_absolute_error(y_test,test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test,test_predictions))

In [93]:
elastic_cv_model.coef_

array([ 5.19612354,  0.43037087,  0.29876351, -4.80417579,  3.46665205,
       -0.40507212,  0.        ,  0.        ,  0.        ,  1.35260206,
       -0.        ,  0.        ,  0.14879719, -0.        ,  0.        ,
        0.        ,  0.09649665,  0.        ,  0.04353956])

In [94]:
elastic_cv_model.alpha_

np.float64(0.004968802520343365)

In [95]:
MAE

0.4629188302693299

In [96]:
RMSE

np.float64(0.5785146895301981)