In [4]:
# importing basic libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# importing polynomial features to perfrom perfrom polynomial transformation of features
from sklearn.preprocessing import PolynomialFeatures

# for train test split
from sklearn.model_selection import train_test_split

# model evaluation parameters
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# for scaling data into standard noramal form (Z-scores)
from sklearn.preprocessing import StandardScaler

# to perfrom normal ridge regression
from sklearn.linear_model import Ridge

# to perfrom ridge regression using cross-validation
from sklearn.linear_model import RidgeCV

## RIDGE REGRESSION

In [5]:
# importing dataset
df = pd.read_csv("C:\\Users\\HP\\Desktop\\python\\udemy_py\\08-Linear-Regression-Models\\Advertising.csv")

In [6]:
# splittng data in features and target
x = df.drop('sales',axis = 1)
y = df['sales']

In [7]:
# converting features into polynomial feature of degree 3
poly_x = PolynomialFeatures(degree = 3, include_bias=False).fit_transform(x)

In [8]:
# train test split of features & target
x_train, x_test,y_train,y_test = train_test_split(poly_x,y,test_size=0.3, random_state= 101)

In [9]:
# creating an instace for StandardScaler
scaler = StandardScaler()

In [10]:
# performing fit & transfrom on x_train
x_train = scaler.fit_transform(x_train)

# performing transfrom on x_test data, we dont apply fit so that X_test reamins unseen to model
x_test = scaler.transform(x_test)

### Ridge regression

- y hat = B0 + B1X1 + B2X2 + B3X3 + BX4......BiXi
- ridge regression formula = (y - y hat) squared + lambda(Bi) squared

In [8]:
help(Ridge)

Help on class Ridge in module sklearn.linear_model._ridge:

class Ridge(sklearn.base.MultiOutputMixin, sklearn.base.RegressorMixin, _BaseRidge)
 |  Ridge(alpha=1.0, *, fit_intercept=True, copy_X=True, max_iter=None, tol=0.0001, solver='auto', positive=False, random_state=None)
 |  
 |  Linear least squares with l2 regularization.
 |  
 |  Minimizes the objective function::
 |  
 |  ||y - Xw||^2_2 + alpha * ||w||^2_2
 |  
 |  This model solves a regression model where the loss function is
 |  the linear least squares function and regularization is given by
 |  the l2-norm. Also known as Ridge Regression or Tikhonov regularization.
 |  This estimator has built-in support for multi-variate regression
 |  (i.e., when y is a 2d-array of shape (n_samples, n_targets)).
 |  
 |  Read more in the :ref:`User Guide <ridge_regression>`.
 |  
 |  Parameters
 |  ----------
 |  alpha : {float, ndarray of shape (n_targets,)}, default=1.0
 |      Constant that multiplies the L2 term, controlling regula

In [9]:
# here we use aplha refers to lambda value
ridge_model = Ridge(alpha = 10)

In [10]:
# fitting ridge regression model
ridge_model.fit(x_train,y_train)

In [11]:
# predictions using model on x_test
pred_y = ridge_model.predict(x_test)

In [12]:
mae = mean_absolute_error(y_test, pred_y)
mse = mean_squared_error(y_test, pred_y)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, pred_y)

print ('Mean Absolute Error: ',mae)
print ('Mean Squared Error: ',mse)
print ('root Mean squared Error: ',mae)
print('R squared: ',r2)

Mean Absolute Error:  0.5774404204714177
Mean Squared Error:  0.8003783071528405
root Mean squared Error:  0.5774404204714177
R squared:  0.9716501253569217


## RidgeCV regression

In [13]:
# See all scoring options: 
SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'matthews_corrcoef', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'positive_likelihood_ratio', 'neg_negative_likelihood_ratio', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weig

In [14]:
# in ridgeCV we set various values of alplha and the model runs on all the passed values of aplha and fits on the best 
# performing aplha value based on passed scoring method

# Generally ridgecv model assumes that higher the score better the model, but for mean absolute error - lower value is better
# so we use neg_mean_absolute_error as scoring method to get 1-mean_absolute_error which follows property of mean aboslute error

rig_cv = RidgeCV(alphas=(0.1,1,10),scoring='neg_mean_absolute_error')

In [15]:
# fitting the model
rig_cv.fit(x_train,y_train)

In [16]:
# checking the best performing aplha vlaue 
rig_cv.alpha_

0.1

In [17]:
# predictions
y_cv_pred = rig_cv.predict(x_test)

In [18]:
# error metircs
mean_absolute_error(y_test, y_cv_pred)

0.42737748843313855

In [19]:
mean_squared_error(y_test, y_cv_pred)

0.3820129881485325

## LASSO REGRESSION
- L - least, A - absolute, S - shrinkage, S - selection, O - operator

In [11]:
# in LassoCV we set various values of alplha and the model runs on all the passed values of aplha and fits on the best 
# performing aplha value based on passed scoring method

from sklearn.linear_model import LassoCV

In [55]:
# eps - stands for epsilon and is used to set a stopping criterion for the algorithm. 
# In practical terms, "eps" controls how close the algorithm should get to the optimal solution before terminating. 
# Smaller values of "eps" lead to more accurate solutions 
# alpha = List of alphas where to compute the models
# cv = Determines the cross-validation splitting strategy (For int/None inputs, KFold is used.)

lasso_cv = LassoCV(eps = 0.01,n_alphas=100,cv = 10, max_iter=1000) 

In [56]:
# fitting lasso regression
lasso_cv.fit(x_train,y_train)

In [57]:
# as we set value of aplhas - 100 model checked performance on 100 values of aplha
len(lasso_cv.alphas_)

100

In [58]:
# best performing alpha value
lasso_cv.alpha_

0.049430709092258274

In [59]:
# lassoCV forces some of coeff to be exactly = 0 when tuning parameters is sifficiently large
# due to which variable selection becomes easy
lasso_cv.coef_

array([ 2.35600233,  0.21183181,  0.        , -0.        ,  3.78675114,
       -0.        ,  0.        ,  0.0624318 ,  0.        , -1.01152151,
       -0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ])

In [60]:
pred = lasso_cv.predict(x_test)

In [61]:
mae = mean_absolute_error(y_test, pred)
mse = mean_squared_error(y_test, pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, pred)

print ('Mean Absolute Error: ',mae)
print ('Mean Squared Error: ',mse)
print ('root Mean squared Error: ',mae)
print('R squared: ',r2)

Mean Absolute Error:  0.5159788188265405
Mean Squared Error:  0.5799493101252956
root Mean squared Error:  0.5159788188265405
R squared:  0.9794578512505185


# Elastic Net

In [28]:
from sklearn.linear_model import ElasticNetCV

In [31]:
elastic_model = ElasticNetCV(l1_ratio=[.1, .5, .7,.9, .95, .99, 1],eps = 0.001,n_alphas=100,
                            max_iter=100000)

In [32]:
elastic_model.fit(x_train,y_train)

In [33]:
elastic_model.l1_ratio_

1.0

In [35]:
test_predictions = elastic_model.predict(x_test)

In [36]:
MAE = mean_absolute_error(y_test,test_predictions)
MSE = mean_squared_error(y_test,test_predictions)
RMSE = np.sqrt(MSE)

print ('Mean Absolute Error: ',mae)
print ('Mean Squared Error: ',mse)
print ('root Mean squared Error: ',mae)
print('R squared: ',r2)

Mean Absolute Error:  0.6541723161252854
Mean Squared Error:  1.2787088713079842
root Mean squared Error:  0.6541723161252854
R squared:  0.9547073728977878


In [37]:
elastic_model.coef_

array([ 4.86023329,  0.12544598,  0.20746872, -4.99250395,  4.38026519,
       -0.22977201, -0.        ,  0.07267717, -0.        ,  1.77780246,
       -0.69614918, -0.        ,  0.12044132, -0.        , -0.        ,
       -0.        ,  0.        ,  0.        , -0.        ])