# Regularization

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv("Advertising.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [4]:
df = df.drop("Unnamed: 0", axis= 1)

In [5]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [6]:
df.shape

(200, 4)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [8]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [11]:
# Features
x = df.drop("sales", axis= 1)

In [12]:
x.head()

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4


In [13]:
# target
y = df["sales"]

In [14]:
y.head()

0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
Name: sales, dtype: float64

### We will convert our Data into Polynomial  Features as increasing Features will make Regularization Effects
### more visible.

In [10]:
polynomial_converter = PolynomialFeatures(degree= 3, include_bias= False)

In [15]:
poly_features = polynomial_converter.fit_transform(x)

In [16]:
poly_features.shape

(200, 19)

In [17]:
x.shape

(200, 3)

In [19]:
# Train Test Split

x_train, x_test, y_train, y_test= train_test_split(poly_features, y, test_size= 0.3, random_state= 42)

In [21]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(140, 19)
(60, 19)
(140,)
(60,)


### Scalling The Data

In [23]:
x_train[0]

array([2.84300000e+02, 1.06000000e+01, 6.40000000e+00, 8.08264900e+04,
       3.01358000e+03, 1.81952000e+03, 1.12360000e+02, 6.78400000e+01,
       4.09600000e+01, 2.29789711e+07, 8.56760794e+05, 5.17289536e+05,
       3.19439480e+04, 1.92869120e+04, 1.16449280e+04, 1.19101600e+03,
       7.19104000e+02, 4.34176000e+02, 2.62144000e+02])

In [29]:
x_test[0]

array([ 0.08266439,  0.55371878,  1.08861066, -0.18749269,  0.42213484,
        0.79788593,  0.32285459,  0.96554739,  0.89655537, -0.36600988,
        0.09130711,  0.33874113,  0.27095882,  0.75045097,  0.6485836 ,
        0.10170039,  0.63526601,  0.77891369,  0.58997769])

In [None]:
# As this new data has some large values in it, we need to Scale The Data.

In [24]:
sc = StandardScaler()

In [25]:
x_train = sc.fit_transform(x_train)

In [26]:
x_test = sc.transform(x_test)

In [27]:
x_train[0]

array([ 1.53003874, -0.8862929 , -1.16598019,  1.93705409, -0.22476356,
       -0.62651363, -0.90793157, -0.850692  , -0.81317305,  2.23096225,
        0.10749852, -0.35876444, -0.61820397, -0.60530328, -0.5751799 ,
       -0.81381745, -0.72447935, -0.64260728, -0.56741817])

In [28]:
x_test[0]

array([ 0.08266439,  0.55371878,  1.08861066, -0.18749269,  0.42213484,
        0.79788593,  0.32285459,  0.96554739,  0.89655537, -0.36600988,
        0.09130711,  0.33874113,  0.27095882,  0.75045097,  0.6485836 ,
        0.10170039,  0.63526601,  0.77891369,  0.58997769])

## Ridge (L2) Regression:

In [30]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV

In [31]:
# Building Ridge Regression Model:
ridge_model = Ridge(alpha= 5)

In [32]:
ridge_model.fit(x_train, y_train)

Ridge(alpha=5)

In [33]:
pred = ridge_model.predict(x_test)

In [35]:
RMSE = np.sqrt(mean_squared_error(y_test, pred))

In [36]:
RMSE

0.8029898376541164

In [None]:
# Alpha value can be from 0 to +infinity.

# So, How do we find best Alpha value.

# We can Use Ridge CV (Cross Validation) and pass on multiple alpha values to build different models.

In [37]:
# Building RidgeCV:
cv_ridge_model = RidgeCV(alphas=(0.01, 0.1, 1, 10, 100), cv= 10, scoring= "neg_mean_squared_error")

In [38]:
cv_ridge_model.fit(x_train, y_train)

RidgeCV(alphas=array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]), cv=10,
        scoring='neg_mean_squared_error')

In [39]:
# Finding alpha value that gave best accuracy
cv_ridge_model.alpha_

0.1

In [40]:
pred = cv_ridge_model.predict(x_test)

In [41]:
RMSE_ridge_cv = np.sqrt(mean_squared_error(y_test, pred))

In [42]:
RMSE_ridge_cv

0.5945136671792901

In [43]:
cv_ridge_model.coef_

array([ 5.90523815,  0.46316396,  0.68028713, -6.17743395,  3.73671928,
       -1.40708382,  0.00624704,  0.11128917, -0.2617823 ,  2.17135744,
       -0.51480159,  0.70587211,  0.60311504, -0.53271216,  0.5716495 ,
       -0.34685826,  0.36744388, -0.03938079, -0.12192939])

## Lasso (L1) Regression:

In [44]:
from sklearn.linear_model import Lasso, LassoCV

In [45]:
# Building Lasso Regression Model:
lasso_model= Lasso(alpha=1)

In [46]:
lasso_model.fit(x_train, y_train)

Lasso(alpha=1)

In [47]:
pred= lasso_model.predict(x_test)

In [48]:
RMSE_lasso = np.sqrt(mean_squared_error(y_test, pred))

In [49]:
RMSE_lasso

1.4826023115977718

In [51]:
# Building LassoCV Model:
cv_lasso_model = LassoCV(eps=0.001, n_alphas= 100, max_iter= 1000000, cv= 10)

In [52]:
cv_lasso_model.fit(x_train, y_train)

LassoCV(cv=10, max_iter=1000000)

In [53]:
cv_lasso_model.alpha_

0.0049245318064748715

In [54]:
pred= cv_lasso_model.predict(x_test)

In [55]:
RMSE_lasso_cv = np.sqrt(mean_squared_error(y_test, pred))

In [56]:
RMSE_lasso_cv

0.6308043049172876

In [57]:
cv_lasso_model.coef_

array([ 5.15048089,  0.4274257 ,  0.29684446, -4.53337994,  3.38937185,
       -0.4288993 ,  0.        ,  0.        ,  0.        ,  1.17891049,
       -0.        ,  0.        ,  0.16706037, -0.        ,  0.        ,
        0.        ,  0.11083672,  0.        ,  0.06155549])

## Elasticnet (L1 and L2) Regression:

In [58]:
from sklearn.linear_model import ElasticNet, ElasticNetCV

In [59]:
# Building Elastice Net Regression Model:
elasticnet_model = ElasticNet(alpha= 1, l1_ratio= 0.5)

In [60]:
elasticnet_model.fit(x_train, y_train)

ElasticNet(alpha=1)

In [62]:
pred = elasticnet_model.predict(x_test)

In [63]:
RMSE_elasticnet = np.sqrt(mean_squared_error(y_test, pred))

In [64]:
RMSE_elasticnet

1.6865065155704049

In [67]:
elasticnet_model.coef_

array([0.88889213, 0.33791231, 0.        , 0.34854326, 1.106399  ,
       0.05523197, 0.16728156, 0.        , 0.        , 0.00113275,
       0.70637146, 0.        , 0.78302792, 0.18044444, 0.        ,
       0.03949557, 0.        , 0.        , 0.        ])

In [65]:
# Building ElasticNetCV Model:
cv_elasticnet_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], eps= 0.001, n_alphas= 100, max_iter= 1000000, cv= 10)

In [66]:
cv_elasticnet_model.fit(x_train, y_train)

ElasticNetCV(cv=10, l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1],
             max_iter=1000000)

In [68]:
pred= cv_elasticnet_model.predict(x_test)

In [69]:
RMSE_elasticnet_cv = np.sqrt(mean_squared_error(y_test, pred))

In [70]:
RMSE_elasticnet_cv

0.6308043049172876

In [71]:
cv_elasticnet_model.coef_

array([ 5.15048089,  0.4274257 ,  0.29684446, -4.53337994,  3.38937185,
       -0.4288993 ,  0.        ,  0.        ,  0.        ,  1.17891049,
       -0.        ,  0.        ,  0.16706037, -0.        ,  0.        ,
        0.        ,  0.11083672,  0.        ,  0.06155549])

In [72]:
cv_elasticnet_model.alpha_

0.0049245318064748715

In [73]:
cv_elasticnet_model.l1_ratio_

1.0