In [68]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

In [70]:
sales_df = pd.read_csv('Advertising.csv')
sales_df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [71]:
X = sales_df[['TV', 'Radio', 'Newspaper']]
y = sales_df['Sales']

In [72]:
# X = X.apply(lambda rec: (rec - rec.mean())/rec.std(), axis= 0)
# y = np.array((y - y.mean())/y.std())

In [73]:
X

Unnamed: 0,TV,Radio,Newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


### Scikit-Learn Library for Machine Learning

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, 
random_state = 42)

In [75]:
len(X_train)

140

In [76]:
len(X_test)

60

In [77]:
# Initializing the model
model = LinearRegression()
# Fitting training data to the model
model.fit(X_train, y_train)

LinearRegression()

In [78]:
model.intercept_

2.7089490925159048

In [79]:
model.coef_

array([0.04405928, 0.1992875 , 0.00688245])

In [80]:
list((zip(X_train.columns, model.coef_)))

[('TV', 0.04405928095746521),
 ('Radio', 0.1992874968989396),
 ('Newspaper', 0.006882452222275501)]

Sales = 2.708 + 0.044*TV + 0.199*Radio + 0.006*Newspaper

In [81]:
# Predicting the y value from the test set

In [82]:
y_pred = model.predict(X_test)

In [83]:
# Creating DataFrame with 3 columns named: actual, predicted and residuals
# to store the respective values

test_pred_df = pd.DataFrame({'actual': y_test, 
                             'predicted': np.round(y_pred,2)})

In [84]:
test_pred_df[:10]

Unnamed: 0,actual,predicted
95,16.9,16.57
15,22.4,21.19
30,21.4,21.55
158,7.3,10.89
128,24.7,22.2
115,12.6,13.36
69,22.3,21.2
170,8.4,7.35
174,11.5,13.28
45,14.9,15.12


In [85]:
# y_train contains the actual value and the predicted value is
# returned from predict() method after passing the X values of the
# training data.

r2 = metrics.r2_score(y_train, model.predict(X_train))
print('R Squared: ', r2)

R Squared:  0.9055159502227753


In [86]:
r2 = metrics.r2_score(y_test, model.predict(X_test))
print('R Squared: ', r2)

R Squared:  0.8609466508230367


In [87]:
# y_pred contains predicted value of test data
mse = metrics.mean_squared_error(y_test, y_pred)

In [88]:
# Taking square root of MSE and then round off to two decimal values
rmse = round(np.sqrt(mse), 2)
print('RMSE: ', rmse)

RMSE:  1.95


### Validation Dataset

In [89]:
sales_df = pd.read_csv('Advertising.csv')
sales_df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [6]:
X_train, X_other, y_train, y_other = train_test_split(sales_df[['TV', 'Radio',
'Newspaper']], sales_df['Sales'], train_size=0.7, random_state = 42)

In [7]:
X_eval, X_test, y_eval, y_test = train_test_split(X_other, y_other, 
train_size=0.5, random_state = 42)

In [8]:
from sklearn.preprocessing import StandardScaler

# (actual value - mean)/standard deviation (aka Z score)
# [mean = 0
# std = 1]

In [9]:
scaler = StandardScaler()

In [10]:
X_train = scaler.fit_transform(X_train)

In [11]:
X_test = scaler.transform(X_test)

In [12]:
X_eval = scaler.transform(X_eval)

In [13]:
lm = LinearRegression()

In [14]:
lm.fit(X_train, y_train)

LinearRegression()

In [68]:
pred = lm.predict(X_eval)

In [69]:
metrics.mean_squared_error(y_eval, pred)

2.549038568717419

In [70]:
# evaluate and perform usual operations and finally use the test data only once

### KFold Validation

In [90]:
sales_df = pd.read_csv('Advertising.csv')
# Printing first few records
sales_df.head()
X = sales_df[['TV', 'Radio', 'Newspaper']]
y = sales_df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [91]:
lm = LinearRegression()

In [92]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [93]:
from sklearn.model_selection import cross_val_score

In [97]:
scores = cross_val_score(lm, X_train, y_train, scoring = 'r2', cv=5)

In [98]:
# r2 = .75, .85
# MSE = -2.5, -3.5

In [99]:
from sklearn.metrics import SCORERS

In [100]:
SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

In [101]:
# https://scikit-learn.org/stable/modules/model_evaluation.html

In [102]:
abs(scores.mean())

0.8627872839147471

In [104]:
scores

array([0.89667066, 0.92803914, 0.80641155, 0.91762366, 0.7651914 ])

In [105]:
lm.fit(X_train, y_train)

LinearRegression()

In [106]:
lm.coef_

array([ 3.76599021,  2.76548662, -0.00690986])

In [107]:
pred = lm.predict(X_test)

In [109]:
metrics.r2_score(y_test, pred)

0.9185780903322445

The choice of k is usually 5 or 10.

### Lasso Regression

# L1 Norm

In [110]:
from sklearn.linear_model import Lasso

In [146]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [147]:
lasso_model = Lasso(alpha = 0.01)

In [148]:
lasso_model.fit(X_train, y_train)

Lasso(alpha=0.01, random_state=101)

In [149]:
lasso_model.coef_

array([ 3.75598404,  2.7540475 , -0.        ])

In [150]:
predict = lasso_model.predict(X_test)

In [151]:
metrics.r2_score(y_test, predict)

0.9182975460753906

### Lasso with CV

In [139]:
from sklearn.linear_model import LassoCV

In [205]:
lassoCV_model = LassoCV(alphas=[.1, 0.01, 10])

In [153]:
lassoCV_model.fit(X_train, y_train)

LassoCV(alphas=[0.1, 0.01, 10])

In [154]:
lassoCV_model.alpha_

0.1

In [155]:
predict = lassoCV_model.predict(X_test)

In [156]:
metrics.r2_score(y_test, predict)

0.915438906791447

In [157]:
# or set alpha to automatic

In [206]:
lassoCV_model = LassoCV(n_alphas=1000)

In [207]:
lassoCV_model.fit(X_train, y_train)

LassoCV(n_alphas=1000)

In [208]:
lassoCV_model.alpha_

0.09472651317300003

In [209]:
predict = lassoCV_model.predict(X_test)

In [210]:
metrics.r2_score(y_test, predict)

0.9156366097134607

# L2 Norm

In [163]:
from sklearn.linear_model import Ridge

In [211]:
ridge_model  = Ridge(alpha = .1)

In [212]:
ridge_model.fit(X_train, y_train)

Ridge(alpha=0.1)

In [213]:
ridge_model.coef_

array([ 3.76333559,  2.76339947, -0.00593674])

In [214]:
predict = ridge_model.predict(X_test)

In [215]:
metrics.r2_score(y_test, predict)

0.918509515975792

In [170]:
# change alpha parameter

In [221]:
ridge_model  = Ridge(alpha = 5)

In [222]:
ridge_model.fit(X_train, y_train)

Ridge(alpha=5)

In [223]:
ridge_model.coef_

array([3.63784885, 2.66537911, 0.03829115])

In [224]:
predict = ridge_model.predict(X_test)

In [225]:
metrics.r2_score(y_test, predict)

0.9142964955815641

### Ridge with CV

In [116]:
from sklearn.linear_model import RidgeCV

In [226]:
ridge_modelCV  = RidgeCV(alphas=(0.1, 1.0, 10))

In [227]:
ridge_modelCV.fit(X_train, y_train)

RidgeCV(alphas=array([ 0.1,  1. , 10. ]))

In [228]:
# ridge_modelCV.cv_values_

In [229]:
ridge_modelCV.coef_

array([ 3.76333559,  2.76339947, -0.00593674])

In [230]:
predict = ridge_modelCV.predict(X_test)

In [231]:
metrics.r2_score(y_test, predict)

0.9185095159757795

### Elastic Net

In [183]:
from sklearn.linear_model import ElasticNet

In [258]:
enet = ElasticNet(alpha = .01, l1_ratio = 0.5)

In [259]:
enet.fit(X_train, y_train)

ElasticNet(alpha=0.01, max_iter=10000)

In [260]:
pred = enet.predict(X_test)

In [261]:
metrics.r2_score(y_test, pred)

0.9179478260809932

### Elastic Net with CV

In [188]:
from sklearn.linear_model import ElasticNetCV

In [236]:
enetCV = ElasticNetCV(l1_ratio = [.1, .5, .7, .8, .9], 
                     n_alphas=100)

In [237]:
enetCV.fit(X_train, y_train)

ElasticNetCV(l1_ratio=[0.1, 0.5, 0.7, 0.8, 0.9])

In [238]:
enetCV.l1_ratio_

0.9

In [239]:
enetCV.alpha_

0.08848730467563237

In [240]:
pred = enetCV.predict(X_test)

In [241]:
metrics.r2_score(y_test, pred)

0.9150494125440356

### Grid Search CV

In [242]:
sales_df = pd.read_csv('Advertising.csv')
# Printing first few records
sales_df.head()
X = sales_df[['TV', 'Radio', 'Newspaper']]
y = sales_df['Sales'] 

In [243]:
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.7, random_state = 101)

In [244]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [198]:
base_model = ElasticNet()

In [199]:
param = {'alpha': [ 0.001, 0.01, 1, 5, 10, 50], 'l1_ratio': [.1, .5, .7, .95, .99, 1]}

In [140]:
from sklearn.model_selection import GridSearchCV

In [200]:
grid_model = GridSearchCV(estimator=base_model, param_grid=param, scoring='r2', 
                          cv=5,verbose=0)

In [201]:
grid_model.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [0.001, 0.01, 1, 5, 10, 50],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.95, 0.99, 1]},
             scoring='r2')

In [202]:
grid_model.best_estimator_

ElasticNet(alpha=0.01, l1_ratio=1)

In [195]:
# grid_model.cv_results_
# pd.DataFrame(grid_model.cv_results_)

In [203]:
y_pred = grid_model.predict(X_test)

In [204]:
metrics.r2_score(y_test, y_pred)

0.9182975460753906