In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Advertising.csv')
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [3]:
X = df.drop('sales' , axis=1)
y = df['sales']

In [4]:
from sklearn.preprocessing import PolynomialFeatures

In [5]:
poly_conv = PolynomialFeatures(degree = 3 , include_bias = False)

In [6]:
X_poly = poly_conv.fit_transform(X)

In [7]:
X.shape

(200, 3)

In [8]:
X_poly.shape

(200, 19)

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train , X_test , y_train , y_test = train_test_split(X_poly , y , test_size = 0.2 , random_state = 53)

In [11]:
X_train.shape

(160, 19)

In [12]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler

In [13]:
scaler = StandardScaler()
#scaler = MinMaxScaler()

In [14]:
scaler.fit(X_train)

In [15]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ridge Model

In [16]:
from sklearn.linear_model import Ridge

In [17]:
ridge = Ridge(alpha = 10)

In [18]:
ridge.fit(X_train , y_train)

In [19]:
y_pred = ridge.predict(X_test)

In [20]:
from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error

In [21]:
r2 = r2_score(y_test , y_pred)
mae = mean_absolute_error(y_test , y_pred)
rmse = mean_squared_error(y_test , y_pred) ** 0.5

In [22]:
rmse

0.7240855313266406

In [23]:
ridge.coef_

array([ 8.39657261e-02,  1.12283544e-02,  7.28415721e-03, -4.11393825e-04,
        1.59316351e-03, -2.50446025e-04, -1.60221106e-03,  1.48308257e-03,
       -3.69994460e-05,  7.13431879e-07, -1.57757828e-06,  8.31817500e-07,
        3.11214639e-06, -5.02394554e-06,  8.54089914e-07,  1.75565147e-05,
       -5.97076686e-06, -3.46356697e-06, -6.87246765e-07])

In [24]:
from sklearn.linear_model import RidgeCV

In [25]:
ridge_cv = RidgeCV(alphas=(0.1,1,10) , scoring='neg_root_mean_squared_error')

In [26]:
ridge_cv.fit(X_train , y_train)

In [27]:
ridge_cv.best_score_

-0.5937685243497502

In [28]:
ridge_cv.alpha_

10.0

In [29]:
from sklearn.metrics import SCORERS

In [30]:
SCORERS.keys() # all scoring metrics.

dict_keys(['explained_variance', 'r2', 'max_error', 'matthews_corrcoef', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_s

In [31]:
y_pred = ridge_cv.predict(X_test)

In [32]:
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)

In [33]:
rmse

0.5254090819805524

In [34]:
ridge_cv.coef_

array([ 8.39657261e-02,  1.12283544e-02,  7.28415721e-03, -4.11393609e-04,
        1.59316368e-03, -2.50446132e-04, -1.60221103e-03,  1.48308257e-03,
       -3.69995270e-05,  7.13975169e-07, -1.54303052e-06,  8.01606802e-07,
        3.11997792e-06, -5.02271996e-06,  8.40336725e-07,  1.75578457e-05,
       -5.97058374e-06, -3.46513889e-06, -6.94645450e-07])

# Lasso Model

In [35]:
from sklearn.linear_model import Lasso , LassoCV

In [45]:
lasso_cv = LassoCV(eps = 0.1 , n_alphas = 100 , cv = 5 , max_iter = 100000) # lasso_cv = LassoCV(alphas = np.linspace(0,01,10,100) , cv = 5)

In [56]:
lasso_cv = LassoCV(alphas = np.linspace(0.01,10,100) , cv = 5 , max_iter = 100000)

In [57]:
lasso_cv.fit(X_train , y_train)

In [58]:
lasso_cv.alpha_

0.1109090909090909

In [59]:
lasso_cv.coef_

array([ 8.03569534e-02,  0.00000000e+00,  0.00000000e+00, -3.94979033e-04,
        1.65089468e-03, -2.13438632e-04, -1.38846281e-03,  1.67244628e-03,
        0.00000000e+00,  6.88480574e-07, -1.68104245e-06,  7.83096395e-07,
        2.84934234e-06, -5.47090057e-06,  8.04683065e-07,  1.57056451e-05,
       -7.49649791e-06, -3.65585952e-06, -8.24531009e-07])

In [60]:
y_pred = lasso_cv.predict(X_test)

In [61]:
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)

In [62]:
r2

0.9803979187434935

In [63]:
mae

0.47695323376833354

In [64]:
rmse

0.5340150892027247

In [None]:
# Done for now.