In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv('/Users/dyadyundanil/Desktop/Machine learning/DATA/Advertising.csv')

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

#Features selection
X = df.drop('sales', axis=1)
y = df['sales']

#Maling new features (new degree)
X_poly = PolynomialFeatures(degree=3, include_bias=False).fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3, random_state=42)

#Data Standartization
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
#RIDGE REGRESSION without cross-validation
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error

#Ridge regression with lambda = 10
ridge_model = Ridge(alpha=10)
ridge_model.fit(X_train_scaled, y_train)
y_pred = ridge_model.predict(X_test_scaled)

#Metrics
MAE = mean_absolute_error(y_test, y_pred)
RMSE = root_mean_squared_error(y_test, y_pred)

print(MAE, RMSE)

0.6296591346758604 0.8916327541710886


In [37]:
from sklearn.linear_model import RidgeCV

#Ridge with searching for the best lambda using cross-validation
ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 5.0, 10.0), scoring='neg_mean_absolute_error')
ridge_cv_model.fit(X_train_scaled, y_train)
print('the best lambda: ', ridge_cv_model.alpha_)
print('coefs of best model: ',ridge_cv_model.coef_)

#MAE, RMSE on hold-out set
y_pred = ridge_cv_model.predict(X_test_scaled)
MAE = mean_absolute_error(y_test, y_pred)
RMSE = root_mean_squared_error(y_test, y_pred)
print('MAE and RMSE: ', MAE, RMSE)

the best lambda:  0.1
coefs of best model:  [ 5.90523815  0.46316396  0.68028713 -6.17743395  3.73671928 -1.40708382
  0.00624704  0.11128917 -0.2617823   2.17135744 -0.51480159  0.70587211
  0.60311504 -0.53271216  0.5716495  -0.34685826  0.36744388 -0.03938079
 -0.12192939]
MAE and RMSE:  0.46671241131181157 0.5945136671805323


In [81]:
#LASSO REGRESSION - least absolute shrinkage and selection operator
from sklearn.linear_model import LassoCV

#LASSO with searching for the best lambda using cross-validation
lasso_cv_model = LassoCV(eps=0.001, n_alphas=1000, max_iter=10000)
lasso_cv_model.fit(X_train_scaled, y_train)
print('the best lambda', lasso_cv_model.alpha_)
print('coefs of best model: ',lasso_cv_model.coef_)

#MAE, RMSE on hold-out set
y_pred = lasso_cv_model.predict(X_test_scaled)
MAE = mean_absolute_error(y_test, y_pred)
RMSE = root_mean_squared_error(y_test, y_pred)
print('MAE and RMSE: ', MAE, RMSE)




the best lambda 0.004924531806474871
coefs of best model:  [ 5.15048089  0.4274257   0.29684446 -4.53337994  3.38937185 -0.4288993
  0.          0.          0.          1.17891049 -0.          0.
  0.16706037 -0.          0.          0.          0.11083672  0.
  0.06155549]
MAE and RMSE:  0.5123045552899799 0.6308043049172861


In [79]:
#Elastic net = Ridge + Lasso
from sklearn.linear_model import ElasticNetCV

#So, we search for the best parameters l1_ratio (alpha) and lambda
elasticnet_cv_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], eps=0.001, n_alphas=1000, max_iter=10000)
elasticnet_cv_model.fit(X_train_scaled, y_train)
print('the best lambda', elasticnet_cv_model.l1_ratio_, elasticnet_cv_model.alpha_)
print('coefs of best model: ',elasticnet_cv_model.coef_)

#MAE, RMSE on hold-out set
y_pred = elasticnet_cv_model.predict(X_test_scaled)
MAE = mean_absolute_error(y_test, y_pred)
RMSE = root_mean_squared_error(y_test, y_pred)
print('MAE and RMSE: ', MAE, RMSE)

the best lambda 1.0 0.004924531806474871
coefs of best model:  [ 5.15048089  0.4274257   0.29684446 -4.53337994  3.38937185 -0.4288993
  0.          0.          0.          1.17891049 -0.          0.
  0.16706037 -0.          0.          0.          0.11083672  0.
  0.06155549]
MAE and RMSE:  0.5123045552899799 0.6308043049172861
