In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import scipy

In [3]:
df = pd.read_csv('/Users/ben.meir/Downloads/Advertising.csv')

df

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [4]:
X = df.drop('sales', axis=1)
y = df['sales']

In [5]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

poly_converter = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly_converter.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [6]:
from sklearn.linear_model import LassoCV

lasso_cv_model = LassoCV(eps=0.1, n_alphas=100)
lasso_cv_model.fit(scaled_X_train, y_train)
test_predictions = lasso_cv_model.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, test_predictions)
RMSE = np.sqrt(mean_squared_error(y_test, test_predictions))

print(f'MAE: {MAE}')
print(f'RMSE: {RMSE}')

MAE: 0.6811456342837985
RMSE: 1.0349127365478732




In [8]:
lasso_cv_model.alphas_

array([4.92453181, 4.81131661, 4.70070424, 4.59263484, 4.48704997,
       4.3838925 , 4.28310663, 4.18463782, 4.08843282, 3.99443958,
       3.90260724, 3.81288613, 3.72522772, 3.63958457, 3.55591037,
       3.47415984, 3.39428875, 3.31625391, 3.24001309, 3.16552505,
       3.0927495 , 3.02164705, 2.95217926, 2.88430853, 2.81799815,
       2.75321226, 2.68991579, 2.62807451, 2.56765497, 2.50862447,
       2.45095108, 2.39460361, 2.33955157, 2.28576518, 2.23321535,
       2.18187363, 2.13171226, 2.08270411, 2.03482265, 1.98804199,
       1.94233682, 1.89768241, 1.85405461, 1.81142982, 1.76978497,
       1.72909754, 1.68934551, 1.65050739, 1.61256215, 1.57548928,
       1.53926871, 1.50388086, 1.46930657, 1.43552715, 1.40252432,
       1.37028023, 1.33877743, 1.30799888, 1.27792793, 1.24854831,
       1.21984413, 1.19179986, 1.16440033, 1.13763072, 1.11147654,
       1.08592365, 1.06095822, 1.03656674, 1.01273603, 0.98945318,
       0.96670561, 0.94448101, 0.92276735, 0.90155289, 0.88082

In [9]:
lasso_cv_model.alpha_

np.float64(0.4924531806474871)

In [10]:
lasso_cv_model.coef_

array([0.97675148, 0.        , 0.        , 0.        , 3.8148913 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])