## **Lasso Regression**

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 

import warnings
warnings.simplefilter("ignore")

In [2]:
df=pd.read_excel("cleaned_advertising_data.xlsx")
df.head()

Unnamed: 0,sales,total_sales
0,22100,337100
1,10400,128900
2,9300,132400
3,18500,251300
4,12900,250000


In [3]:
x=df[["total_sales"]]
y=df["sales"]

In [4]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=2)

**Modelling**

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
estimator=Lasso()

param_grid={'alpha':list(range(1000,2000))}
model_hp=GridSearchCV(estimator,param_grid,cv=5,scoring='r2')
model_hp.fit(x_train,y_train)
model_hp.best_params_

{'alpha': 1999}

* If everytime we get last number as best number, that means lasso is not woking
* in that case we have to consider **alpha=0**

In [6]:
lasso_best=Lasso(alpha=0)
lasso_best.fit(x_train,y_train)

print("Intercept:",lasso_best.intercept_)
print("Coefficient:",lasso_best.coef_)

Intercept: 4021.4705059140542
Coefficient: [0.05100478]


**Evaluation**

In [7]:
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
ypred_train=lasso_best.predict(x_train)

#from sklearn.metrics import r2_score
print("Train r2 :",r2_score(y_train,ypred_train))

#from sklearn.model_selection import cross_val_score
print("cv :",cross_val_score(lasso_best,x_train,y_train,cv=5,scoring="r2").mean())

ypred_test=lasso_best.predict(x_test)

#from sklearn.metrics import r2_score
print("Test r2 ",r2_score(y_test,ypred_test))

Train r2 : 0.8147600798823176
cv : 0.8048349207786423
Test r2  0.33133478476230016


## **Ridge Regression**

In [8]:
from sklearn.linear_model import Ridge
estimator=Ridge()
param_grid={'alpha':list(range(1,100))}
model_hp=GridSearchCV(estimator,param_grid,cv=5,scoring='r2')
model_hp.fit(x_train,y_train)
model_hp.best_params_

{'alpha': 99}

In [9]:
ridge_best=Ridge(alpha=0)
ridge_best.fit(x_train,y_train)

print("intercept:",ridge_best.intercept_)
print("coefficient :",ridge_best.coef_)

intercept: 4021.4705059140524
coefficient : [0.05100478]


In [10]:
ypred_train=ridge_best.predict(x_train)
#from sklearn.metrics import r2_score

print("Train r2 :",r2_score(y_train,ypred_train))
#from sklearn.model_selection import cross_val_score

print("cv :",cross_val_score(ridge_best,x_train,y_train,cv=5,scoring="r2").mean())

ypred_test=ridge_best.predict(x_test)

#from sklearn.metrics import r2_score
print("Test r2",r2_score(y_test,ypred_test))

Train r2 : 0.8147600798823176
cv : 0.8048349207786423
Test r2 0.33133478476230016


## **Elastic Net**

In [11]:
# Without Hyperparameter Tuning
from sklearn.linear_model import ElasticNet
enr_base=ElasticNet()
enr_base.fit(x_train,y_train)


train_pred=enr_base.predict(x_train)
test_pred=enr_base.predict(x_test)

print("Train r2 :",enr_base.score(x_train,y_train))
print("Test r2 :",enr_base.score(x_test,y_test))
print("cv :",cross_val_score(enr_base,x,y,cv=5).mean())

Train r2 : 0.8147600798823176
Test r2 : 0.33133478539547256
cv : 0.7433783178732446


In [12]:
# With Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
estimator=ElasticNet()
param_grid={"alpha":list(range(1,100)),"l1_ratio":[0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]}

model_hp=GridSearchCV(estimator,param_grid,cv=5,scoring='r2')  # scoring='neg_mean_squared_error'
model_hp.fit(x_train,y_train)
model_hp.best_params_

{'alpha': 99, 'l1_ratio': 1}

In [13]:
enr_base=ElasticNet(alpha=3,l1_ratio=0.5)
enr_base.fit(x_train,y_train)
print("intercept:",enr_base.intercept_)
print("coeeficient:",enr_base.coef_)

intercept: 4021.470540314307
coeeficient: [0.05100478]


In [14]:
ypred_train=enr_base.predict(x_train)
ypred_test=enr_base.predict(x_test)

In [17]:
print("Train r2:", enr_base.score(x_train, y_train))  
print("cv:", cross_val_score(enr_base, x_train, y_train, cv=5).mean())
print("Test r2:", enr_base.score(x_test, y_test))  

Train r2: 0.8147600798823176
cv: 0.8048349207874625
Test r2: 0.3313347866618169
