<a href="https://www.kaggle.com/code/raneemabdo/lasso-regression?scriptVersionId=143621969" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
df=pd.DataFrame(data,columns=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT'])
df['target']=target

In [None]:
from sklearn.linear_model import Lasso

In [None]:
lasso_reg=Lasso(alpha=0.1,positive=True)

In [None]:
Lasso.__init__

In [None]:
lasso_reg.fit(data,target)

In [None]:
feature=df.drop('target',axis=1).columns
lasso_coef=lasso_reg.coef_
pd.DataFrame(list(zip(feature,lasso_coef)),columns=['feature_names','estimates'])

In [None]:
plt.plot(lasso_coef)
plt.xticks(range(len(feature)),feature.values,rotation=45)
plt.ylabel('coefficients')
plt.show()

**training lasso regression with train test split**

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.25,random_state=10123)

In [None]:
lasso=Lasso(alpha=0.1,positive=True)
lasso.fit(x_train,y_train)

In [None]:
print("R2 score on training set",lasso.score(x_train,y_train)*100)
print("R2 score on test set",lasso.score(x_test,y_test)*100)

**fitting lasso regression with a renge of alpha values**

In [None]:
alphas=(0.001,0.01,0.1,0.5,1,2,5,10)
coeffs=[]
train_r_squared=[]
test_r_squared=[]

In [None]:
for i in alphas:
    lasso=Lasso(alpha=i)
    lasso.fit(x_train,y_train)
    coeffs.append(lasso.coef_)
    train_r_squared.append(lasso.score(x_train,y_train))
    test_r_squared.append(lasso.score(x_test,y_test))

In [None]:
plt.plot(train_r_squared,label='training set')
plt.plot(test_r_squared,label='test set')
plt.xlabel('lamda index')
plt.ylabel('R2 score')
plt.legend()
plt.grid()
plt.show()

**lasso hyper parameter tuning**

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
alphas=(0.001,0.01,0.1,0.5,1,2,5,10)
param_grid={'alpha':alphas}
lasso_reg=Lasso(positive=True)
lasso_cv=GridSearchCV(estimator=lasso_reg,param_grid=param_grid,cv=5)
lasso_cv.fit(data,target)

In [None]:
print("best parameter ",lasso_cv.best_params_)
print("best score ",lasso_cv.best_score_)