In [13]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
from sklearn.model_selection import train_test_split

In [6]:
insurance_data = pd.read_csv("insurance.csv")

X = insurance_data.drop(columns=["charges"])
y = insurance_data["charges"]

X = pd.get_dummies(X, columns=["region"], drop_first=True, dtype=int)

X["sex"] = X["sex"].map({"male": 1, "female": 0})
X["smoker"] = X["smoker"].map({"yes": 1, "no": 0})

X["age_smoker"] = X["age"] * X["smoker"] 
X["bmi_smoker"] = X["bmi"] * X["smoker"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)


In [11]:
#Creating Lasso model

lasso_model = Lasso(alpha = 0.5)  #Here alpha is nothing but Î» (Check notes)
lasso_model.fit(X_train, y_train)

y_pred = lasso_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("mse:", mse)

mse: 20918648.8898256


In [17]:
#Creating lassoCV model

from sklearn.linear_model import LassoCV
a = [0.001, 0.1, 2, 4, 6, 8, 10, 30, 50, 70, 100]  #Here we have used multiple alpha values so that we can perform cross validation.

lasso_cv_model = LassoCV(  #This is the model creation 
    alphas=a,               #which takes these all parameters
    cv=5,
    max_iter=1000,
    random_state=42
)

lasso_cv_model.fit(X_train, y_train)

print("best alpha: ", lasso_cv_model.alpha_)

y_pred = lasso_cv_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("mse:", mse)
print("r2:", r2)

best alpha:  0.001
mse: 20922599.871035967
r2: 0.8652317499151699
