In [None]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
insurance_data = pd.read_csv("insurance.csv")

#Interaction Features
X = insurance_data.drop(columns = ["charges"])
y = insurance_data["charges"]

X = pd.get_dummies(X, columns = ["region"], drop_first = True, dtype = int)

X["sex"] = X["sex"].map({"female":1, "male": 0})
X["smoker"] = X["smoker"].map({"yes":1, "no":0})
X["age_smoker"] = X["age"] * X["smoker"]
X["bmi_smoker"] = X["bmi"] * X["smoker"]
#Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
import seaborn as sns
alphas = [0.001, 0.1, 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
mses = []

for a in alphas:
    lasso_model = Lasso(alpha = a)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f"MSE for alpha = {a}:", mse)
    mses.append(mse)
sns.lineplot(x = alphas, y = mses, marker = "o")

In [None]:
from sklearn.linear_model import LassoCV
a = [0.001, 0.1, 1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 100]

lasso_cv_model = LassoCV(
    alphas = a,
    cv = 5,
    max_iter = 1000,
    random_state = 42
)

lasso_cv_model.fit(X_train, y_train)

print("Best alpha:", lasso_cv_model.alpha_)

y_pred = lasso_cv_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("mse = ", mse)
print("r2:", r2)