<a href="https://colab.research.google.com/github/Mohammed-Saif-07/ML-winter-quarter/blob/main/EX6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, StandardScaler
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

In [7]:
df = pd.read_csv("insurance.csv")

X = df.drop("charges", axis=1)
y = df["charges"]

num_features = ["age", "bmi", "children"]
cat_features = ["sex", "smoker", "region"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(drop="first"), cat_features)
    ]
)

def evaluate_model(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f" {name} ")
    print("R2 Score:", r2_score(y_test, y_pred))
    print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
    print("\n")

In [9]:
# Linear Regression with Log Transform
lin_reg = TransformedTargetRegressor(
    regressor=Pipeline(steps=[("prepro", preprocessor), ("model", LinearRegression())]),
    func=np.log1p, inverse_func=np.expm1
)
evaluate_model(lin_reg, "Linear Regression (Log Transformed)")

# Polynomial Regression (Degree 2) with Log Transform
poly_reg = TransformedTargetRegressor(
    regressor=Pipeline(steps=[
        ("prepro", preprocessor),
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("model", LinearRegression())
    ]),
    func=np.log1p, inverse_func=np.expm1
)
evaluate_model(poly_reg, "Polynomial Regression (Degree 2 + Log)")

 Linear Regression (Log Transformed) 
R2 Score: 0.6066982575915572
RMSE: 7814.0640259992115


 Polynomial Regression (Degree 2 + Log) 
R2 Score: 0.8547276600804582
RMSE: 4749.036375239781




In [10]:
# Ridge
ridge = TransformedTargetRegressor(
    regressor=Pipeline(steps=[
        ("prepro", preprocessor),
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("model", Ridge(alpha=1.0))
    ]),
    func=np.log1p, inverse_func=np.expm1
)
evaluate_model(ridge, "Ridge Regression")

# Lasso
lasso = TransformedTargetRegressor(
    regressor=Pipeline(steps=[
        ("prepro", preprocessor),
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("model", Lasso(alpha=0.01)) # Increased alpha slightly for better regularization
    ]),
    func=np.log1p, inverse_func=np.expm1
)
evaluate_model(lasso, "Lasso Regression")

# Elastic Net
elastic = TransformedTargetRegressor(
    regressor=Pipeline(steps=[
        ("prepro", preprocessor),
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("model", ElasticNet(alpha=0.01, l1_ratio=0.5))
    ]),
    func=np.log1p, inverse_func=np.expm1
)
evaluate_model(elastic, "Elastic Net")

 Ridge Regression 
R2 Score: 0.8556778692168495
RMSE: 4733.479452820509


 Lasso Regression 
R2 Score: 0.8647690797404678
RMSE: 4581.967790017467


 Elastic Net 
R2 Score: 0.866930774371679
RMSE: 4545.198398242857




**Summary of My Findings**

Model Performance Progression
1. Baseline Model

Linear Regression (Log Transformed): R² = 0.607, RMSE = $7,814
This basic model explained only 61% of the variance in insurance charges

2. Adding Complexity

Polynomial Regression (Degree 2 + Log): R² = 0.855, RMSE = $4,749
Adding polynomial features dramatically improved performance, reducing error by ~39%

3. Regularization Models
All regularized models used polynomial features (degree 2) with log transformation:


Ridge Regression: R² = 0.856

RMSE = $4,733

Lasso Regression: R² = 0.865

RMSE = $4,582

Elastic Net: R² = 0.867

RMSE = $4,545