<h1 style="color: teal; font-size: 40px;">🏥 Medical Cost Regression Project</h1>


In [18]:
# insurance_cost_regression.py

import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [19]:
# Load dataset
df = pd.read_csv("insurance.csv")

In [20]:
# Encode categorical features
le = LabelEncoder()
for col in ['sex', 'smoker', 'region']:
    df[col] = le.fit_transform(df[col])

In [21]:
# Features and Target
X = df.drop("charges", axis=1)
y = df["charges"]

In [22]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [23]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define regression models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "KNN Regressor": KNeighborsRegressor()
}

In [24]:
# Train and evaluate each model
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results[name] = {
        "MAE": mean_absolute_error(y_test, preds),
        "MSE": mean_squared_error(y_test, preds),
        "R2 Score": r2_score(y_test, preds)
    }


In [25]:
# Sort and show results
results_df = pd.DataFrame(results).T.sort_values(by="R2 Score", ascending=False)
print("📊 Model Comparison:\n", results_df)

# Save the best performing model
best_model_name = results_df.index[0]
best_model = models[best_model_name]
with open("insurance_cost_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print(f"\n✅ Best model saved: {best_model_name}")

📊 Model Comparison:
                            MAE           MSE  R2 Score
Gradient Boosting  2448.343338  1.899770e+07  0.877631
Random Forest      2450.677667  2.096239e+07  0.864975
KNN Regressor      3044.069796  2.577585e+07  0.833971
Linear Regression  4186.508898  3.363521e+07  0.783346
Decision Tree      2846.575183  3.922493e+07  0.747341

✅ Best model saved: Gradient Boosting


<h1 style="color: teal; font-size: 40px;">GUI</h1>

In [26]:
import pickle
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Load saved model
with open("insurance_cost_model.pkl", "rb") as f:
    model = pickle.load(f)

# Input widgets
age = widgets.IntSlider(description='Age', min=18, max=100, value=30, style={'description_width': 'initial'})
sex = widgets.Dropdown(options=[('Male', 1), ('Female', 0)], description='Sex', style={'description_width': 'initial'})
bmi = widgets.FloatSlider(description='BMI', min=15.0, max=45.0, step=0.1, value=25.0, style={'description_width': 'initial'})
children = widgets.IntSlider(description='Children', min=0, max=5, value=0, style={'description_width': 'initial'})
smoker = widgets.Dropdown(options=[('Yes', 1), ('No', 0)], description='Smoker', style={'description_width': 'initial'})
region = widgets.Dropdown(
    options=[('Southwest', 3), ('Southeast', 2), ('Northwest', 1), ('Northeast', 0)],
    description='Region', style={'description_width': 'initial'}
)

output_box = widgets.Output()

# Predict button
predict_btn = widgets.Button(description="💸 Predict Insurance Cost", button_style='success')

# Prediction logic
def predict_cost(b):
    with output_box:
        clear_output()
        input_data = np.array([[age.value, sex.value, bmi.value, children.value, smoker.value, region.value]])
        prediction = model.predict(input_data)[0]
        
        print("📊 Estimated Insurance Cost:")
        print(f"💰 ${prediction:,.2f}")
        
        # Visual display
        fig, ax = plt.subplots(figsize=(6, 1.5))
        ax.barh(['Predicted Cost'], [prediction], color='orange')
        ax.set_xlim([0, max(60000, prediction + 1000)])
        ax.set_xlabel("Cost ($)")
        ax.set_title("Prediction Output")
        plt.show()

predict_btn.on_click(predict_cost)

# Layout
form_items = widgets.VBox([
    age, sex, bmi, children, smoker, region, predict_btn, output_box
])

display(widgets.HTML("<h2 style='color: teal;'>🏥 Insurance Cost Estimator</h2>"))
display(form_items)


HTML(value="<h2 style='color: teal;'>🏥 Insurance Cost Estimator</h2>")

VBox(children=(IntSlider(value=30, description='Age', min=18, style=SliderStyle(description_width='initial')),…