In [11]:
import mlflow
import mlflow.sklearn

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error

import pandas as pd
import numpy as np

In [17]:

# Load dataset
# -------------------------
medical_insurance_data = pd.read_csv("c:/Users/91997/Downloads/medical_insurance.csv")

# Separate features and target
X = medical_insurance_data.drop("charges", axis=1)
y = medical_insurance_data["charges"]

# Encode categorical columns
X = pd.get_dummies(X, drop_first=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------
# Define models and hyperparameters
# -------------------------
models = {
    "LinearRegression": {
        "model": LinearRegression(),
        "params": {}
    },
    "DecisionTreeRegressor": {
        "model": DecisionTreeRegressor(max_depth=6, min_samples_split=4, random_state=42),
        "params": {"max_depth": 6, "min_samples_split": 4}
    },
    "RandomForestRegressor": {
        "model": RandomForestRegressor(
            n_estimators=150, max_depth=8, min_samples_split=5, random_state=42
        ),
        "params": {"n_estimators": 150, "max_depth": 8, "min_samples_split": 5}
    },
    "GradientBoostingRegressor": {
        "model": GradientBoostingRegressor(
            n_estimators=200, learning_rate=0.05, max_depth=3, random_state=42
        ),
        "params": {"n_estimators": 200, "learning_rate": 0.05, "max_depth": 3}
    },
    "XGBRegressor": {
    "model": XGBRegressor(n_estimators=200,max_depth=5,learning_rate=0.1,subsample=0.8,colsample_bytree=0.8, random_state=42
        ),
    "params": {"n_estimators": 200,"max_depth": 5,"learning_rate": 0.1,"subsample": 0.8,"colsample_bytree": 0.8}
    
}
}

# -------------------------
# MLflow setup
# -------------------------
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Medical_Insurance_Cost_Prediction")

# -------------------------
# Train and log each model
# -------------------------
for model_name, model_info in models.items():
    model = model_info["model"]
    params = model_info["params"]

    with mlflow.start_run(run_name=model_name):
        # Train
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Evaluate
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)

        # Log parameters and metrics
        mlflow.log_params(params)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("MSE", mse)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("R2_Score", r2)

        # Log model
        mlflow.sklearn.log_model(model, name  
                                 ="models")

        print(f"‚úÖ {model_name} logged to MLflow (R2: {r2:.4f})")

print("üéØ All models with hyperparameters logged successfully to MLflow!")






‚úÖ LinearRegression logged to MLflow (R2: 0.7398)
üèÉ View run LinearRegression at: http://127.0.0.1:5000/#/experiments/400402020757060974/runs/84cc7e9f5bda40eda4159610456f6675
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/400402020757060974




‚úÖ DecisionTreeRegressor logged to MLflow (R2: 0.8720)
üèÉ View run DecisionTreeRegressor at: http://127.0.0.1:5000/#/experiments/400402020757060974/runs/fce02b2f9817475a9ba47efe96bdb61b
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/400402020757060974




‚úÖ RandomForestRegressor logged to MLflow (R2: 0.9060)
üèÉ View run RandomForestRegressor at: http://127.0.0.1:5000/#/experiments/400402020757060974/runs/d0ffa481841c4f8a86663ead1971b814
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/400402020757060974




‚úÖ GradientBoostingRegressor logged to MLflow (R2: 0.8739)
üèÉ View run GradientBoostingRegressor at: http://127.0.0.1:5000/#/experiments/400402020757060974/runs/6f8cf0e24c1d454bb07dbd7851df701d
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/400402020757060974




‚úÖ XGBRegressor logged to MLflow (R2: 0.9245)
üèÉ View run XGBRegressor at: http://127.0.0.1:5000/#/experiments/400402020757060974/runs/658ea5d983c74fda84ab9b080165aa04
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/400402020757060974
üéØ All models with hyperparameters logged successfully to MLflow!


In [18]:
#model regestration
# Model details

model_name = 'GradientBoostingRegressor'
run_id = '6f8cf0e24c1d454bb07dbd7851df701d'  
model_uri = f"runs:/{run_id}/models"

# Register the model
mv = mlflow.register_model(model_uri=model_uri, name=model_name)

Registered model 'GradientBoostingRegressor' already exists. Creating a new version of this model...
2025/10/29 11:55:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: GradientBoostingRegressor, version 9
Created version '9' of model 'GradientBoostingRegressor'.


In [19]:
# Load and test the model

import mlflow.pyfunc

model_name = 'GradientBoostingRegressor'
model_version = 9
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")


age = int(input("Enter age:"))
bmi = float(input("Enter BMI: "))
children = int(input("Enter number of children: "))
sex_male = int(input("Enter 1 for Male, 0 for Female: "))
smoker_yes = int(input("Enter 1 if Smoker, 0 if Non-Smoker: "))
region_northwest = int(input("Enter 1 if region is Northwest, else 0: "))
region_southeast = int(input("Enter 2 if region is Southeast, else 0: "))
region_southwest = int(input("Enter 3 if region is Southwest, else 0: "))

# Create input DataFrame
test_input = pd.DataFrame([{
    "age": age,
    "bmi": bmi,
    "children": children,
    "sex_male": sex_male,
    "smoker_yes": smoker_yes,
    "region_northwest": region_northwest,
    "region_southeast": region_southeast,
    "region_southwest": region_southwest
}])

In [20]:
y_pred = model.predict(test_input)
y_pred

array([22041.24402467])

In [None]:
# Transistion the model to the production

import mlflow
from mlflow.tracking import MlflowClient


current_model_uri = f"models:/{model_name}@challenger"
productction_model_name = "GradientBoostingRegressor"

client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri=current_model_uri, dst_name=productction_model_name)

Registered model 'GradientBoostingRegressor' already exists. Creating a new version of this model...
Copied version '3' of model 'GradientBoostingRegressor' to version '10' of model 'GradientBoostingRegressor'.


<ModelVersion: aliases=[], creation_timestamp=1761719220318, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1761719220318, metrics=None, model_id=None, name='GradientBoostingRegressor', params=None, run_id='162cf97dc2964bfa93e1cd4b002eb8d9', run_link='', source='models:/GradientBoostingRegressor/3', status='READY', status_message=None, tags={}, user_id='', version='10'>

In [1]:
import dagshub
dagshub.init(repo_owner='seema_kukkar.', repo_name='medical-insurance-cost-prediction', mlflow=True)