## Installing Dependencies

In [1]:
pip install pandas scikit-learn skl2onnx onnxruntime joblib


Collecting skl2onnx
  Downloading skl2onnx-1.18.0-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting onnx>=1.2.1 (from skl2onnx)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxconverter-common>=1.7.0 (from skl2onnx)
  Downloading onnxconverter_common-1.14.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting protobuf (from onnxruntime)
  Downloading protobuf-3.20.2-py2.py3-none-any.whl.metadata (720 bytes)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading skl2onnx-1.18.0-py2.py3-none-any.whl (300 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.3/300.3 kB[0m [3

## Importing Dependencies

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

## Load data and Pre-process

In [2]:
df = pd.read_csv("indian_insurance_dataset.csv")
X = pd.get_dummies(df.drop("charges", axis=1),
                   columns=["sex","smoker","region"])
y = df["charges"]

## Split features/target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Train plain RandomForest

In [4]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## Save sklearn model for backup

In [5]:
joblib.dump(model, "insurance_model.pkl")
print("✅ Saved insurance_model.pkl")

✅ Saved insurance_model.pkl


##  Convert to ONNX

In [6]:
n_features = X_train.shape[1]
initial_type = [("float_input", FloatTensorType([None, n_features]))]

onnx_model = convert_sklearn(
    model,
    initial_types=initial_type,
    target_opset=12
)

with open("insurance_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())
print(f"✅ Exported insurance_model.onnx ({n_features} float features)")

✅ Exported insurance_model.onnx (12 float features)


## Make predictions on the test set

In [7]:
y_pred = model.predict(X_test)

## Calculate performance metrics

In [8]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2  = r2_score(y_test, y_pred)

##  Print performance metrics

In [9]:
print("🔹 Performance on Test Set 🔹")
print(f"MAE:  {mae:.2f}")
print(f"MSE:  {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²:   {r2:.2f}")

🔹 Performance on Test Set 🔹
MAE:  97.58
MSE:  20894.48
RMSE: 144.55
R²:   1.00
