In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pickle
import os

In [2]:
# Debugging: Check the current working directory
print("Current Working Directory:", os.getcwd())

# Step 1: Load the dataset
file_path = '../data/insurance.csv'

# Debugging: Check if the file exists
if not os.path.exists(file_path):
    raise FileNotFoundError(f"The file {file_path} does not exist. Please check the path.")

data = pd.read_csv(file_path)
print("Dataset loaded successfully.")


Current Working Directory: c:\eSupport\Jupyter\healthcare-cost-prediction\notebooks
Dataset loaded successfully.


In [3]:
# Step 2: Preprocess the data
# One-hot encode categorical variables
data = pd.get_dummies(data, drop_first=True)
print("Categorical variables encoded successfully.")

# Scale numerical features
scaler = StandardScaler()
data[['age', 'bmi']] = scaler.fit_transform(data[['age', 'bmi']])
print("Numerical features scaled successfully.")

Categorical variables encoded successfully.
Numerical features scaled successfully.


In [4]:
# Step 3: Split the data
X = data.drop('charges', axis=1)
y = data['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Data split into training and testing sets successfully.")

Data split into training and testing sets successfully.


In [5]:
# Step 4: Train the model
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
print("Model trained successfully.")

Model trained successfully.


In [6]:
# Step 5: Save the trained model
model_path = '../model.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(rf, f)
print(f"Model saved to {model_path}.")

Model saved to ../model.pkl.


In [7]:
# Step 6: Save the feature names used during training
feature_names_path = '../feature_names.txt'
with open(feature_names_path, 'w') as f:
    f.write(','.join(X_train.columns))
print(f"Feature names saved to {feature_names_path}.")

Feature names saved to ../feature_names.txt.
