In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import pickle

# Load dataset
df = pd.read_csv("processed_healthcare_data.csv")

# Preprocessing: Handle categorical features (e.g., medical_condition)
# One-hot encode categorical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['medical_condition'])
    ],
    remainder='passthrough'
)

# Define features and target
X = df[['age', 'medical_condition']]  # Features (age + categorical)
y = df['billing_amount']  # Target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing to training data
X_train_transformed = preprocessor.fit_transform(X_train)

# Train the model
model = LinearRegression()
model.fit(X_train_transformed, y_train)

# Save the model and preprocessor
pickle.dump(model, open("model.pkl", "wb"))
pickle.dump(preprocessor, open("preprocessor.pkl", "wb"))

print("✅ Model training completed! Saved: model.pkl, preprocessor.pkl")

✅ Model training completed! Saved: model.pkl, preprocessor.pkl


In [3]:
import pickle
import pandas as pd

# Load the model and preprocessor
model = pickle.load(open("model.pkl", "rb"))
preprocessor = pickle.load(open("preprocessor.pkl", "rb"))

# Example: New data for prediction
new_data = pd.DataFrame({'age': [45], 'medical_condition': ['diabetes']})

# Ensure correct capitalization to match training data
new_data['medical_condition'] = new_data['medical_condition'].str.capitalize()

# Transform new data using the preprocessor
new_data_transformed = preprocessor.transform(new_data)

# Make prediction
predicted_billing = model.predict(new_data_transformed)
print("Predicted Billing Amount:", predicted_billing[0])


Predicted Billing Amount: 25354.806866120183
