In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import os

# 🔹 Step 1: Create synthetic healthcare dataset
# Replace this with real data if available
np.random.seed(42)
data = pd.DataFrame({
    'age': np.random.randint(20, 70, 1000),
    'bmi': np.random.uniform(18, 35, 1000),
    'bp': np.random.randint(90, 160, 1000),
    'heart_rate': np.random.randint(60, 120, 1000),
    'smoking': np.random.randint(0, 2, 1000),
    'alcohol': np.random.randint(0, 2, 1000),
    'diabetes': np.random.randint(0, 2, 1000),
    'risk': np.random.choice(['Low', 'Medium', 'High'], 1000)
})

# 🔹 Step 2: Encode target variable
data['risk'] = data['risk'].map({'Low': 0, 'Medium': 1, 'High': 2})

# 🔹 Step 3: Split into features and target
X = data[['age', 'bmi', 'bp', 'heart_rate', 'smoking', 'alcohol', 'diabetes']]
y = data['risk']

# 🔹 Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🔹 Step 5: Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 🔹 Step 6: Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model trained with accuracy: {accuracy:.2f}")

# 🔹 Step 7: Save model
os.makedirs('model', exist_ok=True)
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("✅ Model saved as model/model.pkl")

✅ Model trained with accuracy: 0.39
✅ Model saved as model/model.pkl
