In [1]:
# ======================================
# 🏥 Patient Readmission Risk Predictor
# Run Locally in Jupyter Notebook
# ======================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

print("✅ Libraries loaded successfully")

# ------------------------------------------------
# STEP 1: Simulate or Load Data
# ------------------------------------------------

# For demo purposes, create synthetic healthcare data
np.random.seed(42)
n = 500
df = pd.DataFrame({
    "age": np.random.randint(20, 90, size=n),
    "heart_rate": np.random.randint(50, 120, size=n),
    "bp_systolic": np.random.randint(90, 180, size=n),
    "bp_diastolic": np.random.randint(60, 100, size=n),
    "hemoglobin": np.random.uniform(9.0, 17.0, size=n),
    "length_of_stay": np.random.randint(0, 15, size=n),
    "gender": np.random.choice(["Male", "Female"], size=n),
    "race": np.random.choice(["Caucasian", "AfricanAmerican", "Asian", "Hispanic", "Other"], size=n),
    "diagnosis": np.random.choice(["Diabetes", "Heart Disease", "Kidney Disease", "Other"], size=n)
})

# Create target column
df["readmitted_NO"] = ((df["length_of_stay"] <= 7) & (df["age"] < 75) & (df["hemoglobin"] > 11)).astype(int)
df.head()
# ------------------------------------------------
# STEP 2: Preprocessing
# ------------------------------------------------

# Convert categorical variables to numeric (one-hot)
df_encoded = pd.get_dummies(df, drop_first=False)

# Split features and target
X = df_encoded.drop("readmitted_NO", axis=1)
y = df_encoded["readmitted_NO"]

# Normalize numeric features
from sklearn.preprocessing import StandardScaler
num_cols = ["age", "heart_rate", "bp_systolic", "bp_diastolic", "hemoglobin", "length_of_stay"]
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

print("✅ Preprocessing complete.")
# ------------------------------------------------
# STEP 3: Train Model
# ------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("✅ Model trained successfully!")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# ------------------------------------------------
# STEP 4: Save Model for Streamlit App
# ------------------------------------------------

joblib.dump(model, "patient_risk_model.pkl")
joblib.dump(list(X.columns), "model_columns.pkl")
print("💾 Model and columns saved successfully!")


# ------------------------------------------------
# STEP 5: (Optional) Test Streamlit App locally
# ------------------------------------------------

print("""
To launch your dashboard:
1️⃣ Open terminal in this folder
2️⃣ Run:
    streamlit run app.py
3️⃣ Go to the browser link shown (usually http://localhost:8501)
""")


✅ Libraries loaded successfully
✅ Preprocessing complete.
✅ Model trained successfully!
Accuracy: 1.0

Confusion Matrix:
 [[72  0]
 [ 0 28]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        72
           1       1.00      1.00      1.00        28

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100

💾 Model and columns saved successfully!

To launch your dashboard:
1️⃣ Open terminal in this folder
2️⃣ Run:
    streamlit run app.py
3️⃣ Go to the browser link shown (usually http://localhost:8501)

