<a href="https://colab.research.google.com/github/Manojini51-sys/Calculate_distance.py/blob/main/healthcare_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# ==========================================
# Healthcare Readmission Prediction
# (Final Corrected | No CSV | Warning-Free)
# ==========================================

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

# ------------------------
# 1. CREATE SYNTHETIC DATA
# ------------------------
np.random.seed(42)

n_samples = 500

df = pd.DataFrame({
    "Age": np.random.randint(20, 90, n_samples),
    "Gender": np.random.choice(["Male", "Female"], n_samples),
    "AdmissionType": np.random.choice(["Emergency", "Elective", "Urgent"], n_samples),
    "NumLabProcedures": np.random.randint(1, 100, n_samples),
    "NumMedications": np.random.randint(1, 50, n_samples),
    "NumInpatient": np.random.randint(0, 10, n_samples),
    "Diabetes": np.random.choice(["Yes", "No"], n_samples),
    "Readmitted": np.random.choice([0, 1], n_samples, p=[0.65, 0.35])
})

print("Dataset Shape:", df.shape)
print(df.head())

# ------------------------
# 2. BASIC CLEANING (Pandas 3.0 Safe)
# ------------------------
df = df.drop_duplicates()

num_cols = df.select_dtypes(include=["int64", "float64"]).columns
cat_cols = df.select_dtypes(include=["object"]).columns

for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

for col in cat_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

# ------------------------
# 3. ENCODE CATEGORICAL DATA
# ------------------------
encoders = {}

for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le

# ------------------------
# 4. FEATURE / TARGET SPLIT
# ------------------------
X = df.drop("Readmitted", axis=1)
y = df["Readmitted"]

# ------------------------
# 5. TRAIN-TEST SPLIT
# ------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# ------------------------
# 6. MODEL TRAINING
# ------------------------
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    class_weight="balanced"
)

model.fit(X_train, y_train)

# ------------------------
# 7. PREDICTION
# ------------------------
y_pred = model.predict(X_test)

# ------------------------
# 8. EVALUATION
# ------------------------
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ------------------------
# 9. SINGLE PATIENT PREDICTION (FIXED)
# ------------------------
sample_patient = X_test.iloc[[0]]   # DataFrame with feature names
prediction = model.predict(sample_patient)

print(
    "\nSample Patient Readmission Prediction:",
    "Yes" if prediction[0] == 1 else "No"
)

Dataset Shape: (500, 8)
   Age  Gender AdmissionType  NumLabProcedures  NumMedications  NumInpatient  \
0   71  Female      Elective                99              33             1   
1   34  Female     Emergency                18               7             8   
2   80  Female        Urgent                59              12             4   
3   40  Female     Emergency                17              49             1   
4   43  Female     Emergency                14              30             8   

  Diabetes  Readmitted  
0      Yes           0  
1       No           0  
2      Yes           0  
3      Yes           0  
4       No           0  

Accuracy: 0.62

Confusion Matrix:
 [[54 13]
 [25  8]]

Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.81      0.74        67
           1       0.38      0.24      0.30        33

    accuracy                           0.62       100
   macro avg       0.53      0.52      0.52    