In [8]:
# ==============================================
# Patient Appointment No-Show Prediction Model
# ==============================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# -----------------------
# 1️⃣ Load the dataset
# -----------------------
df = pd.read_csv("appointments.csv")

print("Initial Data Preview:")
print(df.head())
print("\nColumns:", df.columns.tolist())

# -----------------------
# 2️⃣ Handle Missing Values
# -----------------------
df = df.fillna("Unknown")

# -----------------------
# 3️⃣ Drop Irrelevant Columns (unique IDs)
# -----------------------
drop_cols = ['appointment_id', 'patient_id', 'doctor_id']
df = df.drop(columns=[col for col in drop_cols if col in df.columns])

# -----------------------
# 4️⃣ Handle Datetime Columns
# -----------------------
for col in df.columns:
    if 'date' in col.lower():
        df[col] = pd.to_datetime(df[col], errors='coerce')
        df[f'{col}_day'] = df[col].dt.day
        df[f'{col}_month'] = df[col].dt.month
        df[f'{col}_weekday'] = df[col].dt.weekday
        df = df.drop(columns=[col])

# -----------------------
# 5️⃣ Encode Categorical Columns
# -----------------------
le = LabelEncoder()

for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col].astype(str))

# -----------------------
# 6️⃣ Define Features (X) and Target (y)
# -----------------------
# Assuming 'status' indicates attendance (No-Show vs Attended)
target_col = 'status'
X = df.drop(columns=[target_col])
y = df[target_col]

# -----------------------
# 7️⃣ Train-Test Split
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------
# 8️⃣ Train Decision Tree Model
# -----------------------
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# -----------------------
# 9️⃣ Evaluate Model
# -----------------------
y_pred = model.predict(X_test)

print("\n✅ Model Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# -----------------------
# 🔟 Save Predictions
# -----------------------
df['Predicted_NoShow'] = model.predict(X)

df.to_csv("predicted_appointments.csv", index=False)
print("\n📁 File saved as 'predicted_appointments.csv'")


Initial Data Preview:
  appointment_id patient_id doctor_id appointment_date appointment_time  \
0           A001       P034      D009       2023-08-09         15:15:00   
1           A002       P032      D004       2023-06-09         14:30:00   
2           A003       P048      D004       2023-06-28          8:00:00   
3           A004       P025      D006       2023-09-01          9:15:00   
4           A005       P040      D003       2023-07-06         12:45:00   

  reason_for_visit     status  
0          Therapy  Scheduled  
1          Therapy    No-show  
2     Consultation  Cancelled  
3     Consultation  Cancelled  
4        Emergency    No-show  

Columns: ['appointment_id', 'patient_id', 'doctor_id', 'appointment_date', 'appointment_time', 'reason_for_visit', 'status']

✅ Model Accuracy: 32.5 %

Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.25      0.29        12
           1       0.31      0.33      0.32      