In [1]:
# no_show_prediction.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

# 1. Load dataset
data = "Medical.csv"
df = pd.read_csv(data)
print(df.head())

# 2. Data Cleaning
df = df.drop_duplicates()
df = df.rename(columns={"No-show": "No_show"})

# Convert target: No_show = "Yes" → 1, "No" → 0
df["No_show"] = df["No_show"].map({"Yes": 1, "No": 0})

# Drop irrelevant cols
df = df.drop(["PatientId", "AppointmentID"], axis=1)

# Convert date columns
df["ScheduledDay"] = pd.to_datetime(df["ScheduledDay"])
df["AppointmentDay"] = pd.to_datetime(df["AppointmentDay"])

# Feature engineering
df["WaitingDays"] = (df["AppointmentDay"] - df["ScheduledDay"]).dt.days
df["AppointmentWeekDay"] = df["AppointmentDay"].dt.dayofweek

# Drop original datetime cols
df = df.drop(["ScheduledDay", "AppointmentDay"], axis=1)

# Handle categorical vars
df = pd.get_dummies(df, drop_first=True)

# 3. Train-test split
X = df.drop("No_show", axis=1)
y = df["No_show"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Train Decision Tree
model = DecisionTreeClassifier(max_depth=6, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# 6. Save model & columns
joblib.dump(model, "/content/decision_tree_model.pkl")
joblib.dump(list(X.columns), "/content/model_features.pkl")

print("✅ Model and features saved!")


      PatientId  AppointmentID Gender          ScheduledDay  \
0  2.987250e+13        5642903      F  2016-04-29T18:38:08Z   
1  5.589978e+14        5642503      M  2016-04-29T16:08:27Z   
2  4.262962e+12        5642549      F  2016-04-29T16:19:04Z   
3  8.679512e+11        5642828      F  2016-04-29T17:29:31Z   
4  8.841186e+12        5642494      F  2016-04-29T16:07:23Z   

         AppointmentDay  Age      Neighbourhood  Scholarship  Hipertension  \
0  2016-04-29T00:00:00Z   62    JARDIM DA PENHA            0             1   
1  2016-04-29T00:00:00Z   56    JARDIM DA PENHA            0             0   
2  2016-04-29T00:00:00Z   62      MATA DA PRAIA            0             0   
3  2016-04-29T00:00:00Z    8  PONTAL DE CAMBURI            0             0   
4  2016-04-29T00:00:00Z   56    JARDIM DA PENHA            0             1   

   Diabetes  Alcoholism  Handcap  SMS_received No-show  
0         0           0        0             0      No  
1         0           0        0      