# 🏥 Healthcare No-show Prediction

This notebook uses logistic regression to predict whether a patient will miss a scheduled medical appointment. It demonstrates feature engineering, model training, and interpretation of the results.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
%matplotlib inline

In [None]:
df = pd.read_csv("healthcare_noshow_sample.csv")
df.head()

In [None]:
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['LeadTime'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['Gender'] = df['Gender'].map({'F': 0, 'M': 1})
df['No-show'] = df['No-show'].map({'No': 0, 'Yes': 1})

In [None]:
features = ['Gender', 'Age', 'Scholarship', 'SMS_received', 'LeadTime']
X = df[features]
y = df['No-show']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=["Show", "No-show"], yticklabels=["Show", "No-show"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
coefficients = pd.DataFrame({
    "Feature": features,
    "Coefficient": model.coef_[0]
}).sort_values(by="Coefficient", key=abs, ascending=False)

sns.barplot(x="Coefficient", y="Feature", data=coefficients, palette="viridis")
plt.title("Feature Importance - Logistic Regression")
plt.show()

### 💡 Key Takeaways
- SMS reminders and longer lead times reduce no-show likelihood.
- Logistic regression gives interpretable insights on driver features.
