# No-show Appointment Prediction
This notebook trains a model, visualizes results, and exports predictions for Power BI.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("KaggleV2-May-2016.csv")

# Preprocess data
df['No-show'] = df['No-show'].map({'Yes': 1, 'No': 0})
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['waiting_days'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df = df[df['waiting_days'] >= 0]

# Create age group
df['age_group'] = pd.cut(df['Age'], bins=[0, 18, 35, 60, 100], labels=['Child', 'Youth', 'Adult', 'Senior'])

# Define features and target
features = ['Age', 'Hipertension', 'Diabetes', 'Alcoholism', 'SMS_received', 'waiting_days']
X = df[features]
y = df['No-show']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
clf = RandomForestClassifier(n_estimators=100, class_weight='balanced', max_depth=10, random_state=42)
clf.fit(X_train, y_train)

# Predictions and evaluation
df['prediction'] = clf.predict(X)
report = classification_report(y_test, clf.predict(X_test), zero_division=0)
print(report)


## 📊 Visualizations

In [None]:
# No-show rate by age group
sns.barplot(x='age_group', y='No-show', data=df)
plt.title("No-show Rate by Age Group")
plt.xlabel("Age Group")
plt.ylabel("No-show Rate")
plt.show()

# SMS reminder impact
sns.barplot(x='SMS_received', y='No-show', data=df)
plt.title("Impact of SMS Reminders on No-show")
plt.xlabel("SMS Received")
plt.ylabel("No-show Rate")
plt.show()

# Feature importance
importances = pd.Series(clf.feature_importances_, index=features).sort_values(ascending=False)
importances.plot(kind='bar', title='Feature Importance')
plt.ylabel('Importance')
plt.show()


## 💾 Export predictions for Power BI

In [None]:
df[['Age', 'Hipertension', 'Diabetes', 'Alcoholism', 'SMS_received', 'waiting_days', 'prediction']].to_csv("predictions_for_powerbi.csv", index=False)
print("Exported to predictions_for_powerbi.csv")