## Dependencies
Run the following if you need to install packages (uncomment in a notebook cell):

In [None]:
import pandas as pd
import numpy as np
import logging
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()


# --- Configuration / paths ---
CSV_PATH = r"C:\Users\Abrish\hospital-resource-analytics\ER Wait Time Dataset.csv"

Raw df shape: (216, 18)
['CALENDAR_MONTH_END_DATE', 'APC_Finished_Consultant', 'APC_FCEs_with_a_procedure', 'APC_Percent_FCEs_with_procedure', 'APC_Ordinary_Episodes', 'APC_Day_Case_Episodes', 'APC_Day_Case_Episodes_with_proc', 'APC_Percent_Day_Cases_with_proc', 'APC_Finished_Admission_Episodes', 'APC_Emergency', 'Outpatient_Total_Appointments', 'Outpatient_Attended_Appointments', 'Outpatient_Percent_Attended', 'Outpatient_DNA_Appointment', 'Outpatient_Percent_DNA', 'Outpatient_Follow_Up_Attendance', 'Outpatient_Attendance_Type_1', 'Outpatient_Attendance_Type_2']


KeyError: "None of [Index(['Hospital ID', 'Hospital Name', 'Region', 'Season',\n       'Total Wait Time (min)'],\n      dtype='object')] are in the [columns]"

In [None]:
# Leakage check: retrain without 'Patient Satisfaction' if it was used as a feature
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

print('Columns present in training features (sample):', list(X_encoded.columns)[:20])
if any('Patient Satisfaction' in c for c in X_encoded.columns):
    print('Patient Satisfaction detected in features — retraining without it')
    cols_no_ps = [c for c in X_encoded.columns if 'Patient Satisfaction' not in c]
    X_no_ps = X_encoded[cols_no_ps].copy()
    # 80/20 split as before
    X_train2, X_test2, y_train2, y_test2 = train_test_split(X_no_ps, y, test_size=0.2, random_state=42)
    model_no_ps = RandomForestRegressor(n_estimators=200, random_state=42)
    model_no_ps.fit(X_train2, y_train2)
    y_pred2 = model_no_ps.predict(X_test2)
    rmse2 = (mean_squared_error(y_test2, y_pred2))**0.5
    r22 = r2_score(y_test2, y_pred2)
    print(f'Retrained without Patient Satisfaction — RMSE: {rmse2:.2f}, R2: {r22:.3f}')
    joblib.dump({'model': model_no_ps, 'feature_columns': cols_no_ps}, 'hospital_wait_model_no_ps.pkl')
    print('Saved hospital_wait_model_no_ps.pkl')
    # update notebook-scoped model and X_encoded so subsequent helper uses the new model
    model = model_no_ps
    X_encoded = X_no_ps
else:
    print('Patient Satisfaction not present in feature columns — no retrain performed')

In [None]:
# Show top recommendations after possible retrain
print('
Top 5 recommendations after leakage check/retrain:')
try:
    print(recommend_hospitals('Winter', top_k=5))
except Exception as e:
    print('recommend_hospitals failed:', e)

In [None]:
# Run models from `ml_model.py` and show outputs
# Ensure notebook kernel uses the same environment that has required packages
import sys
print('Python executable:', sys.executable)

import ml_model

# Load data
print('\nLoading data via ml_model.load_data()')
df = ml_model.load_data()
print('Data loaded:', df.shape)
display(df.head())

# Cross-sectional model (80/20)
print('\nRunning cross-sectional RandomForest...')
cs_res = None
try:
    cs_res = ml_model.train_cross_sectional(df)
    print('Cross-sectional results:', cs_res)
except Exception as e:
    print('Cross-sectional failed:', e)

# Prepare monthly series
print('\nPreparing monthly median series...')
try:
    ts = ml_model.prepare_monthly_series(df)
    print('Monthly series length:', len(ts))
    display(ts.tail())
except Exception as e:
    print('Series preparation failed:', e)
    ts = None

# Time-series experiments (use 80/20 internally)
if ts is not None and len(ts) >= 12:
    print('\nRunning time-series experiments (Prophet / SARIMA / LSTM)')
    for fn in (ml_model.try_prophet, ml_model.try_sarima, ml_model.try_lstm):
        try:
            res = fn(ts)
            print(f"{fn.__name__} -> {res}")
        except Exception as e:
            print(f"{fn.__name__} raised: {e}")
else:
    print('\nTime-series too short or missing; skipping experiments')

# If the notebook has the hospital-season model and recommend_hospitals helper, show an example
try:
    print('\nExample hospital recommendations (from notebook aggregated model)')
    # this will use the recommend_hospitals defined earlier in the notebook if present
    print(recommend_hospitals('Winter', top_k=5))
except Exception as e:
    print('recommend_hospitals not available in notebook scope or failed:', e)
