In [4]:
# Check which Python your notebook kernel is using
import sys
print(sys.executable)
# Install into that same environment
!{sys.executable} -m pip install pandas numpy scikit-learn joblib matplotlib

c:\Users\Abrish\hospital-resource-analytics\.venv\Scripts\python.exe


In [28]:
import pandas as pd
import numpy as np

# Example monthly time series
dates = pd.date_range(start='2023-01-01', periods=24, freq='MS')  # 24 months
values = np.random.randint(50, 200, size=24)  # random values

ts = pd.Series(data=values, index=dates)


In [7]:
import pandas as pd
import numpy as np
import logging
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
CSV_PATH = r"C:\Users\Abrish\hospital-resource-analytics\ER Wait Time Dataset.csv"


In [19]:
def safe_import(name):
    try:
        module = __import__(name)
        return module
    except Exception:
        return None


def load_data(path=CSV_PATH):
    logger.info(f"Loading dataset from {path}")
    if not os.path.exists(path):
        raise FileNotFoundError(f"Data file not found: {path}")
    df = pd.read_csv(path)
    logger.info(f"Loaded dataframe shape: {df.shape}")
    return df


def train_cross_sectional(df):
    # Use 'Total Wait Time (min)' as target if present
    target = 'Total Wait Time (min)'
    if target not in df.columns:
        # fallback: pick first numeric column
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        if not numeric_cols:
            raise ValueError('No numeric columns found for cross-sectional model')
        target = numeric_cols[0]

    feature_cols = [c for c in df.select_dtypes(include=[np.number]).columns.tolist() if c != target]
    if not feature_cols:
        raise ValueError('No numeric features available')

    X = df[feature_cols].fillna(0)
    y = df[target].fillna(0)

    logger.info(f"Cross-sectional: using target='{target}' and {len(feature_cols)} numeric features")

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)

    y_pred = rf.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    logger.info(f"RandomForest — RMSE: {rmse:.2f}, R2: {r2:.3f}")

    feat_imp = pd.DataFrame({'feature': feature_cols, 'importance': rf.feature_importances_}).sort_values('importance', ascending=False)
    logger.info("Top features:\n" + str(feat_imp.head(10)))

    joblib.dump({'model': rf, 'features': feature_cols}, 'rf_cross_sectional.pkl')
    logger.info('Saved rf_cross_sectional.pkl')

    return {'model': rf, 'rmse': rmse, 'r2': r2}

In [9]:
def prepare_monthly_series(df, hospital_id=None):
    # Parse visit date and compute monthly median wait
    if 'Visit Date' not in df.columns:
        raise ValueError('Visit Date column required for time-series models')
    df['Visit Date'] = pd.to_datetime(df['Visit Date'], errors='coerce')
    if hospital_id:
        s = df[df['Hospital ID'] == hospital_id]
    else:
        s = df

    # resample by month
    ts = s.set_index('Visit Date')['Total Wait Time (min)'].resample('MS').median().dropna()
    logger.info(f"Prepared monthly series (hospital_id={hospital_id}) length={len(ts)}")
    return ts

In [10]:
def eval_series_forecast(true, pred):
    # align
    common = true.index.intersection(pred.index)
    if len(common) == 0:
        return None
    y_true = true.loc[common].values
    y_pred = pred.loc[common].values
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse

In [24]:
def try_prophet(ts):
    prophet = safe_import('prophet') or safe_import('fbprophet')
    if prophet is None:
        logger.warning('Prophet not installed — skipping')
        return None

    try:
        from prophet import Prophet
    except Exception:
        from fbprophet import Prophet

    import numpy as np
    from sklearn.metrics import r2_score
    import joblib

    # Prepare dataframe
    dfprop = ts.reset_index()
    dfprop.columns = ['ds', 'y']

    # Check length
    n = len(dfprop)
    if n < 12:
        logger.warning('Time series too short for Prophet — skipping')
        return None

    # Train / test split (80/20)
    split = int(n * 0.8)
    train = dfprop.iloc[:split]
    test = dfprop.iloc[split:]

    # Train model
    m = Prophet()
    m.fit(train)

    # Forecast
    future = m.make_future_dataframe(periods=len(test), freq='MS')
    forecast = m.predict(future)
    fc = forecast.set_index('ds')['yhat']

    # Predictions for test period
    pred = fc.loc[test['ds']]

    # RMSE
    rmse = eval_series_forecast(test.set_index('ds')['y'], pred)

    # Align indices
    true_series = test.set_index('ds')['y']
    common_idx = true_series.index.intersection(pred.index)

    if len(common_idx) == 0:
        logger.warning('No overlapping dates between truth and prediction')
        joblib.dump({'model': m}, 'prophet_model.pkl')
        return {'rmse': rmse, 'accuracy_rmse_pct': None}

    y_true = true_series.loc[common_idx].values
    y_pred = pred.loc[common_idx].values

    # RMSE-based accuracy using max actual value
    y_true_max = y_true.max()
    if y_true_max > 0:
        accuracy_rmse = 100.0 * (1 - rmse / y_true_max)
        accuracy_rmse = max(0.0, min(100.0, accuracy_rmse))
    else:
        accuracy_rmse = None

    # Log results
    logger.info(
        f'Prophet RMSE: {rmse:.2f}, '
        f'Accuracy (RMSE-based): {accuracy_rmse:.2f}%'
        if accuracy_rmse is not None else
        f'Prophet RMSE: {rmse:.2f}, Accuracy: N/A'
    )

    # Save model
    joblib.dump({'model': m}, 'prophet_model.pkl')

    return {
        'rmse': rmse,
        'accuracy_rmse_pct': accuracy_rmse
    }


In [32]:
import logging

# Define logger
logger = logging.getLogger('ProphetLogger')
logger.setLevel(logging.INFO)

# Console output
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


In [None]:
# --- Required imports ---
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import joblib

# --- Helper function to calculate RMSE ---
def eval_series_forecast(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# --- Create example time series ---
dates = pd.date_range(start='2023-01-01', periods=24, freq='MS')  # 24 months
values = np.random.randint(50, 200, size=24)  # random monthly values
ts = pd.Series(data=values, index=dates)

# --- Prophet wrapper function ---
def try_prophet(ts):
    try:
        from prophet import Prophet
    except Exception:
        try:
            from fbprophet import Prophet
        except Exception:
            print("Prophet not installed — skipping")
            return None

    dfprop = ts.reset_index()
    dfprop.columns = ['ds', 'y']

    if len(dfprop) < 12:
        print("Time series too short for Prophet — skipping")
        return None

    split = int(len(dfprop) * 0.8)
    train = dfprop.iloc[:split]
    test = dfprop.iloc[split:]

    m = Prophet()
    m.fit(train)

    future = m.make_future_dataframe(periods=len(test), freq='MS')
    forecast = m.predict(future)
    fc = forecast.set_index('ds')['yhat']

    pred = fc.loc[test['ds']]
    rmse = eval_series_forecast(test.set_index('ds')['y'], pred)

    y_true = test['y'].values
    y_true_max = y_true.max()
    if y_true_max > 0:
        accuracy_rmse = 100.0 * (1 - rmse / y_true_max)
        accuracy_rmse = max(0.0, min(100.0, accuracy_rmse))
    else:
        accuracy_rmse = None

    print(f'Prophet RMSE: {rmse:.2f}, Accuracy (RMSE-based): {accuracy_rmse:.2f}%' 
          if accuracy_rmse is not None else 
          f'Prophet RMSE: {rmse:.2f}, Accuracy: N/A')

    joblib.dump({'model': m}, 'prophet_model.pkl')

    return {
        'rmse': rmse,
        'accuracy_rmse_pct': accuracy_rmse
    }

# --- Run the function and print results ---
result = try_prophet(ts)

if result is None:
    print("Prophet was skipped.")
else:
    rmse = result.get('rmse', None)
    accuracy = result.get('accuracy_rmse_pct', None)

    if rmse is not None:
        print(f"RMSE: {rmse:.2f}")
    else:
        print("RMSE: N/A")

    if accuracy is not None:
        print(f"Accuracy (RMSE-based): {accuracy:.2f}%")
    else:
        print("Accuracy (RMSE-based): N/A")




Prophet was skipped.
