# Explainability & Delay Propagation Demo Notebook
This notebook demonstrates a compact per-train explainability workflow (SHAP) and a scaffold for a delay propagation simulation using schedule data in the repository.


In [None]:
# Setup: imports and environment checks
import sys
import os
import random
import importlib

# Try to install shap/networkx if missing (not automatic in some environments)
def ensure_package(pkg):
    try:
        importlib.import_module(pkg)
    except Exception:
        print(f"Attempting to install {pkg}...")
        !{sys.executable} -m pip install {pkg}

# Common packages
for p in ['pandas','numpy','joblib','matplotlib','seaborn','shap','networkx']:
    try:
        importlib.import_module(p)
    except Exception:
        print(f"Package {p} is not present; you may need to install it manually in your environment")

import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

# reproducibility
RNG_SEED = 42
random.seed(RNG_SEED)
np.random.seed(RNG_SEED)
print('Environment ready')

In [None]:
# Load model artifacts from backend
from pathlib import Path
repo_root = Path('..')
backend_dir = repo_root / 'backend'

model_file = backend_dir / 'model.pkl'
route_encoder_file = backend_dir / 'route_encoder.pkl'
weather_encoder_file = backend_dir / 'weather_encoder.pkl'
season_encoder_file = backend_dir / 'season_encoder.pkl'
feature_file = backend_dir / 'feature_columns.json'

print('Looking for model at', model_file)

model = None
route_encoder = None
weather_encoder = None
season_encoder = None
feature_columns = None

if model_file.exists():
    model = joblib.load(model_file)
    print('Loaded model')
else:
    print('Model file not found; try running backend/load_model in the repo')

try:
    if route_encoder_file.exists():
        route_encoder = joblib.load(route_encoder_file)
    if weather_encoder_file.exists():
        weather_encoder = joblib.load(weather_encoder_file)
    if season_encoder_file.exists():
        season_encoder = joblib.load(season_encoder_file)
    if feature_file.exists():
        import json
        with feature_file.open('r') as f:
            feature_columns = json.load(f)
except Exception as e:
    print('Could not load encoders/features:', e)

print('Artifacts loaded:', {'model': bool(model), 'route_encoder': bool(route_encoder)})

In [None]:
# Quick SHAP explainability example for one sample
try:
    import shap
    shap_available = True
except Exception:
    shap_available = False

# Small helper to build a feature row similar to backend's helper

def build_value_map_for_route(route, day_of_week, month, distance_km, weather_condition, season, model_obj=None, route_encoder_local=None):
    try:
        if model_obj is not None and hasattr(model_obj, 'feature_names_in_'):
            # This is a simplified mapping - align to expected keys
            try:
                route_encoded = route_encoder_local.transform([route])[0] if route_encoder_local is not None else route
            except Exception:
                route_encoded = route
            is_peak = 1 if day_of_week in [5,6,7] else 0
            try:
                weather_encoded = weather_encoder.transform([weather_condition])[0]
            except Exception:
                weather_encoded = 0
            try:
                season_encoded = season_encoder.transform([season])[0]
            except Exception:
                season_encoded = 0
            return {
                'route_encoded': route_encoded,
                'distance_km': float(distance_km),
                'day_of_week': int(day_of_week),
                'month': int(month),
                'is_peak_day': int(is_peak),
                'weather_encoded': int(weather_encoded),
                'season_encoded': int(season_encoded)
            }
    except Exception:
        pass
    return None

# Example route
route = 'HYB-VSKP'
date = '2025-12-15'
from datetime import datetime
obj = datetime.strptime(date, '%Y-%m-%d')
dow = obj.weekday()+1
month = obj.month
# try to infer distance from trains table if available

distance = 800
weather = 'Rainy'
season = 'Monsoon'

vmap = build_value_map_for_route(route, dow, month, distance, weather, season, model, route_encoder)
features = None
if vmap is not None and model is not None:
    # Reuse backend function if available by importing it from backend.app, else build DataFrame manually
    try:
        import importlib.util
        spec = importlib.util.spec_from_file_location('backend_app', '../backend/app.py')
        backend_app = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(backend_app)
        features = backend_app._build_feature_frame_for_model(model, vmap)
    except Exception as e:
        # fallback: if feature_columns is present
        if feature_columns:
            row = [vmap.get(col, 0) for col in feature_columns]
            features = pd.DataFrame([row], columns=feature_columns)

# Prediction
pred = None
if features is not None and model is not None:
    try:
        pred = float(model.predict(features)[0])
    except Exception as e:
        pred = None

print('Prediction for sample:', pred)

# SHAP explanation
if shap_available and model is not None:
    explainer = shap.TreeExplainer(model)
    shap_vals = explainer.shap_values(features)
    if isinstance(shap_vals, list):
        shap_vals = shap_vals[0]
    s_arr = np.array(shap_vals).reshape(-1)
    cols = list(features.columns)
    feat_vals = features.iloc[0].to_dict()
    # top 5
    pairs = []
    for c, v, s in zip(cols, [feat_vals.get(c) for c in cols], s_arr):
        pairs.append((c, v, float(s), abs(float(s))))
    pairs = sorted(pairs, key=lambda x: x[3], reverse=True)[:5]
    print('\nTop contributors:')
    for c, v, s, a in pairs:
        print(f" - {c}: value={v} impact={'+' if s>=0 else ''}{s:.3f} (min)")
    try:
        shap.summary_plot(np.array([s_arr]), features, show=True)
    except Exception as e:
        print('Could not render SHAP plot in this environment:', e)
else:
    print('SHAP not available in this environment; run `pip install shap` to enable explanations')

In [None]:
# Propagation: build a small example graph, run simulation, visualize and run backtest
from backend import propagation as propagation

# Define example edges and injection
edges = [('A','B',{'transfer_time':10}), ('B','C',{'transfer_time':5})]
G = propagation.build_dependency_graph(edges)

init = {'A': 15.0}
final, traces = propagation.simulate_propagation(G, init, recovery_margin=5.0)
print('Simulated final delays:', final)
print('Traces:', traces)

# Visualize
fig, ax = propagation.visualize_propagation(G, final, traces)
# Show in notebook (matplotlib inline assumed)
fig.show()

# Backtest: compare with observed final (perfect match example)
observed_final = {'A':15.0, 'B':20.0, 'C':20.0}
simulated_final, traces, metrics = propagation.backtest_propagation(G, init, observed_final, recovery_margin=5.0)
print('Backtest metrics:', metrics)

# Save the visualization to file to include in report
img_b64 = propagation._fig_to_base64(fig)
with open('../reports/propagation_example_base64.txt','w') as f:
    f.write(img_b64)
print('Saved demonstration visualization base64 to reports/propagation_example_base64.txt')