In [1]:
#imports and configs
import os, json, uuid
from datetime import datetime, timedelta
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.stats.multitest import multipletests

#DoWhy for robustness checks
try:
    import dowhy
    from dowhy import CausalModel
    DOWHY_AVAILABLE = True
except Exception:
    DOWHY_AVAILABLE = False

#Table names 
METRICS_TABLE = "curated_metrics_demo"
INCIDENTS_TABLE = "monitoring_incidents"
CAUSAL_REPORTS_TABLE = "causal_reports"

#parameters
TIME_FREQ = "1H"
CONTEXT_BEFORE_HOURS = 48
CONTEXT_AFTER_HOURS = 6
MAXLAG = 6  # for Granger
TOP_K_DOWHY = 2


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 3, Finished, Available, Finished)

In [2]:
#selecting incident and building window
incident_df = spark.sql(f"SELECT * FROM {INCIDENTS_TABLE} ORDER BY detected_ts DESC LIMIT 1").toPandas()
if incident_df.empty:
    raise RuntimeError("No incidents in table: " + INCIDENTS_TABLE)
incident = incident_df.iloc[0].to_dict()
print("Using incident:", incident['incident_id'], incident['window_start'], "→", incident['window_end'])

window_start = pd.to_datetime(incident['window_start'])
window_end   = pd.to_datetime(incident['window_end'])
ts_start = window_start - pd.Timedelta(hours=CONTEXT_BEFORE_HOURS)
ts_end   = window_end + pd.Timedelta(hours=CONTEXT_AFTER_HOURS)

print("Context window:", ts_start, "→", ts_end)


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 4, Finished, Available, Finished)

Using incident: github-2025-09-16 03:00:00-0 2025-09-16 03:00:00 → 2025-09-16 04:00:00
Context window: 2025-09-14 03:00:00 → 2025-09-16 10:00:00


In [3]:
# helpers to fetch the series
def fetch_metric_series(metric_col, source=None, freq=TIME_FREQ):
    """
    Returns a pandas Series indexed by hourly timestamps between ts_start..ts_end.
    If source is None, aggregates across sources.
    """
    where_src = f"AND source = '{source}'" if source else ""
    sql = f"""
    SELECT hour, AVG(CAST({metric_col} AS DOUBLE)) as val
    FROM {METRICS_TABLE}
    WHERE hour BETWEEN timestamp '{ts_start}' AND timestamp '{ts_end}'
    {where_src}
    GROUP BY hour
    ORDER BY hour
    """
    try:
        df = spark.sql(sql).toPandas()
    except Exception as e:
        print("SQL error:", e)
        df = pd.DataFrame(columns=['hour','val'])
    if df.empty:
        idx = pd.date_range(ts_start, ts_end, freq=freq)
        return pd.Series(0.0, index=idx)
    df['hour'] = pd.to_datetime(df['hour'])
    s = pd.Series(df['val'].values, index=df['hour'])
    idx = pd.date_range(ts_start, ts_end, freq=freq)
    s = s.reindex(idx).fillna(method='ffill').fillna(0.0)
    return s


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 5, Finished, Available, Finished)

In [4]:
#building target and candidates
incident_source = incident['source']  # 'github' or 'noaa'
target = fetch_metric_series('cnt', source=incident_source)
target.name = 'target_cnt'

candidates = {}

# same-source features
candidates[f"{incident_source}_error_rate"] = fetch_metric_series('error_rate', source=incident_source)
# scale avg_latency to avoid huge numbers, divide by 1e6 (adjust to units you need)
candidates[f"{incident_source}_avg_latency_millions"] = fetch_metric_series('avg_latency', source=incident_source) / 1e6

# cross-source controls: include other sources present in metrics table
# fetch distinct sources from metrics table
sources = [r.source for r in spark.sql(f"SELECT DISTINCT source FROM {METRICS_TABLE}").collect()]
for src in sources:
    if src == incident_source:
        continue
    candidates[f"{src}_cnt"] = fetch_metric_series('cnt', source=src)
    candidates[f"{src}_avg_latency_millions"] = fetch_metric_series('avg_latency', source=src) / 1e6



#assemble dataframe
df_all = pd.concat([target] + [candidates[k] for k in candidates.keys()], axis=1)
df_all.columns = ['target'] + list(candidates.keys())
df_all = df_all.fillna(method='ffill').fillna(0.0)
print("Matrix shape:", df_all.shape)
df_all.head()


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 6, Finished, Available, Finished)

  s = s.reindex(idx).fillna(method='ffill').fillna(0.0)
  s = s.reindex(idx).fillna(method='ffill').fillna(0.0)
  s = s.reindex(idx).fillna(method='ffill').fillna(0.0)
  df_all = df_all.fillna(method='ffill').fillna(0.0)


Unnamed: 0,target,github_error_rate,github_avg_latency_millions,noaa_cnt,noaa_avg_latency_millions
2025-09-14 03:00:00,0.0,0.0,0.0,0.0,0.0
2025-09-14 04:00:00,0.0,0.0,0.0,0.0,0.0
2025-09-14 05:00:00,0.0,0.0,0.0,0.0,0.0
2025-09-14 06:00:00,0.0,0.0,0.0,0.0,0.0
2025-09-14 07:00:00,0.0,0.0,0.0,0.0,0.0


In [5]:
#standardizing numeric candidates (z-score) to stabilize tests leaving binary as is
from scipy import stats

df_proc = df_all.copy()
for c in df_proc.columns:
    if set(df_proc[c].unique()) <= {0,1}:  # binary
        continue
    if df_proc[c].std() == 0 or np.isnan(df_proc[c].std()):
        df_proc[c] = (df_proc[c] - df_proc[c].mean())
    else:
        df_proc[c] = (df_proc[c] - df_proc[c].mean()) / df_proc[c].std()

df_proc.head()


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 7, Finished, Available, Finished)

Unnamed: 0,target,github_error_rate,github_avg_latency_millions,noaa_cnt,noaa_avg_latency_millions
2025-09-14 03:00:00,-0.302621,0.0,-0.404257,0.0,0.0
2025-09-14 04:00:00,-0.302621,0.0,-0.404257,0.0,0.0
2025-09-14 05:00:00,-0.302621,0.0,-0.404257,0.0,0.0
2025-09-14 06:00:00,-0.302621,0.0,-0.404257,0.0,0.0
2025-09-14 07:00:00,-0.302621,0.0,-0.404257,0.0,0.0


In [6]:
#Granger tests across candidates
def run_granger_pair(y_series, x_series, maxlag=MAXLAG):
    arr = pd.concat([y_series, x_series], axis=1).dropna().values
    try:
        res = grangercausalitytests(arr, maxlag=maxlag, verbose=False)
        pvals = []
        fstats = []
        for lag in range(1, maxlag+1):
            fstat, pval, _, _ = res[lag][0]['ssr_ftest']
            pvals.append(float(pval)); fstats.append(float(fstat))
        best_idx = int(np.argmin(pvals))
        return {'best_p': float(pvals[best_idx]), 'best_lag': best_idx+1, 'fstat': float(fstats[best_idx])}
    except Exception as e:
        return {'best_p': 1.0, 'best_lag': None, 'fstat': 0.0}

results = {}
for name in df_proc.columns:
    if name == 'target': continue
    results[name] = run_granger_pair(df_proc['target'], df_proc[name], maxlag=MAXLAG)

#multiple testing
pvals = [results[k]['best_p'] for k in results.keys()]
if pvals:
    reject, pvals_corrected, _, _ = multipletests(pvals, alpha=0.05, method='fdr_bh')
else:
    reject, pvals_corrected = [], []

for i,k in enumerate(list(results.keys())):
    results[k]['p_corrected'] = float(pvals_corrected[i]) if len(pvals_corrected)>i else results[k]['best_p']
    results[k]['is_significant'] = bool(reject[i]) if len(reject)>i else False

ranked = sorted(results.items(), key=lambda kv: kv[1]['p_corrected'])
print("Ranked candidates (best first):")
for k,v in ranked:
    print(k, v)


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 8, Finished, Available, Finished)





In [9]:
#DoWhy for robustness checks
try:
    import dowhy
    from dowhy import CausalModel
    DOWHY_AVAILABLE = True
except Exception:
    DOWHY_AVAILABLE = False
    
#DoWhy for top candidates 
dowhy_results = {}
if DOWHY_AVAILABLE:
    top_candidates = [k for k,v in ranked if v['is_significant']][:TOP_K_DOWHY]
    for cand in top_candidates:
        data = df_all[['target', cand]].copy().reset_index().rename(columns={'index':'ts'})
        data = data.dropna()
        #binarizing treatment for simplicity
        data['treat'] = (data[cand] > data[cand].median()).astype(int)
        try:
            model = CausalModel(data=data, treatment='treat', outcome='target', common_causes=None)
            ident = model.identify_effect()
            est = model.estimate_effect(ident, method_name='backdoor.linear_regression')
            ref1 = model.refute_estimate(ident, est, method_name='placebo_treatment_refuter', placebo_type='permute')
            ref2 = model.refute_estimate(ident, est, method_name='random_common_cause')
            dowhy_results[cand] = {'estimate': float(est.value), 'refute_placebo': str(ref1), 'refute_random': str(ref2)}
        except Exception as e:
            dowhy_results[cand] = {'error': str(e)}
else:
    print("DoWhy not installed; skipping DoWhy checks.")
dowhy_results


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 11, Finished, Available, Finished)

DoWhy not installed; skipping DoWhy checks.


{}

In [11]:
#plot and save evidence
incident_id = incident['incident_id']
evidence_dir = f"/tmp/causal_evidence_{incident_id}"
os.makedirs(evidence_dir, exist_ok=True)

def save_candidate_plots(candidate):
    idx = df_all.index
    plt.figure(figsize=(10,4))
    plt.plot(idx, df_all['target'], label='target', marker='o')
    plt.plot(idx, df_all[candidate], label=candidate, marker='x')
    plt.axvspan(window_start, window_end, color='red', alpha=0.2, label='incident_window')
    plt.legend(); plt.title(f"Target vs {candidate}")
    f1 = os.path.join(evidence_dir, f"evidence_{incident_id}_{candidate}_ts.png")
    plt.tight_layout(); plt.savefig(f1); plt.close()

    # cross-correlation
    lags = list(range(-12,13))
    corr_vals = [df_all['target'].corr(df_all[candidate].shift(l)) for l in lags]
    plt.figure(figsize=(8,3)); plt.stem(lags, corr_vals)
    plt.title(f"Cross-corr target & {candidate}"); plt.xlabel("lag")
    f2 = os.path.join(evidence_dir, f"evidence_{incident_id}_{candidate}_xcorr.png")
    plt.savefig(f2); plt.close()
    return [f1, f2]

top_for_plots = [k for k,v in ranked if v['is_significant']][:3]
evidence_files = {}
for cand in top_for_plots:
    evidence_files[cand] = save_candidate_plots(cand)

print("Saved evidence files locally at:", evidence_dir)


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 13, Finished, Available, Finished)

Saved evidence files locally at: /tmp/causal_evidence_github-2025-09-16 03:00:00-0


In [12]:
#build JSON report and write to causal_reports table
report = {
    "incident_id": incident['incident_id'],
    "generated_at": datetime.utcnow().isoformat() + "Z",
    "window": {"start": str(window_start), "end": str(window_end)},
    "ranked_candidates": []
}

for name, meta in ranked:
    r = {
        "candidate": name,
        "best_lag": meta.get("best_lag"),
        "p_value": float(meta.get("best_p")),
        "p_value_corrected": float(meta.get("p_corrected")),
        "fstat": float(meta.get("fstat")),
        "is_significant": bool(meta.get("is_significant")),
        "dowhy": dowhy_results.get(name, None),
        "evidence_files": evidence_files.get(name, [])  # replace with OneLake URLs if uploaded
    }
    report["ranked_candidates"].append(r)


# append to causal_reports table
try:
    spark.sql(f"CREATE TABLE IF NOT EXISTS {CAUSAL_REPORTS_TABLE} (incident_id STRING, report STRING, generated_at TIMESTAMP) USING DELTA")
    row = pd.DataFrame([{'incident_id': incident['incident_id'], 'report': json.dumps(report), 'generated_at': datetime.utcnow()}])
    spark.createDataFrame(row).write.format("delta").mode("append").saveAsTable(CAUSAL_REPORTS_TABLE)
    print("Inserted causal report into table:", CAUSAL_REPORTS_TABLE)
except Exception as e:
    print("Failed to write to causal_reports table:", e)


StatementMeta(, e9b7a638-8f6f-4762-b16a-660e2b360b03, 14, Finished, Available, Finished)

Inserted causal report into table: causal_reports
