# Flink Data Trace + Integration Test Notebook

This notebook validates pipeline correctness by querying data in strict order:

1. Raw ingress (`streaming_events`, DLQ, quarantine)
2. Typed tables (`ai_stream_status`, `stream_trace_events`, `ai_stream_events`, `stream_ingest_metrics`)
3. Silver projections (`fact_stream_*`)
4. Stateful lifecycle facts (`fact_workflow_*`)
5. Rollups (`agg_*`)
6. API views (`v_api_*`)

It also runs SQL assertion suites and scenario-candidate checks used by the CLI harness.

In [1]:
# If needed once:
# %pip install clickhouse-connect pandas


In [2]:
from __future__ import annotations
import json
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
import clickhouse_connect
import pandas as pd
def find_repo_root(start: Path) -> Path:
    for candidate in [start, *start.parents]:
        if (candidate / 'tests' / 'integration' / 'sql').is_dir():
            return candidate
    raise FileNotFoundError('Could not locate repo root containing tests/integration/sql')
def parse_utc(ts: str) -> datetime:
    normalized = ts.replace('Z', '+00:00')
    dt = datetime.fromisoformat(normalized)
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)
def resolve_window(repo_root: Path, lookback_hours: int) -> tuple[datetime, datetime, str]:
    env_from = os.getenv('FIXTURE_FROM_TS', '').strip()
    env_to = os.getenv('FIXTURE_TO_TS', '').strip()
    if env_from and env_to:
        return parse_utc(env_from), parse_utc(env_to), 'env FIXTURE_FROM_TS/FIXTURE_TO_TS'
    manifest_path = repo_root / 'tests' / 'integration' / 'fixtures' / 'manifest.json'
    if manifest_path.exists():
        try:
            payload = json.loads(manifest_path.read_text(encoding='utf-8'))
            window = payload.get('window', {})
            from_ts = window.get('from_ts')
            to_ts = window.get('to_ts')
            if from_ts and to_ts:
                return parse_utc(from_ts), parse_utc(to_ts), f'manifest {manifest_path}'
        except Exception as exc:
            print(f'WARNING: Failed reading fixture manifest window: {exc}')
    to_ts = datetime.now(timezone.utc)
    from_ts = to_ts - timedelta(hours=lookback_hours)
    return from_ts, to_ts, f'lookback {lookback_hours}h'
CH_HOST = os.getenv('CH_HOST', 'localhost')
CH_PORT = int(os.getenv('CH_PORT', '8123'))
CH_DATABASE = os.getenv('CH_DATABASE', 'livepeer_analytics')
CH_USER = os.getenv('CH_USER', 'analytics_user')
CH_PASSWORD = os.getenv('CH_PASSWORD', 'analytics_password')
CH_SECURE = os.getenv('CH_SECURE', '').lower() in {'1', 'true', 'yes'}
VALIDATION_LOOKBACK_HOURS = int(os.getenv('VALIDATION_LOOKBACK_HOURS', '24'))
LIMIT_PER_SCENARIO = int(os.getenv('LIMIT_PER_SCENARIO', '5'))
REPO_ROOT = find_repo_root(Path.cwd().resolve())
SQL_DIR = REPO_ROOT / 'tests' / 'integration' / 'sql'
FROM_TS, TO_TS, WINDOW_SOURCE = resolve_window(REPO_ROOT, VALIDATION_LOOKBACK_HOURS)
print(f'Repo root: {REPO_ROOT}')
print(f'SQL dir: {SQL_DIR}')
print(f'Window source: {WINDOW_SOURCE}')
print(f'Window UTC: {FROM_TS.isoformat()} -> {TO_TS.isoformat()}')
import ipywidgets as widgets
from IPython.display import display
if CH_HOST not in {'localhost', '127.0.0.1'}:
    print(f"WARNING: Using non-local ClickHouse host: {CH_HOST}:{CH_PORT}")



Repo root: /home/julian/Documents/development/spe-work/livepeer-naap-analytics
SQL dir: /home/julian/Documents/development/spe-work/livepeer-naap-analytics/tests/integration/sql
Window source: manifest /home/julian/Documents/development/spe-work/livepeer-naap-analytics/tests/integration/fixtures/manifest.json
Window UTC: 2026-02-24T18:07:50.618000+00:00 -> 2026-02-25T01:46:38.054000+00:00


In [3]:
client = clickhouse_connect.get_client(
    host=CH_HOST,
    port=CH_PORT,
    username=CH_USER,
    password=CH_PASSWORD,
    database=CH_DATABASE,
    secure=CH_SECURE,
)

QUERY_SETTINGS = {
    'max_execution_time': int(os.getenv('CH_MAX_EXECUTION_TIME_SEC', '30')),
    'max_threads': int(os.getenv('CH_MAX_THREADS', '4')),
    'max_rows_to_read': int(os.getenv('CH_MAX_ROWS_TO_READ', '5000000')),
    'read_overflow_mode': 'break',
}

params = {
    'from_ts': FROM_TS.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
    'to_ts': TO_TS.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
    'limit_per_scenario': LIMIT_PER_SCENARIO,
}


def query_df(sql: str, parameters: dict | None = None) -> pd.DataFrame:
    result = client.query(sql, parameters=parameters or {}, settings=QUERY_SETTINGS)
    return pd.DataFrame(result.result_rows, columns=result.column_names)


def parse_blocks(path: str | Path, marker: str) -> list[tuple[str, str]]:
    blocks = []
    current_name = None
    current_lines = []
    for raw_line in Path(path).read_text(encoding='utf-8').splitlines():
        if raw_line.startswith(marker):
            if current_name and current_lines:
                sql = '\n'.join(current_lines).strip().rstrip(';')
                if sql:
                    blocks.append((current_name, sql))
            current_name = raw_line.split(':', 1)[1].strip()
            current_lines = []
            continue
        if current_name is not None:
            current_lines.append(raw_line)
    if current_name and current_lines:
        sql = '\n'.join(current_lines).strip().rstrip(';')
        if sql:
            blocks.append((current_name, sql))
    return blocks


## Harness Artifacts First (Automated Gate Summary)

This section reads `artifacts/test-runs/<run_id>` outputs when present and surfaces failures before interactive exploration.


In [4]:
import json

HARNESS_ROOT = REPO_ROOT / 'artifacts' / 'test-runs'
HARNESS_RUN_ID = os.getenv('HARNESS_RUN_ID', '').strip()

def _latest_harness_run(root: Path) -> Path | None:
    if not root.exists():
        return None
    runs = sorted([p for p in root.iterdir() if p.is_dir()])
    return runs[-1] if runs else None

def _load_json(path: Path) -> dict | None:
    if not path.exists():
        return None
    return json.loads(path.read_text(encoding='utf-8'))

selected_run = (HARNESS_ROOT / HARNESS_RUN_ID) if HARNESS_RUN_ID else _latest_harness_run(HARNESS_ROOT)

if not selected_run or not selected_run.exists():
    print('No harness artifacts found. Run scripts/run_scenario_test_harness.py first or set HARNESS_RUN_ID.')
else:
    print(f'Using harness run: {selected_run.name}')
    summary = _load_json(selected_run / 'summary.json')
    if summary:
        stage_df = pd.DataFrame(summary.get('results', []))
        if not stage_df.empty and {'stage','status','duration_sec'}.issubset(stage_df.columns):
            display(stage_df[['stage', 'status', 'duration_sec', 'error']])

    assertion_files = ['assert_raw_typed.json', 'assert_pipeline.json', 'assert_api.json', 'assert_scenarios.json']
    failure_rows = []
    for file_name in assertion_files:
        payload = _load_json(selected_run / 'stages' / file_name)
        if not payload:
            continue
        for r in payload.get('results', []):
            if not r.get('passed', False):
                failure_rows.append({
                    'suite': file_name.replace('.json', ''),
                    'test_name': r.get('name', ''),
                    'failed_rows': r.get('failed_rows', 1),
                    'error': r.get('error', ''),
                    'diagnostics': r.get('diagnostics', {}),
                })

    if failure_rows:
        failures_df = pd.DataFrame(failure_rows)
        display(failures_df[['suite', 'test_name', 'failed_rows', 'error', 'diagnostics']])
        print(f'Harness assertion failures: {len(failures_df)}')
    else:
        print('Harness assertions: PASS (or assertion JSON files not present for this run).')


Using harness run: 20260224T234752Z


Unnamed: 0,stage,status,duration_sec,error
0,stack_up,PASS,53.115548,
1,schema_apply,PASS,0.336856,
2,pipeline_ready,PASS,40.182952,
3,replay_events,PASS,1.396575,
4,pipeline_wait,PASS,20.000249,
5,query_pack,PASS,0.607896,
6,assert_raw_typed,PASS,0.434408,
7,assert_pipeline,PASS,0.585124,
8,assert_api,PASS,0.92633,
9,assert_scenarios,PASS,0.411483,


Harness assertions: PASS (or assertion JSON files not present for this run).


## Ordered Pipeline Trace (Raw -> API)

### Trace Pack: End-to-End Flow
- What this checks: Raw ingest, typed tables, silver/stateful facts, rollups, and API views in one ordered pass.
- Why it matters: Confirms data traverses each architecture layer for the selected window.
- How to read failures: Empty stages or timestamp gaps usually point to readiness, filtering window, or projection issues.


In [5]:
flow_file = SQL_DIR / 'trace_pipeline_flow.sql'
flow_blocks = parse_blocks(flow_file, '-- QUERY:')
print(f'Loaded {len(flow_blocks)} ordered queries from {flow_file}')

flow_results: dict[str, pd.DataFrame] = {}
for name, sql in flow_blocks:
    df = query_df(sql, params)
    flow_results[name] = df
    print(f'\n=== {name} ===')
    display(df.head(20))


Loaded 9 ordered queries from /home/julian/Documents/development/spe-work/livepeer-naap-analytics/tests/integration/sql/trace_pipeline_flow.sql

=== 01_raw_ingest ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,streaming_events_dlq,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,streaming_events_quarantine,23,2026-02-24 23:49:27.273,2026-02-24 23:49:27.276
2,streaming_events,107,2026-02-24 20:07:50.619,2026-02-25 01:44:46.233



=== 02_typed_tables ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,ai_stream_events,14,2026-02-24 20:08:09.605,2026-02-24 23:45:59.945
1,ai_stream_status,25,2026-02-24 20:08:01.068,2026-02-24 23:46:38.054
2,network_capabilities,20,2026-02-24 23:03:39.871,2026-02-25 01:44:46.233
3,stream_ingest_metrics,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
4,stream_trace_events,54,2026-02-24 20:07:50.619,2026-02-24 23:46:22.050



=== 03_silver_projection_counts ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,fact_stream_ingest_samples,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,fact_stream_status_samples,25,2026-02-24 20:08:01.068,2026-02-24 23:46:38.054
2,fact_stream_trace_edges,54,2026-02-24 20:07:50.618,2026-02-24 23:46:22.050



=== 04_stateful_fact_counts ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,fact_workflow_param_updates,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,fact_workflow_session_segments,9,2026-02-24 20:07:50.618,2026-02-24 23:46:17.982
2,fact_workflow_sessions,10,2026-02-24 20:07:50.618,2026-02-24 23:46:07.969



=== 05_reliability_and_swap_summary ===


Unnamed: 0,sessions,known_stream_sessions,startup_success_sessions,startup_excused_sessions,startup_unexcused_sessions,confirmed_swapped_sessions,inferred_orchestrator_change_sessions,swapped_sessions,unexcused_rate
0,10,9,6,3,0,1,0,1,0.0



=== 06_rollup_population ===


Unnamed: 0,object_name,rows_window
0,agg_reliability_1h,9
1,agg_stream_performance_1m,6



=== 07_view_population ===


Unnamed: 0,object_name,rows_window
0,v_api_gpu_metrics,2
1,v_api_network_demand,5
2,v_api_sla_compliance,5



=== 08_gpu_view_parity ===


Unnamed: 0,joined_rows,mean_abs_diff_fps,max_abs_diff_fps
0,0,,0.0



=== 09_sla_view_parity ===


Unnamed: 0,joined_rows,total_known_diff,total_unexcused_diff,total_swapped_diff
0,5,0,0,0


## Integration Assertions (CI-aligned)

In [6]:
def run_assertion_file(path: str | Path) -> pd.DataFrame:
    tests = parse_blocks(path, '-- TEST:')
    rows = []
    for name, sql in tests:
        try:
            df = query_df(sql, params)
        except Exception as exc:
            rows.append({'test_name': name, 'failed_rows': 1, 'status': 'FAIL', 'error': str(exc)})
            continue

        if df.empty:
            rows.append({'test_name': name, 'failed_rows': 1, 'status': 'FAIL', 'error': 'No rows'})
            continue

        row = df.iloc[0].to_dict()
        failed = int(row.get('failed_rows', 1))
        row['test_name'] = name
        row['status'] = 'PASS' if failed == 0 else 'FAIL'
        rows.append(row)

    out = pd.DataFrame(rows)
    cols = ['test_name', 'status', 'failed_rows'] + [c for c in out.columns if c not in {'test_name', 'status', 'failed_rows'}]
    return out[cols]


### Raw -> Typed Assertions
- What this checks: Accounting parity from raw events into typed tables, plus network capabilities fanout guard.
- Why it matters: Confirms parser output volume is consistent with accepted raw input (after DLQ/quarantine).
- How to read failures: Look at `raw_rows`, `accepted_rows_est`, `typed_rows`, and `typed_distinct_source_events` diagnostics first.


### Pipeline Contract Assertions
- What this checks: Presence, projection integrity, session semantics, and rollup/view parity contracts.
- Why it matters: Validates correctness-critical transformations owned by Flink and serving parity in ClickHouse.
- How to read failures: Treat as regressions unless explicitly informational; inspect failing test diagnostics first.


In [7]:
raw_typed_assertions = run_assertion_file(SQL_DIR / 'assertions_raw_typed.sql')
display(raw_typed_assertions)
raw_typed_failures = raw_typed_assertions[raw_typed_assertions['status'] == 'FAIL']
print(f'Raw->typed assertion failures: {len(raw_typed_failures)}')

pipeline_assertions = run_assertion_file(SQL_DIR / 'assertions_pipeline.sql')
display(pipeline_assertions)

pipeline_failures = pipeline_assertions[pipeline_assertions['status'] == 'FAIL']
print(f'Pipeline assertion failures: {len(pipeline_failures)}')


Unnamed: 0,test_name,status,failed_rows,overflow_type_count,total_raw_rows,total_dlq_rows,total_quarantine_rows,total_accepted_rows_est_capped,dlq_rows,quarantine_rows,total_accepted_rows_est,total_typed_rows,raw_rows,accepted_rows_est,typed_rows,typed_distinct_source_events,avg_fanout_per_source_event
0,raw_typed_accepted_estimate_nonnegative,PASS,0.0,1.0,107.0,0.0,23.0,93.0,,,,,,,,,
1,raw_typed_no_dlq_or_quarantine_for_core_types,PASS,0.0,,,,,,0.0,0.0,,,,,,,
2,raw_typed_core_1to1_parity,PASS,0.0,,93.0,0.0,0.0,,,,93.0,93.0,,,,,
3,raw_typed_network_capabilities_expected_in_window,PASS,0.0,,,,,,,,,,14.0,,,,
4,raw_typed_network_capabilities_fanout_guard,PASS,0.0,,,,,,0.0,23.0,,,14.0,0.0,20.0,14.0,1.428571


Raw->typed assertion failures: 0


Unnamed: 0,test_name,status,failed_rows,missing_objects,missing_mvs,caps_rows,dim_caps_rows,adv_rows,dim_adv_rows,mc_rows,...,failing_workflow_session_ids,failing_stream_request_pairs,orphan_param_updates,mismatched_rows,joined_rows,mean_abs_diff_fps,max_abs_diff_fps,total_known_diff,total_unexcused_diff,total_swapped_diff
0,raw_events_present,PASS,0.0,[],,,,,,,...,,,,,,,,,,
1,capability_dimension_mvs_present,PASS,0.0,,[],,,,,,...,,,,,,,,,,
2,capability_dimensions_projecting,PASS,0.0,,,20.0,20.0,320.0,320.0,20.0,...,,,,,,,,,,
3,session_fact_present,PASS,0.0,,,,,,,,...,,,,,,,,,,
4,network_capabilities_raw_and_typed_present,PASS,0.0,,,,,,,,...,,,,,,,,,,
5,status_raw_to_silver_projection,PASS,0.0,,,,,,,,...,,,,,,,,,,
6,trace_raw_to_silver_projection,PASS,0.0,,,,,,,,...,,,,,,,,,,
7,ingest_raw_to_silver_projection,PASS,0.0,,,,,,,,...,,,,,,,,,,
8,session_final_uniqueness,PASS,0.0,,,,,,,,...,,,,,,,,,,
9,workflow_session_has_identifier,PASS,0.0,,,,,,,,...,,,,,,,,,,


Pipeline assertion failures: 0


### API Readiness Assertions
- What this checks: Serving view presence, key fields, ratio math, hourly grain, and recompute parity.
- Why it matters: Ensures downstream API consumers see coherent and contract-safe metrics.
- How to read failures: Focus on view-level formula drift and dimension nullability first.


In [8]:
api_readiness_assertions = run_assertion_file(SQL_DIR / 'assertions_api_readiness.sql')
display(api_readiness_assertions)

api_readiness_failures = api_readiness_assertions[api_readiness_assertions['status'] == 'FAIL']
print(f'API readiness assertion failures: {len(api_readiness_failures)}')


Unnamed: 0,test_name,status,failed_rows,missing_views,rows_checked,missing_columns,low_sample_windows,joined_rows
0,api_views_present,PASS,0,[],,,,
1,gpu_metrics_keys_not_null,PASS,0,,2.0,,,
2,gpu_metrics_rollup_fields_consistent,PASS,0,,2.0,,,
3,gpu_metrics_latency_fields_nonnegative,PASS,0,,2.0,,,
4,gpu_metrics_startup_seconds_matches_ms,PASS,0,,2.0,,,
5,network_demand_hourly_grain,PASS,0,,5.0,,,
6,network_demand_by_gpu_hourly_grain,PASS,0,,5.0,,,
7,network_demand_by_gpu_required_columns_present,PASS,0,,,[],,
8,network_demand_by_gpu_capacity_fields_nonnegative,PASS,0,,5.0,,,
9,network_demand_additive_fields_nonnegative,PASS,0,,5.0,,,


API readiness assertion failures: 0


## API Readiness Failure Drill-Down
This section shows offending rows for failed API readiness checks so failures are explainable (contract bug vs sparse telemetry).


In [9]:
# Drill-down queries for common readiness failures
readiness_checks = {
    'gpu_metrics_keys_not_null': '''
        SELECT *
        FROM livepeer_analytics.v_api_gpu_metrics
        WHERE window_start >= {from_ts:DateTime64(3)}
          AND window_start < {to_ts:DateTime64(3)}
          AND (
            orchestrator_address = ''
            OR pipeline = ''
            OR ifNull(gpu_id, '') = ''
            OR window_start IS NULL
          )
        ORDER BY window_start DESC
        LIMIT 50
    ''',
    'gpu_metrics_rollup_fields_consistent': '''
        SELECT
          window_start,
          orchestrator_address,
          pipeline,
          model_id,
          gpu_id,
          known_sessions,
          unexcused_sessions,
          swapped_sessions,
          failure_rate,
          ifNull(unexcused_sessions / nullIf(known_sessions, 0), 0) AS recomputed_failure_rate,
          swap_rate,
          ifNull(swapped_sessions / nullIf(known_sessions, 0), 0) AS recomputed_swap_rate
        FROM livepeer_analytics.v_api_gpu_metrics
        WHERE window_start >= {from_ts:DateTime64(3)}
          AND window_start < {to_ts:DateTime64(3)}
          AND (
            abs(failure_rate - ifNull(unexcused_sessions / nullIf(known_sessions, 0), 0)) > 0.000001
            OR abs(swap_rate - ifNull(swapped_sessions / nullIf(known_sessions, 0), 0)) > 0.000001
          )
        ORDER BY window_start DESC
        LIMIT 50
    ''',
    'network_demand_by_gpu_hourly_grain': '''
        SELECT *
        FROM livepeer_analytics.v_api_network_demand_by_gpu
        WHERE window_start >= {from_ts:DateTime64(3)}
          AND window_start < {to_ts:DateTime64(3)}
          AND toMinute(window_start) != 0
        ORDER BY window_start DESC
        LIMIT 50
    ''',
    'network_demand_by_gpu_capacity_fields_nonnegative': '''
        SELECT *
        FROM livepeer_analytics.v_api_network_demand_by_gpu
        WHERE window_start >= {from_ts:DateTime64(3)}
          AND window_start < {to_ts:DateTime64(3)}
          AND (
            inference_minutes_by_gpu_type < 0
            OR used_inference_minutes < 0
            OR available_capacity_minutes < 0
            OR capacity_rate < 0
            OR capacity_rate > 1.5
          )
        ORDER BY window_start DESC
        LIMIT 50
    ''',
    'sla_compliance_rollup_safe': '''
        SELECT
          window_start,
          orchestrator_address,
          pipeline,
          model_id,
          gpu_id,
          known_sessions,
          unexcused_sessions,
          swapped_sessions,
          success_ratio,
          ifNull(1 - (unexcused_sessions / nullIf(known_sessions, 0)), 0) AS recomputed_success_ratio,
          no_swap_ratio,
          ifNull(1 - (swapped_sessions / nullIf(known_sessions, 0)), 0) AS recomputed_no_swap_ratio
        FROM livepeer_analytics.v_api_sla_compliance
        WHERE window_start >= {from_ts:DateTime64(3)}
          AND window_start < {to_ts:DateTime64(3)}
          AND (
            abs(success_ratio - ifNull(1 - (unexcused_sessions / nullIf(known_sessions, 0)), 0)) > 0.000001
            OR abs(no_swap_ratio - ifNull(1 - (swapped_sessions / nullIf(known_sessions, 0)), 0)) > 0.000001
          )
        ORDER BY window_start DESC
        LIMIT 50
    '''
}

if 'api_readiness_assertions' in globals():
    failed_tests = api_readiness_assertions.loc[api_readiness_assertions['status'] == 'FAIL', 'test_name'].tolist()
    print('Failed readiness tests:', failed_tests)
    for t in failed_tests:
        sql = readiness_checks.get(t)
        if not sql:
            print(f'\n{t}: no drill-down query registered')
            continue
        print(f'\n=== {t} offending rows ===')
        df = query_df(sql, params)
        display(df)
else:
    print('Run API readiness assertions first.')


Failed readiness tests: []


## Serving Schema Sanity
Quick schema contract check for serving views to catch column naming drift before API integration.


In [10]:
required_cols = {
    'v_api_gpu_metrics': ['window_start', 'orchestrator_address', 'pipeline', 'model_id', 'gpu_id', 'status_samples', 'known_sessions'],
    'v_api_network_demand': ['window_start', 'gateway', 'pipeline', 'total_streams', 'total_sessions', 'total_inference_minutes'],
    'v_api_network_demand_by_gpu': ['window_start', 'gateway', 'orchestrator_address', 'pipeline', 'model_id', 'gpu_id', 'capacity_rate'],
    'v_api_sla_compliance': ['window_start', 'orchestrator_address', 'pipeline', 'model_id', 'gpu_id', 'sla_score'],
}

rows = []
for view_name, must_have in required_cols.items():
    d = query_df(f"DESCRIBE TABLE livepeer_analytics.{view_name}")
    cols = set(d['name'].tolist())
    missing = [c for c in must_have if c not in cols]
    rows.append({
        'view_name': view_name,
        'column_count': len(cols),
        'missing_required_columns': missing,
        'status': 'PASS' if not missing else 'FAIL'
    })

schema_check_df = pd.DataFrame(rows)
display(schema_check_df)

for view_name in required_cols:
    print(f'\n=== {view_name} columns ===')
    display(query_df(f"DESCRIBE TABLE livepeer_analytics.{view_name}"))


Unnamed: 0,view_name,column_count,missing_required_columns,status
0,v_api_gpu_metrics,33,[],PASS
1,v_api_network_demand,19,[],PASS
2,v_api_network_demand_by_gpu,21,[],PASS
3,v_api_sla_compliance,16,[],PASS



=== v_api_gpu_metrics columns ===


Unnamed: 0,name,type,default_type,default_expression,comment,codec_expression,ttl_expression
0,window_start,DateTime('UTC'),,,,,
1,orchestrator_address,String,,,,,
2,pipeline,String,,,,,
3,model_id,Nullable(String),,,,,
4,gpu_id,Nullable(String),,,,,
5,region,Nullable(String),,,,,
6,gpu_name,Nullable(String),,,,,
7,gpu_memory_total,Nullable(UInt64),,,,,
8,runner_version,Nullable(String),,,,,
9,cuda_version,Nullable(String),,,,,



=== v_api_network_demand columns ===


Unnamed: 0,name,type,default_type,default_expression,comment,codec_expression,ttl_expression
0,window_start,DateTime('UTC'),,,,,
1,gateway,String,,,,,
2,region,Nullable(String),,,,,
3,pipeline,String,,,,,
4,total_streams,UInt64,,,,,
5,total_sessions,UInt64,,,,,
6,total_inference_minutes,Float64,,,,,
7,avg_output_fps,Float64,,,,,
8,known_sessions,UInt64,,,,,
9,served_sessions,UInt64,,,,,



=== v_api_network_demand_by_gpu columns ===


Unnamed: 0,name,type,default_type,default_expression,comment,codec_expression,ttl_expression
0,window_start,DateTime('UTC'),,,,,
1,gateway,String,,,,,
2,orchestrator_address,String,,,,,
3,region,Nullable(String),,,,,
4,pipeline,String,,,,,
5,model_id,Nullable(String),,,,,
6,gpu_id,Nullable(String),,,,,
7,gpu_type,String,,,,,
8,total_streams,UInt64,,,,,
9,total_sessions,UInt64,,,,,



=== v_api_sla_compliance columns ===


Unnamed: 0,name,type,default_type,default_expression,comment,codec_expression,ttl_expression
0,window_start,DateTime('UTC'),,,,,
1,orchestrator_address,String,,,,,
2,pipeline,String,,,,,
3,model_id,Nullable(String),,,,,
4,gpu_id,Nullable(String),,,,,
5,region,Nullable(String),,,,,
6,known_sessions,UInt64,,,,,
7,success_sessions,UInt64,,,,,
8,excused_sessions,UInt64,,,,,
9,unexcused_sessions,UInt64,,,,,


## Raw -> Silver Correlation Checks

These rows show that raw typed records are carried into silver facts using `source_event_uid` correlation.

In [11]:
projection_checks = pipeline_assertions[pipeline_assertions['test_name'].str.contains('_raw_to_silver_projection', na=False)]
display(projection_checks[['test_name', 'status', 'failed_rows', 'typed_rows', 'projected_rows', 'missing_in_silver']])


Unnamed: 0,test_name,status,failed_rows,typed_rows,projected_rows,missing_in_silver
5,status_raw_to_silver_projection,PASS,0.0,25.0,25.0,0.0
6,trace_raw_to_silver_projection,PASS,0.0,54.0,54.0,0.0
7,ingest_raw_to_silver_projection,PASS,0.0,0.0,0.0,0.0


## Contract Coverage Profile (Telemetry Completeness)
Profiles key sparsity signals to separate telemetry gaps from serving-model bugs.


In [12]:
coverage_sql = '''
SELECT *
FROM
(
  SELECT
    'v_api_gpu_metrics' AS object_name,
    count() AS rows_window,
    countIf(pipeline = '') AS empty_pipeline_rows,
    countIf(ifNull(gpu_id, '') = '') AS empty_gpu_rows,
    countIf(status_samples = 0) AS zero_status_sample_rows,
    CAST(NULL AS Nullable(UInt64)) AS empty_gateway_rows
  FROM livepeer_analytics.v_api_gpu_metrics
  WHERE window_start >= {from_ts:DateTime64(3)}
    AND window_start < {to_ts:DateTime64(3)}

  UNION ALL

  SELECT
    'v_api_network_demand' AS object_name,
    count() AS rows_window,
    countIf(pipeline = '') AS empty_pipeline_rows,
    CAST(NULL AS Nullable(UInt64)) AS empty_gpu_rows,
    CAST(NULL AS Nullable(UInt64)) AS zero_status_sample_rows,
    countIf(gateway = '') AS empty_gateway_rows
  FROM livepeer_analytics.v_api_network_demand
  WHERE window_start >= {from_ts:DateTime64(3)}
    AND window_start < {to_ts:DateTime64(3)}
)
ORDER BY object_name
'''

display(query_df(coverage_sql, params))

hourly_sparsity_sql = '''
SELECT
  toStartOfInterval(sample_ts, INTERVAL 1 HOUR) AS window_start,
  count() AS status_rows,
  countIf(output_fps > 0) AS fps_positive_rows,
  countIf(output_fps = 0) AS fps_zero_rows,
  countIf(gateway = '') AS empty_gateway_rows
FROM livepeer_analytics.fact_stream_status_samples
WHERE sample_ts >= {from_ts:DateTime64(3)}
  AND sample_ts < {to_ts:DateTime64(3)}
GROUP BY window_start
ORDER BY window_start DESC
'''

print('\nStatus sample sparsity by hour:')
display(query_df(hourly_sparsity_sql, params))


Unnamed: 0,object_name,rows_window,empty_pipeline_rows,empty_gpu_rows,zero_status_sample_rows,empty_gateway_rows
0,v_api_gpu_metrics,2,0,0.0,0.0,
1,v_api_network_demand,5,0,,,0.0



Status sample sparsity by hour:


Unnamed: 0,window_start,status_rows,fps_positive_rows,fps_zero_rows,empty_gateway_rows
0,2026-02-24 23:00:00,11,8,3,0
1,2026-02-24 22:00:00,9,6,3,0
2,2026-02-24 20:00:00,5,3,2,0


## Scenario Candidate Discovery

In [13]:
from IPython.display import Markdown

scenario_blocks = parse_blocks(SQL_DIR / 'scenario_candidates.sql', '-- QUERY:')
scenario_candidates: dict[str, pd.DataFrame] = {}
scenario_review_guidance = {
    'scenario_1_clean_success_no_swap_fps_gt_12': (
        '- Goal: confirm clean successful sessions with sustained output FPS and no swaps.\n'
        '- Validate: `avg_output_fps > 12`, `segment_orchestrators` is stable (typically 1), and IDs map to expected fixture rows.'
    ),
    'scenario_2_no_orchestrator_then_closed': (
        '- Goal: confirm startup failure path where gateway cannot find orchestrators and stream closes.\n'
        '- Validate: `startup_success=0`, `has_no_orch=1`, `has_close=1`, and timestamps/IDs are unique per row.'
    ),
    'scenario_3_success_with_swap': (
        '- Goal: confirm successful sessions with explicit/derived swap evidence under current lifecycle semantics.\n'
        '- Validate: nonzero `confirmed_swap_count` or `inferred_orchestrator_change_count`, plus matching stream/request/session IDs.'
    ),
    'scenario_4_success_with_param_updates': (
        '- Goal: confirm successful sessions with parameter updates.\n'
        '- Validate: non-empty rows when data exists; if empty, treat as data-availability signal rather than parser failure.'
    ),
}
scenario_rows = []
for name, sql in scenario_blocks:
    df = query_df(sql, params)
    scenario_candidates[name] = df
    scenario_rows.append((name, df))

# Fallout table: sessions in window that do not match any explicit scenario bucket.
classified_ids = set()
for df in scenario_candidates.values():
    if df is not None and not df.empty and 'workflow_session_id' in df.columns:
        classified_ids.update(df['workflow_session_id'].astype(str).tolist())

fallout_sql = """
WITH fs_latest AS
(
  SELECT
    workflow_session_id,
    stream_id,
    request_id,
    session_start_ts,
    session_end_ts,
    known_stream,
    startup_success,
    startup_excused,
    startup_unexcused,
    confirmed_swap_count,
    inferred_orchestrator_change_count,
    swap_count,
    orchestrator_address,
    gpu_id,
    model_id,
    pipeline
  FROM
  (
    SELECT
      *,
      row_number() OVER (
        PARTITION BY workflow_session_id
        ORDER BY version DESC, session_start_ts DESC, session_end_ts DESC
      ) AS rn
    FROM livepeer_analytics.fact_workflow_sessions FINAL
    WHERE session_start_ts >= {from_ts:DateTime64(3)}
      AND session_start_ts < {to_ts:DateTime64(3)}
  )
  WHERE rn = 1
)
SELECT
  f.workflow_session_id AS workflow_session_id,
  f.stream_id AS stream_id,
  f.request_id AS request_id,
  f.session_start_ts AS session_start_ts,
  f.session_end_ts AS session_end_ts,
  ifNull(s.avg_output_fps, 0) AS avg_output_fps,
  f.known_stream AS known_stream,
  f.startup_success AS startup_success,
  f.startup_excused AS startup_excused,
  f.startup_unexcused AS startup_unexcused,
  f.confirmed_swap_count AS confirmed_swap_count,
  f.inferred_orchestrator_change_count AS inferred_orchestrator_change_count,
  f.swap_count AS swap_count,
  f.orchestrator_address AS orchestrator_address,
  f.gpu_id AS gpu_id,
  f.model_id AS model_id,
  f.pipeline AS pipeline,
  ifNull(tf.has_no_orch, 0) AS has_no_orch,
  ifNull(tf.has_close, 0) AS has_close
FROM fs_latest f
LEFT JOIN
(
  SELECT workflow_session_id, avg(output_fps) AS avg_output_fps
  FROM livepeer_analytics.fact_stream_status_samples
  GROUP BY workflow_session_id
) s USING (workflow_session_id)
LEFT JOIN
(
  SELECT
    workflow_session_id,
    max(toUInt8(trace_type = 'gateway_no_orchestrators_available')) AS has_no_orch,
    max(toUInt8(trace_type = 'gateway_ingest_stream_closed')) AS has_close
  FROM livepeer_analytics.fact_stream_trace_edges
  GROUP BY workflow_session_id
) tf USING (workflow_session_id)
ORDER BY f.session_start_ts DESC
"""

all_sessions_df = query_df(fallout_sql, params)
if 'workflow_session_id' not in all_sessions_df.columns:
    all_sessions_df = pd.DataFrame(columns=[
        'workflow_session_id', 'stream_id', 'request_id', 'session_start_ts', 'session_end_ts',
        'avg_output_fps', 'known_stream', 'startup_success', 'startup_excused', 'startup_unexcused',
        'confirmed_swap_count', 'inferred_orchestrator_change_count', 'swap_count',
        'orchestrator_address', 'gpu_id', 'model_id', 'pipeline', 'has_no_orch', 'has_close'
    ])
fallout_df = all_sessions_df.copy()
if classified_ids and 'workflow_session_id' in fallout_df.columns:
    fallout_df = fallout_df[~fallout_df['workflow_session_id'].astype(str).isin(classified_ids)].copy()

# Defensive debug: if scenario tables are populated but fallout source query is empty,
# surface a direct fact-session count for the same window to catch stale state quickly.
if len(classified_ids) > 0 and len(all_sessions_df) == 0:
    direct_count_df = query_df("""
    SELECT countDistinct(workflow_session_id) AS sessions_in_window
    FROM livepeer_analytics.fact_workflow_sessions
    WHERE session_start_ts >= {from_ts:DateTime64(3)}
      AND session_start_ts < {to_ts:DateTime64(3)}
    """, params)
    direct_count = int(direct_count_df.iloc[0]['sessions_in_window']) if not direct_count_df.empty else 0
    print('Warning: fallout source query returned 0 rows while scenario tables are non-empty.')
    print(f'Direct distinct session count in window: {direct_count}')

print(f"Notebook DB target: {CH_HOST}:{CH_PORT}/{CH_DATABASE} (secure={CH_SECURE})")
print(f"Sessions in window (latest-per-id): {len(all_sessions_df)}")
print(f"Classified scenario ids: {len(classified_ids)}")
print(f"Unclassified sessions: {len(fallout_df)}")

display(Markdown("### Classified Sessions"))

for name, df in scenario_rows:
    print(f'\n{name}: {len(df)} candidate rows')
    display(Markdown("<br>"))
    display(Markdown(f"**Review guidance**\n{scenario_review_guidance.get(name, '- Goal: validate scenario row integrity.')}"))
    display(df.head(10))

display(Markdown("### Scenario Fallout (Unclassified Sessions)"))
display(Markdown("<br>"))
display(Markdown("**Review guidance**\n- Goal: expose sessions in-window that did not match any scenario bucket.\n- Validate: whether each row should be moved into an existing scenario, a new scenario, or intentionally remain unclassified due to weak evidence/edge conditions."))
fallout_display_cols = [
    'workflow_session_id', 'stream_id', 'request_id', 'session_start_ts', 'session_end_ts',
    'avg_output_fps', 'known_stream', 'startup_success', 'startup_excused', 'startup_unexcused',
    'confirmed_swap_count', 'inferred_orchestrator_change_count', 'swap_count',
    'orchestrator_address', 'gpu_id', 'model_id', 'pipeline', 'has_no_orch', 'has_close'
]
display(fallout_df.reindex(columns=fallout_display_cols).head(50))


Notebook DB target: localhost:8123/livepeer_analytics (secure=False)
Sessions in window (latest-per-id): 9
Classified scenario ids: 8
Unclassified sessions: 1


### Classified Sessions


scenario_1_clean_success_no_swap_fps_gt_12: 4 candidate rows


<br>

**Review guidance**
- Goal: confirm clean successful sessions with sustained output FPS and no swaps.
- Validate: `avg_output_fps > 12`, `segment_orchestrators` is stable (typically 1), and IDs map to expected fixture rows.

Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,avg_output_fps,segment_orchestrators
0,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771976762954588419|e565fe91,aiJobTesterStream-1771976762954588419,e565fe91,2026-02-24 23:46:07.905,2026-02-24 23:46:38.054,12.418062,1
1,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771976699132148122|38f3dfdf,aiJobTesterStream-1771976699132148122,38f3dfdf,2026-02-24 23:45:04.421,2026-02-24 23:45:49.686,13.987728,0
2,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771975428405499248|f6652a52,aiJobTesterStream-1771975428405499248,f6652a52,2026-02-24 23:23:52.905,2026-02-24 23:24:37.143,15.732481,0
3,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771973325837950594|a378a0a3,aiJobTesterStream-1771973325837950594,a378a0a3,2026-02-24 22:48:50.929,2026-02-24 22:50:02.512,14.438464,0



scenario_2_no_orchestrator_then_closed: 3 candidate rows


<br>

**Review guidance**
- Goal: confirm startup failure path where gateway cannot find orchestrators and stream closes.
- Validate: `startup_success=0`, `has_no_orch=1`, `has_close=1`, and timestamps/IDs are unique per row.

Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,startup_success,startup_excused,startup_unexcused,has_no_orch,has_close
0,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771976754969242792|8dafcf82,aiJobTesterStream-1771976754969242792,8dafcf82,2026-02-24 23:45:59.842,2026-02-24 23:45:59.945,0,1,0,1,1
1,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771976747082240198|723c0243,aiJobTesterStream-1771976747082240198,723c0243,2026-02-24 23:45:52.221,2026-02-24 23:45:52.325,0,1,0,1,1
2,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771976691186291404|b4d3bff1,aiJobTesterStream-1771976691186291404,b4d3bff1,2026-02-24 23:44:56.179,2026-02-24 23:44:58.536,0,1,0,1,1



scenario_3_success_with_swap: 1 candidate rows


<br>

**Review guidance**
- Goal: confirm successful sessions with explicit/derived swap evidence under current lifecycle semantics.
- Validate: nonzero `confirmed_swap_count` or `inferred_orchestrator_change_count`, plus matching stream/request/session IDs.

Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,confirmed_swap_count,inferred_orchestrator_change_count,swap_count,segment_orchestrators
0,scenario_3_success_with_swap,aiJobTesterStream-1771963662960919069|f2690a64,aiJobTesterStream-1771963662960919069,f2690a64,2026-02-24 20:07:50.618,2026-02-24 20:08:52.110,1,0,1,0



scenario_4_success_with_param_updates: 0 candidate rows


<br>

**Review guidance**
- Goal: confirm successful sessions with parameter updates.
- Validate: non-empty rows when data exists; if empty, treat as data-availability signal rather than parser failure.

### Scenario Fallout (Unclassified Sessions)

<br>

**Review guidance**
- Goal: expose sessions in-window that did not match any scenario bucket.
- Validate: whether each row should be moved into an existing scenario, a new scenario, or intentionally remain unclassified due to weak evidence/edge conditions.

Unnamed: 0,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,avg_output_fps,known_stream,startup_success,startup_excused,startup_unexcused,confirmed_swap_count,inferred_orchestrator_change_count,swap_count,orchestrator_address,gpu_id,model_id,pipeline,has_no_orch,has_close
7,aiJobTesterStream-1771970506672291079|475af460,aiJobTesterStream-1771970506672291079,475af460,2026-02-24 22:01:54.154,2026-02-24 22:02:45.963,7.15338,1,1,0,0,0,0,0,,,,streamdiffusion-sdxl-v2v,0,1


### Scenario Coverage Assertions
- What this checks: Whether selected windows include expected scenario categories used for fixture generation.
- Why it matters: Protects against blind spots in scenario-based testing.
- How to read failures: Usually means sparse production windows or missing event classes, not necessarily pipeline breakage.


In [14]:
display(Markdown("**Review guidance**\n- Goal: verify each scenario class has at least one candidate in the current window (except explicitly informational checks).\n- Validate: `status=PASS`, `failed_rows=0`, and `candidates` counts align with the scenario discovery tables above."))

scenario_assertions = run_assertion_file(SQL_DIR / 'assertions_scenario_candidates.sql')
display(scenario_assertions)

scenario_failures = scenario_assertions[scenario_assertions['status'] == 'FAIL']
print(f'Scenario assertion failures: {len(scenario_failures)}')


**Review guidance**
- Goal: verify each scenario class has at least one candidate in the current window (except explicitly informational checks).
- Validate: `status=PASS`, `failed_rows=0`, and `candidates` counts align with the scenario discovery tables above.

Unnamed: 0,test_name,status,failed_rows,candidates
0,scenario_1_clean_success_no_swap_fps_gt_12_exists,PASS,0,1
1,scenario_2_no_orchestrator_then_closed_exists,PASS,0,1
2,scenario_3_success_with_swap_exists,PASS,0,1
3,scenario_4_success_with_param_updates_exists,PASS,0,0


Scenario assertion failures: 0


## Interactive Session Edge Explorer

Select a scenario candidate session and render raw event/edge timeline dynamically.

In [15]:
# Reactive session explorer: changing the dropdown refreshes all correlated outputs below.

from IPython.display import Markdown

# Use a unique value with scenario + row + ids so picker selection maps 1:1 to the
# scenario candidate rows shown above.
session_options = []
for scenario_name, df in scenario_candidates.items():
    if df is None or df.empty or 'workflow_session_id' not in df.columns:
        continue
    for row_idx, row in df.reset_index(drop=True).iterrows():
        sid = str(row['workflow_session_id'])
        stream_id = str(row.get('stream_id', '') or '')
        request_id = str(row.get('request_id', '') or '')
        display_stream = stream_id if stream_id else '(no_stream_id)'
        display_request = request_id if request_id else '(no_request_id)'
        label = (
            f"{scenario_name} | row={row_idx} | stream={display_stream} | "
            f"request={display_request}"
        )
        session_options.append((label, (scenario_name, int(row_idx), sid, stream_id, request_id)))

session_picker = widgets.Dropdown(
    options=session_options,
    description='Session:',
    layout=widgets.Layout(width='95%')
)

reactive_out = {
    'diagnostics': widgets.Output(),
    'gpu_observed': widgets.Output(),
    'capability_corr': widgets.Output(),
    'timeline': widgets.Output(),
    'audit': widgets.Output(),
    'hourly': widgets.Output(),
}


def _selected_context() -> dict | None:
    selected = getattr(session_picker, 'value', None)
    if not selected:
        return None
    if isinstance(selected, tuple) and len(selected) == 5:
        scenario_name, row_idx, sid, stream_id, request_id = selected
        return {
            'scenario_name': str(scenario_name),
            'row_idx': int(row_idx),
            'sid': str(sid),
            'stream_id': str(stream_id),
            'request_id': str(request_id),
        }
    return {
        'scenario_name': 'unknown',
        'row_idx': -1,
        'sid': str(selected),
        'stream_id': '',
        'request_id': '',
    }


def _selected_sid() -> str | None:
    ctx = _selected_context()
    if not ctx:
        return None
    return ctx['sid']


def _render_selected_session() -> None:
    selected_ctx = _selected_context()
    sid = selected_ctx['sid'] if selected_ctx else None
    for out in reactive_out.values():
        out.clear_output()

    if not sid:
        with reactive_out['diagnostics']:
            print('No scenario candidates in current window.')
        return

    diagnostics_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(version, version) AS latest_version,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts,
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id,
          argMax(orchestrator_address, version) AS latest_orchestrator_address,
          argMax(known_stream, version) AS known_stream,
          argMax(startup_success, version) AS startup_success,
          argMax(startup_excused, version) AS startup_excused,
          argMax(startup_unexcused, version) AS startup_unexcused,
          argMax(swap_count, version) AS fact_swap_count,
          argMax(error_count, version) AS fact_error_count,
          argMax(excusable_error_count, version) AS fact_excusable_error_count
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      trace_counts AS
      (
        SELECT
          countIf(trace_type = 'orchestrator_swap') AS explicit_swap_edges,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS trace_orchestrators_seen
        FROM livepeer_analytics.fact_stream_trace_edges
        WHERE workflow_session_id = sid
      ),
      typed_trace_counts AS
      (
        SELECT
          countIf(trace_type = 'orchestrator_swap') AS typed_explicit_swap_edges,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS typed_trace_orchestrators_seen
        FROM livepeer_analytics.stream_trace_events
        WHERE
        (
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND request_id = (SELECT request_id FROM latest_session)
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) = ''
            AND request_id = (SELECT request_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) = ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
        )
          AND event_timestamp >= {from_ts:DateTime64(3)}
          AND event_timestamp < {to_ts:DateTime64(3)}
      ),
      segment_counts AS
      (
        SELECT
          count() AS segment_rows,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS segment_orchestrators_seen
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      ),
      ai_counts AS
      (
        SELECT
          countIf(event_type = 'error') AS raw_error_events,
          countIf(event_type = 'params_update') AS raw_params_update_events
        FROM livepeer_analytics.ai_stream_events
        WHERE
        (
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND request_id = (SELECT request_id FROM latest_session)
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) = ''
            AND request_id = (SELECT request_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) = ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
        )
          AND event_timestamp >= {from_ts:DateTime64(3)}
          AND event_timestamp < {to_ts:DateTime64(3)}
      ),
      param_fact AS
      (
        SELECT count() AS fact_param_update_rows
        FROM livepeer_analytics.fact_workflow_param_updates
        WHERE workflow_session_id = sid
      )
    SELECT
      latest_session.latest_version,
      latest_session.session_start_ts,
      latest_session.session_end_ts,
      latest_session.stream_id,
      latest_session.request_id,
      latest_session.latest_orchestrator_address,
      latest_session.known_stream,
      latest_session.startup_success,
      latest_session.startup_excused,
      latest_session.startup_unexcused,
      latest_session.fact_swap_count,
      trace_counts.explicit_swap_edges,
      trace_counts.trace_orchestrators_seen,
      typed_trace_counts.typed_explicit_swap_edges,
      typed_trace_counts.typed_trace_orchestrators_seen,
      segment_counts.segment_rows,
      segment_counts.segment_orchestrators_seen,
      ai_counts.raw_error_events,
      latest_session.fact_error_count,
      latest_session.fact_excusable_error_count,
      ai_counts.raw_params_update_events,
      param_fact.fact_param_update_rows
    FROM latest_session
    CROSS JOIN trace_counts
    CROSS JOIN typed_trace_counts
    CROSS JOIN segment_counts
    CROSS JOIN ai_counts
    CROSS JOIN param_fact
    """

    gpu_observed_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id,
          argMax(orchestrator_address, version) AS orchestrator_address,
          argMax(pipeline, version) AS pipeline,
          argMax(model_id, version) AS model_id,
          argMax(gpu_id, version) AS gpu_id,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      segment_gpus AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS segment_gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS segment_model_ids
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      ),
      param_gpus AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS param_update_gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS param_update_model_ids
        FROM livepeer_analytics.fact_workflow_param_updates
        WHERE workflow_session_id = sid
      )
    SELECT
      sid AS workflow_session_id,
      latest_session.stream_id,
      latest_session.request_id,
      latest_session.orchestrator_address,
      latest_session.pipeline,
      latest_session.model_id,
      latest_session.gpu_id AS session_gpu_id,
      (SELECT segment_gpu_ids FROM segment_gpus) AS segment_gpu_ids,
      (SELECT param_update_gpu_ids FROM param_gpus) AS param_update_gpu_ids,
      (SELECT segment_model_ids FROM segment_gpus) AS segment_model_ids,
      (SELECT param_update_model_ids FROM param_gpus) AS param_update_model_ids,
      latest_session.session_start_ts,
      latest_session.session_end_ts
    FROM latest_session
    """

    capability_corr_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(orchestrator_address, version) AS orchestrator_address,
          argMax(pipeline, version) AS pipeline,
          argMax(model_id, version) AS model_id,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      observed AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS model_ids
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      )
    SELECT
      d.snapshot_ts,
      d.orchestrator_address,
      d.orchestrator_proxy_address,
      d.pipeline,
      d.model_id,
      d.gpu_id,
      d.gpu_name,
      d.runner_version,
      d.region,
      toUInt8(d.pipeline = (SELECT pipeline FROM latest_session)) AS pipeline_match,
      toUInt8(ifNull(d.model_id, '') = ifNull((SELECT model_id FROM latest_session), '') OR has((SELECT model_ids FROM observed), ifNull(d.model_id, ''))) AS model_match,
      toUInt8(has((SELECT gpu_ids FROM observed), ifNull(d.gpu_id, ''))) AS gpu_match
    FROM livepeer_analytics.dim_orchestrator_capability_snapshots d
    WHERE d.orchestrator_address = (SELECT orchestrator_address FROM latest_session)
      AND d.snapshot_ts >= (SELECT session_start_ts FROM latest_session) - INTERVAL 24 HOUR
      AND d.snapshot_ts <= coalesce((SELECT session_end_ts FROM latest_session), now64(3, 'UTC')) + INTERVAL 24 HOUR
      AND
      (
        d.pipeline = (SELECT pipeline FROM latest_session)
        OR ifNull(d.model_id, '') = ifNull((SELECT model_id FROM latest_session), '')
        OR has((SELECT model_ids FROM observed), ifNull(d.model_id, ''))
        OR has((SELECT gpu_ids FROM observed), ifNull(d.gpu_id, ''))
      )
    ORDER BY d.snapshot_ts DESC
    LIMIT 200
    """

    timeline_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      session_window AS
      (
        SELECT
          coalesce(argMax(session_start_ts, version), {from_ts:DateTime64(3)}) - INTERVAL 1 HOUR AS from_ts,
          coalesce(argMax(session_end_ts, version), argMax(session_start_ts, version), {to_ts:DateTime64(3)}) + INTERVAL 1 HOUR AS to_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      ts,
      source,
      detail,
      stream_id,
      request_id,
      orchestrator_address,
      data_timestamp,
      raw_event_timestamp
    FROM
    (
      SELECT edge_ts AS ts, 'fact_stream_trace_edges' AS source, trace_type AS detail, stream_id, request_id, orchestrator_address,
             edge_ts AS data_timestamp, CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS raw_event_timestamp
      FROM livepeer_analytics.fact_stream_trace_edges
      WHERE workflow_session_id = sid

      UNION ALL

      SELECT coalesce(data_timestamp, event_timestamp) AS ts, 'raw_stream_trace_events' AS source, trace_type AS detail,
             stream_id, request_id, orchestrator_address, data_timestamp, event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.stream_trace_events
      WHERE
      (
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND request_id = (SELECT request_id FROM latest_session)
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) = ''
          AND request_id = (SELECT request_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) = ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
      )
        AND event_timestamp >= (SELECT from_ts FROM session_window)
        AND event_timestamp < (SELECT to_ts FROM session_window)

      UNION ALL

      SELECT event_timestamp AS ts, 'raw_ai_stream_events' AS source, event_type AS detail,
             stream_id, request_id, '' AS orchestrator_address,
             CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS data_timestamp,
             event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.ai_stream_events
      WHERE
      (
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND request_id = (SELECT request_id FROM latest_session)
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) = ''
          AND request_id = (SELECT request_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) = ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
      )
        AND event_timestamp >= (SELECT from_ts FROM session_window)
        AND event_timestamp < (SELECT to_ts FROM session_window)
    ) t
    ORDER BY ts
    """

    audit_sql = """
    WITH {sid:String} AS sid
    SELECT
      argMax(stream_id, version) AS stream_id,
      argMax(request_id, version) AS request_id,
      argMax(orchestrator_address, version) AS orchestrator_address,
      argMax(pipeline, version) AS pipeline,
      argMax(model_id, version) AS model_id,
      argMax(gpu_id, version) AS gpu_id,
      argMax(session_start_ts, version) AS session_start_ts,
      argMax(session_end_ts, version) AS session_end_ts
    FROM livepeer_analytics.fact_workflow_sessions
    WHERE workflow_session_id = sid
    """

    hourly_sql = """
    WITH
      {sid:String} AS sid,
      session_hour_keys AS
      (
        SELECT DISTINCT
          toStartOfInterval(sample_ts, INTERVAL 1 HOUR) AS window_start,
          ifNull(orchestrator_address, '') AS orchestrator_address,
          ifNull(pipeline, '') AS pipeline,
          ifNull(model_id, '') AS model_id,
          ifNull(gpu_id, '') AS gpu_id
        FROM
        (
          SELECT sample_ts, orchestrator_address, pipeline, model_id, gpu_id
          FROM livepeer_analytics.fact_stream_status_samples
          WHERE workflow_session_id = sid
          UNION ALL
          SELECT sample_ts, orchestrator_address, pipeline, model_id, gpu_id
          FROM livepeer_analytics.fact_workflow_latency_samples
          WHERE workflow_session_id = sid
        ) x
      )
    SELECT
      k.window_start,
      nullIf(k.orchestrator_address, '') AS orchestrator_address,
      nullIf(k.pipeline, '') AS pipeline,
      nullIf(k.model_id, '') AS model_id,
      nullIf(k.gpu_id, '') AS gpu_id,
      g.status_samples,
      g.avg_output_fps,
      g.prompt_to_first_frame_ms,
      g.startup_time_ms,
      g.e2e_latency_ms,
      g.known_sessions
    FROM session_hour_keys k
    LEFT JOIN livepeer_analytics.v_api_gpu_metrics g
      ON g.window_start = k.window_start
     AND ifNull(g.orchestrator_address, '') = k.orchestrator_address
     AND ifNull(g.pipeline, '') = k.pipeline
     AND ifNull(g.model_id, '') = k.model_id
     AND ifNull(g.gpu_id, '') = k.gpu_id
    ORDER BY k.window_start, k.orchestrator_address, k.pipeline, k.model_id, k.gpu_id
    """

    try:
        with reactive_out['diagnostics']:
            display(Markdown('### Session Diagnostics (auto-refreshed)'))
            if selected_ctx:
                print(
                    'Selected from scenario table:',
                    f"scenario={selected_ctx['scenario_name']}",
                    f"row={selected_ctx['row_idx']}",
                    f"stream_id={selected_ctx['stream_id']}",
                    f"request_id={selected_ctx['request_id']}",
                    f"workflow_session_id={selected_ctx['sid']}"
                )
            display(Markdown('- Goal: validate the selected session lifecycle summary and identity consistency.\n- Validate: stream/request/session IDs match picker, swap/error counters align with timeline evidence, and orchestrator presence is expected for this scenario.'))
            display(query_df(diagnostics_sql, {**params, 'sid': sid}))

        with reactive_out['gpu_observed']:
            display(Markdown('### Session GPU Correlation (Observed)'))
            display(Markdown('- Goal: validate GPU/model attribution carried in session, segment, and param-update facts.\n- Validate: `session_gpu_id`/model align with segment arrays; blanks indicate unattributed sessions or mapping gaps.'))
            display(query_df(gpu_observed_sql, {**params, 'sid': sid}))

        with reactive_out['capability_corr']:
            display(Markdown('### Session GPU Correlation (Capabilities)'))
            display(Markdown('- Goal: validate capability snapshots overlap the selected session context.\n- Validate: `pipeline_match`, `model_match`, and `gpu_match` flags; check recent `snapshot_ts` near session time for expected orchestrator.'))
            capability_corr_df = query_df(capability_corr_sql, {**params, 'sid': sid})
            print(f'Capability correlation rows: {len(capability_corr_df)}')
            display(capability_corr_df)

        with reactive_out['timeline']:
            display(Markdown('### Interactive Session Edge Explorer (Timeline)'))
            display(Markdown('- Goal: validate event ordering from raw traces to silver edges for the selected session.\n- Validate: expected lifecycle sequence, consistent stream/request IDs, and swap/error evidence where applicable.'))
            display(query_df(timeline_sql, {**params, 'sid': sid}).head(400))

        with reactive_out['audit']:
            display(Markdown('### Bronze -> Silver -> Gold Audit'))
            display(Markdown('- Goal: validate the canonical gold row keys produced for this session.\n- Validate: orchestrator/pipeline/model/gpu keys are populated as expected and match scenario intent.'))
            display(query_df(audit_sql, {**params, 'sid': sid}))

        with reactive_out['hourly']:
            display(Markdown('### Gold Row Drill-Down (Hourly GPU View)'))
            display(Markdown('- Goal: validate hourly GPU serving rows backing this session.\n- Validate: key join columns (`window_start`, orchestrator/pipeline/model/gpu) map to non-null metrics where attribution exists.'))
            display(query_df(hourly_sql, {**params, 'sid': sid}))
    except Exception as exc:
        with reactive_out['diagnostics']:
            print(f'Error rendering session outputs: {exc}')


def _on_session_change(change):
    if change.get('name') == 'value':
        _render_selected_session()


session_picker.observe(_on_session_change, names='value')

_display_items = [session_picker]
_display_items.extend([
    reactive_out['diagnostics'],
    reactive_out['gpu_observed'],
    reactive_out['capability_corr'],
    reactive_out['timeline'],
    reactive_out['audit'],
    reactive_out['hourly'],
])

display(widgets.VBox(_display_items))

# Initial render
_render_selected_session()



VBox(children=(Dropdown(description='Session:', layout=Layout(width='95%'), options=(('scenario_1_clean_succesâ€¦

## Session Diagnostics (Swaps, Errors, Param Updates)

This section compares confirmed and inferred swap signals for each session.

`swap_count` now tracks confirmed swaps only. Inferred orchestrator changes are tracked separately via `inferred_orchestrator_change_count`.
- confirmed swaps: explicit trace edge `orchestrator_swap`
- inferred changes: canonical orchestrator identity changes observed within the session

In [16]:
def resolve_selected_session() -> tuple[str | None, str | None]:
    if 'session_picker' in globals():
        selected = getattr(session_picker, 'value', None)
        if selected:
            if isinstance(selected, tuple) and len(selected) >= 3:
                scenario_name, row_idx, sid = selected[:3]
                return str(sid), f"interactive:{scenario_name}:row={row_idx}"
            return str(selected), 'interactive'

    for name, df in scenario_candidates.items():
        if not df.empty and 'workflow_session_id' in df.columns:
            return str(df.iloc[0]['workflow_session_id']), name

    fallback = query_df("""
    SELECT argMax(workflow_session_id, version) AS workflow_session_id
    FROM livepeer_analytics.fact_workflow_sessions
    WHERE session_start_ts >= {from_ts:DateTime64(3)}
      AND session_start_ts < {to_ts:DateTime64(3)}
    """, params)
    if fallback.empty or not fallback.iloc[0]['workflow_session_id']:
        return None, None
    return str(fallback.iloc[0]['workflow_session_id']), 'fallback_latest_session'

selected_session_id, selected_source = resolve_selected_session()
print('Selected session:', selected_session_id, 'from', selected_source)


Selected session: aiJobTesterStream-1771976762954588419|e565fe91 from interactive:scenario_1_clean_success_no_swap_fps_gt_12:row=0


In [17]:
if selected_session_id:
    diagnostics_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(version, version) AS latest_version,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts,
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id,
          argMax(orchestrator_address, version) AS latest_orchestrator_address,
          argMax(known_stream, version) AS known_stream,
          argMax(startup_success, version) AS startup_success,
          argMax(startup_excused, version) AS startup_excused,
          argMax(startup_unexcused, version) AS startup_unexcused,
          argMax(swap_count, version) AS fact_swap_count,
          argMax(error_count, version) AS fact_error_count,
          argMax(excusable_error_count, version) AS fact_excusable_error_count
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      trace_counts AS
      (
        SELECT
          countIf(trace_type = 'orchestrator_swap') AS explicit_swap_edges,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS trace_orchestrators_seen
        FROM livepeer_analytics.fact_stream_trace_edges
        WHERE workflow_session_id = sid
      ),
      typed_trace_counts AS
      (
        SELECT
          countIf(trace_type = 'orchestrator_swap') AS typed_explicit_swap_edges,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS typed_trace_orchestrators_seen
        FROM livepeer_analytics.stream_trace_events
        WHERE
        (
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND request_id = (SELECT request_id FROM latest_session)
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) = ''
            AND request_id = (SELECT request_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) = ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
        )
          AND event_timestamp >= {from_ts:DateTime64(3)}
          AND event_timestamp < {to_ts:DateTime64(3)}
      ),
      segment_counts AS
      (
        SELECT
          count() AS segment_rows,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS segment_orchestrators_seen
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      ),
      ai_counts AS
      (
        SELECT
          countIf(event_type = 'error') AS raw_error_events,
          countIf(event_type = 'params_update') AS raw_params_update_events
        FROM livepeer_analytics.ai_stream_events
        WHERE
        (
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND request_id = (SELECT request_id FROM latest_session)
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) != ''
            AND (SELECT stream_id FROM latest_session) = ''
            AND request_id = (SELECT request_id FROM latest_session)
          )
          OR
          (
            (SELECT request_id FROM latest_session) = ''
            AND (SELECT stream_id FROM latest_session) != ''
            AND stream_id = (SELECT stream_id FROM latest_session)
          )
        )
          AND event_timestamp >= {from_ts:DateTime64(3)}
          AND event_timestamp < {to_ts:DateTime64(3)}
      ),
      param_fact AS
      (
        SELECT count() AS fact_param_update_rows
        FROM livepeer_analytics.fact_workflow_param_updates
        WHERE workflow_session_id = sid
      ),
      session_versions AS
      (
        SELECT
          count() AS session_rows_all_versions,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS session_orchestrators_seen_across_versions
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      latest_session.latest_version,
      latest_session.session_start_ts,
      latest_session.session_end_ts,
      latest_session.stream_id,
      latest_session.request_id,
      latest_session.latest_orchestrator_address,
      latest_session.known_stream,
      latest_session.startup_success,
      latest_session.startup_excused,
      latest_session.startup_unexcused,
      latest_session.fact_swap_count,
      trace_counts.explicit_swap_edges,
      trace_counts.trace_orchestrators_seen,
      typed_trace_counts.typed_explicit_swap_edges,
      typed_trace_counts.typed_trace_orchestrators_seen,
      segment_counts.segment_rows,
      segment_counts.segment_orchestrators_seen,
      ai_counts.raw_error_events,
      latest_session.fact_error_count,
      latest_session.fact_excusable_error_count,
      ai_counts.raw_params_update_events,
      param_fact.fact_param_update_rows,
      session_versions.session_rows_all_versions,
      session_versions.session_orchestrators_seen_across_versions
    FROM latest_session
    CROSS JOIN trace_counts
    CROSS JOIN typed_trace_counts
    CROSS JOIN segment_counts
    CROSS JOIN ai_counts
    CROSS JOIN param_fact
    CROSS JOIN session_versions
    """

    diagnostics_df = query_df(diagnostics_sql, {**params, 'sid': selected_session_id})
    display(diagnostics_df)
else:
    print('No session available in this window.')


Unnamed: 0,latest_version,session_start_ts,session_end_ts,stream_id,request_id,latest_orchestrator_address,known_stream,startup_success,startup_excused,startup_unexcused,...,typed_trace_orchestrators_seen,segment_rows,segment_orchestrators_seen,raw_error_events,fact_error_count,fact_excusable_error_count,raw_params_update_events,fact_param_update_rows,session_rows_all_versions,session_orchestrators_seen_across_versions
0,9,2026-02-24 23:46:07.905,2026-02-24 23:46:38.054,aiJobTesterStream-1771976762954588419,e565fe91,0xdc28f2842810d1a013ad51de174d02eaba192dc7,1,1,0,0,...,1,1,1,0,0,0,0,0,2,1


## Session GPU Correlation (Stream -> Session -> Capability)
For the selected session, this section shows:
- GPU IDs observed in session facts/segments/param updates
- capability correlation candidates using orchestrator + pipeline/model


In [18]:
# This section now mirrors the reactive dropdown outputs from the Interactive Session Edge Explorer.
if 'reactive_out' in globals() and isinstance(reactive_out, dict):
    display(reactive_out.get('gpu_observed'))
    display(reactive_out.get('capability_corr'))
else:
    print('Run the "Interactive Session Edge Explorer" cell first to enable reactive outputs.')



Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<IPython.core.display.Markdown object>â€¦

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<IPython.core.display.Markdown object>â€¦

In [19]:
if selected_session_id:
    timeline_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      session_window AS
      (
        SELECT
          coalesce(argMax(session_start_ts, version), {from_ts:DateTime64(3)}) - INTERVAL 1 HOUR AS from_ts,
          coalesce(argMax(session_end_ts, version), argMax(session_start_ts, version), {to_ts:DateTime64(3)}) + INTERVAL 1 HOUR AS to_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      ts,
      source,
      detail,
      stream_id,
      request_id,
      orchestrator_address,
      data_timestamp,
      raw_event_timestamp
    FROM
    (
      SELECT
        edge_ts AS ts,
        'fact_stream_trace_edges' AS source,
        trace_type AS detail,
        stream_id,
        request_id,
        orchestrator_address,
        edge_ts AS data_timestamp,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS raw_event_timestamp
      FROM livepeer_analytics.fact_stream_trace_edges
      WHERE workflow_session_id = sid

      UNION ALL

      SELECT
        coalesce(data_timestamp, event_timestamp) AS ts,
        'raw_stream_trace_events' AS source,
        trace_type AS detail,
        stream_id,
        request_id,
        orchestrator_address,
        data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.stream_trace_events
      WHERE
      (
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND request_id = (SELECT request_id FROM latest_session)
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) = ''
          AND request_id = (SELECT request_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) = ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
      )
        AND event_timestamp >= (SELECT from_ts FROM session_window)
        AND event_timestamp < (SELECT to_ts FROM session_window)

      UNION ALL

      SELECT
        event_timestamp AS ts,
        'raw_ai_stream_events' AS source,
        event_type AS detail,
        stream_id,
        request_id,
        '' AS orchestrator_address,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.ai_stream_events
      WHERE
      (
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND request_id = (SELECT request_id FROM latest_session)
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) != ''
          AND (SELECT stream_id FROM latest_session) = ''
          AND request_id = (SELECT request_id FROM latest_session)
        )
        OR
        (
          (SELECT request_id FROM latest_session) = ''
          AND (SELECT stream_id FROM latest_session) != ''
          AND stream_id = (SELECT stream_id FROM latest_session)
        )
      )
        AND event_timestamp >= (SELECT from_ts FROM session_window)
        AND event_timestamp < (SELECT to_ts FROM session_window)
    ) t
    ORDER BY ts
    """

    timeline_df = query_df(timeline_sql, {**params, 'sid': selected_session_id})
    display(timeline_df.head(400))
else:
    print('No session available in this window.')


Unnamed: 0,ts,source,detail,stream_id,request_id,orchestrator_address,data_timestamp,raw_event_timestamp
0,2026-02-24 23:46:07.905,fact_stream_trace_edges,gateway_receive_stream_request,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:07.905,NaT
1,2026-02-24 23:46:07.905,raw_stream_trace_events,gateway_receive_stream_request,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:07.905,2026-02-24 23:46:07.906
2,2026-02-24 23:46:18.415,fact_stream_trace_edges,gateway_send_first_ingest_segment,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:18.415,NaT
3,2026-02-24 23:46:18.415,raw_stream_trace_events,gateway_send_first_ingest_segment,aiJobTesterStream-1771976762954588419,e565fe91,0xd66e5c00725e0d87d57172191ebd5e865fd71cff,2026-02-24 23:46:18.415,2026-02-24 23:46:18.415
4,2026-02-24 23:46:18.599,fact_stream_trace_edges,runner_send_first_processed_segment,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:18.599,NaT
5,2026-02-24 23:46:18.599,raw_stream_trace_events,runner_send_first_processed_segment,aiJobTesterStream-1771976762954588419,e565fe91,0xd66e5c00725e0d87d57172191ebd5e865fd71cff,2026-02-24 23:46:18.599,2026-02-24 23:46:18.607
6,2026-02-24 23:46:19.659,fact_stream_trace_edges,gateway_receive_first_processed_segment,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:19.659,NaT
7,2026-02-24 23:46:19.659,raw_stream_trace_events,gateway_receive_first_processed_segment,aiJobTesterStream-1771976762954588419,e565fe91,0xd66e5c00725e0d87d57172191ebd5e865fd71cff,2026-02-24 23:46:19.659,2026-02-24 23:46:19.659
8,2026-02-24 23:46:20.598,fact_stream_trace_edges,runner_receive_first_ingest_segment,aiJobTesterStream-1771976762954588419,e565fe91,,2026-02-24 23:46:20.598,NaT
9,2026-02-24 23:46:20.598,raw_stream_trace_events,runner_receive_first_ingest_segment,aiJobTesterStream-1771976762954588419,e565fe91,0xd66e5c00725e0d87d57172191ebd5e865fd71cff,2026-02-24 23:46:20.598,2026-02-24 23:46:20.605


## Bronze -> Silver -> Gold Audit (Serving Validation)
For the selected session, this section traces counts and time bounds from bronze/raw events to silver facts and gold API views.
Use it to quickly verify that serving rows are grounded in source data.


In [20]:
# This section now mirrors the reactive dropdown output from the Interactive Session Edge Explorer.
if 'reactive_out' in globals() and isinstance(reactive_out, dict):
    display(reactive_out.get('audit'))
else:
    print('Run the "Interactive Session Edge Explorer" cell first to enable reactive outputs.')



Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<IPython.core.display.Markdown object>â€¦

## Gold Row Drill-Down (Hourly GPU View)
Pick a `v_api_gpu_metrics` hourly row for the selected session key and inspect the underlying silver inputs side-by-side.


In [21]:
# This section now mirrors the reactive dropdown output from the Interactive Session Edge Explorer.
if 'reactive_out' in globals() and isinstance(reactive_out, dict):
    display(reactive_out.get('hourly'))
else:
    print('Run the "Interactive Session Edge Explorer" cell first to enable reactive outputs.')



Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '<IPython.core.display.Markdown object>â€¦

## CLI Harness Commands

Run the same checks outside notebook:

```bash
python scripts/run_clickhouse_query_pack.py --lookback-hours 24
python scripts/run_clickhouse_data_tests.py --sql-file tests/integration/sql/assertions_pipeline.sql --lookback-hours 24
python scripts/run_clickhouse_data_tests.py --sql-file tests/integration/sql/assertions_scenario_candidates.sql --lookback-hours 720
```

Export production fixtures for your four scenarios:

```bash
python scripts/export_scenario_fixtures.py \
  --from-ts 2026-01-01T00:00:00Z \
  --to-ts 2026-02-16T00:00:00Z \
  --limit-per-scenario 3
```