# Flink Data Trace + Integration Test Notebook

This notebook validates pipeline correctness by querying data in strict order:

1. Raw ingress (`streaming_events`, DLQ, quarantine)
2. Typed tables (`ai_stream_status`, `stream_trace_events`, `ai_stream_events`, `stream_ingest_metrics`)
3. Silver projections (`fact_stream_*`)
4. Stateful lifecycle facts (`fact_workflow_*`)
5. Rollups (`agg_*`)
6. API views (`v_api_*`)

It also runs SQL assertion suites and scenario-candidate checks used by the CLI harness.

In [29]:
# If needed once:
# %pip install clickhouse-connect pandas

In [30]:
from __future__ import annotations

import os
from datetime import datetime, timedelta, timezone
from pathlib import Path

import clickhouse_connect
import pandas as pd

CH_HOST = os.getenv('CH_HOST', 'clickhouse.livepeer.cloud')
CH_PORT = int(os.getenv('CH_PORT', '8123'))
CH_DATABASE = os.getenv('CH_DATABASE', 'livepeer_analytics')
CH_USER = os.getenv('CH_USER', 'analytics_user')
CH_PASSWORD = os.getenv('CH_PASSWORD', 'analytics_password')
CH_SECURE = os.getenv('CH_SECURE', '').lower() in {'1', 'true', 'yes'}

TO_TS = datetime.now(timezone.utc)
FROM_TS = TO_TS - timedelta(hours=24)

print(f'Window UTC: {FROM_TS.isoformat()} -> {TO_TS.isoformat()}')
import ipywidgets as widgets
from IPython.display import display


Window UTC: 2026-02-16T22:18:37.157299+00:00 -> 2026-02-17T22:18:37.157299+00:00


In [31]:
client = clickhouse_connect.get_client(
    host=CH_HOST,
    port=CH_PORT,
    username=CH_USER,
    password=CH_PASSWORD,
    database=CH_DATABASE,
    secure=CH_SECURE,
)

params = {
    'from_ts': FROM_TS.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
    'to_ts': TO_TS.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3],
    'limit_per_scenario': 5,
}

def query_df(sql: str, parameters: dict | None = None) -> pd.DataFrame:
    result = client.query(sql, parameters=parameters or {})
    return pd.DataFrame(result.result_rows, columns=result.column_names)

def parse_blocks(path: str, marker: str) -> list[tuple[str, str]]:
    blocks = []
    current_name = None
    current_lines = []
    for raw_line in Path(path).read_text(encoding='utf-8').splitlines():
        if raw_line.startswith(marker):
            if current_name and current_lines:
                sql = '\n'.join(current_lines).strip().rstrip(';')
                if sql:
                    blocks.append((current_name, sql))
            current_name = raw_line.split(':', 1)[1].strip()
            current_lines = []
            continue
        if current_name is not None:
            current_lines.append(raw_line)
    if current_name and current_lines:
        sql = '\n'.join(current_lines).strip().rstrip(';')
        if sql:
            blocks.append((current_name, sql))
    return blocks

## Ordered Pipeline Trace (Raw -> API)

In [32]:
flow_file = '../../tests/integration/sql/trace_pipeline_flow.sql'
flow_blocks = parse_blocks(flow_file, '-- QUERY:')
print(f'Loaded {len(flow_blocks)} ordered queries from {flow_file}')

flow_results: dict[str, pd.DataFrame] = {}
for name, sql in flow_blocks:
    df = query_df(sql, params)
    flow_results[name] = df
    print(f'\n=== {name} ===')
    display(df.head(20))

Loaded 9 ordered queries from ../../tests/integration/sql/trace_pipeline_flow.sql

=== 01_raw_ingest ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,streaming_events_quarantine,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,streaming_events_dlq,389,2026-02-16 22:28:39.516,2026-02-17 22:15:58.511
2,streaming_events,22655,2026-02-16 22:18:57.212,2026-02-17 22:18:33.266



=== 02_typed_tables ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,ai_stream_events,5136,2026-02-16 22:20:33.514,2026-02-17 22:14:30.753
1,ai_stream_status,1794,2026-02-16 22:22:46.308,2026-02-17 22:12:44.245
2,network_capabilities,17453,2026-02-16 22:19:03.942,2026-02-17 22:17:04.774
3,stream_ingest_metrics,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
4,stream_trace_events,9708,2026-02-16 22:20:33.154,2026-02-17 22:14:30.753



=== 03_silver_projection_counts ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,fact_stream_ingest_samples,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,fact_stream_status_samples,1794,2026-02-16 22:22:46.308,2026-02-17 22:12:44.245
2,fact_stream_trace_edges,9708,2026-02-16 22:20:33.153,2026-02-17 22:14:30.753



=== 04_stateful_fact_counts ===


Unnamed: 0,object_name,rows_window,min_ts,max_ts
0,fact_workflow_param_updates,0,1970-01-01 00:00:00.000,1970-01-01 00:00:00.000
1,fact_workflow_session_segments,2714,2026-02-16 22:20:33.153,2026-02-17 22:14:30.656
2,fact_workflow_sessions,3728,2026-02-16 22:20:33.153,2026-02-17 22:14:30.652



=== 05_reliability_and_swap_summary ===


Unnamed: 0,sessions,known_stream_sessions,startup_success_sessions,startup_excused_sessions,startup_unexcused_sessions,swapped_sessions,unexcused_rate
0,3726,2705,406,2299,0,25,0.0



=== 06_rollup_population ===


Unnamed: 0,object_name,rows_window
0,agg_latency_edges_1m,0
1,agg_reliability_1h,556
2,agg_stream_performance_1m,647



=== 07_view_population ===


Unnamed: 0,object_name,rows_window
0,v_api_gpu_metrics,643
1,v_api_network_demand,598
2,v_api_sla_compliance,386



=== 08_gpu_view_parity ===


Unnamed: 0,joined_rows,mean_abs_diff_fps,max_abs_diff_fps
0,643,0.0,0.0



=== 09_sla_view_parity ===


Unnamed: 0,joined_rows,total_known_diff,total_unexcused_diff,total_swapped_diff
0,386,0,0,0


## Integration Assertions (CI-aligned)

In [33]:
def run_assertion_file(path: str) -> pd.DataFrame:
    tests = parse_blocks(path, '-- TEST:')
    rows = []
    for name, sql in tests:
        try:
            df = query_df(sql, params)
        except Exception as exc:
            rows.append({'test_name': name, 'failed_rows': 1, 'status': 'FAIL', 'error': str(exc)})
            continue

        if df.empty:
            rows.append({'test_name': name, 'failed_rows': 1, 'status': 'FAIL', 'error': 'No rows'})
            continue

        row = df.iloc[0].to_dict()
        failed = int(row.get('failed_rows', 1))
        row['test_name'] = name
        row['status'] = 'PASS' if failed == 0 else 'FAIL'
        rows.append(row)

    out = pd.DataFrame(rows)
    cols = ['test_name', 'status', 'failed_rows'] + [c for c in out.columns if c not in {'test_name', 'status', 'failed_rows'}]
    return out[cols]

pipeline_assertions = run_assertion_file('../../tests/integration/sql/assertions_pipeline.sql')
display(pipeline_assertions)

pipeline_failures = pipeline_assertions[pipeline_assertions['status'] == 'FAIL']
print(f'Pipeline assertion failures: {len(pipeline_failures)}')

Unnamed: 0,test_name,status,failed_rows,missing_objects,sessions_window,typed_rows,projected_rows,missing_in_silver,unique_sessions_checked,total_rows,swapped_without_evidence,orphan_param_updates,joined_rows,mean_abs_diff_fps,max_abs_diff_fps,total_known_diff,total_unexcused_diff,total_swapped_diff,rows_checked
0,raw_events_present,PASS,0.0,[],,,,,,,,,,,,,,,
1,session_fact_present,PASS,0.0,,3728.0,,,,,,,,,,,,,,
2,status_raw_to_silver_projection,PASS,0.0,,,1794.0,1794.0,0.0,,,,,,,,,,,
3,trace_raw_to_silver_projection,PASS,0.0,,,9708.0,9708.0,0.0,,,,,,,,,,,
4,ingest_raw_to_silver_projection,PASS,0.0,,,0.0,0.0,0.0,,,,,,,,,,,
5,session_final_uniqueness,PASS,0.0,,,,,,2705.0,,,,,,,,,,
6,workflow_session_has_identifier,PASS,0.0,,,,,,,3726.0,,,,,,,,,
7,swapped_sessions_have_evidence,PASS,0.0,,,,,,,,0.0,,,,,,,,
8,param_updates_reference_existing_session,PASS,0.0,,,,,,,,,0.0,,,,,,,
9,gpu_view_matches_rollup,PASS,0.0,,,,,,,,,,643.0,0.0,0.0,,,,


Pipeline assertion failures: 0


## Raw -> Silver Correlation Checks

These rows show that raw typed records are carried into silver facts using `source_event_uid` correlation.

In [34]:
projection_checks = pipeline_assertions[pipeline_assertions['test_name'].str.contains('_raw_to_silver_projection', na=False)]
display(projection_checks[['test_name', 'status', 'failed_rows', 'typed_rows', 'projected_rows', 'missing_in_silver']])

Unnamed: 0,test_name,status,failed_rows,typed_rows,projected_rows,missing_in_silver
2,status_raw_to_silver_projection,PASS,0.0,1794.0,1794.0,0.0
3,trace_raw_to_silver_projection,PASS,0.0,9708.0,9708.0,0.0
4,ingest_raw_to_silver_projection,PASS,0.0,0.0,0.0,0.0


## Scenario Candidate Discovery

In [35]:
scenario_blocks = parse_blocks('../../tests/integration/sql/scenario_candidates.sql', '-- QUERY:')
scenario_candidates: dict[str, pd.DataFrame] = {}
for name, sql in scenario_blocks:
    df = query_df(sql, params)
    scenario_candidates[name] = df
    print(f'\n{name}: {len(df)} candidate rows')
    display(df.head(10))


scenario_1_clean_success_no_swap_fps_gt_12: 5 candidate rows


Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,avg_output_fps,segment_orchestrators
0,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771364900717092746|9bbfabec,aiJobTesterStream-1771364900717092746,9bbfabec,2026-02-17 21:48:25.183,2026-02-17 21:49:08.913,14.913415,1
1,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771364825858373880|e0820064,aiJobTesterStream-1771364825858373880,e0820064,2026-02-17 21:47:10.377,2026-02-17 21:47:54.145,15.785872,1
2,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771364727157826729|fb2a48b4,aiJobTesterStream-1771364727157826729,fb2a48b4,2026-02-17 21:45:31.736,2026-02-17 21:46:45.630,16.899454,1
3,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771364623588675124|4979696f,aiJobTesterStream-1771364623588675124,4979696f,2026-02-17 21:43:48.108,2026-02-17 21:44:30.589,15.563234,1
4,scenario_1_clean_success_no_swap_fps_gt_12,aiJobTesterStream-1771364511335879001|3eb7a3f7,aiJobTesterStream-1771364511335879001,3eb7a3f7,2026-02-17 21:41:55.867,2026-02-17 21:42:40.934,14.312684,1



scenario_2_no_orchestrator_then_closed: 5 candidate rows


Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,startup_success,startup_excused,startup_unexcused,has_no_orch,has_close
0,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771366463003192557|e317bc30,aiJobTesterStream-1771366463003192557,e317bc30,2026-02-17 22:14:30.652,2026-02-17 22:14:30.753,0,1,0,1,1
1,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771366450922100738|1a0b5362,aiJobTesterStream-1771366450922100738,1a0b5362,2026-02-17 22:14:18.601,2026-02-17 22:14:19.003,0,1,0,1,1
2,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771366438467382564|6267e6b6,aiJobTesterStream-1771366438467382564,6267e6b6,2026-02-17 22:14:06.160,2026-02-17 22:14:06.664,0,1,0,1,1
3,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771366426482581172|8812befc,aiJobTesterStream-1771366426482581172,8812befc,2026-02-17 22:13:54.098,2026-02-17 22:13:56.392,0,1,0,1,1
4,scenario_2_no_orchestrator_then_closed,aiJobTesterStream-1771366414105254420|f059a9b5,aiJobTesterStream-1771366414105254420,f059a9b5,2026-02-17 22:13:41.781,2026-02-17 22:13:44.134,0,1,0,1,1



scenario_3_success_with_swap: 5 candidate rows


Unnamed: 0,scenario_name,workflow_session_id,stream_id,request_id,session_start_ts,session_end_ts,swap_count,segment_orchestrators
0,scenario_3_success_with_swap,aiJobTesterStream-1771363264736023709|040ce13c,aiJobTesterStream-1771363264736023709,040ce13c,2026-02-17 21:21:09.221,2026-02-17 21:21:52.968,1,2
1,scenario_3_success_with_swap,aiJobTesterStream-1771362627842762017|730215df,aiJobTesterStream-1771362627842762017,730215df,2026-02-17 21:10:35.380,2026-02-17 21:11:34.191,1,1
2,scenario_3_success_with_swap,aiJobTesterStream-1771345665022942910|fa413b23,aiJobTesterStream-1771345665022942910,fa413b23,2026-02-17 16:27:49.582,2026-02-17 16:29:34.178,1,1
3,scenario_3_success_with_swap,aiJobTesterStream-1771338506517687065|be7a2370,aiJobTesterStream-1771338506517687065,be7a2370,2026-02-17 14:28:30.947,2026-02-17 14:30:16.421,1,1
4,scenario_3_success_with_swap,aiJobTesterStream-1771324852689241436|9068473c,aiJobTesterStream-1771324852689241436,9068473c,2026-02-17 10:40:57.361,2026-02-17 10:41:41.286,1,2



scenario_4_success_with_param_updates: 0 candidate rows


In [36]:
scenario_assertions = run_assertion_file('../../tests/integration/sql/assertions_scenario_candidates.sql')
display(scenario_assertions)

scenario_failures = scenario_assertions[scenario_assertions['status'] == 'FAIL']
print(f'Scenario assertion failures: {len(scenario_failures)}')

Unnamed: 0,test_name,status,failed_rows,candidates
0,scenario_1_clean_success_no_swap_fps_gt_12_exists,PASS,0,1
1,scenario_2_no_orchestrator_then_closed_exists,PASS,0,1
2,scenario_3_success_with_swap_exists,PASS,0,1
3,scenario_4_success_with_param_updates_exists,FAIL,1,0


Scenario assertion failures: 1


## Interactive Session Edge Explorer

Select a scenario candidate session and render raw event/edge timeline dynamically.

In [37]:
# Expand and run this to build dropdown options from scenario candidate results
# Use a unique value (scenario_name, row_index, session_id) so duplicates are not collapsed.
session_options = []
for scenario_name, df in scenario_candidates.items():
    if df is None or df.empty or 'workflow_session_id' not in df.columns:
        continue
    for row_idx, row in df.reset_index(drop=True).iterrows():
        sid = str(row['workflow_session_id'])
        label = f"{scenario_name} | row={row_idx} | {sid[:28]}"
        session_options.append((label, (scenario_name, int(row_idx), sid)))

session_picker = widgets.Dropdown(
    options=session_options,
    description='Session:',
    layout=widgets.Layout(width='95%')
)
run_edges_btn = widgets.Button(description='Show Raw Edges', button_style='primary')
edge_out = widgets.Output()

def show_selected_session_edges(_):
    edge_out.clear_output()
    selected = session_picker.value
    if not selected:
        with edge_out:
            print('No scenario candidates in current window.')
        return

    scenario_name, row_idx, sid = selected

    sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      ts,
      raw_source,
      edge_name,
      stream_id,
      request_id,
      orchestrator_address,
      data_timestamp,
      raw_event_timestamp,
      toUInt8(edge_name = 'orchestrator_swap') AS is_swap_event,
      toUInt8(edge_name = 'params_update') AS is_params_update
    FROM
    (
      SELECT
        edge_ts AS ts,
        'fact_stream_trace_edges' AS raw_source,
        trace_type AS edge_name,
        stream_id,
        request_id,
        orchestrator_address,
        edge_ts AS data_timestamp,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS raw_event_timestamp
      FROM livepeer_analytics.fact_stream_trace_edges
      WHERE workflow_session_id = sid

      UNION ALL

      SELECT
        coalesce(data_timestamp, event_timestamp) AS ts,
        'raw_stream_trace_events' AS raw_source,
        trace_type AS edge_name,
        stream_id,
        request_id,
        orchestrator_address,
        data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.stream_trace_events
      WHERE request_id = (SELECT request_id FROM latest_session)
         OR stream_id = (SELECT stream_id FROM latest_session)

      UNION ALL

      SELECT
        event_timestamp AS ts,
        'ai_stream_events' AS raw_source,
        event_type AS edge_name,
        stream_id,
        request_id,
        '' AS orchestrator_address,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.ai_stream_events
      WHERE request_id = (SELECT request_id FROM latest_session)
         OR stream_id = (SELECT stream_id FROM latest_session)

      UNION ALL

      SELECT
        event_timestamp AS ts,
        'ai_stream_status' AS raw_source,
        concat('state:', state) AS edge_name,
        stream_id,
        request_id,
        orchestrator_address,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.ai_stream_status
      WHERE request_id = (SELECT request_id FROM latest_session)
         OR stream_id = (SELECT stream_id FROM latest_session)
    ) t
    ORDER BY ts, raw_source, edge_name
    """

    try:
        df = query_df(sql, {**params, 'sid': sid})
        with edge_out:
            print(f'Scenario: {scenario_name} | row={row_idx} | session={sid}')
            if df.empty:
                print('No timeline rows found for selected session.')
            else:
                print(
                    f"swap_events_count={int(df['is_swap_event'].sum())} | "
                    f"params_update_events_count={int(df['is_params_update'].sum())}"
                )
            display(df)
    except Exception as exc:
        with edge_out:
            print(f'Query failed: {exc}')

run_edges_btn.on_click(show_selected_session_edges)

display(session_picker, run_edges_btn, edge_out)

Dropdown(description='Session:', layout=Layout(width='95%'), options=(('scenario_1_clean_success_no_swap_fps_g…

Button(button_style='primary', description='Show Raw Edges', style=ButtonStyle())

Output()

## Session Diagnostics (Swaps, Errors, Param Updates)

This section explains why a session can show `swap_count > 0` even when no explicit `orchestrator_swap` edge appears.

`swap_count` in session facts is derived as:
- explicit swaps from trace edge `orchestrator_swap`, OR
- fallback from observed orchestrator identity changes across session state/attribution updates.

In [38]:
def resolve_selected_session() -> tuple[str | None, str | None]:
    if 'session_picker' in globals():
        selected = getattr(session_picker, 'value', None)
        if selected:
            if isinstance(selected, tuple) and len(selected) == 3:
                scenario_name, row_idx, sid = selected
                return str(sid), f"interactive:{scenario_name}:row={row_idx}"
            return str(selected), 'interactive'

    for name, df in scenario_candidates.items():
        if not df.empty and 'workflow_session_id' in df.columns:
            return str(df.iloc[0]['workflow_session_id']), name

    fallback = query_df("""
    SELECT argMax(workflow_session_id, version) AS workflow_session_id
    FROM livepeer_analytics.fact_workflow_sessions
    WHERE session_start_ts >= {from_ts:DateTime64(3)}
      AND session_start_ts < {to_ts:DateTime64(3)}
    """, params)
    if fallback.empty or not fallback.iloc[0]['workflow_session_id']:
        return None, None
    return str(fallback.iloc[0]['workflow_session_id']), 'fallback_latest_session'

selected_session_id, selected_source = resolve_selected_session()
print('Selected session:', selected_session_id, 'from', selected_source)

Selected session: aiJobTesterStream-1771364900717092746|9bbfabec from interactive:scenario_1_clean_success_no_swap_fps_gt_12:row=0


In [39]:
if selected_session_id:
    diagnostics_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(version, version) AS latest_version,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts,
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id,
          argMax(orchestrator_address, version) AS latest_orchestrator_address,
          argMax(known_stream, version) AS known_stream,
          argMax(startup_success, version) AS startup_success,
          argMax(startup_excused, version) AS startup_excused,
          argMax(startup_unexcused, version) AS startup_unexcused,
          argMax(swap_count, version) AS fact_swap_count,
          argMax(error_count, version) AS fact_error_count,
          argMax(excusable_error_count, version) AS fact_excusable_error_count
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      trace_counts AS
      (
        SELECT
          countIf(trace_type = 'orchestrator_swap') AS explicit_swap_edges,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS trace_orchestrators_seen
        FROM livepeer_analytics.fact_stream_trace_edges
        WHERE workflow_session_id = sid
      ),
      segment_counts AS
      (
        SELECT
          count() AS segment_rows,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS segment_orchestrators_seen
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      ),
      ai_counts AS
      (
        SELECT
          countIf(event_type = 'error') AS raw_error_events,
          countIf(event_type = 'params_update') AS raw_params_update_events
        FROM livepeer_analytics.ai_stream_events
        WHERE request_id = (SELECT request_id FROM latest_session)
           OR stream_id = (SELECT stream_id FROM latest_session)
      ),
      param_fact AS
      (
        SELECT count() AS fact_param_update_rows
        FROM livepeer_analytics.fact_workflow_param_updates
        WHERE workflow_session_id = sid
      ),
      session_versions AS
      (
        SELECT
          count() AS session_rows_all_versions,
          uniqExactIf(orchestrator_address, orchestrator_address != '') AS session_orchestrators_seen_across_versions
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      latest_session.latest_version,
      latest_session.session_start_ts,
      latest_session.session_end_ts,
      latest_session.stream_id,
      latest_session.request_id,
      latest_session.latest_orchestrator_address,
      latest_session.known_stream,
      latest_session.startup_success,
      latest_session.startup_excused,
      latest_session.startup_unexcused,
      latest_session.fact_swap_count,
      trace_counts.explicit_swap_edges,
      trace_counts.trace_orchestrators_seen,
      segment_counts.segment_rows,
      segment_counts.segment_orchestrators_seen,
      ai_counts.raw_error_events,
      latest_session.fact_error_count,
      latest_session.fact_excusable_error_count,
      ai_counts.raw_params_update_events,
      param_fact.fact_param_update_rows,
      session_versions.session_rows_all_versions,
      session_versions.session_orchestrators_seen_across_versions
    FROM latest_session
    CROSS JOIN trace_counts
    CROSS JOIN segment_counts
    CROSS JOIN ai_counts
    CROSS JOIN param_fact
    CROSS JOIN session_versions
    """

    diagnostics_df = query_df(diagnostics_sql, {**params, 'sid': selected_session_id})
    display(diagnostics_df)
else:
    print('No session available in this window.')

Unnamed: 0,latest_version,session_start_ts,session_end_ts,stream_id,request_id,latest_orchestrator_address,known_stream,startup_success,startup_excused,startup_unexcused,...,trace_orchestrators_seen,segment_rows,segment_orchestrators_seen,raw_error_events,fact_error_count,fact_excusable_error_count,raw_params_update_events,fact_param_update_rows,session_rows_all_versions,session_orchestrators_seen_across_versions
0,12,2026-02-17 21:48:25.183,2026-02-17 21:49:08.913,aiJobTesterStream-1771364900717092746,9bbfabec,0x733da28b0145ff561868e408d2ac8565ebe73aab,1,1,0,0,...,1,1,1,1,1,1,0,0,1,1


## Session GPU Correlation (Stream -> Session -> Capability)
For the selected session, this section shows:
- GPU IDs observed in session facts/segments/param updates
- capability correlation candidates using orchestrator + pipeline/model


In [None]:
if selected_session_id:
    gpu_observed_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id,
          argMax(orchestrator_address, version) AS orchestrator_address,
          argMax(pipeline, version) AS pipeline,
          argMax(pipeline_id, version) AS pipeline_id,
          argMax(model_id, version) AS model_id,
          argMax(gpu_id, version) AS gpu_id,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      segment_gpus AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS segment_gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS segment_model_ids
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      ),
      param_gpus AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS param_update_gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS param_update_model_ids
        FROM livepeer_analytics.fact_workflow_param_updates
        WHERE workflow_session_id = sid
      )
    SELECT
      sid AS workflow_session_id,
      latest_session.stream_id,
      latest_session.request_id,
      latest_session.orchestrator_address,
      latest_session.pipeline,
      latest_session.pipeline_id,
      latest_session.model_id,
      latest_session.gpu_id AS session_gpu_id,
      (SELECT segment_gpu_ids FROM segment_gpus) AS segment_gpu_ids,
      (SELECT param_update_gpu_ids FROM param_gpus) AS param_update_gpu_ids,
      (SELECT segment_model_ids FROM segment_gpus) AS segment_model_ids,
      (SELECT param_update_model_ids FROM param_gpus) AS param_update_model_ids,
      latest_session.session_start_ts,
      latest_session.session_end_ts
    FROM latest_session
    """

    capability_corr_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(orchestrator_address, version) AS orchestrator_address,
          argMax(pipeline, version) AS pipeline,
          argMax(pipeline_id, version) AS pipeline_id,
          argMax(model_id, version) AS model_id,
          argMax(session_start_ts, version) AS session_start_ts,
          argMax(session_end_ts, version) AS session_end_ts
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      ),
      observed AS
      (
        SELECT
          groupUniqArrayIf(ifNull(gpu_id, ''), ifNull(gpu_id, '') != '') AS gpu_ids,
          groupUniqArrayIf(ifNull(model_id, ''), ifNull(model_id, '') != '') AS model_ids
        FROM livepeer_analytics.fact_workflow_session_segments
        WHERE workflow_session_id = sid
      )
    SELECT
      d.snapshot_ts,
      d.orchestrator_address,
      d.orchestrator_proxy_address,
      d.pipeline,
      d.pipeline_id,
      d.model_id,
      d.gpu_id,
      d.gpu_name,
      d.runner_version,
      d.region,
      toUInt8(d.pipeline = (SELECT pipeline FROM latest_session)) AS pipeline_match,
      toUInt8(ifNull(d.pipeline_id, '') = ifNull((SELECT pipeline_id FROM latest_session), '')) AS pipeline_id_match,
      toUInt8(ifNull(d.model_id, '') = ifNull((SELECT model_id FROM latest_session), '') OR has((SELECT model_ids FROM observed), ifNull(d.model_id, ''))) AS model_match,
      toUInt8(has((SELECT gpu_ids FROM observed), ifNull(d.gpu_id, ''))) AS gpu_match
    FROM livepeer_analytics.dim_orchestrator_capability_snapshots d
    WHERE d.orchestrator_address = (SELECT orchestrator_address FROM latest_session)
      AND d.snapshot_ts >= (SELECT session_start_ts FROM latest_session) - INTERVAL 24 HOUR
      AND d.snapshot_ts <= coalesce((SELECT session_end_ts FROM latest_session), now64(3, 'UTC')) + INTERVAL 24 HOUR
      AND
      (
        d.pipeline = (SELECT pipeline FROM latest_session)
        OR ifNull(d.pipeline_id, '') = ifNull((SELECT pipeline_id FROM latest_session), '')
        OR ifNull(d.model_id, '') = ifNull((SELECT model_id FROM latest_session), '')
        OR has((SELECT model_ids FROM observed), ifNull(d.model_id, ''))
        OR has((SELECT gpu_ids FROM observed), ifNull(d.gpu_id, ''))
      )
    ORDER BY d.snapshot_ts DESC
    LIMIT 200
    """

    gpu_observed_df = query_df(gpu_observed_sql, {**params, 'sid': selected_session_id})
    display(gpu_observed_df)

    capability_corr_df = query_df(capability_corr_sql, {**params, 'sid': selected_session_id})
    print(f'Capability correlation rows: {len(capability_corr_df)}')
    display(capability_corr_df)
else:
    print('No session available in this window.')





In [None]:
if selected_session_id:
    timeline_sql = """
    WITH
      {sid:String} AS sid,
      latest_session AS
      (
        SELECT
          argMax(stream_id, version) AS stream_id,
          argMax(request_id, version) AS request_id
        FROM livepeer_analytics.fact_workflow_sessions
        WHERE workflow_session_id = sid
      )
    SELECT
      ts,
      source,
      detail,
      stream_id,
      request_id,
      orchestrator_address,
      data_timestamp,
      raw_event_timestamp
    FROM
    (
      SELECT
        edge_ts AS ts,
        'fact_stream_trace_edges' AS source,
        trace_type AS detail,
        stream_id,
        request_id,
        orchestrator_address,
        edge_ts AS data_timestamp,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS raw_event_timestamp
      FROM livepeer_analytics.fact_stream_trace_edges
      WHERE workflow_session_id = sid

      UNION ALL

      SELECT
        coalesce(data_timestamp, event_timestamp) AS ts,
        'raw_stream_trace_events' AS source,
        trace_type AS detail,
        stream_id,
        request_id,
        orchestrator_address,
        data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.stream_trace_events
      WHERE request_id = (SELECT request_id FROM latest_session)
         OR stream_id = (SELECT stream_id FROM latest_session)

      UNION ALL

      SELECT
        event_timestamp AS ts,
        'raw_ai_stream_events' AS source,
        event_type AS detail,
        stream_id,
        request_id,
        '' AS orchestrator_address,
        CAST(NULL AS Nullable(DateTime64(3, 'UTC'))) AS data_timestamp,
        event_timestamp AS raw_event_timestamp
      FROM livepeer_analytics.ai_stream_events
      WHERE request_id = (SELECT request_id FROM latest_session)
         OR stream_id = (SELECT stream_id FROM latest_session)
    ) t
    ORDER BY ts
    """

    timeline_df = query_df(timeline_sql, {**params, 'sid': selected_session_id})
    display(timeline_df.head(400))
else:
    print('No session available in this window.')

## CLI Harness Commands

Run the same checks outside notebook:

```bash
python scripts/run_clickhouse_query_pack.py --lookback-hours 24
python scripts/run_clickhouse_data_tests.py --sql-file tests/integration/sql/assertions_pipeline.sql --lookback-hours 24
python scripts/run_clickhouse_data_tests.py --sql-file tests/integration/sql/assertions_scenario_candidates.sql --lookback-hours 720
```

Export production fixtures for your four scenarios:

```bash
python scripts/export_scenario_fixtures.py \
  --from-ts 2026-01-01T00:00:00Z \
  --to-ts 2026-02-16T00:00:00Z \
  --limit-per-scenario 3
```