# Event Explorer

Browse and analyze captured WebSocket events from Rugs.fun sessions.

In [None]:
from _paths import *
import json
from pathlib import Path
import pandas as pd
from IPython.display import display, HTML, JSON

## 1. Load Discovered Events

Load schemas and fields from the knowledge pipeline:

In [None]:
schemas = load_discovered_schemas()
fields = load_discovered_fields()

print(f"Total events discovered: {len(schemas)}")
print(f"Total field paths: {len(fields)}")

# Create event summary
event_summary = []
for event_name, schema in schemas.items():
    event_summary.append({
        'event': event_name,
        'field_count': len(schema.get('fields', {})),
        'sample_count': schema.get('sample_count', 0),
        'tier': schema.get('tier', 'OBSERVED')
    })

df_events = pd.DataFrame(event_summary)
if not df_events.empty:
    df_events = df_events.sort_values('sample_count', ascending=False)
    display(df_events)

## 2. Event Type Filter

Filter events by tier (OBSERVED, VERIFIED, CANONICAL):

In [None]:
# Filter by tier
TIER = "OBSERVED"  # Change to VERIFIED or CANONICAL

filtered = {k: v for k, v in schemas.items() if v.get('tier', 'OBSERVED') == TIER}
print(f"Events with tier '{TIER}': {len(filtered)}")

for event_name in list(filtered.keys())[:10]:
    print(f"  - {event_name}")

## 3. Event Detail Inspector

Inspect a specific event's schema and sample data:

In [None]:
# Select event to inspect
EVENT_NAME = "gameStateUpdate"  # Change to inspect other events

if EVENT_NAME in schemas:
    event_schema = schemas[EVENT_NAME]
    print(f"Event: {EVENT_NAME}")
    print(f"Tier: {event_schema.get('tier', 'OBSERVED')}")
    print(f"Sample count: {event_schema.get('sample_count', 0)}")
    print(f"\nFields ({len(event_schema.get('fields', {}))}):\n")
    
    for field_path, field_info in event_schema.get('fields', {}).items():
        field_type = field_info.get('type', 'unknown')
        examples = field_info.get('examples', [])
        print(f"  {field_path}: {field_type}")
        if examples:
            print(f"    examples: {examples[:3]}")
else:
    print(f"Event '{EVENT_NAME}' not found.")
    print(f"Available events: {list(schemas.keys())[:10]}")

## 4. Field Search

Search for fields across all events:

In [None]:
# Search for fields containing a pattern
SEARCH_PATTERN = "price"  # Change to search for other fields

matches = []
for field_path, field_info in fields.items():
    if SEARCH_PATTERN.lower() in field_path.lower():
        matches.append({
            'field': field_path,
            'type': field_info.get('type', 'unknown'),
            'events': field_info.get('events', [])
        })

print(f"Fields matching '{SEARCH_PATTERN}': {len(matches)}\n")

for match in matches[:20]:
    events_str = ', '.join(match['events'][:3])
    if len(match['events']) > 3:
        events_str += f" (+{len(match['events']) - 3} more)"
    print(f"{match['field']} ({match['type']})")
    print(f"  Found in: {events_str}")

## 5. RAG Knowledge Search

Semantic search over indexed knowledge:

In [None]:
try:
    from retrieval.retrieve import search
    
    QUERY = "gameStateUpdate price changes"  # Change query
    results = search(QUERY, top_k=5)
    
    print(f"Search: '{QUERY}'\n")
    print("=" * 60)
    
    for i, (doc, meta, dist) in enumerate(zip(
        results['documents'][0],
        results['metadatas'][0],
        results['distances'][0]
    )):
        print(f"\n[{i+1}] Distance: {dist:.3f}")
        print(f"Source: {meta.get('source', 'unknown')}")
        print(f"Content: {doc[:300]}...")
        print("-" * 40)

except ImportError as e:
    print(f"RAG pipeline not available: {e}")
    print("\nRun: cd rag-pipeline && python -m ingestion.ingest")

## 6. Raw Recording Browser

Browse raw WebSocket recordings:

In [None]:
recordings_dir = RUGS_RECORDINGS_DIR

if recordings_dir.exists():
    recordings = list(recordings_dir.glob("*.jsonl"))
    print(f"Found {len(recordings)} recordings in {recordings_dir}")
    
    # Show recent recordings
    recordings_sorted = sorted(recordings, key=lambda x: x.stat().st_mtime, reverse=True)
    print("\nRecent recordings:")
    for rec in recordings_sorted[:5]:
        size_kb = rec.stat().st_size / 1024
        print(f"  {rec.name} ({size_kb:.1f} KB)")
else:
    print(f"Recordings directory not found: {recordings_dir}")
    print("Configure RUGS_RECORDINGS_DIR in config.env")

## 7. Sample Recording Analysis

Analyze a specific recording file:

In [None]:
# Load a specific recording (modify path as needed)
if recordings_dir.exists():
    recordings = list(recordings_dir.glob("*.jsonl"))
    if recordings:
        sample_file = recordings[0]  # First recording
        
        events = []
        with open(sample_file) as f:
            for line in f:
                if line.strip():
                    events.append(json.loads(line))
        
        print(f"Recording: {sample_file.name}")
        print(f"Total events: {len(events)}")
        
        # Event type distribution
        event_types = {}
        for evt in events:
            evt_type = evt.get('event', 'unknown')
            event_types[evt_type] = event_types.get(evt_type, 0) + 1
        
        print("\nEvent distribution:")
        for evt_type, count in sorted(event_types.items(), key=lambda x: -x[1]):
            print(f"  {evt_type}: {count}")
    else:
        print("No recordings found.")

## Next Steps

- **02_canonical_review.ipynb** - Validate and promote fields to CANONICAL
- **03_coverage_dashboard.ipynb** - Visualize documentation gaps
- **04_rl_bot_analysis.ipynb** - Analyze RL model performance