# STYX vs Conductor Benchmark

**Purpose:** Compare token efficiency between STYX (Selective Token Yield eXtraction) and Google Conductor's context management approach.

**Hypothesis:** STYX extracts only decision-relevant state, while Conductor stores full markdown context. STYX should use significantly fewer tokens for equivalent understanding.

**Dataset:** facebook/react GitHub issues (287 issues from prior validation)

In [None]:
# Install dependencies
!pip install tiktoken requests PyGithub -q

In [None]:
# Configuration
import os
from google.colab import userdata

# Get API key from Colab secrets (add your key in the Colab sidebar under 'Secrets')
# Name it GEMINI_API_KEY
try:
    GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
except:
    GEMINI_API_KEY = input('Enter your Gemini API key: ')

os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY
print(f'API key configured (length: {len(GEMINI_API_KEY)})')

In [None]:
import tiktoken
import requests
import json
from datetime import datetime

# Token counter (using cl100k_base, similar to GPT-4/Gemini tokenization)
enc = tiktoken.get_encoding('cl100k_base')

def count_tokens(text: str) -> int:
    return len(enc.encode(text))

print('Tokenizer loaded')

In [None]:
# Fetch GitHub issues (facebook/react)
def fetch_github_issues(repo='facebook/react', max_issues=287):
    issues = []
    page = 1
    while len(issues) < max_issues:
        url = f'https://api.github.com/repos/{repo}/issues'
        params = {'state': 'all', 'per_page': 100, 'page': page}
        resp = requests.get(url, params=params)
        if resp.status_code != 200:
            print(f'GitHub API error: {resp.status_code}')
            break
        batch = resp.json()
        if not batch:
            break
        issues.extend(batch)
        page += 1
        print(f'Fetched {len(issues)} issues...')
    return issues[:max_issues]

issues = fetch_github_issues()
print(f'Total issues fetched: {len(issues)}')

In [None]:
# APPROACH 1: Full Context (baseline)
# What you'd send if you just dumped everything

full_context = '\n\n'.join([
    f"Issue #{i['number']}: {i['title']}\nState: {i['state']}\nLabels: {[l['name'] for l in i.get('labels', [])]}\nBody: {i.get('body', '') or '(empty)'}\n"
    for i in issues
])

full_tokens = count_tokens(full_context)
print(f'Full context: {full_tokens:,} tokens')

In [None]:
# APPROACH 2: Conductor-style Context
# Conductor stores context as structured markdown files
# Simulating: product.md, techstack.md, style.md, and issue summaries

conductor_product_md = '''# Product Context: React
## Overview
React is a JavaScript library for building user interfaces.

## Goals
- Declarative views
- Component-based architecture
- Learn once, write anywhere

## Current Focus
- React 19 features
- Server Components
- Concurrent rendering
'''

conductor_techstack_md = '''# Tech Stack
- Language: JavaScript/TypeScript
- Build: Rollup, Webpack
- Testing: Jest
- CI: GitHub Actions
'''

conductor_issues_md = '# Active Issues\n\n' + '\n'.join([
    f"## Issue #{i['number']}: {i['title']}\nState: {i['state']}\nLabels: {[l['name'] for l in i.get('labels', [])]}\nSummary: {(i.get('body', '') or '')[:500]}...\n"
    for i in issues
])

conductor_context = conductor_product_md + '\n' + conductor_techstack_md + '\n' + conductor_issues_md
conductor_tokens = count_tokens(conductor_context)
print(f'Conductor-style context: {conductor_tokens:,} tokens')

In [None]:
# APPROACH 3: STYX Extraction
# Extract only decision-relevant binding state

def styx_extract(issues):
    """STYX: Selective Token Yield eXtraction
    Extracts only decision-relevant state from issues.
    Categories: decisions, constraints, tensions, anti_patterns
    """
    decisions = []
    constraints = []
    tensions = []
    anti_patterns = []
    
    decision_keywords = ['decided', 'will', 'must', 'should', 'approved', 'merged', 'accepted', 'implemented']
    constraint_keywords = ['cannot', 'must not', 'blocked', 'requires', 'depends on', 'breaking change']
    tension_keywords = ['vs', 'tradeoff', 'alternative', 'instead', 'conflict', 'disagree']
    anti_pattern_keywords = ['don\'t', 'avoid', 'deprecated', 'wrong', 'mistake', 'bug', 'regression']
    
    for issue in issues:
        title = issue['title'].lower()
        body = (issue.get('body', '') or '').lower()
        labels = [l['name'].lower() for l in issue.get('labels', [])]
        combined = title + ' ' + body
        
        # Extract decisions
        if any(kw in combined for kw in decision_keywords) or 'decision' in labels:
            decisions.append(f"#{issue['number']}: {issue['title'][:100]}")
        
        # Extract constraints
        if any(kw in combined for kw in constraint_keywords) or 'breaking change' in labels:
            constraints.append(f"#{issue['number']}: {issue['title'][:100]}")
        
        # Extract tensions
        if any(kw in combined for kw in tension_keywords):
            tensions.append(f"#{issue['number']}: {issue['title'][:100]}")
        
        # Extract anti-patterns
        if any(kw in combined for kw in anti_pattern_keywords) or 'bug' in labels:
            anti_patterns.append(f"#{issue['number']}: {issue['title'][:100]}")
    
    return {
        'decisions': decisions,
        'constraints': constraints,
        'tensions': tensions,
        'anti_patterns': anti_patterns
    }

styx_state = styx_extract(issues)

styx_context = f'''# STYX Binding State
## Decisions ({len(styx_state['decisions'])})
{chr(10).join(styx_state['decisions'][:50]) if styx_state['decisions'] else 'None'}

## Constraints ({len(styx_state['constraints'])})
{chr(10).join(styx_state['constraints'][:20]) if styx_state['constraints'] else 'None'}

## Tensions ({len(styx_state['tensions'])})
{chr(10).join(styx_state['tensions'][:30]) if styx_state['tensions'] else 'None'}

## Anti-Patterns ({len(styx_state['anti_patterns'])})
{chr(10).join(styx_state['anti_patterns'][:20]) if styx_state['anti_patterns'] else 'None'}
'''

styx_tokens = count_tokens(styx_context)
print(f'STYX context: {styx_tokens:,} tokens')
print(f'\nExtracted: {len(styx_state["decisions"])} decisions, {len(styx_state["constraints"])} constraints, {len(styx_state["tensions"])} tensions, {len(styx_state["anti_patterns"])} anti-patterns')

In [None]:
# RESULTS COMPARISON
print('=' * 60)
print('STYX vs CONDUCTOR BENCHMARK RESULTS')
print('=' * 60)
print(f'Dataset: facebook/react ({len(issues)} issues)')
print(f'Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
print('=' * 60)
print(f'{"Approach":<25} {"Tokens":>12} {"vs Full":>12} {"vs Conductor":>15}')
print('-' * 60)
print(f'{"Full Context (baseline)":<25} {full_tokens:>12,} {"1.0x":>12} {"-":>15}')
print(f'{"Conductor-style":<25} {conductor_tokens:>12,} {full_tokens/conductor_tokens:>11.1f}x {"-":>15}')
print(f'{"STYX":<25} {styx_tokens:>12,} {full_tokens/styx_tokens:>11.1f}x {conductor_tokens/styx_tokens:>14.1f}x')
print('=' * 60)
print(f'\nSTYX achieves {100 - (styx_tokens/full_tokens*100):.1f}% token reduction vs full context')
print(f'STYX achieves {100 - (styx_tokens/conductor_tokens*100):.1f}% token reduction vs Conductor')

In [None]:
# Save results for evidence
results = {
    'benchmark': 'STYX vs Conductor',
    'dataset': 'facebook/react',
    'issues_count': len(issues),
    'timestamp': datetime.now().isoformat(),
    'results': {
        'full_context_tokens': full_tokens,
        'conductor_tokens': conductor_tokens,
        'styx_tokens': styx_tokens,
        'styx_vs_full_ratio': round(full_tokens/styx_tokens, 2),
        'styx_vs_conductor_ratio': round(conductor_tokens/styx_tokens, 2),
        'styx_reduction_vs_full_pct': round(100 - (styx_tokens/full_tokens*100), 1),
        'styx_reduction_vs_conductor_pct': round(100 - (styx_tokens/conductor_tokens*100), 1)
    },
    'styx_extraction': {
        'decisions': len(styx_state['decisions']),
        'constraints': len(styx_state['constraints']),
        'tensions': len(styx_state['tensions']),
        'anti_patterns': len(styx_state['anti_patterns'])
    }
}

with open('styx_vs_conductor_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print('Results saved to styx_vs_conductor_results.json')
print(json.dumps(results, indent=2))