# STYX vs GraphRAG + Re-ranking Benchmark (P000)

**Purpose:** Validate STYX token reduction against state-of-the-art retrieval: GraphRAG and re-ranking.

**Test Matrix:**
1. Basic RAG (sentence-transformers + ChromaDB) - baseline
2. GraphRAG (Microsoft graph-based retrieval)
3. RAG + Re-ranking (cross-encoder)
4. GraphRAG + Re-ranking + STYX (full stack)

**Key Question:** Does STYX still deliver meaningful compression when retrieval is already optimized?

**IMPORTANT:** This benchmark must complete before updating lightspeedup.com or tweet angles.

In [None]:
# Install dependencies (takes ~2 min)
!pip install tiktoken sentence-transformers chromadb graphrag torch transformers -q
!pip install cross-encoder-reranker -q 2>/dev/null || pip install sentence-transformers -q

In [None]:
# Configuration
import os
from google.colab import userdata

try:
    GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
except:
    GEMINI_API_KEY = input('Enter your Gemini API key: ')

os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY
print(f'API key configured')

In [None]:
import tiktoken
import requests
import json
from datetime import datetime
import torch

enc = tiktoken.get_encoding('cl100k_base')
def count_tokens(text: str) -> int:
    return len(enc.encode(text))

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Fetch GitHub issues
def fetch_github_issues(repo='facebook/react', max_issues=287):
    issues = []
    page = 1
    while len(issues) < max_issues:
        url = f'https://api.github.com/repos/{repo}/issues'
        params = {'state': 'all', 'per_page': 100, 'page': page}
        resp = requests.get(url, params=params)
        if resp.status_code != 200:
            break
        batch = resp.json()
        if not batch:
            break
        issues.extend(batch)
        page += 1
    return issues[:max_issues]

issues = fetch_github_issues()
print(f'Fetched {len(issues)} issues')

# Prepare documents
documents = [
    f"Issue #{i['number']}: {i['title']}\n{i.get('body', '') or ''}" 
    for i in issues
]
print(f'Prepared {len(documents)} documents')

In [None]:
# Full context baseline
full_context = '\n\n---\n\n'.join(documents)
full_tokens = count_tokens(full_context)
print(f'Full context: {full_tokens:,} tokens')

In [None]:
# APPROACH 1: Basic RAG (sentence-transformers + ChromaDB)
from sentence_transformers import SentenceTransformer
import chromadb

print('Loading embedding model...')
embedder = SentenceTransformer('all-MiniLM-L6-v2')

print('Creating vector store...')
client = chromadb.Client()
collection = client.create_collection('issues')

# Add documents
embeddings = embedder.encode(documents, show_progress_bar=True)
collection.add(
    ids=[str(i) for i in range(len(documents))],
    embeddings=embeddings.tolist(),
    documents=documents
)

# Query: typical dev question
query = "How do I handle concurrent state updates in React?"
query_embedding = embedder.encode([query])[0]

# Retrieve top-k
results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=20
)

rag_context = '\n\n---\n\n'.join(results['documents'][0])
rag_tokens = count_tokens(rag_context)
print(f'Basic RAG (top-20): {rag_tokens:,} tokens')

In [None]:
# APPROACH 2: RAG + Re-ranking (Cross-Encoder)
from sentence_transformers import CrossEncoder

print('Loading cross-encoder for re-ranking...')
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

# Get more candidates for re-ranking
results_50 = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=50
)

# Re-rank
pairs = [[query, doc] for doc in results_50['documents'][0]]
scores = reranker.predict(pairs)

# Sort by re-ranker score and take top-10
ranked = sorted(zip(results_50['documents'][0], scores), key=lambda x: x[1], reverse=True)
reranked_docs = [doc for doc, score in ranked[:10]]

reranked_context = '\n\n---\n\n'.join(reranked_docs)
reranked_tokens = count_tokens(reranked_context)
print(f'RAG + Re-ranking (top-10 reranked): {reranked_tokens:,} tokens')

In [None]:
# APPROACH 3: GraphRAG-style (simplified - entity extraction + graph traversal)
# Full GraphRAG requires more setup; this simulates the key insight: 
# traverse relationships to find structurally relevant documents

import re
from collections import defaultdict

# Build simple entity graph
entity_graph = defaultdict(set)
doc_entities = {}

# Extract entities (React-specific keywords)
entity_patterns = [
    r'useState', r'useEffect', r'useRef', r'useMemo', r'useCallback',
    r'Suspense', r'Concurrent', r'Server Component', r'hydration',
    r'reconcil', r'fiber', r'scheduler', r'batch', r'transition'
]

for idx, doc in enumerate(documents):
    entities = set()
    for pattern in entity_patterns:
        if re.search(pattern, doc, re.IGNORECASE):
            entities.add(pattern.lower())
    doc_entities[idx] = entities
    for entity in entities:
        entity_graph[entity].add(idx)

# Query entities
query_entities = {'concurrent', 'state', 'batch', 'usestate'}

# Graph traversal: get docs with matching entities + their neighbors
relevant_docs = set()
for entity in query_entities:
    for pattern in entity_patterns:
        if entity in pattern.lower():
            relevant_docs.update(entity_graph.get(pattern.lower(), set()))

# Add documents that share entities with relevant docs (1-hop)
expanded_docs = set(relevant_docs)
for doc_idx in list(relevant_docs)[:10]:
    for entity in doc_entities.get(doc_idx, set()):
        expanded_docs.update(list(entity_graph.get(entity, set()))[:5])

graphrag_docs = [documents[i] for i in sorted(expanded_docs)[:15]]
graphrag_context = '\n\n---\n\n'.join(graphrag_docs)
graphrag_tokens = count_tokens(graphrag_context)
print(f'GraphRAG-style (entity graph): {graphrag_tokens:,} tokens')

In [None]:
# APPROACH 4: STYX Extraction
def styx_extract(documents):
    decisions = []
    constraints = []
    tensions = []
    anti_patterns = []
    
    decision_kw = ['decided', 'will', 'must', 'should', 'approved', 'merged', 'implemented', 'fixed']
    constraint_kw = ['cannot', 'must not', 'blocked', 'requires', 'depends', 'breaking']
    tension_kw = ['vs', 'tradeoff', 'alternative', 'instead', 'conflict', 'disagree', 'but']
    anti_pattern_kw = ['don\'t', 'avoid', 'deprecated', 'wrong', 'mistake', 'bug', 'regression']
    
    for doc in documents:
        doc_lower = doc.lower()
        first_line = doc.split('\n')[0][:100]
        
        if any(kw in doc_lower for kw in decision_kw):
            decisions.append(first_line)
        if any(kw in doc_lower for kw in constraint_kw):
            constraints.append(first_line)
        if any(kw in doc_lower for kw in tension_kw):
            tensions.append(first_line)
        if any(kw in doc_lower for kw in anti_pattern_kw):
            anti_patterns.append(first_line)
    
    return {
        'decisions': list(set(decisions))[:30],
        'constraints': list(set(constraints))[:15],
        'tensions': list(set(tensions))[:20],
        'anti_patterns': list(set(anti_patterns))[:15]
    }

# Apply STYX to each retrieval method's output
styx_on_rag = styx_extract(results['documents'][0])
styx_on_reranked = styx_extract(reranked_docs)
styx_on_graphrag = styx_extract(graphrag_docs)

def format_styx(state):
    return f'''## Decisions ({len(state['decisions'])})
{chr(10).join(state['decisions']) if state['decisions'] else 'None'}

## Constraints ({len(state['constraints'])})
{chr(10).join(state['constraints']) if state['constraints'] else 'None'}

## Tensions ({len(state['tensions'])})
{chr(10).join(state['tensions']) if state['tensions'] else 'None'}

## Anti-Patterns ({len(state['anti_patterns'])})
{chr(10).join(state['anti_patterns']) if state['anti_patterns'] else 'None'}'''

styx_rag_context = format_styx(styx_on_rag)
styx_reranked_context = format_styx(styx_on_reranked)
styx_graphrag_context = format_styx(styx_on_graphrag)

styx_rag_tokens = count_tokens(styx_rag_context)
styx_reranked_tokens = count_tokens(styx_reranked_context)
styx_graphrag_tokens = count_tokens(styx_graphrag_context)

print(f'STYX on RAG: {styx_rag_tokens:,} tokens')
print(f'STYX on Reranked: {styx_reranked_tokens:,} tokens')
print(f'STYX on GraphRAG: {styx_graphrag_tokens:,} tokens')

In [None]:
# RESULTS
print('=' * 70)
print('STYX vs GraphRAG + Re-ranking BENCHMARK RESULTS')
print('=' * 70)
print(f'Dataset: facebook/react ({len(issues)} issues)')
print(f'Query: "{query}"')
print(f'Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
print('=' * 70)
print(f'{"Approach":<35} {"Tokens":>10} {"Reduction":>12}')
print('-' * 70)
print(f'{"Full Context (baseline)":<35} {full_tokens:>10,} {"-":>12}')
print('-' * 70)
print(f'{"Basic RAG (top-20)":<35} {rag_tokens:>10,} {f"{100-rag_tokens/full_tokens*100:.0f}%":>12}')
print(f'{"  + STYX":<35} {styx_rag_tokens:>10,} {f"{100-styx_rag_tokens/full_tokens*100:.0f}%":>12}')
print('-' * 70)
print(f'{"RAG + Re-ranking (top-10)":<35} {reranked_tokens:>10,} {f"{100-reranked_tokens/full_tokens*100:.0f}%":>12}')
print(f'{"  + STYX":<35} {styx_reranked_tokens:>10,} {f"{100-styx_reranked_tokens/full_tokens*100:.0f}%":>12}')
print('-' * 70)
print(f'{"GraphRAG-style (entity graph)":<35} {graphrag_tokens:>10,} {f"{100-graphrag_tokens/full_tokens*100:.0f}%":>12}')
print(f'{"  + STYX":<35} {styx_graphrag_tokens:>10,} {f"{100-styx_graphrag_tokens/full_tokens*100:.0f}%":>12}')
print('=' * 70)
print(f'\nKey Finding: STYX provides additional {100-styx_graphrag_tokens/graphrag_tokens*100:.0f}% reduction ON TOP of GraphRAG')

In [None]:
# Save results
results = {
    'benchmark': 'STYX vs GraphRAG + Re-ranking',
    'dataset': 'facebook/react',
    'issues_count': len(issues),
    'query': query,
    'timestamp': datetime.now().isoformat(),
    'results': {
        'full_context_tokens': full_tokens,
        'basic_rag': {'tokens': rag_tokens, 'with_styx': styx_rag_tokens},
        'rag_reranking': {'tokens': reranked_tokens, 'with_styx': styx_reranked_tokens},
        'graphrag': {'tokens': graphrag_tokens, 'with_styx': styx_graphrag_tokens},
    },
    'styx_additional_reduction': {
        'on_rag': f"{100-styx_rag_tokens/rag_tokens*100:.1f}%",
        'on_reranking': f"{100-styx_reranked_tokens/reranked_tokens*100:.1f}%",
        'on_graphrag': f"{100-styx_graphrag_tokens/graphrag_tokens*100:.1f}%"
    }
}

with open('styx_vs_graphrag_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print('Results saved to styx_vs_graphrag_results.json')
print(json.dumps(results, indent=2))