In [8]:
import re
from pathlib import Path
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt

def parse_markdown_standings(content: str) -> Dict[str, Dict[str, List]]:
    """Parse markdown content and extract standings for each stage."""
    stages = {}
    current_stage = None
    
    # Find stage sections and their tables
    for line in content.split('\n'):
        if line.startswith('### '):
            current_stage = line.replace('### ', '').strip()
            stages[current_stage] = []
        elif current_stage and '|' in line and not line.startswith('|---'):
            if 'Team | W-L |' not in line:  # Skip header row
                parts = [p.strip() for p in line.split('|')]
                if len(parts) >= 3:
                    team = parts[1].strip()
                    wl = parts[2].strip()
                    if wl.startswith('+'):
                        wl = wl[1:]  # Remove the '+' prefix
                    wins, losses = map(int, wl.split('-'))
                    stages[current_stage].append({
                        'team': team,
                        'wins': wins,
                        'losses': losses,
                        'score': wins - losses
                    })
    
    # Sort teams in each stage by wins-losses
    for stage in stages:
        stages[stage] = sorted(
            stages[stage],
            key=lambda x: (x['wins'], -x['losses']),
            reverse=True
        )
    
    return stages

def compare_standings(real_standings: Dict, model_standings: Dict) -> Dict:
    """Compare standings between real world and model prediction."""
    comparison = {}
    
    for stage in real_standings:
        if stage not in model_standings:
            continue
            
        real_ranks = {team['team']: idx + 1 for idx, team in enumerate(real_standings[stage])}
        model_ranks = {team['team']: idx + 1 for idx, team in enumerate(model_standings[stage])}
        
        # Calculate rank differences
        rank_diffs = []
        for team in real_ranks:
            if team in model_ranks:
                rank_diff = abs(real_ranks[team] - model_ranks[team])
                rank_diffs.append(rank_diff)
        
        if rank_diffs:
            comparison[stage] = {
                'avg_rank_diff': sum(rank_diffs) / len(rank_diffs),
                'max_rank_diff': max(rank_diffs),
                'details': []
            }
            
            # Add detailed comparisons
            for team in real_ranks:
                if team in model_ranks:
                    comparison[stage]['details'].append({
                        'team': team,
                        'real_rank': real_ranks[team],
                        'model_rank': model_ranks[team],
                        'rank_diff': abs(real_ranks[team] - model_ranks[team])
                    })
    
    return comparison

def generate_analysis(real_world_path: Path, model_path: Path, model_name: str) -> str:
    """Generate analysis report comparing real world results with model predictions."""
    # Read markdown files
    with open(real_world_path) as f:
        real_world_content = f.read()
    with open(model_path) as f:
        model_content = f.read()
    
    # Parse standings
    real_standings = parse_markdown_standings(real_world_content)
    model_standings = parse_markdown_standings(model_content)
    
    # Compare standings
    comparison = compare_standings(real_standings, model_standings)
    
    # Generate report
    report = f"# Standings Analysis: {model_name}\n\n"
    
    for stage in comparison:
        report += f"## {stage}\n\n"
        report += f"Average rank difference: {comparison[stage]['avg_rank_diff']:.2f}\n"
        report += f"Maximum rank difference: {comparison[stage]['max_rank_diff']}\n\n"
        
        report += "| Team | Real Rank | Model Rank | Rank Difference |\n"
        report += "|------|-----------|------------|----------------|\n"
        
        # Sort details by rank difference
        sorted_details = sorted(
            comparison[stage]['details'],
            key=lambda x: (-x['rank_diff'], x['real_rank'])
        )
        
        for detail in sorted_details:
            report += f"| {detail['team']} | {detail['real_rank']} | {detail['model_rank']} | {detail['rank_diff']} |\n"
        
        report += "\n---\n\n"
    
    return report, comparison

def plot_rank_differences(comparisons: Dict[str, Dict], output_path: Path):
    """Create plots for rank difference analysis."""
    stages = list(next(iter(comparisons.values())).keys())
    models = list(comparisons.keys())
    
    # Average rank difference plot
    plt.figure(figsize=(10, 6))
    for model in models:
        avg_diffs = [comparisons[model][stage]['avg_rank_diff'] for stage in stages]
        plt.plot(stages, avg_diffs, marker='o', label=model)
    
    plt.title('Average Rank Difference Across Stages')
    plt.xlabel('Stage')
    plt.ylabel('Average Rank Difference')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(output_path / 'avg_rank_diff.png')
    plt.close()
    
    # Maximum rank difference plot
    plt.figure(figsize=(10, 6))
    for model in models:
        max_diffs = [comparisons[model][stage]['max_rank_diff'] for stage in stages]
        plt.plot(stages, max_diffs, marker='o', label=model)
    
    plt.title('Maximum Rank Difference Across Stages')
    plt.xlabel('Stage')
    plt.ylabel('Maximum Rank Difference')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(output_path / 'max_rank_diff.png')
    plt.close()

def generate_analysis(real_world_path: Path, model_path: Path, model_name: str) -> str:
    """Generate analysis report comparing real world results with model predictions."""
    # Read markdown files
    with open(real_world_path) as f:
        real_world_content = f.read()
    with open(model_path) as f:
        model_content = f.read()
    
    # Parse standings
    real_standings = parse_markdown_standings(real_world_content)
    model_standings = parse_markdown_standings(model_content)
    
    # Compare standings
    comparison = compare_standings(real_standings, model_standings)
    
    # Generate report
    report = f"## {model_name}\n\n"
    
    for stage in comparison:
        report += f"### {stage}\n\n"
        report += f"Average rank difference: {comparison[stage]['avg_rank_diff']:.2f}\n"
        report += f"Maximum rank difference: {comparison[stage]['max_rank_diff']}\n\n"
        
        report += "| Team | Real Rank | Model Rank | Rank Difference |\n"
        report += "|------|-----------|------------|----------------|\n"
        
        # Sort details by rank difference
        sorted_details = sorted(
            comparison[stage]['details'],
            key=lambda x: (-x['rank_diff'], x['real_rank'])
        )
        
        for detail in sorted_details:
            report += f"| {detail['team']} | {detail['real_rank']} | {detail['model_rank']} | {detail['rank_diff']} |\n"
        
        report += "\n---\n\n"
    
    return report, comparison

def analyze_standings():
    # Get the current working directory and navigate to results folder
    current_dir = Path.cwd()
    if current_dir.name == 'analysis':
        results_dir = current_dir.parent
    else:
        results_dir = current_dir
    analysis_dir = results_dir / 'analysis'
    
    # Create analysis directory if it doesn't exist
    analysis_dir.mkdir(exist_ok=True)
    
    real_world_path = results_dir / '0real-world' / 'README.md'
    model_comparisons = {}
    all_reports = ["# CS2 Match Prediction Standings Analysis\n\n"]
    
    # Analyze each model
    for model_dir in results_dir.iterdir():
        if model_dir.is_dir() and model_dir.name != '0real-world' and not model_dir.name.startswith('.'):
            model_path = model_dir / 'README.md'
            if model_path.exists():
                print(f"Analyzing {model_dir.name}...")
                report, comparison = generate_analysis(real_world_path, model_path, model_dir.name)
                all_reports.append(report)
                model_comparisons[model_dir.name] = comparison
    
    # Save combined report
    with open(analysis_dir / 'analyze-standings.md', 'w') as f:
        f.write('\n'.join(all_reports))
    
    # Generate plots
    plot_rank_differences(model_comparisons, analysis_dir)
    
    print(f"Analysis complete. Results saved in {analysis_dir}")
    return model_comparisons

# Run the analysis
model_comparisons = analyze_standings()

Analyzing deepseek-chat...
Analyzing sabia3...
Analyzing gpt41...
Analyzing claude-sonnet-4...
Analyzing claude-opus-4...
Analyzing gpt-o4-mini...
Analysis complete. Results saved in /Users/lui/oss/cs2-match-prediction/results/analysis
