# Kuhn Poker Tournament Analysis

This notebook analyzes the results from LLM agent tournaments.

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add parent directory to path
sys.path.append('..')

from experiment.analyzer import ResultsAnalyzer, plot_results

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 10)

## Load Tournament Results

In [None]:
# Load result files from results directory
results_dir = Path('../results')

result_files = list(results_dir.glob('*.json'))
result_files = [f for f in result_files if 'summary' not in f.name]

print(f"Found {len(result_files)} tournament result files:")
for f in result_files:
    print(f"  - {f.name}")

In [None]:
# Load and analyze each tournament
analyzer = ResultsAnalyzer('../results')
all_stats = {}

for result_file in result_files:
    with open(result_file) as f:
        tournament_data = json.load(f)
    
    matchup = tournament_data['matchup']
    stats = analyzer.analyze_tournament(tournament_data)
    all_stats[matchup] = stats
    
    print(f"\n{matchup}:")
    print(f"  {stats['agent1']}: {stats['agent1_profit_per_100']:+.2f} per 100 hands")
    print(f"  {stats['agent2']}: {stats['agent2_profit_per_100']:+.2f} per 100 hands")

## Hypothesis Testing

In [None]:
# Compare agent performance
performance_data = []

for matchup, stats in all_stats.items():
    performance_data.append({
        'Matchup': matchup,
        'Agent': stats['agent1'],
        'Profit/100': stats['agent1_profit_per_100']
    })
    performance_data.append({
        'Matchup': matchup,
        'Agent': stats['agent2'],
        'Profit/100': stats['agent2_profit_per_100']
    })

df = pd.DataFrame(performance_data)
df

In [None]:
# Visualize profit comparison
fig, ax = plt.subplots(figsize=(12, 6))

matchups = df['Matchup'].unique()
x = range(len(matchups))
width = 0.35

for i, matchup in enumerate(matchups):
    matchup_data = df[df['Matchup'] == matchup]
    
    for j, (_, row) in enumerate(matchup_data.iterrows()):
        ax.bar(i + j*width, row['Profit/100'], width, 
               label=row['Agent'] if i == 0 else '',
               alpha=0.8)

ax.set_xlabel('Matchup')
ax.set_ylabel('Profit per 100 hands')
ax.set_title('Agent Performance Comparison')
ax.set_xticks([i + width/2 for i in x])
ax.set_xticklabels(matchups, rotation=45, ha='right')
ax.axhline(y=0, color='black', linestyle='--', alpha=0.3)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Detailed Analysis: Action Frequencies

In [None]:
# Compare action frequencies across agents
for matchup, stats in all_stats.items():
    print(f"\n{matchup}")
    print("="*60)
    
    action_freq = stats.get('action_frequencies', {})
    
    for agent, frequencies in action_freq.items():
        total = sum(frequencies.values())
        print(f"\n{agent}:")
        for action, count in sorted(frequencies.items()):
            pct = (count / total * 100) if total > 0 else 0
            print(f"  {action:6s}: {count:4d} ({pct:5.1f}%)")

## Cumulative Profit Analysis

In [None]:
# Plot cumulative profit over time for each matchup
fig, axes = plt.subplots(len(all_stats), 1, figsize=(14, 5*len(all_stats)))

if len(all_stats) == 1:
    axes = [axes]

for ax, (matchup, stats) in zip(axes, all_stats.items()):
    cumulative = stats.get('cumulative_profit', {})
    
    for agent, profits in cumulative.items():
        ax.plot(profits, label=agent, linewidth=2)
    
    ax.set_xlabel('Hand Number')
    ax.set_ylabel('Cumulative Profit (chips)')
    ax.set_title(f'Cumulative Profit: {matchup}')
    ax.legend()
    ax.axhline(y=0, color='black', linestyle='--', alpha=0.3)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Win Rate by Card

In [None]:
# Analyze win rates by card
for matchup, stats in all_stats.items():
    card_stats = stats.get('card_statistics', {})
    
    if not card_stats:
        continue
    
    print(f"\n{matchup} - Card Statistics:")
    print(f"{'Card':<6} {'Hands':<8} {'Win Rate':<12} {'Avg Profit':<12}")
    print("-" * 45)
    
    for card in ['K', 'Q', 'J']:
        if card in card_stats:
            cs = card_stats[card]
            print(f"{card:<6} {cs['hands']:<8} {cs.get('win_rate', 0)*100:>6.1f}%      {cs.get('avg_profit', 0):>+6.2f}")

## Research Conclusions

### Hypothesis 1: Exploitative > GTO vs Human-Like
- Did the exploitative agent outperform GTO against the human-like agent?

### Hypothesis 2: GTO â‰ˆ Exploitative (vs each other)
- Were the results roughly balanced when GTO played exploitative?

### Hypothesis 3: GTO > Human-Like
- Did GTO exploit the human biases?

In [None]:
# Print summary of findings
print("RESEARCH FINDINGS SUMMARY")
print("="*60)

for matchup, stats in all_stats.items():
    print(f"\n{matchup}:")
    print(f"  Winner: {stats['agent1'] if stats['agent1_profit'] > 0 else stats['agent2']}")
    print(f"  Margin: {abs(stats['agent1_profit_per_100']):.2f} chips/100 hands")
    
    if 'confidence_intervals' in stats:
        ci = stats['confidence_intervals']
        print(f"  Statistical significance: ", end="")
        
        for agent, interval in ci.items():
            if interval['ci_lower'] > 0 or interval['ci_upper'] < 0:
                print("YES (95% CI doesn't include zero)")
                break
        else:
            print("Inconclusive (needs more hands)")