# 07 - Visualization Library

**Purpose**: Create publication-quality charts for the presentation.

**Goal**: Generate 5-8 compelling visualizations that support your story.

**Tips**:
- Every chart should have a clear insight as the title
- Use colorblind-friendly palettes
- Large fonts (readable from back of room)
- Export as high-res PNG for slides

**Chart Ideas**:
1. Card win rate bar chart (Top 15)
2. Trophy distribution histogram (showing "walls")
3. Deck archetype performance comparison
4. Card synergy network diagram
5. Feature importance from model
6. Elixir cost vs win rate scatter
7. Trophy progression by arena

In [None]:
import sys, os, pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))

from visualization import (
    setup_presentation_style, save_figure,
    plot_win_rate_comparison, plot_distribution,
    plot_scatter_with_regression, plot_correlation_heatmap
)

setup_presentation_style()

# Ensure output directory exists
os.makedirs(os.path.join(PROJECT_ROOT, 'presentation/figures'), exist_ok=True)

## Chart 1: [Your Key Insight]

Example: "Evolution Cards Dominate the Meta"

In [None]:
# Chart 1: Top Cards by Win Rate
print("Creating Chart 1: Top Cards by Win Rate...")

try:
    # Try to load card win rates from notebook 03
    card_stats = pd.read_parquet(os.path.join(PROJECT_ROOT, 'artifacts/card_win_rates.parquet'))
    print(f"âœ“ Loaded card statistics: {len(card_stats)} cards")
    
    # Create visualization
    top_cards = card_stats.nlargest(20, 'win_rate')
    
    fig, ax = plt.subplots(figsize=(14, 8))
    bars = ax.barh(range(len(top_cards)), top_cards['win_rate'] * 100, color='steelblue', edgecolor='black', alpha=0.8)
    ax.set_yticks(range(len(top_cards)))
    ax.set_yticklabels(top_cards['card_name'] if 'card_name' in top_cards.columns else top_cards.index)
    ax.set_xlabel('Win Rate (%)', fontsize=16)
    ax.set_title('Top 20 Cards by Win Rate: Meta Dominance', fontsize=20, fontweight='bold', pad=20)
    ax.invert_yaxis()
    ax.grid(axis='x', alpha=0.3)
    
    # Add value labels
    for i, (idx, row) in enumerate(top_cards.iterrows()):
        ax.text(row['win_rate'] * 100 + 0.2, i, f"{row['win_rate']*100:.2f}%", 
                va='center', fontsize=11, fontweight='bold')
    
    # Add horizontal line at 50% (balanced)
    ax.axvline(50, color='red', linestyle='--', linewidth=2, alpha=0.5, label='50% (Balanced)')
    ax.legend(fontsize=12)
    
    plt.tight_layout()
    save_figure('fig1_top_cards_winrate.png')
    plt.show()
    
    print(f"âœ“ Chart 1 saved: fig1_top_cards_winrate.png")
    
except FileNotFoundError:
    print("âš  Card win rates artifact not found. Run notebook 03 first.")
    print("  Creating placeholder chart...")
    
    # Create sample data for demonstration
    sample_cards = pd.DataFrame({
        'card_name': ['Evolved Pekka', 'Mega Knight', 'Hog Rider', 'Wizard', 'Balloon'],
        'win_rate': [0.54, 0.52, 0.51, 0.50, 0.49]
    })
    
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.barh(sample_cards['card_name'], sample_cards['win_rate'] * 100, color='steelblue', alpha=0.7)
    ax.set_xlabel('Win Rate (%)', fontsize=14)
    ax.set_title('Top Cards by Win Rate (Sample Data)', fontsize=16, fontweight='bold')
    ax.axvline(50, color='red', linestyle='--', linewidth=2, alpha=0.5)
    ax.invert_yaxis()
    plt.tight_layout()
    save_figure('fig1_top_cards_winrate.png')
    plt.show()
    
    print("âœ“ Placeholder chart created")

## Chart 2: [Your Second Insight]

In [None]:
# Chart 2: Trophy Distribution showing "Trophy Walls"
print("Creating Chart 2: Trophy Distribution...")

# Query trophy data directly
import sys
sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))
from duckdb_utils import get_connection, create_battles_view, query_to_df

try:
    con = get_connection()
    create_battles_view(con, os.path.join(PROJECT_ROOT, 'battles.parquet'))
    
    # Sample trophy data for visualization
    trophy_query = """
    SELECT "average.startingTrophies" as trophies
    FROM battles
    WHERE "average.startingTrophies" IS NOT NULL
    USING SAMPLE 50000
    """
    trophy_data = query_to_df(con, trophy_query, show_progress=False)
    
    # Create visualization
    fig, ax = plt.subplots(figsize=(14, 7))
    
    # Histogram with KDE
    counts, bins, patches = ax.hist(trophy_data['trophies'], bins=80, color='steelblue', 
                                     edgecolor='black', alpha=0.7, density=False)
    
    # Mark key trophy milestones
    milestones = [4000, 5000, 6000, 7000]
    colors = ['orange', 'red', 'purple', 'darkred']
    
    for milestone, color in zip(milestones, colors):
        ax.axvline(milestone, color=color, linestyle='--', linewidth=2, alpha=0.7, 
                   label=f'{milestone} Trophy Wall')
    
    ax.set_xlabel('Trophy Count', fontsize=16)
    ax.set_ylabel('Number of Battles', fontsize=16)
    ax.set_title('Trophy Distribution: Where Players Get Stuck', fontsize=20, fontweight='bold', pad=20)
    ax.legend(fontsize=12, loc='upper right')
    ax.grid(axis='y', alpha=0.3)
    
    # Add annotation for highest concentration
    max_count_idx = np.argmax(counts)
    max_trophy = (bins[max_count_idx] + bins[max_count_idx + 1]) / 2
    ax.annotate(f'Peak: ~{max_trophy:.0f} trophies\n({counts[max_count_idx]:.0f} battles)',
                xy=(max_trophy, counts[max_count_idx]),
                xytext=(max_trophy + 500, counts[max_count_idx] * 1.2),
                fontsize=12, fontweight='bold',
                bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.7),
                arrowprops=dict(arrowstyle='->', linewidth=2))
    
    plt.tight_layout()
    save_figure('fig2_trophy_distribution.png')
    plt.show()
    
    print(f"âœ“ Chart 2 saved: fig2_trophy_distribution.png")
    print(f"  Mean trophy level: {trophy_data['trophies'].mean():.0f}")
    print(f"  Median trophy level: {trophy_data['trophies'].median():.0f}")
    
except Exception as e:
    print(f"âš  Error creating trophy chart: {e}")
    print("  Creating placeholder chart...")
    
    # Placeholder data
    sample_trophies = np.random.normal(5500, 1000, 10000)
    sample_trophies = sample_trophies[sample_trophies > 0]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.hist(sample_trophies, bins=50, color='steelblue', alpha=0.7, edgecolor='black')
    ax.set_xlabel('Trophy Count', fontsize=14)
    ax.set_ylabel('Frequency', fontsize=14)
    ax.set_title('Trophy Distribution (Sample Data)', fontsize=16, fontweight='bold')
    ax.axvline(5000, color='red', linestyle='--', linewidth=2, label='5000 Trophy Wall')
    ax.legend()
    plt.tight_layout()
    save_figure('fig2_trophy_distribution.png')
    plt.show()
    
    print("âœ“ Placeholder chart created")

## Chart 3: [Your Third Insight]

In [None]:
# Chart 3: Deck Evolution by Trophy Level
print("Creating Chart 3: Deck Evolution by Trophy Level...")

try:
    # Query deck characteristics by trophy bracket
    deck_evolution_query = """
    WITH trophy_brackets AS (
        SELECT 
            CASE 
                WHEN "winner.startingTrophies" < 2000 THEN '0-2k'
                WHEN "winner.startingTrophies" < 3000 THEN '2k-3k'
                WHEN "winner.startingTrophies" < 4000 THEN '3k-4k'
                WHEN "winner.startingTrophies" < 5000 THEN '4k-5k'
                WHEN "winner.startingTrophies" < 6000 THEN '5k-6k'
                WHEN "winner.startingTrophies" < 7000 THEN '6k-7k'
                ELSE '7k+'
            END as bracket,
            "winner.elixir.average" as elixir_avg,
            "winner.rarity.legendary.count" as legendary_count,
            "winner.spell.count" as spell_count
        FROM battles
        WHERE "winner.startingTrophies" IS NOT NULL
    )
    SELECT 
        bracket,
        AVG(elixir_avg) as avg_elixir,
        AVG(legendary_count) as avg_legendary,
        AVG(spell_count) as avg_spells
    FROM trophy_brackets
    GROUP BY bracket
    ORDER BY 
        CASE bracket
            WHEN '0-2k' THEN 1
            WHEN '2k-3k' THEN 2
            WHEN '3k-4k' THEN 3
            WHEN '4k-5k' THEN 4
            WHEN '5k-6k' THEN 5
            WHEN '6k-7k' THEN 6
            ELSE 7
        END
    """
    
    deck_evolution = query_to_df(con, deck_evolution_query, show_progress=False)
    
    # Create multi-line chart
    fig, ax = plt.subplots(figsize=(14, 8))
    
    x_pos = range(len(deck_evolution))
    
    # Plot three metrics
    ax.plot(x_pos, deck_evolution['avg_elixir'], marker='o', linewidth=3, 
            markersize=10, label='Avg Elixir Cost', color='steelblue')
    ax2 = ax.twinx()
    ax2.plot(x_pos, deck_evolution['avg_legendary'], marker='s', linewidth=3, 
             markersize=10, label='Avg Legendary Cards', color='gold')
    ax2.plot(x_pos, deck_evolution['avg_spells'], marker='^', linewidth=3, 
             markersize=10, label='Avg Spell Cards', color='purple')
    
    # Formatting
    ax.set_xticks(x_pos)
    ax.set_xticklabels(deck_evolution['bracket'])
    ax.set_xlabel('Trophy Bracket', fontsize=16)
    ax.set_ylabel('Average Elixir Cost', fontsize=16, color='steelblue')
    ax2.set_ylabel('Average Card Count', fontsize=16)
    ax.set_title('How Winning Decks Evolve with Skill Level', fontsize=20, fontweight='bold', pad=20)
    ax.tick_params(axis='y', labelcolor='steelblue')
    ax.grid(alpha=0.3)
    
    # Combined legend
    lines1, labels1 = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax.legend(lines1 + lines2, labels1 + labels2, loc='upper left', fontsize=12)
    
    plt.tight_layout()
    save_figure('fig3_deck_evolution.png')
    plt.show()
    
    print(f"âœ“ Chart 3 saved: fig3_deck_evolution.png")
    print(f"  Elixir trend: {deck_evolution['avg_elixir'].iloc[0]:.2f} â†’ {deck_evolution['avg_elixir'].iloc[-1]:.2f}")
    print(f"  Legendary trend: {deck_evolution['avg_legendary'].iloc[0]:.2f} â†’ {deck_evolution['avg_legendary'].iloc[-1]:.2f}")
    
except Exception as e:
    print(f"âš  Error creating deck evolution chart: {e}")
    print("  Creating placeholder chart...")
    
    # Placeholder data
    brackets = ['0-2k', '2k-3k', '3k-4k', '4k-5k', '5k-6k', '6k-7k', '7k+']
    elixir = [3.2, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9]
    legendary = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5]
    
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(brackets, elixir, marker='o', linewidth=2, label='Elixir Cost')
    ax.plot(brackets, legendary, marker='s', linewidth=2, label='Legendary Cards')
    ax.set_xlabel('Trophy Bracket', fontsize=14)
    ax.set_ylabel('Average Value', fontsize=14)
    ax.set_title('Deck Evolution (Sample Data)', fontsize=16, fontweight='bold')
    ax.legend()
    ax.grid(alpha=0.3)
    plt.tight_layout()
    save_figure('fig3_deck_evolution.png')
    plt.show()
    
    print("âœ“ Placeholder chart created")

## Chart 4: Interactive Network (Optional)

Use Plotly for an interactive card synergy network (could be impressive in presentation!)

In [None]:
# Chart 4: Model Performance Comparison
print("Creating Chart 4: Model Performance Comparison...")

# Create sample model results (these would come from notebook 06)
# Users should replace these with actual results after running notebook 06
models = ['Logistic\nRegression', 'Random\nForest', 'XGBoost']
accuracy = [52.5, 56.2, 58.1]  # Sample values - replace with actual
roc_auc = [0.54, 0.58, 0.60]  # Sample values - replace with actual
benchmark = 56.94

# Create visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))

# Accuracy comparison
bars1 = ax1.bar(models, accuracy, color=['lightcoral', 'steelblue', 'mediumseagreen'],
                edgecolor='black', alpha=0.8, width=0.6)
ax1.axhline(benchmark, color='red', linestyle='--', linewidth=2.5, 
            label=f'Research Benchmark ({benchmark}%)', alpha=0.7)
ax1.set_ylabel('Accuracy (%)', fontsize=16)
ax1.set_title('Model Accuracy Comparison', fontsize=18, fontweight='bold')
ax1.legend(fontsize=12)
ax1.grid(axis='y', alpha=0.3)
ax1.set_ylim(50, 60)

# Add value labels on bars
for bar, acc in zip(bars1, accuracy):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + 0.3,
             f'{acc:.1f}%', ha='center', va='bottom', fontsize=13, fontweight='bold')

# ROC-AUC comparison
bars2 = ax2.bar(models, [x*100 for x in roc_auc], 
                color=['lightcoral', 'steelblue', 'mediumseagreen'],
                edgecolor='black', alpha=0.8, width=0.6)
ax2.axhline(50, color='gray', linestyle=':', linewidth=2, label='Random Guess (50%)', alpha=0.5)
ax2.set_ylabel('ROC-AUC (%)', fontsize=16)
ax2.set_title('Model ROC-AUC Comparison', fontsize=18, fontweight='bold')
ax2.legend(fontsize=12)
ax2.grid(axis='y', alpha=0.3)
ax2.set_ylim(50, 65)

# Add value labels on bars
for bar, auc in zip(bars2, roc_auc):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + 0.5,
             f'{auc*100:.1f}%', ha='center', va='bottom', fontsize=13, fontweight='bold')

# Overall title
fig.suptitle('Battle Outcome Prediction: Model Performance', 
             fontsize=22, fontweight='bold', y=1.00)

plt.tight_layout()
save_figure('fig4_model_comparison.png')
plt.show()

print(f"âœ“ Chart 4 saved: fig4_model_comparison.png")
print(f"\nðŸ’¡ Presentation Talking Points:")
print(f"  - Best model (XGBoost): {max(accuracy):.1f}% accuracy")
print(f"  - {'âœ… Beat' if max(accuracy) > benchmark else 'âš  Below'} research benchmark of {benchmark}%")
print(f"  - Model proves deck composition matters for victory")
print(f"\nâš  Note: Replace sample values with actual results from Notebook 06!")

## Export All Figures

Make sure all charts are saved to `presentation/figures/` as high-res PNGs

In [None]:
# List all exported figures and provide summary
import glob

figures = glob.glob(os.path.join(PROJECT_ROOT, 'presentation/figures/*.png'))

print("=" * 70)
print("ðŸ“Š VISUALIZATION LIBRARY SUMMARY")
print("=" * 70)

if figures:
    print(f"\nâœ“ Exported {len(figures)} presentation-ready figures:\n")
    for i, fig in enumerate(sorted(figures), 1):
        filename = os.path.basename(fig)
        size_kb = os.path.getsize(fig) / 1024
        print(f"  {i}. {filename:<35} ({size_kb:.1f} KB)")
else:
    print("\nâš  No figures found. Run the chart creation cells above.")

print("\n" + "=" * 70)
print("ðŸŽ¤ PRESENTATION GUIDE")
print("=" * 70)

presentation_guide = """
Recommended Slide Structure (8 minutes):

1. INTRODUCTION (1 min)
   - Dataset overview: 9.2GB, millions of battles
   - Research question: What makes decks win?

2. DATA EXPLORATION (2 min)
   â†’ Use: fig1_top_cards_winrate.png
   - Key finding: Top cards with 52%+ win rates
   - Talk about meta dominance and evolution cards
   
   â†’ Use: fig2_trophy_distribution.png
   - Trophy walls at 4k, 5k, 6k, 7k
   - Player progression bottlenecks

3. PLAYER PROGRESSION (2 min)
   â†’ Use: fig3_deck_evolution.png
   - How decks change from beginner to pro
   - Elixir cost increases with skill
   - More legendaries at higher levels

4. PREDICTIVE MODELING (2 min)
   â†’ Use: fig4_model_comparison.png
   - Built 3 machine learning models
   - Achieved X% accuracy (compare to 56.94% benchmark)
   - Proves deck composition predicts outcomes

5. KEY INSIGHTS & RECOMMENDATIONS (1 min)
   - Top 3 actionable insights
   - What players should focus on
   - Future research directions

ðŸ’¡ Tips:
- Start each slide with the key insight as the title
- Use simple language (avoid jargon)
- Point to specific data on charts
- Practice transitions between sections
- Leave 2-4 min for Q&A
"""

print(presentation_guide)

print("=" * 70)
print(f"âœ… All visualizations ready for presentation!")
print(f"   Location: {os.path.join(PROJECT_ROOT, 'presentation/figures/')}")
print("=" * 70)