# 04 - EDA: Player Progression

**Purpose**: Understand trophy progression patterns and player skill development.

**Key Questions**:
- Where do players hit trophy "walls"?
- How do deck characteristics change with trophy level?
- Is there trophy inflation/deflation?
- What separates winners from losers at different skill levels?

**Potential Insights**:
- Identify key trophy milestones (4000, 5000, 6000)
- Show how successful decks evolve with skill
- Analyze trophy risk at different levels

In [None]:
import sys, os, duckdb, pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))

# Use Parquet if available (faster), fallback to CSV
DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.parquet')
if not os.path.exists(DATA_PATH):
    DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.csv')

from duckdb_utils import get_connection, create_battles_view, query_to_df
from visualization import setup_presentation_style, plot_distribution
from feature_engineering import create_trophy_bracket_features

con = get_connection()
create_battles_view(con, DATA_PATH)
setup_presentation_style()

## 1. Trophy Distribution

Visualize where players cluster

In [None]:
# Trophy distribution histogram - shows where players cluster
trophy_dist_query = """
SELECT 
    FLOOR("average.startingTrophies" / 100) * 100 as trophy_bin,
    COUNT(*) as battles
FROM battles
WHERE "average.startingTrophies" IS NOT NULL
    AND "average.startingTrophies" BETWEEN 0 AND 10000
GROUP BY trophy_bin
ORDER BY trophy_bin
"""

trophy_dist = query_to_df(con, trophy_dist_query, show_progress=False)

# Visualization
fig, ax = plt.subplots(figsize=(14, 7))

ax.bar(trophy_dist['trophy_bin'], trophy_dist['battles'], 
       width=90, color='steelblue', edgecolor='black', alpha=0.7)

# Mark common trophy walls
walls = [4000, 5000, 6000, 7000]
colors = ['red', 'orange', 'purple', 'darkred']
for wall, color in zip(walls, colors):
    ax.axvline(wall, color=color, linestyle='--', linewidth=2, alpha=0.7,
               label=f'{wall/1000:.0f}k Trophy Wall')

ax.set_xlabel('Trophy Count', fontsize=14)
ax.set_ylabel('Number of Battles', fontsize=14)
ax.set_title('Trophy Distribution: Where Do Players Cluster?', fontsize=16, fontweight='bold', pad=15)
ax.legend(fontsize=12)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, '../presentation/figures/fig1_trophy_distribution.png'), 
            dpi=300, bbox_inches='tight')
plt.show()

# Find peaks (concentration points)
from scipy.signal import find_peaks
peaks, _ = find_peaks(trophy_dist['battles'].values, prominence=trophy_dist['battles'].max() * 0.1)
peak_trophies = trophy_dist.iloc[peaks]['trophy_bin'].values

print(f"✓ Trophy Distribution Analysis:")
print(f"  Total trophy range: {trophy_dist['trophy_bin'].min():.0f} - {trophy_dist['trophy_bin'].max():.0f}")
print(f"  Concentration peaks detected at: {peak_trophies[:5]}")
print(f"  Most common trophy level: {trophy_dist.loc[trophy_dist['battles'].idxmax(), 'trophy_bin']:.0f}")

## 2. Trophy Change Analysis

In [None]:
# Trophy change patterns by starting trophy level
trophy_change_query = """
SELECT 
    FLOOR("winner.startingTrophies" / 500) * 500 as trophy_bracket,
    AVG("winner.trophyChange") as avg_gain,
    STDDEV("winner.trophyChange") as std_gain,
    COUNT(*) as battles
FROM battles
WHERE "winner.startingTrophies" IS NOT NULL
    AND "winner.trophyChange" IS NOT NULL
    AND "winner.trophyChange" > 0
GROUP BY trophy_bracket
HAVING battles > 100
ORDER BY trophy_bracket
"""

trophy_change = query_to_df(con, trophy_change_query, show_progress=False)

# Also get loser data
loser_change_query = """
SELECT 
    FLOOR("loser.startingTrophies" / 500) * 500 as trophy_bracket,
    ABS(AVG("loser.trophyChange")) as avg_loss,
    STDDEV("loser.trophyChange") as std_loss,
    COUNT(*) as battles
FROM battles
WHERE "loser.startingTrophies" IS NOT NULL
    AND "loser.trophyChange" IS NOT NULL
GROUP BY trophy_bracket
HAVING battles > 100
ORDER BY trophy_bracket
"""

loser_change = query_to_df(con, loser_change_query, show_progress=False)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Average trophy gain/loss by bracket
ax = axes[0]
ax.plot(trophy_change['trophy_bracket'], trophy_change['avg_gain'], 
        marker='o', linewidth=2, color='green', label='Average Trophy Gain (Winners)')
ax.plot(loser_change['trophy_bracket'], loser_change['avg_loss'],
        marker='s', linewidth=2, color='red', label='Average Trophy Loss (Losers)')
ax.set_xlabel('Trophy Bracket', fontsize=14)
ax.set_ylabel('Average Trophy Change', fontsize=14)
ax.set_title('Trophy Gain/Loss by Skill Level', fontsize=16, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

# Volatility (std dev) by bracket
ax = axes[1]
ax.bar(trophy_change['trophy_bracket'], trophy_change['std_gain'],
       width=400, color='steelblue', edgecolor='black', alpha=0.7)
ax.set_xlabel('Trophy Bracket', fontsize=14)
ax.set_ylabel('Trophy Change Volatility (Std Dev)', fontsize=14)
ax.set_title('Trophy Volatility: How Risky is Each Level?', fontsize=16, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, '../presentation/figures/fig2_trophy_change.png'),
            dpi=300, bbox_inches='tight')
plt.show()

print("\n✓ Trophy Change Analysis:")
print(f"  Highest average gain: {trophy_change.loc[trophy_change['avg_gain'].idxmax(), 'trophy_bracket']:.0f} trophies")
print(f"  Most volatile bracket: {trophy_change.loc[trophy_change['std_gain'].idxmax(), 'trophy_bracket']:.0f} trophies")

## 3. Deck Evolution by Trophy Level

In [None]:
# How do deck characteristics change with skill (trophy level)?
deck_evolution_query = """
SELECT 
    FLOOR("winner.startingTrophies" / 1000) * 1000 as trophy_bracket,
    AVG("winner.elixir.average") as avg_elixir,
    AVG("winner.rarity.legendary") as avg_legendaries,
    AVG("winner.spell.count") as avg_spells,
    AVG("winner.troop.count") as avg_troops,
    AVG("winner.structure.count") as avg_structures,
    COUNT(*) as battles
FROM battles
WHERE "winner.startingTrophies" IS NOT NULL
    AND "winner.elixir.average" IS NOT NULL
GROUP BY trophy_bracket
HAVING battles > 500
ORDER BY trophy_bracket
"""

deck_evolution = query_to_df(con, deck_evolution_query, show_progress=False)

# Visualization: 4-panel deck evolution
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Elixir cost progression
ax = axes[0, 0]
ax.plot(deck_evolution['trophy_bracket'], deck_evolution['avg_elixir'],
        marker='o', linewidth=2.5, markersize=8, color='steelblue')
ax.set_xlabel('Trophy Bracket', fontsize=12)
ax.set_ylabel('Average Elixir Cost', fontsize=12)
ax.set_title('Elixir Cost vs Skill Level', fontsize=14, fontweight='bold')
ax.grid(alpha=0.3)

# Legendary usage
ax = axes[0, 1]
ax.plot(deck_evolution['trophy_bracket'], deck_evolution['avg_legendaries'],
        marker='s', linewidth=2.5, markersize=8, color='gold')
ax.set_xlabel('Trophy Bracket', fontsize=12)
ax.set_ylabel('Average # of Legendaries', fontsize=12)
ax.set_title('Legendary Card Usage by Skill', fontsize=14, fontweight='bold')
ax.grid(alpha=0.3)

# Spell count
ax = axes[1, 0]
ax.plot(deck_evolution['trophy_bracket'], deck_evolution['avg_spells'],
        marker='^', linewidth=2.5, markersize=8, color='purple')
ax.set_xlabel('Trophy Bracket', fontsize=12)
ax.set_ylabel('Average # of Spells', fontsize=12)
ax.set_title('Spell Usage by Skill Level', fontsize=14, fontweight='bold')
ax.grid(alpha=0.3)

# Troop vs Structure ratio
ax = axes[1, 1]
ax.plot(deck_evolution['trophy_bracket'], deck_evolution['avg_troops'],
        marker='o', linewidth=2.5, markersize=8, color='green', label='Troops')
ax.plot(deck_evolution['trophy_bracket'], deck_evolution['avg_structures'],
        marker='s', linewidth=2.5, markersize=8, color='brown', label='Structures')
ax.set_xlabel('Trophy Bracket', fontsize=12)
ax.set_ylabel('Average Card Count', fontsize=12)
ax.set_title('Troop vs Structure Usage', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, '../presentation/figures/fig3_deck_evolution.png'),
            dpi=300, bbox_inches='tight')
plt.show()

print("\n✓ Deck Evolution Analysis:")
print(f"  Elixir range: {deck_evolution['avg_elixir'].min():.2f} - {deck_evolution['avg_elixir'].max():.2f}")
print(f"  Legendary usage increases with skill: {deck_evolution['avg_legendaries'].min():.2f} → {deck_evolution['avg_legendaries'].max():.2f}")

## 4. Matchup Fairness

In [None]:
# Analyze trophy differential between winner and loser
matchup_fairness_query = """
SELECT 
    "winner.startingTrophies" - "loser.startingTrophies" as trophy_diff,
    COUNT(*) as battles
FROM battles
WHERE "winner.startingTrophies" IS NOT NULL
    AND "loser.startingTrophies" IS NOT NULL
    AND ABS("winner.startingTrophies" - "loser.startingTrophies") < 1000
GROUP BY trophy_diff
ORDER BY trophy_diff
"""

matchup_fairness = query_to_df(con, matchup_fairness_query, show_progress=False)

# Calculate underdog win rate
underdog_query = """
SELECT 
    CASE 
        WHEN "winner.startingTrophies" < "loser.startingTrophies" THEN 'Underdog Won'
        ELSE 'Favorite Won'
    END as outcome,
    COUNT(*) as battles,
    AVG("winner.startingTrophies" - "loser.startingTrophies") as avg_trophy_diff
FROM battles
WHERE "winner.startingTrophies" IS NOT NULL
    AND "loser.startingTrophies" IS NOT NULL
GROUP BY outcome
"""

underdog_stats = query_to_df(con, underdog_query, show_progress=False)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Trophy differential distribution
ax = axes[0]
ax.hist(matchup_fairness['trophy_diff'], bins=50, weights=matchup_fairness['battles'],
        color='steelblue', edgecolor='black', alpha=0.7)
ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Even Match')
ax.set_xlabel('Trophy Differential (Winner - Loser)', fontsize=14)
ax.set_ylabel('Number of Battles', fontsize=14)
ax.set_title('Matchmaking Fairness: Trophy Differential Distribution', fontsize=16, fontweight='bold')
ax.legend()
ax.grid(axis='y', alpha=0.3)

# Underdog win statistics
ax = axes[1]
underdog_pct = (underdog_stats[underdog_stats['outcome'] == 'Underdog Won']['battles'].values[0] / 
                underdog_stats['battles'].sum() * 100)
colors = ['coral', 'lightgreen']
ax.bar(underdog_stats['outcome'], underdog_stats['battles'],
       color=colors, edgecolor='black', alpha=0.8)
ax.set_ylabel('Number of Battles', fontsize=14)
ax.set_title(f'Underdogs Win {underdog_pct:.1f}% of Matches', fontsize=16, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, '../presentation/figures/fig4_matchup_fairness.png'),
            dpi=300, bbox_inches='tight')
plt.show()

print("\n✓ Matchup Fairness Analysis:")
print(f"  Underdog win rate: {underdog_pct:.2f}%")
print(f"  Average trophy differential: {matchup_fairness['trophy_diff'].mean():.1f}")
print(f"  Most common differential: {matchup_fairness.loc[matchup_fairness['battles'].idxmax(), 'trophy_diff']:.0f} trophies")

## Key Insights

**Document progression patterns for presentation**