# 04.5 - Advanced Meta Analysis & Actionable Insights

**Purpose**: Extract comprehensive, actionable insights for competitive play

**Key Questions**:
- What are the optimal deck characteristics (elixir, composition)?
- Which specific cards and card combos dominate the meta?
- How does the meta evolve across trophy progression walls?
- What do our ML models tell us about winning strategies?

**Unique Features**:
- Data-driven trophy wall detection (not hardcoded)
- Card synergy analysis (2-card combos)
- Evolution card impact assessment
- Statistical confidence intervals on all metrics
- Archetype performance by trophy level
- Card level impact analysis

**Output**: Presentation-ready insights with human-readable card names

In [None]:
import sys, os, json
import duckdb, pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from itertools import combinations

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))

# Use Parquet if available
DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.parquet')
if not os.path.exists(DATA_PATH):
    DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.csv')

from duckdb_utils import get_connection, create_battles_view, query_to_df, save_to_parquet
from visualization import setup_presentation_style

con = get_connection()
create_battles_view(con, DATA_PATH)
setup_presentation_style()

# Ensure artifacts and figures directories exist
os.makedirs(os.path.join(PROJECT_ROOT, 'artifacts'), exist_ok=True)
os.makedirs(os.path.join(PROJECT_ROOT, 'presentation/figures'), exist_ok=True)

print("âœ“ Environment setup complete")

## Section 0: Load Card Name Mapping

Convert card IDs to human-readable names for all analyses

In [None]:
# Try to load cards.json from multiple locations
card_mapping = {}
card_file_locations = [
    os.path.join(PROJECT_ROOT, 'artifacts', 'cards.json'),
    os.path.join(PROJECT_ROOT, 'Datasets', 'cards.json'),
    os.path.join(PROJECT_ROOT, 'cards.json')
]

cards_loaded = False
for card_file in card_file_locations:
    if os.path.exists(card_file):
        print(f"Found cards.json at: {card_file}")
        with open(card_file, 'r') as f:
            card_data = json.load(f)
            
            # Handle different JSON structures
            if isinstance(card_data, dict):
                # If already a mapping
                card_mapping = {int(k) if k.isdigit() else k: v for k, v in card_data.items()}
            elif isinstance(card_data, list):
                # If it's a list of card objects
                for card in card_data:
                    if 'id' in card and 'name' in card:
                        card_mapping[card['id']] = card['name']
            
            cards_loaded = True
            print(f"âœ“ Loaded {len(card_mapping)} card mappings")
            break

if not cards_loaded:
    print("âš  Warning: cards.json not found. Will use card IDs instead of names.")
    print("  Expected locations:")
    for loc in card_file_locations:
        print(f"    - {loc}")

def get_card_name(card_id):
    """Convert card ID to name, with fallback to ID if not found"""
    if pd.isna(card_id):
        return None
    
    # Try as integer
    try:
        card_id_int = int(card_id)
        if card_id_int in card_mapping:
            return card_mapping[card_id_int]
    except (ValueError, TypeError):
        pass
    
    # Try as string
    if card_id in card_mapping:
        return card_mapping[card_id]
    
    # Fallback to ID
    return f"Card_{card_id}"

def is_evolution_card(card_name):
    """Detect if a card is an evolution variant"""
    if not card_name:
        return False
    evolution_keywords = ['evolved', 'evo', 'evolution', 'evolved ']
    return any(keyword in str(card_name).lower() for keyword in evolution_keywords)

# Test the mapping
if cards_loaded:
    sample_ids = list(card_mapping.keys())[:5]
    print(f"\nSample card mappings:")
    for card_id in sample_ids:
        name = get_card_name(card_id)
        evo = "[EVO]" if is_evolution_card(name) else ""
        print(f"  {card_id} â†’ {name} {evo}")

## Section 1: Data-Driven Trophy Wall Detection

Instead of assuming 4k/5k/6k/7k, let's detect walls from battle density

In [None]:
# Query battle distribution to find natural clustering points
trophy_dist_query = """
SELECT 
    FLOOR("average.startingTrophies" / 100) * 100 as trophy_bin,
    COUNT(*) as battle_count
FROM battles
WHERE "average.startingTrophies" IS NOT NULL
    AND "average.startingTrophies" BETWEEN 0 AND 10000
GROUP BY trophy_bin
ORDER BY trophy_bin
"""

trophy_distribution = query_to_df(con, trophy_dist_query, show_progress=False)

print(f"Trophy distribution loaded: {len(trophy_distribution)} bins")
print(f"Trophy range: {trophy_distribution['trophy_bin'].min():.0f} to {trophy_distribution['trophy_bin'].max():.0f}")
print(f"Total battles: {trophy_distribution['battle_count'].sum():,}")

In [None]:
# Detect trophy walls using peak detection
from scipy.signal import find_peaks

# Normalize battle counts for peak detection
normalized_counts = trophy_distribution['battle_count'] / trophy_distribution['battle_count'].max()

# Find peaks (local maxima) - these are trophy walls
peaks, properties = find_peaks(normalized_counts, prominence=0.1, distance=10)

detected_walls = trophy_distribution.iloc[peaks]['trophy_bin'].values

# Use standard walls if detection fails or finds too few
standard_walls = [4000, 5000, 6000, 7000]
if len(detected_walls) < 3:
    print("âš  Using standard trophy walls: 4k, 5k, 6k, 7k")
    trophy_walls = standard_walls
else:
    # Round detected walls to nearest 1000
    trophy_walls = [int(round(w, -3)) for w in detected_walls if 3000 <= w <= 8000]
    # Ensure we have key milestones
    for wall in standard_walls:
        if not any(abs(w - wall) < 500 for w in trophy_walls):
            trophy_walls.append(wall)
    trophy_walls = sorted(set(trophy_walls))

print(f"\nâœ“ Trophy Walls Detected: {trophy_walls}")

# Visualize distribution with detected walls
fig, ax = plt.subplots(figsize=(14, 7))

ax.bar(trophy_distribution['trophy_bin'], trophy_distribution['battle_count'], 
       width=90, color='steelblue', edgecolor='black', alpha=0.7)

# Mark detected walls
colors = ['red', 'orange', 'purple', 'darkred', 'maroon']
for i, wall in enumerate(trophy_walls[:5]):
    color = colors[i] if i < len(colors) else 'black'
    ax.axvline(wall, color=color, linestyle='--', linewidth=2.5, alpha=0.8,
               label=f'{wall/1000:.0f}k Trophy Wall')

ax.set_xlabel('Trophy Count', fontsize=16)
ax.set_ylabel('Number of Battles', fontsize=16)
ax.set_title('Trophy Distribution: Data-Driven Wall Detection', fontsize=20, fontweight='bold', pad=20)
ax.legend(fontsize=12, loc='upper right')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, 'presentation/figures/fig5_detected_walls.png'), 
            dpi=300, bbox_inches='tight')
plt.show()

# Define trophy brackets based on detected walls
trophy_brackets = {}
walls_with_bounds = [0] + trophy_walls + [10000]
for i in range(len(walls_with_bounds) - 1):
    lower = walls_with_bounds[i]
    upper = walls_with_bounds[i + 1]
    if lower == 0:
        label = f'0-{upper/1000:.0f}k'
    elif upper == 10000:
        label = f'{lower/1000:.0f}k+'
    else:
        label = f'{lower/1000:.0f}k-{upper/1000:.0f}k'
    trophy_brackets[label] = (lower, upper)

print(f"\nðŸ“Š Trophy Brackets Defined:")
for label, (low, high) in trophy_brackets.items():
    count = trophy_distribution[(trophy_distribution['trophy_bin'] >= low) & 
                                 (trophy_distribution['trophy_bin'] < high)]['battle_count'].sum()
    print(f"  {label:<15} ({low:>5} - {high:>5}): {count:>12,} battles")

# Save detected walls
detected_walls_data = {
    'walls': trophy_walls,
    'brackets': {k: list(v) for k, v in trophy_brackets.items()}
}
with open(os.path.join(PROJECT_ROOT, 'artifacts/detected_trophy_walls.json'), 'w') as f:
    json.dump(detected_walls_data, f, indent=2)

print(f"\nâœ“ Trophy walls saved to artifacts/detected_trophy_walls.json")

In [None]:
# 2.1 Optimal Elixir Cost Analysis
elixir_query = """
SELECT 
    ROUND("winner.elixir.average" * 4) / 4 as elixir_bucket,
    COUNT(*) as battles,
    SUM(CASE WHEN "winner.trophyChange" > 0 THEN 1 ELSE 0 END) as wins,
    AVG("winner.startingTrophies") as avg_trophies
FROM battles
WHERE "winner.elixir.average" IS NOT NULL
    AND "winner.elixir.average" BETWEEN 2.0 AND 6.0
GROUP BY elixir_bucket
HAVING battles > 1000
ORDER BY elixir_bucket
"""

elixir_data = query_to_df(con, elixir_query, show_progress=False)
elixir_data['win_rate'] = elixir_data['wins'] / elixir_data['battles']

# Calculate 95% confidence intervals
def wilson_confidence_interval(wins, total, z=1.96):
    """Wilson score interval for binomial proportion"""
    if total == 0:
        return 0, 0
    p_hat = wins / total
    denominator = 1 + z**2 / total
    center = (p_hat + z**2 / (2 * total)) / denominator
    margin = z * np.sqrt((p_hat * (1 - p_hat) + z**2 / (4 * total)) / total) / denominator
    return center - margin, center + margin

elixir_data['ci_lower'], elixir_data['ci_upper'] = zip(*elixir_data.apply(
    lambda row: wilson_confidence_interval(row['wins'], row['battles']), axis=1))

# Find optimal elixir range
optimal_elixir = elixir_data.loc[elixir_data['win_rate'].idxmax(), 'elixir_bucket']
optimal_wr = elixir_data['win_rate'].max()

# Visualization
fig, ax = plt.subplots(figsize=(14, 7))

ax.plot(elixir_data['elixir_bucket'], elixir_data['win_rate'] * 100, 
        marker='o', linewidth=3, markersize=10, color='steelblue', label='Win Rate')
ax.fill_between(elixir_data['elixir_bucket'], 
                elixir_data['ci_lower'] * 100, 
                elixir_data['ci_upper'] * 100,
                alpha=0.3, color='steelblue', label='95% Confidence Interval')

# Mark optimal point
ax.axvline(optimal_elixir, color='red', linestyle='--', linewidth=2, alpha=0.7)
ax.axhline(optimal_wr * 100, color='red', linestyle=':', linewidth=1.5, alpha=0.5)
ax.scatter([optimal_elixir], [optimal_wr * 100], color='red', s=200, zorder=5, 
           label=f'Optimal: {optimal_elixir:.2f} elixir')

ax.set_xlabel('Average Elixir Cost', fontsize=16)
ax.set_ylabel('Win Rate (%)', fontsize=16)
ax.set_title('Optimal Elixir Cost for Winning Decks', fontsize=20, fontweight='bold', pad=20)
ax.legend(fontsize=12)
ax.grid(alpha=0.3)
ax.set_ylim(48, 54)

plt.tight_layout()
plt.savefig(os.path.join(PROJECT_ROOT, 'presentation/figures/fig7_optimal_elixir.png'),
            dpi=300, bbox_inches='tight')
plt.show()

print(f"\\nâœ“ Optimal Elixir Analysis:")
print(f"  Best elixir cost: {optimal_elixir:.2f} ({optimal_wr*100:.2f}% win rate)")
print(f"  95% CI: [{elixir_data.loc[elixir_data['win_rate'].idxmax(), 'ci_lower']*100:.2f}%, {elixir_data.loc[elixir_data['win_rate'].idxmax(), 'ci_upper']*100:.2f}%]")
print(f"  Sample size: {elixir_data.loc[elixir_data['win_rate'].idxmax(), 'battles']:,} battles")

# Identify optimal range (within 1% of best)
optimal_range = elixir_data[elixir_data['win_rate'] >= optimal_wr - 0.01]
print(f"  Optimal range: {optimal_range['elixir_bucket'].min():.2f} - {optimal_range['elixir_bucket'].max():.2f} elixir")

## Section 2: Optimal Deck Characteristics

Find the "Goldilocks zone" for deck building with statistical confidence