# Data analysis for method section

In [1]:
import pandas as pd
import numpy as np
import os

In [5]:
# Define file paths for each rank
file_path = "data/riot_API/"
rank_paths = {
    "IRON": os.path.join(file_path, "matchup_data_IRON.csv"),
    "BRONZE": os.path.join(file_path, "matchup_data_BRONZE.csv"),
    "SILVER": os.path.join(file_path, "matchup_data_SILVER.csv"),
    "GOLD": os.path.join(file_path, "matchup_data_GOLD.csv"),
    "PLATINUM": os.path.join(file_path, "matchup_data_PLATINUM.csv"),
    "EMERALD": os.path.join(file_path, "matchup_data_EMERALD.csv"),
    "DIAMOND": os.path.join(file_path, "matchup_data_DIMOND.csv")
}

# Load data into a dictionary of DataFrames
riot_df = {tier: pd.read_csv(path) for tier, path in rank_paths.items()}

# Combine all ranks into one unified DataFrame
combined_df = pd.concat(riot_df.values(), ignore_index=True)

In [11]:
# --- Balanced Win Rate ---
matchup_groups = combined_df.groupby(['player_champion', 'opponent_champion'])

balanced_count = 0
total_matchups = 0

for _, group in matchup_groups:
    if len(group) < 10:
        continue
    win_rate = group['win'].mean()
    if 0.45 <= win_rate <= 0.55:
        balanced_count += 1
    total_matchups += 1

balanced_win_rate_pct = (balanced_count / total_matchups) * 100 if total_matchups else 0

# --- CS-Based Metrics ---
cs_diff = abs(combined_df['first_10_min_cs'] - combined_df['opponent_first_10_min_cs'])
avg_cs_diff = cs_diff.mean()

# --- Estimated Gold Difference at 10min ---
estimated_gold_diff = cs_diff * 20  # 20 gold per minion
gold_equilibrium_pct = (estimated_gold_diff < 400).mean() * 100

# --- Full Game Gold Difference ---
full_game_gold_diff = combined_df['gold_diff'].abs()
avg_full_gold_diff = full_game_gold_diff.mean()

# --- Final Summary ---
summary = {
    "Balanced Win Rates (%)": round(balanced_win_rate_pct, 1),
    "Avg CS Difference at 10min": round(avg_cs_diff, 2),
    "Gold Equilibrium at 10min (<400g, %)": round(gold_equilibrium_pct, 1),
    "Avg Full-Game Gold Difference": round(avg_full_gold_diff, 2)
}

for key, value in summary.items():
    print(f"{key}: {value}")

Balanced Win Rates (%): 35.4
Avg CS Difference at 10min: 13.85
Gold Equilibrium at 10min (<400g, %): 73.7
Avg Full-Game Gold Difference: 2678.02


In [13]:
# List of target champions
champion_pool = ["Garen", "Teemo", "Kennen", "Tryndamere"]

# Combine all tier data
combined_df = pd.concat(riot_df.values(), ignore_index=True)

# Filter relevant matchups
filtered = combined_df[
    (combined_df['player_champion'].isin(champion_pool)) &
    (combined_df['opponent_champion'].isin(champion_pool))
]

# Group by matchup pair
matchup_groups = filtered.groupby(['player_champion', 'opponent_champion'])

# Summarize each matchup
rows = []
for (champ_a, champ_b), group in matchup_groups:
    matches = len(group)
    win_rate = group['win'].mean() * 100
    avg_cs10 = group['first_10_min_cs'].mean()
    avg_gold_diff = group['gold_diff'].mean()
    avg_cs_min = group['cs_per_min'].mean() if 'cs_per_min' in group.columns else avg_cs10 / 10

    rows.append({
        "Champion A": champ_a,
        "Champion B": champ_b,
        "Matches": matches,
        "Win Rate": f"{win_rate:.1f}%",
        "CS@10": round(avg_cs10, 1),
        "Gold Diff": round(avg_gold_diff),
        "CS/min": round(avg_cs_min, 2)
    })

# Create and print DataFrame
matchup_summary_df = pd.DataFrame(rows)
print(matchup_summary_df)

# Optional: export LaTeX
print("\nLaTeX Format:\n")
print(matchup_summary_df.to_latex(index=False, caption="Matchup statistics for Garen, Teemo, Kennen, and Tryndamere", label="tab:matchup_results"))


    Champion A  Champion B  Matches Win Rate  CS@10  Gold Diff  CS/min
0        Garen      Kennen       11    45.5%   57.4        950    6.32
1        Garen       Teemo       44    50.0%   55.2        405    6.19
2        Garen  Tryndamere       30    56.7%   61.1       -857    6.70
3       Kennen       Garen       11    54.5%   63.3       -950    6.13
4       Kennen       Teemo        7    57.1%   66.6       -115    6.62
5       Kennen  Tryndamere        2    50.0%   65.0      -2741    5.67
6        Teemo       Garen       44    50.0%   64.9       -405    6.21
7        Teemo      Kennen        7    42.9%   68.3        115    6.39
8        Teemo  Tryndamere       14    50.0%   53.2      -1351    5.68
9   Tryndamere       Garen       30    43.3%   69.9        857    7.29
10  Tryndamere      Kennen        2    50.0%   74.0       2741    8.35
11  Tryndamere       Teemo       14    50.0%   57.9       1351    6.35

LaTeX Format:

\begin{table}
\caption{Matchup statistics for Garen, Teemo, K