In [43]:
import pandas as pd

DIVISION_TIERS = [
    "IRON", "BRONZE", "SILVER",
    "GOLD", "PLATINUM", "EMERALD", "DIAMOND"
]

NO_DIVISION_TIERS = [
    "MASTER", "GRANDMASTER", "CHALLENGER"
]

DIVISIONS = ["I", "II", "III", "IV"]

def load_tier_data(tier_name, timeline_path, matches_path):
    timeline = pd.read_csv(timeline_path)
    matches  = pd.read_csv(matches_path)

    match_team_win = (
        matches[["matchId", "teamId", "win"]]
        .drop_duplicates(subset=["matchId", "teamId"])
    )

    winner_team = (
        match_team_win[match_team_win["win"] == 1]
        .rename(columns={"teamId": "winnerTeamId"})
        [["matchId", "winnerTeamId"]]
    )

    df = timeline.merge(winner_team, on="matchId", how="left")
    df["win"] = (df["winnerTeamId"] == 100).astype(int)
    df["tier"] = tier_name

    rows = []

    for _, row in df.iterrows():
        max_min = int(row["maxMinute"])

        for minute in range(15, max_min + 1, 5):
            suffix = f"_{minute}"
            if f"goldDiff{suffix}" not in row:
                continue

            rows.append({
                "tier": tier_name,
                "matchId": row["matchId"],
                "minute": minute,
                "maxMinute": max_min,

                # gold / kill
                "goldDiff": row[f"goldDiff{suffix}"],
                "totalKillDiff": row[f"totalKillDiff{suffix}"],

                # objectives
                "dragonDiff": row[f"dragonDiff{suffix}"],
                "elderDiff": row[f"elderDiff{suffix}"],
                "heraldDiff": row[f"heraldDiff{suffix}"],
                "baronDiff": row[f"baronDiff{suffix}"],
                "atakhanDiff": row[f"atakhanDiff{suffix}"],
                "grubDiff": row[f"grubDiff{suffix}"],

                # structures (방향 보정)
                "outerTowerDiff": -row[f"outerTowerDiff{suffix}"],
                "innerTowerDiff": -row[f"innerTowerDiff{suffix}"],
                "baseTowerDiff":  -row[f"baseTowerDiff{suffix}"],

                "win": row["win"]
            })

    return rows

all_rows = []

# 디비전 있는 티어
for tier in DIVISION_TIERS:
    for div in DIVISIONS:
        name = f"{tier}_{div}"
        print(f"[LOAD] {name}")

        all_rows.extend(
            load_tier_data(
                name,
                f"../../data/processed/{name}_timeline.csv",
                f"../../data/processed/{name}_matches.csv"
            )
        )

# 디비전 없는 티어
for tier in NO_DIVISION_TIERS:
    print(f"[LOAD] {tier}")

    all_rows.extend(
        load_tier_data(
            tier,
            f"../../data/processed/{tier}_timeline.csv",
            f"../../data/processed/{tier}_matches.csv"
        )
    )

long_df = pd.DataFrame(all_rows)
print("TOTAL ROWS:", len(long_df))

[LOAD] IRON_I
[LOAD] IRON_II
[LOAD] IRON_III
[LOAD] IRON_IV
[LOAD] BRONZE_I
[LOAD] BRONZE_II
[LOAD] BRONZE_III
[LOAD] BRONZE_IV
[LOAD] SILVER_I
[LOAD] SILVER_II
[LOAD] SILVER_III
[LOAD] SILVER_IV
[LOAD] GOLD_I
[LOAD] GOLD_II
[LOAD] GOLD_III
[LOAD] GOLD_IV
[LOAD] PLATINUM_I
[LOAD] PLATINUM_II
[LOAD] PLATINUM_III
[LOAD] PLATINUM_IV
[LOAD] EMERALD_I
[LOAD] EMERALD_II
[LOAD] EMERALD_III
[LOAD] EMERALD_IV
[LOAD] DIAMOND_I
[LOAD] DIAMOND_II
[LOAD] DIAMOND_III
[LOAD] DIAMOND_IV
[LOAD] MASTER
[LOAD] GRANDMASTER
[LOAD] CHALLENGER
TOTAL ROWS: 134897


# 시간 구간별 평균 승률

In [None]:
analysis_time = (
    long_df
    .groupby("minute")["win"]
    .agg(["mean", "count"])
    .reset_index()
    .rename(columns={"mean": "win_rate", "count": "num_samples"})
)

print("=== [분석 1] 시간 구간별 평균 승률 (15분 이상) ===")
display(analysis_time)

=== [분석 1] 시간 구간별 평균 승률 (15분 이상) ===


Unnamed: 0,minute,win_rate,num_samples
0,15,0.516936,44019
1,20,0.510227,37256
2,25,0.503155,29790
3,30,0.498263,15540
4,35,0.49608,5868
5,40,0.48759,1813
6,45,0.510549,474
7,50,0.586538,104
8,55,0.48,25
9,60,0.2,5


# 지표 차이 구간별 승률

In [40]:
def analyze_metric_winrate(
    df,
    metric,
    bin_size,
    min_samples=50,
    min_val=None,
    max_val=None
):
    temp = df.copy()
    temp["bin"] = (temp[metric] // bin_size) * bin_size

    result = (
        temp
        .groupby("bin")["win"]
        .agg(["mean", "count", "std"])
        .reset_index()
        .rename(columns={
            "bin": metric,
            "mean": "win_rate",
            "count": "num_samples",
            "std": "std_win"
        })
    )

    # 표본 수 필터
    result = result[result["num_samples"] >= min_samples]

    # 값 범위 필터 (선택)
    if min_val is not None and max_val is not None:
        result = result[result[metric].between(min_val, max_val)]

    return result.sort_values(metric)

metric_configs = {
    # gold / kill
    "goldDiff":        {"bin": 500, "min": -8000, "max": 8000},
    "totalKillDiff":  {"bin": 1,   "min": -20,   "max": 20},

    # objectives
    "dragonDiff":     {"bin": 1,   "min": -4,    "max": 4},
    "elderDiff":      {"bin": 1,   "min": -2,    "max": 2},
    "heraldDiff":     {"bin": 1,   "min": -2,    "max": 2},
    "baronDiff":      {"bin": 1,   "min": -3,    "max": 3},
    "atakhanDiff":    {"bin": 1,   "min": -1,    "max": 1},
    "grubDiff":       {"bin": 1,   "min": -3,   "max": 3},

    # structures
    "outerTowerDiff": {"bin": 1,   "min": -3,    "max": 3},
    "innerTowerDiff": {"bin": 1,   "min": -3,    "max": 3},
    "baseTowerDiff":  {"bin": 1,   "min": -3,    "max": 3},
}

print("=== [분석 2] 지표 차이 구간별 승률 ===")
for metric, cfg in metric_configs.items():
    print(f"\n=== [{metric}] 차이 구간별 승률 ===")
    display(
        analyze_metric_winrate(
            long_df,
            metric=metric,
            bin_size=cfg["bin"],
            min_samples=50,
            min_val=cfg["min"],
            max_val=cfg["max"]
        )
    )

=== [분석 2] 지표 차이 구간별 승률 ===

=== [goldDiff] 차이 구간별 승률 ===


Unnamed: 0,goldDiff,win_rate,num_samples,std_win
24,-8000.0,0.042138,1590,0.200968
25,-7500.0,0.058013,1741,0.233835
26,-7000.0,0.068076,1939,0.251942
27,-6500.0,0.076722,2294,0.266208
28,-6000.0,0.099879,2483,0.299899
29,-5500.0,0.096549,2869,0.295395
30,-5000.0,0.13217,3208,0.338728
31,-4500.0,0.149945,3628,0.357067
32,-4000.0,0.175553,3845,0.380489
33,-3500.0,0.196069,4019,0.39707



=== [totalKillDiff] 차이 구간별 승률 ===


Unnamed: 0,totalKillDiff,win_rate,num_samples,std_win
19,-20.0,0.044118,476,0.205573
20,-19.0,0.055838,591,0.229802
21,-18.0,0.053777,781,0.225722
22,-17.0,0.067465,919,0.250962
23,-16.0,0.063291,1106,0.243596
24,-15.0,0.068503,1343,0.252702
25,-14.0,0.079589,1558,0.270743
26,-13.0,0.085966,1931,0.280386
27,-12.0,0.094797,2268,0.292999
28,-11.0,0.108443,2582,0.310999



=== [dragonDiff] 차이 구간별 승률 ===


Unnamed: 0,dragonDiff,win_rate,num_samples,std_win
0,-4.0,0.107319,2637,0.309577
1,-3.0,0.176484,8709,0.381253
2,-2.0,0.294553,21351,0.455852
3,-1.0,0.408149,28051,0.4915
4,0.0,0.537624,26273,0.498592
5,1.0,0.661549,24231,0.473192
6,2.0,0.766726,15784,0.422929
7,3.0,0.856134,6089,0.350983
8,4.0,0.891648,1772,0.310912



=== [elderDiff] 차이 구간별 승률 ===


Unnamed: 0,elderDiff,win_rate,num_samples,std_win
2,-1.0,0.350923,379,0.47789
3,0.0,0.508734,134070,0.499926
4,1.0,0.615584,385,0.48709



=== [heraldDiff] 차이 구간별 승률 ===


Unnamed: 0,heraldDiff,win_rate,num_samples,std_win
0,-1.0,0.365662,39036,0.481622
1,0.0,0.515898,46924,0.499753
2,1.0,0.615608,48937,0.486456



=== [baronDiff] 차이 구간별 승률 ===


Unnamed: 0,baronDiff,win_rate,num_samples,std_win
2,-3.0,0.379747,79,0.488425
3,-2.0,0.321637,1026,0.467332
4,-1.0,0.286101,8130,0.451965
5,0.0,0.510637,116901,0.499889
6,1.0,0.711644,7841,0.453027
7,2.0,0.707036,867,0.455385



=== [atakhanDiff] 차이 구간별 승률 ===


Unnamed: 0,atakhanDiff,win_rate,num_samples,std_win
0,-1.0,0.285405,25406,0.451616
1,0.0,0.51292,85064,0.499836
2,1.0,0.725672,24427,0.446184



=== [grubDiff] 차이 구간별 승률 ===


Unnamed: 0,grubDiff,win_rate,num_samples,std_win
3,-3.0,0.419582,40445,0.493497
4,-2.0,0.467907,1075,0.499201
5,-1.0,0.449133,15285,0.497422
6,0.0,0.495461,2644,0.500074
7,1.0,0.53407,16290,0.498853
8,2.0,0.552177,1217,0.497474
9,3.0,0.5869,51571,0.492395



=== [outerTowerDiff] 차이 구간별 승률 ===


Unnamed: 0,outerTowerDiff,win_rate,num_samples,std_win
0,-3.0,0.077837,4882,0.267942
1,-2.0,0.146751,11850,0.353873
2,-1.0,0.277285,23914,0.447668
3,-0.0,0.498644,49797,0.500003
4,1.0,0.721311,25437,0.448363
5,2.0,0.855218,13358,0.351894
6,3.0,0.928609,5659,0.257499



=== [innerTowerDiff] 차이 구간별 승률 ===


Unnamed: 0,innerTowerDiff,win_rate,num_samples,std_win
0,-3.0,0.0932,5118,0.290742
1,-2.0,0.169649,9443,0.375344
2,-1.0,0.276346,16530,0.447204
3,-0.0,0.508101,69618,0.499938
4,1.0,0.715596,17883,0.451143
5,2.0,0.81272,10535,0.390155
6,3.0,0.906239,5770,0.291521



=== [baseTowerDiff] 차이 구간별 승률 ===


Unnamed: 0,baseTowerDiff,win_rate,num_samples,std_win
0,-3.0,0.131532,1110,0.338133
1,-2.0,0.137762,3673,0.344697
2,-1.0,0.219828,9976,0.41415
3,-0.0,0.507483,103771,0.499946
4,1.0,0.772516,11192,0.419226
5,2.0,0.848799,3955,0.35829
6,3.0,0.9,1220,0.300123


# 승/패 팀 주요 지표 평균 비교

In [37]:
final_state = (
    long_df
    .sort_values("minute")
    .groupby("matchId")
    .tail(1)
)

analysis_win_lose = (
    final_state
    .groupby("win")[
        [
            "goldDiff",
            "totalKillDiff",
            "dragonDiff",
            "elderDiff",
            "heraldDiff",
            "baronDiff",
            "atakhanDiff",
            "grubDiff",
            "outerTowerDiff",
            "innerTowerDiff",
            "baseTowerDiff"
        ]
    ]
    .mean()
)

print("=== [분석 3] 승/패 팀 주요 지표 평균 비교 (종료 시점 기준) ===")
display(analysis_win_lose)


=== [분석 3] 승/패 팀 주요 지표 평균 비교 (종료 시점 기준) ===


Unnamed: 0_level_0,goldDiff,totalKillDiff,dragonDiff,elderDiff,heraldDiff,baronDiff,atakhanDiff,grubDiff,outerTowerDiff,innerTowerDiff,baseTowerDiff
win,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,-5430.597091,-8.483085,-1.375701,-0.005461,-0.194906,-0.189494,-0.382902,-0.304031,-0.89392,-1.052581,-0.535569
1,5567.459643,8.868415,0.90875,0.0052,0.362966,0.156048,0.339724,0.895049,0.984761,1.107303,0.559575


# 결과 파일 생성

In [42]:
import os

os.makedirs("../../data/analysis", exist_ok=True)
os.makedirs("../../data/analysis/metrics", exist_ok=True)

analysis_time.to_csv(
    "../../data/analysis/time_winrate.csv",
    index=False,
    encoding="utf-8-sig"
)

for metric, cfg in metric_configs.items():
    result = analyze_metric_winrate(
        long_df,
        metric=metric,
        bin_size=cfg["bin"],
        min_samples=50,
        min_val=cfg["min"],
        max_val=cfg["max"]
    )

    result.to_csv(
        f"../../data/analysis/metrics/{metric}_winrate.csv",
        index=False,
        encoding="utf-8-sig"
    )

analysis_win_lose.to_csv(
    "../../data/analysis/final_state_win_lose_mean.csv",
    encoding="utf-8-sig"
)