In [None]:
# imports & parquet dateien einlesen
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_info = pd.read_parquet("../filtered_data/match_info.parquet")
df_general = pd.read_parquet("../filtered_data/match_player_general.parquet")
df_ts = pd.read_parquet("../filtered_data/match_player_timestamp.parquet")


In [None]:
# final level/player
last_ts = df_ts.sort_values("timestamp_s").groupby(["match_id", "account_id"]).tail(1)

# add team
player_team = df_general[["match_id", "account_id", "team"]].drop_duplicates()
last_ts_team = last_ts.merge(player_team, on=["match_id", "account_id"], how="left")

# avg lvl team
team_lvl = (
    last_ts_team.groupby(["match_id", "team"])["level"]
    .mean()
    .reset_index()
)

team_lvl_wide = team_lvl.pivot(
    index="match_id",
    columns="team",
    values="level"
).reset_index()

#lvl diff (Team0 - Team1)
team_lvl_wide["lvl_diff"] = team_lvl_wide["Team0"] - team_lvl_wide["Team1"]

merged_lvl = team_lvl_wide.merge(df_info[["match_id", "winning_team"]], on="match_id")

# winner lvl advantage
merged_lvl["lvl_advantage_winner"] = (
    merged_lvl["lvl_diff"].apply(lambda x: "Team0" if x > 0 else "Team1")
    == merged_lvl["winning_team"]
)

print(merged_lvl.head())
print("Level-Advantage Accuracy:", merged_lvl["lvl_advantage_winner"].mean())


In [None]:
# Plot: Level-Advantage
total_wins = len(merged_lvl)
lvl_advantage_wins = merged_lvl["lvl_advantage_winner"].sum()
lvl_comeback_wins = total_wins - lvl_advantage_wins

labels = ["Sieger hatte Level-Advantage", "Sieger hatte KEINEN Level-Advantage"]
values = [lvl_advantage_wins, lvl_comeback_wins]

plt.figure(figsize=(8, 5))
bars = plt.bar(labels, values, color=["tab:blue", "tab:red"], alpha=0.8)

for bar in bars:
    height = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        height + total_wins * 0.01,
        f"{height}",
        ha='center',
        fontsize=10
    )

plt.title("Level-Advantage bei gewonnenen Matches")
plt.ylabel("Anzahl Spiele")
plt.grid(axis="y", alpha=0.3)
plt.show()


In [None]:
player_team = df_general[["match_id", "account_id", "team"]].drop_duplicates()
df_ts_team = df_ts.merge(player_team, on=["match_id", "account_id"], how="left")

# 2) avglevel / timestamp + team
team_lvl_ts = (
    df_ts_team.groupby(["match_id", "timestamp_s", "team"])["level"]
    .mean()
    .reset_index()
)

team_lvl_ts_wide = team_lvl_ts.pivot(
    index=["match_id", "timestamp_s"],
    columns="team",
    values="level"
).reset_index()

# lvl-diff
team_lvl_ts_wide["lvl_diff"] = team_lvl_ts_wide["Team0"] - team_lvl_ts_wide["Team1"]

# add winning team
lvl_ts = team_lvl_ts_wide.merge(df_info[["match_id", "winning_team"]], on="match_id")

lvl_ts["leading_team"] = lvl_ts["lvl_diff"].apply(
    lambda x: "Team0" if x > 0 else "Team1"
)

lvl_ts["lead_correct"] = lvl_ts["leading_team"] == lvl_ts["winning_team"]

total_matches = lvl_ts["match_id"].nunique()

lvl_summary_by_time = (
    lvl_ts.groupby("timestamp_s")
    .agg(
        avg_lvl_diff=("lvl_diff", "mean"),
        avg_abs_lvl_diff=("lvl_diff", lambda s: s.abs().mean()),
        lead_accuracy=("lead_correct", "mean"),
        matchcount=("match_id", "nunique")
    )
    .reset_index()
)

lvl_summary_by_time["match_pct"] = lvl_summary_by_time["matchcount"] / total_matches * 100

print(lvl_summary_by_time.head())
print(lvl_summary_by_time.to_string(index=False))


In [None]:
lvl_summary_by_time["timestamp_min"] = lvl_summary_by_time["timestamp_s"] / 60

plt.figure(figsize=(12, 6))
ax1 = plt.gca()

# avg lvl diff
ax1.plot(
    lvl_summary_by_time["timestamp_min"],
    lvl_summary_by_time["avg_abs_lvl_diff"],
    marker="o",
    label="Ø |Level-Diff|"
)
ax1.set_xlabel("Zeitpunkt im Match (Minuten)")
ax1.set_ylabel("Ø absolute Level-Differenz")
ax1.grid(True, alpha=0.3)

# Lead Accuracy
ax2 = ax1.twinx()
ax2.plot(
    lvl_summary_by_time["timestamp_min"],
    lvl_summary_by_time["lead_accuracy"],
    marker="x",
    linestyle="--",
    label="P(Level-Leader gewinnt am Ende)"
)
ax2.set_ylabel("Lead Accuracy")
ax2.set_ylim(0.5, 1.0)

plt.title("Level-Differenz über Zeit & wie stark sie den Sieger vorhersagt")

lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc="lower right")

plt.show()


In [None]:
#nur Zeitpunkte mit >= 1000 Matches
filtered_lvl = lvl_summary_by_time[lvl_summary_by_time["matchcount"] >= 1000].copy()
filtered_lvl["timestamp_min"] = filtered_lvl["timestamp_s"] / 60

plt.figure(figsize=(12, 6))
ax1 = plt.gca()
ax1.plot(
    filtered_lvl["timestamp_min"],
    filtered_lvl["avg_abs_lvl_diff"],
    marker="o",
    label="Ø |Level-Diff| (gefiltert)"
)
ax1.set_xlabel("Zeitpunkt (Minuten)")
ax1.set_ylabel("Ø Level-Differenz")

ax2 = ax1.twinx()
ax2.plot(
    filtered_lvl["timestamp_min"],
    filtered_lvl["lead_accuracy"],
    marker="x",
    linestyle="--",
    label="P(Level-Leader gewinnt)"
)
ax2.set_ylabel("Lead Accuracy")
ax2.set_ylim(0.5, 1.0)

plt.title("Level-Differenz & Level-Accuracy (≥1000 Matches)")
plt.grid(alpha=0.3)

lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc="lower right")

plt.show()
