Analyse wie oft Helden vorkommen, wie oft sie die Matches an denen sie beteiligt sind gewinnen und welche Auswirkung ein Goldvorteil/Nachteil auf die jeweiligen Helden hat.
(Manche performen ggf. trotz Nachteil besser als andere?)

In [None]:
# imports & parquet dateien einlesen
import pandas as pd
import matplotlib.pyplot as plt


df_info = pd.read_parquet("../filtered_data/match_info.parquet")
df_general = pd.read_parquet("../filtered_data/match_player_general.parquet")
df_ts = pd.read_parquet("../filtered_data/match_player_timestamp.parquet")


In [None]:
# Helden nach Häufigkeit zählen
hero_counts = df_general['hero_name'].value_counts().sort_values(ascending=False)

print(hero_counts)


plt.figure(figsize=(12, 6))
hero_counts.plot(kind='bar')

plt.title("Häufigkeit der gespielten Helden")
plt.xlabel("Held")
plt.ylabel("Vorkommen")
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.show()


In [None]:
hero_match_counts = (
    df_general.groupby('hero_name')['match_id']
    .nunique()
    .sort_values(ascending=False)
)

total_matches = df_general['match_id'].nunique()

# Prozentanteil der Matches
hero_match_percent = (hero_match_counts / total_matches) * 100


# --- 2. Winrate pro Held bestimmen ---
# df_info enthält "match_id" und "winning_team"
df_merged = df_general.merge(df_info[['match_id', 'winning_team']], on='match_id', how='left')

# Spieler hat gewonnen? → sein Team == winning_team
df_merged['won'] = df_merged['team'] == df_merged['winning_team']

# Winrate (%) pro Held
hero_winrate = (
    df_merged.groupby('hero_name')['won']
    .mean()
    .sort_values(ascending=False) * 100
)


# --- 3. Plot: Prozent der Matches pro Held ---
plt.figure(figsize=(14, 6))
hero_match_percent.plot(kind='bar')
plt.title("Prozentualer Anteil der Matches, an denen der Held beteiligt war")
plt.ylabel("Prozent (%)")
plt.xlabel("Held")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


# --- 4. Plot: Winrate pro Held ---
df_merged = df_general.merge(df_info[['match_id', 'winning_team']], on='match_id', how='left')
df_merged['won'] = df_merged['team'] == df_merged['winning_team']

hero_winrate = (
    df_merged.groupby('hero_name')['won']
    .mean()
    .sort_values(ascending=False) * 100
)

print(round(hero_winrate,2))


# --- Plot ---
plt.figure(figsize=(14, 6))
ax = hero_winrate.plot(kind='bar')

plt.title("Winrate pro Held")
plt.ylabel("Winrate (%)")
plt.xlabel("Held")

# Y-Achse begrenzen
plt.ylim(44, 55)

# Beschriftung der Balken
for i, value in enumerate(hero_winrate):
    ax.text(i, value + 0.2, f"{value:.1f}%", ha='center', va='bottom', fontsize=9)

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
avg_networth = df_general["net_worth"].mean()


df_end = df_general.copy()

# Team-Namen aus df_info anhängen
df_end = df_end.merge(df_info[["match_id", "winning_team"]], on="match_id", how="left")

df_end["won"] = (df_end["team"] == df_end["winning_team"]).astype(int)



df_end["case_normal"] = True
df_end["case_plus15k"] = df_end["net_worth"] >= (avg_networth + 15000)
df_end["case_minus15k"] = df_end["net_worth"] <= (avg_networth - 15000)


# Winrates pro Held

def calc_wr(group, mask):
    """Hilfsfunktion: Gewinnrate für gefilterte Fälle"""
    sub = group[mask(group)]
    if len(sub) == 0:
        return float("nan")
    return sub["won"].mean() * 100

results = []

for hero, g in df_end.groupby("hero_name"):
    wr_normal   = calc_wr(g, lambda x: x["case_normal"])
    wr_plus15k  = calc_wr(g, lambda x: x["case_plus15k"])
    wr_minus15k = calc_wr(g, lambda x: x["case_minus15k"])

    results.append({
        "hero": hero,
        "winrate_all": wr_normal,
        "winrate_plus15k": wr_plus15k,
        "winrate_minus15k": wr_minus15k,
        "samples_all": len(g),
        "samples_plus15k": g["case_plus15k"].sum(),
        "samples_minus15k": g["case_minus15k"].sum()
    })

df_wr = pd.DataFrame(results).sort_values("winrate_all", ascending=False)


#Ausgabe

pd.set_option("display.max_rows", None)
print(df_wr.to_string(index=False))