In [19]:
import numpy as np
import pandas as pd
import re

# Find each individual hero winrate against each of the other heroes

In [35]:
heroes = pd.read_csv("../data/heroes.csv")
heroes.head()

Unnamed: 0,id,name,role,type
0,1,Anti-Mage,Carry,Melee
1,2,Axe,Initiator,Melee
2,3,Bane,Support,Ranged
3,4,Bloodseeker,Carry,Melee
4,5,Crystal Maiden,Support,Ranged


**1. Import our matches data (21175 high elo games)**

In [36]:
data = pd.read_csv("../data/matches.csv")
data.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
21171,5335146932,Dire,96-10-126-86-85,129-91-49-72-79
21172,5346192557,Dire,10-20-23-13-108,106-90-7-11-40
21173,5389270362,Dire,101-53-2-23-100,106-11-98-7-31
21174,5368190580,Dire,128-70-23-96-45,90-43-50-7-99
21175,5339404531,Radiant,77-79-46-99-90,60-41-108-13-68


**2. Format the data so that it shows a list of heroes in each team**

In [37]:
data["radiant_team"] = data["radiant_team"].str.split("-")
data["dire_team"] = data["dire_team"].str.split("-")
data.head()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
0,5483336094,Dire,"[121, 7, 23, 77, 10]","[53, 95, 126, 38, 110]"
1,5490543094,Dire,"[114, 3, 42, 110, 25]","[56, 126, 102, 9, 79]"
2,5460832296,Radiant,"[66, 106, 6, 50, 51]","[23, 33, 7, 10, 58]"
3,5483523082,Dire,"[93, 10, 31, 7, 60]","[89, 121, 126, 23, 86]"
4,5501271276,Dire,"[121, 28, 19, 107, 51]","[96, 3, 106, 110, 1]"


**3. Explode the data based on each team column**

In [38]:
data = data.explode("radiant_team")
data = data.explode("dire_team")

In [39]:
data = data.fillna(0)
data[["radiant_team", "dire_team"]] = (data[["radiant_team",
                                            "dire_team"]]
                                            .astype(int))

In [40]:
data

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
0,5483336094,Dire,121,53
0,5483336094,Dire,121,95
0,5483336094,Dire,121,126
0,5483336094,Dire,121,38
0,5483336094,Dire,121,110
...,...,...,...,...
21175,5339404531,Radiant,90,60
21175,5339404531,Radiant,90,41
21175,5339404531,Radiant,90,108
21175,5339404531,Radiant,90,13


**4. Here we needed to get a sum of the times a hero won against another hero being on each different team**

In [44]:
# data = data.assign(radiant_win = lambda x: data["victor_team"] == "Radiant")
# data = data.assign(dire_win = lambda x: data["victor_team"] == "Dire")
# data = data.replace({True: 1, False: 0})
data = data.assign(radiant_win = np.where(data["victor_team"] == "Radiant", 1, 0))
data = data.assign(dire_win = np.where(data["victor_team"] == "Dire", 1, 0))

In [47]:
data

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,radiant_win,dire_win
0,5483336094,Dire,121,53,0,1
0,5483336094,Dire,121,95,0,1
0,5483336094,Dire,121,126,0,1
0,5483336094,Dire,121,38,0,1
0,5483336094,Dire,121,110,0,1
...,...,...,...,...,...,...
21175,5339404531,Radiant,90,60,1,0
21175,5339404531,Radiant,90,41,1,0
21175,5339404531,Radiant,90,108,1,0
21175,5339404531,Radiant,90,13,1,0


**5. Group the data by each possible hero combination**

    - We need both sides of the coin so we have to mirror the first groupby in another table and then concat

**5.1 Performing the first groupby**

In [48]:
data = (data.groupby(["radiant_team", "dire_team"])
                .agg(win_1=("radiant_win", "sum"), win_2=("dire_win", "sum"))
                .reset_index()
                .rename(columns={"radiant_team": "hero_1", "dire_team": "hero_2"}))
data

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,1,2,7,6
1,1,3,9,10
2,1,4,12,5
3,1,5,11,14
4,1,6,6,4
...,...,...,...,...
14005,129,119,17,21
14006,129,120,62,63
14007,129,121,82,80
14008,129,126,58,63


**5.2 Mirroring the first table**

In [53]:
data_mirror = (data[["hero_2", "hero_1", "win_2", "win_1"]].copy()
     .rename(columns={
        "hero_2": "hero_1",
        "hero_1": "hero_2",
         "win_2": "win_1",
         "win_1": "win_2"
}))
data_mirror

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,2,1,6,7
1,3,1,10,9
2,4,1,5,12
3,5,1,14,11
4,6,1,4,6
...,...,...,...,...
14005,119,129,21,17
14006,120,129,63,62
14007,121,129,80,82
14008,126,129,63,58


**5.3 Concatenating both tables to get the results of each hero against every possible other hero**

In [54]:
# data_mirror = pd.DataFrame({
#     "hero_1": data["hero_2"], 
#     "hero_2": data["hero_1"], 
#     "win_1": data["win_2"], 
#     "win_2": data["win_1"]})
# data_mirror

In [58]:
complete_data = pd.concat([data, data_mirror], sort=False)
complete_data

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,1,2,7,6
1,1,3,9,10
2,1,4,12,5
3,1,5,11,14
4,1,6,6,4
...,...,...,...,...
14005,119,129,21,17
14006,120,129,63,62
14007,121,129,80,82
14008,126,129,63,58


**6. Get the complete data with the right column names and save it to a .csv file**

In [59]:
complete_data["games_played"] = complete_data["win_1"] + complete_data["win_2"]
complete_data["win_rate"] = (complete_data["win_1"] / complete_data["games_played"]).round(3)
complete_data = (complete_data.drop(columns=["win_2"])
                .rename(columns={"hero_1": "hero", "hero_2": "opponent", "win_1": "hero_wins"}))

In [60]:
complete_data

Unnamed: 0,hero,opponent,hero_wins,games_played,win_rate
0,1,2,7,13,0.538
1,1,3,9,19,0.474
2,1,4,12,17,0.706
3,1,5,11,25,0.440
4,1,6,6,10,0.600
...,...,...,...,...,...
14005,119,129,21,38,0.553
14006,120,129,63,125,0.504
14007,121,129,80,162,0.494
14008,126,129,63,121,0.521


In [33]:
complete_data.to_csv("../data/heroes_matchups.csv", index=False)