In [1]:
import numpy as np
import pandas as pd
import re

In [2]:
heroes = pd.read_csv("../data/heroes.csv")
heroes.head()

Unnamed: 0,id,name,role,type
0,1,Anti-Mage,Carry,Melee
1,2,Axe,Initiator,Melee
2,3,Bane,Support,Ranged
3,4,Bloodseeker,Carry,Melee
4,5,Crystal Maiden,Support,Ranged


In [3]:
data = pd.read_csv("../data/matches.csv")
data.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
11290,5155632958,Dire,100-48-20-58-38,86-52-21-47-79
11291,5144422972,Radiant,112-25-104-102-52,5-13-54-19-53
11292,5147592969,Radiant,50-51-20-75-52,101-106-63-2-85
11293,5144697968,Radiant,57-88-13-74-44,84-16-52-46-6
11294,5178333151,Dire,114-17-28-86-5,23-110-97-96-13


In [4]:
data["radiant_team"] = data["radiant_team"].str.split("-")
data["dire_team"] = data["dire_team"].str.split("-")
data.head()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
0,5206719625,Dire,"[94, 20, 6, 101, 38]","[4, 87, 112, 23, 96]"
1,5188363980,Radiant,"[91, 49, 16, 106, 25]","[53, 15, 100, 12, 13]"
2,5195372233,Dire,"[86, 106, 102, 120, 53]","[40, 98, 64, 15, 47]"
3,5201375658,Dire,"[6, 107, 22, 31, 20]","[54, 46, 83, 25, 51]"
4,5203084179,Dire,"[20, 5, 51, 94, 65]","[12, 55, 31, 103, 25]"


In [5]:
data = data.explode("radiant_team")
data = data.explode("dire_team")

In [6]:
data = data.fillna(0)
data[["radiant_team", "dire_team"]] = (data[["radiant_team",
                                            "dire_team"]]
                                            .astype(int))

In [7]:
data

Unnamed: 0,match_id,victor_team,radiant_team,dire_team
0,5206719625,Dire,94,4
0,5206719625,Dire,94,87
0,5206719625,Dire,94,112
0,5206719625,Dire,94,23
0,5206719625,Dire,94,96
...,...,...,...,...
11294,5178333151,Dire,5,23
11294,5178333151,Dire,5,110
11294,5178333151,Dire,5,97
11294,5178333151,Dire,5,96


In [8]:
data = data.assign(radiant_win = lambda x: data["victor_team"] == "Radiant")
data = data.assign(dire_win = lambda x: data["victor_team"] == "Dire")
data = data.replace({True: 1, False: 0})

In [9]:
data

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,radiant_win,dire_win
0,5206719625,Dire,94,4,0,1
0,5206719625,Dire,94,87,0,1
0,5206719625,Dire,94,112,0,1
0,5206719625,Dire,94,23,0,1
0,5206719625,Dire,94,96,0,1
...,...,...,...,...,...,...
11294,5178333151,Dire,5,23,0,1
11294,5178333151,Dire,5,110,0,1
11294,5178333151,Dire,5,97,0,1
11294,5178333151,Dire,5,96,0,1


# NEW TRY

In [10]:
data = (data.groupby(["radiant_team", "dire_team"])
                .agg(win_1=("radiant_win", "sum"), win_2=("dire_win", "sum"))
                .reset_index()
                .rename(columns={"radiant_team": "hero_1", "dire_team": "hero_2"}))
data

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,1,2,3,5
1,1,3,6,2
2,1,4,9,5
3,1,5,10,14
4,1,6,3,4
...,...,...,...,...
13801,129,119,7,4
13802,129,120,4,8
13803,129,121,11,6
13804,129,126,5,8


In [11]:
data_mirror = pd.DataFrame({
    "hero_1": data["hero_2"], 
    "hero_2": data["hero_1"], 
    "win_1": data["win_2"], 
    "win_2": data["win_1"]})
data_mirror

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,2,1,5,3
1,3,1,2,6
2,4,1,5,9
3,5,1,14,10
4,6,1,4,3
...,...,...,...,...
13801,119,129,4,7
13802,120,129,8,4
13803,121,129,6,11
13804,126,129,8,5


In [12]:
complete_data = pd.concat([data, data_mirror], sort=False)
complete_data

Unnamed: 0,hero_1,hero_2,win_1,win_2
0,1,2,3,5
1,1,3,6,2
2,1,4,9,5
3,1,5,10,14
4,1,6,3,4
...,...,...,...,...
13801,119,129,4,7
13802,120,129,8,4
13803,121,129,6,11
13804,126,129,8,5


In [15]:
complete_data["games_played"] = complete_data["win_1"] + complete_data["win_2"]
complete_data["win_rate"] = (complete_data["win_1"] / complete_data["games_played"]).round(3)
complete_data = (complete_data.drop(columns=["win_2"])
                .rename(columns={"hero_1": "hero", "hero_2": "opponent", "win_1": "hero_wins"}))

In [16]:
complete_data

Unnamed: 0,hero,opponent,hero_wins,games_played,win_rate
0,1,2,3,8,0.375
1,1,3,6,8,0.750
2,1,4,9,14,0.643
3,1,5,10,24,0.417
4,1,6,3,7,0.429
...,...,...,...,...,...
13801,119,129,4,11,0.364
13802,120,129,8,12,0.667
13803,121,129,6,17,0.353
13804,126,129,8,13,0.615


In [18]:
complete_data.to_csv("../data/heroes_matchups.csv", index=False)