In [1]:
import pandas as pd
import numpy as np

# Find winrate prediction against real results

**1. Import sample Data**

In [2]:
### Import dataset for winrate 
sample_data = pd.read_csv("../data/heroes_matchups.csv")
sample_data.tail()

Unnamed: 0,hero,opponent,hero_wins,games_played,win_rate
28015,119,129,21,38,0.553
28016,120,129,63,125,0.504
28017,121,129,80,162,0.494
28018,126,129,63,121,0.521
28019,128,129,59,107,0.551


**2. Import test Data**

In [3]:
### Import dataset for test
test_data = pd.read_csv("../data/random_matches.csv")
test_data.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
348,5496943702,Dire,107-23-30-64-1,9-10-110-120-8,1793
349,5496943914,Dire,34-126-65-95-64,86-90-104-56-48,2418
350,5496944214,Radiant,9-48-106-14-83,49-38-12-100-86,1642
351,5496945916,Dire,112-98-81-9-80,36-42-126-100-3,1356
352,5496946503,Dire,54-5-63-80-108,41-68-36-120-22,1491


**3. Format the test data in order to create lists of heroes in each team**

In [4]:
test_data["radiant_team"] = (test_data["radiant_team"].str.split("-"))
test_data["dire_team"] = test_data["dire_team"].str.split("-")

In [5]:
test_data["radiant_team"] = test_data["radiant_team"].apply(lambda x: list(map(int, x)))
test_data["dire_team"] = test_data["dire_team"].apply(lambda x: list(map(int, x)))
test_data.tail()

Unnamed: 0,match_id,victor_team,radiant_team,dire_team,match_duration
348,5496943702,Dire,"[107, 23, 30, 64, 1]","[9, 10, 110, 120, 8]",1793
349,5496943914,Dire,"[34, 126, 65, 95, 64]","[86, 90, 104, 56, 48]",2418
350,5496944214,Radiant,"[9, 48, 106, 14, 83]","[49, 38, 12, 100, 86]",1642
351,5496945916,Dire,"[112, 98, 81, 9, 80]","[36, 42, 126, 100, 3]",1356
352,5496946503,Dire,"[54, 5, 63, 80, 108]","[41, 68, 36, 120, 22]",1491


**4. Create a function that calculates the winrates**

In [6]:
def get_winrates(sample_data: pd.DataFrame, test_data: dict) -> dict:
    
    # hero winrate container per each team
    radiant_winrates = []
    dire_winrates = []
    
    # contains the result of each match we are calculating
    results = []

    # loops for each map in the test_data
    for x in range(len(test_data)):
        matchup = test_data.iloc[x].to_dict()
    
        # loops for each hero in the radiant team
        for hero in matchup["radiant_team"]:
            hero_wr = []
            for opponent in matchup["dire_team"]:
                # find the matchup in our sample_data and return the mean winrate
                (hero_wr.append(
                    sample_data
                    .loc[(sample_data["hero"] == hero) & (sample_data["opponent"] == opponent), "win_rate"]
                    .mean()))
            
            # return a tuple with hero and winrate
            radiant_winrates.append((hero, round(np.mean(hero_wr), 3)))
        
        # loops for each hero in the dire team
        for hero in matchup["dire_team"]:
            hero_wr = []
            for opponent in matchup["radiant_team"]:
                # find the matchup in our sample_data and return the mean winrate
                (hero_wr.append(
                    sample_data
                    .loc[(sample_data["hero"] == hero) & (sample_data["opponent"] == opponent), "win_rate"]
                    .mean()))
                
            # return a tuple with hero and winrate
            dire_winrates.append((hero, round(np.mean(hero_wr), 3)))
        
        # gets the mean winrate for each team
        radiant_wr = round(np.mean(radiant_winrates, axis=0)[1], 3)
        dire_wr = round(np.mean(dire_winrates, axis=0)[1], 3)
        
        # returns the match data
        predicted = "Radiant" if radiant_wr > 0.5 else "Dire" if dire_wr > 0.5 else "Either"
        wr = max([radiant_wr, dire_wr])
        mvp_r = max(radiant_winrates, key=lambda item:item[1])
        mvp_d = max(dire_winrates, key=lambda item:item[1])
        
        # appends the match data to the results list
        results.append({
            "match_id": matchup["match_id"],
            "predicted": predicted,
            "victor": matchup["victor_team"],
            "win_rate": wr, 
            "mvp_radiant": mvp_r[0],
            "mvp_dire": mvp_d[0],
            "match_duration(min)": int(matchup["match_duration"]/60)
        })
        
    # returns a dataframe containing all match results
    return pd.DataFrame(results)

**5. Run the function passing the sample data and test_data**

In [7]:
results = get_winrates(sample_data, test_data)

**6. Check the total number of successful predictions**

In [8]:
results.loc[results["predicted"] == results["victor"]]

Unnamed: 0,match_id,predicted,victor,win_rate,mvp_radiant,mvp_dire,match_duration(min)
2,5497086811,Radiant,Radiant,0.508,38,66,25
9,5497093314,Dire,Dire,0.513,38,66,40
10,5497093618,Dire,Dire,0.508,100,66,31
12,5497096317,Dire,Dire,0.510,100,66,39
14,5497098118,Dire,Dire,0.510,56,66,30
...,...,...,...,...,...,...,...
275,5496181116,Dire,Dire,0.501,92,61,33
277,5496191514,Dire,Dire,0.501,92,61,23
279,5496197616,Dire,Dire,0.501,92,61,34
280,5496199805,Dire,Dire,0.501,92,61,36


**7. Save the results to a csv file**

In [9]:
results.to_csv("../data/results.csv")