In [64]:
# Aim is to create dataset where hero is the unit of analysis, using all
# the relevant data from what has been downloaded
import pandas as pd
import numpy as np
import itertools as it

In [19]:
# Load data we're interested in
heroes = pd.read_csv('data/basic_heroes.csv')
heroes_dict = dict(zip(heroes['id'],heroes['localized_name'])) # for adding column with heroes' name later
players = pd.read_csv('data/basic_player.csv')
drafts = pd.read_csv('data/drafts.csv')

# Calculate means for all matches, regardless win/lose

In [102]:
# This is basic data based on players data
df = players.groupby(by='hero_id').agg(
    {'match_id': 'count',
     'win': 'sum',
     'ancient_kills': 'mean',
     'buyback_count': 'mean',
     'camps_stacked': 'mean',
     'courier_kills': 'mean',
     'creeps_stacked': 'mean',
     'deaths': 'mean',
     'denies': 'mean',
     'hero_damage': 'mean',
     'hero_healing': 'mean',
     'hero_kills': 'mean',
     'kills': 'mean',
     'lane_kills': 'mean',
     'necronomicon_kills': 'mean',
     'neutral_kills': 'mean',
     'observer_kills': 'mean',
     'observer_uses': 'mean',
     'roshan_kills': 'mean',
     'rune_pickups': 'mean',
     'sentry_kills': 'mean',
     'sentry_uses': 'mean',
     'total_gold': 'mean',
     'total_xp': 'mean',
     'tower_damage': 'mean',
     'tower_kills': 'mean',
    }).rename(
    columns={'match_id':'num_matches_picked',
             'win':'num_matches_won'
            }).reset_index()
df.insert(3,'matches_won_perc',df['num_matches_won']/df['num_matches_picked'])

# Calculate number of times banned based on draft info
bans = drafts[drafts['pick']==False].groupby(by='hero_id').agg(
    {'hero_id':'count'}).rename(
    columns={'hero_id':'num_matches_banned'}).reset_index()

# Merge the three datasets
merged = df.merge(
    heroes, how='left', left_on='hero_id',right_on='id').merge(
    bans, how='left',on='hero_id')
merged['sum_pickbans']=merged['num_matches_picked']+merged['num_matches_banned']

# Save as output for Vis
merged.to_csv('output/agg_heroes_data_all_matches.csv',index=False)

# Derive data for 2-hero combinations

In [83]:
match_ids = players['match_id'].unique().tolist()
player_sides = ['radiant','dire']

combi_data = []
for match_id in match_ids: #iterate over each match and each team to get combi list of heros used
    for side in player_sides:
        filtered = players[(players['match_id'] == match_id) & (players['player_side'] == side)]
        hero_list = filtered['hero_id'].values.tolist()
        combi_list = list(it.combinations(hero_list,2))
        
        for combi in combi_list:
            filtered_combi = filtered[(filtered['hero_id']==combi[0]) | (filtered['hero_id']==combi[1])]
            data= {
                'match_id':match_id,
                'side':side,
                'combi':combi,
                'hero1_id':combi[0],
                'hero2_id':combi[1],
                'win':filtered['win'].mean(),
                
                'ancient_kills':filtered_combi['ancient_kills'].sum(),
                'buyback_count':filtered_combi['buyback_count'].sum(),
                'camps_stacked':filtered_combi['camps_stacked'].sum(),
                'courier_kills':filtered_combi['courier_kills'].sum(),
                'creeps_stacked':filtered_combi['creeps_stacked'].sum(),
                'deaths':filtered_combi['deaths'].sum(),
                'denies':filtered_combi['denies'].sum(),
                'hero_damage':filtered_combi['hero_damage'].sum(),
                'hero_healing':filtered_combi['hero_healing'].sum(),
                'hero_kills':filtered_combi['hero_kills'].sum(),
                'kills':filtered_combi['kills'].sum(),
                'lane_kills':filtered_combi['lane_kills'].sum(),
                'necronomicon_kills':filtered_combi['necronomicon_kills'].sum(),
                'neutral_kills':filtered_combi['neutral_kills'].sum(),
                'observer_kills':filtered_combi['observer_kills'].sum(),
                'observer_uses':filtered_combi['observer_uses'].sum(),
                'roshan_kills':filtered_combi['roshan_kills'].sum(),
                'rune_pickups':filtered_combi['rune_pickups'].sum(),
                'sentry_kills':filtered_combi['sentry_kills'].sum(),
                'sentry_uses':filtered_combi['sentry_uses'].sum(),
                'total_gold':filtered_combi['total_gold'].sum(),
                'total_xp':filtered_combi['total_xp'].sum(),
                'tower_damage':filtered_combi['tower_damage'].sum(),
                'tower_kills':filtered_combi['tower_kills'].sum(),
            }
            combi_data.append(data)

combi_df = pd.DataFrame(combi_data)
combi_df.to_csv('output/combinations.csv',index=False)

Unnamed: 0,ancient_kills,buyback_count,camps_stacked,combi,courier_kills,creeps_stacked,deaths,denies,hero1_id,hero2_id,...,roshan_kills,rune_pickups,sentry_kills,sentry_uses,side,total_gold,total_xp,tower_damage,tower_kills,win
0,47,4,7,"(110, 80)",0,15,17,11,110,80,...,1,22,10,5,radiant,80542,84526,16717,8,0.0
1,1,6,3,"(110, 106)",0,5,20,9,110,106,...,0,26,11,5,radiant,85959,93528,1278,2,0.0
2,26,6,4,"(110, 104)",0,6,28,28,110,104,...,0,10,11,9,radiant,69070,79029,185,1,0.0
3,1,5,2,"(110, 107)",0,4,26,10,110,107,...,0,15,13,10,radiant,61422,76559,185,1,0.0
4,46,4,8,"(80, 106)",0,16,15,10,80,106,...,1,40,1,0,radiant,95121,93608,17625,8,0.0
5,71,4,9,"(80, 104)",0,17,23,29,80,104,...,1,24,1,4,radiant,78232,79109,16532,7,0.0
6,46,3,7,"(80, 107)",0,15,21,11,80,107,...,1,29,3,5,radiant,70584,76639,16532,7,0.0
7,25,6,5,"(106, 104)",0,7,26,27,106,104,...,0,28,2,4,radiant,83649,88111,1093,1,0.0
8,0,5,3,"(106, 107)",0,5,24,9,106,107,...,0,33,4,5,radiant,76001,85641,1093,1,0.0
9,25,5,4,"(104, 107)",0,6,32,28,104,107,...,0,17,4,9,radiant,59112,71142,0,0,0.0


In [89]:
# Get aggregate stats for the 2-hero combinations
combinations = pd.read_csv('output/combinations.csv')

In [104]:
df = combinations.groupby(by='combi').agg(
    {'hero1_id':'first',
     'hero2_id':'first',
        
     'match_id': 'count',
     'win': 'sum',
     'ancient_kills': 'mean',
     'buyback_count': 'mean',
     'camps_stacked': 'mean',
     'courier_kills': 'mean',
     'creeps_stacked': 'mean',
     'deaths': 'mean',
     'denies': 'mean',
     'hero_damage': 'mean',
     'hero_healing': 'mean',
     'hero_kills': 'mean',
     'kills': 'mean',
     'lane_kills': 'mean',
     'necronomicon_kills': 'mean',
     'neutral_kills': 'mean',
     'observer_kills': 'mean',
     'observer_uses': 'mean',
     'roshan_kills': 'mean',
     'rune_pickups': 'mean',
     'sentry_kills': 'mean',
     'sentry_uses': 'mean',
     'total_gold': 'mean',
     'total_xp': 'mean',
     'tower_damage': 'mean',
     'tower_kills': 'mean',
    }).rename(
    columns={'match_id':'num_matches_picked',
             'win':'num_matches_won'
            }).reset_index()
df.insert(3,'matches_won_perc',df['num_matches_won']/df['num_matches_picked'])
df.insert(3,'hero1_name',df['hero1_id'].replace(heroes_dict))
df.insert(4,'hero2_name',df['hero2_id'].replace(heroes_dict))

df.sort_values(['hero1_name','hero2_name'],inplace=True)

df.to_csv('output/agg_combinations_all_data.csv',index=False)