In [1]:
import requests
import pandas as pd
import numpy as np
import json

# Some pages that help explain what the responses mean
# https://github.com/odota/web/blob/master/src/lang/en-US.json
# https://github.com/karigunnarsson/midMatchup/blob/master/getGames.R

# Based on expriments, it seems that pos_log records the position of players
# the first number is either X/Y axis, then X/Y axis, then probably the 
# total number of periods that the player spent on that square.
# Doesn't seem to have timing information associated with this.
# Each period seems to be around 3 seconds. Can do heatmap ba.

# Get data from opendota

In [2]:
# Load TI8 match ids
ti8_matches = pd.read_csv("ti8_match_ids.csv")
match_ids = ti8_matches['match_id'].unique().tolist()

In [3]:
# Get all matches that we're interested in from the opendota endpoint
matches_endpoint = "https://api.opendota.com/api/matches/"

match_dicts = []
for match_id in match_ids:
    r = requests.get(matches_endpoint+str(match_id))
    match_dict = r.json()
    match_dicts.append(match_dict)
    
# Save the match_dicts file  
#with open('data.json', 'w') as file:
#     file.write(json.dumps(match_dicts))

In [4]:
# Get basic info on DOTA 2 heroes e.g. id
heroes_endpoint = "https://api.opendota.com/api/heroes/"
r = requests.get(heroes_endpoint)
heroes= r.json()

# Prep for future use
basic_heroes_data = []
for hero in heroes:
    data = {
        'id':hero['id'],
        'name':hero['name'],
        'localized_name':hero['localized_name'],
        'primary_attr':hero['primary_attr'],
        'attack_type':hero['attack_type'],
        'roles':';'.join(hero['roles']).lower(),
    }
    basic_heroes_data.append(data)

basic_heroes_df = pd.DataFrame(basic_heroes_data)
dummified_roles = basic_heroes_df['roles'].str.get_dummies(sep=';')
basic_heroes_df = pd.concat([basic_heroes_df, dummified_roles],axis=1)

basic_heroes_df.to_csv('output/basic_heroes.csv',index=False)

# Prep data for Qlik Sense

In [3]:
# Load match data from file, instead of calling from API
with open('data.json') as f:
    match_dicts = json.load(f)

In [7]:
draft_data = []
for match in match_dicts:
    for draft in match['draft_timings']:
        data = {
            'match_id':match['match_id'],
            'order':draft['order'],
            'pick':draft['pick'],
            'active_team':draft['active_team'],
            'hero_id':draft['hero_id'],
            'player_slot':draft['player_slot'],
            'extra_time':draft['extra_time'],
            'total_time_taken':draft['total_time_taken']
            }
        draft_data.append(data)

drafts_df = pd.DataFrame(draft_data)
drafts_df.to_csv('output/drafts.csv',index=False)  

In [13]:
# Prep basic match data that we're interested in
basic_match_data = []
for match in match_dicts:
    data = {
        'match_id':match['match_id'],
        'duration':match['duration'],
        'first_blood_time':match['first_blood_time'],
        'radiant_score':match['radiant_score'],
        'radiant_win':match['radiant_win'],
        'radiant_team_id':match['radiant_team']['team_id'],
        'radiant_team_name':match['radiant_team']['name'],
        'dire_score':match['dire_score'],
        'dire_team_id':match['dire_team']['team_id'],
        'dire_team_name':match['dire_team']['name'],
    }
    basic_match_data.append(data)
        
basic_match_df = pd.DataFrame(basic_match_data)
basic_match_df.to_csv('output/basic_match.csv',index=False)

In [4]:
# Prep match data for radiant gold and xp advantage that we're interested in
radiant_gold_and_xp_adv_df = pd.DataFrame()
for match in match_dicts:
    match_id = match['match_id']
    d = pd.DataFrame.from_dict(
        dict([('minute', list(np.arange(1,len(match['radiant_gold_adv'])+1))),
              ('radiant_gold_adv', match['radiant_gold_adv']),
              ('radiant_xp_adv', match['radiant_xp_adv'])
             ]))
    d['match_id'] = match_id
    radiant_gold_and_xp_adv_df=radiant_gold_and_xp_adv_df.append(d,ignore_index=True)
    
radiant_gold_and_xp_adv_df.to_csv('output/radiant_gold_and_xp_adv.csv',index=False)

In [6]:
# Prep objectives data that we're interested in
objectives_data = []
for match in match_dicts:
    match_id = match['match_id']
    objectives = [dict(objective, **{'match_id':match_id}) for objective in match['objectives']]
    objectives_data+=objectives
    
objectives_df = pd.DataFrame(objectives_data)
objectives_df.to_csv('output/objectives.csv',index=False)

In [7]:
# Prep team fight data that we're interested in
teamfights_data = []
for match in match_dicts:
    for teamfight in match['teamfights']:
        data = {
            'match_id':match['match_id'],
            'start':teamfight['start'],
            'end':teamfight['end'],
            'last_death':teamfight['last_death'],
        }
        teamfights_data.append(data)

teamfights_df = pd.DataFrame(teamfights_data)
teamfights_df.to_csv('output/teamfights.csv',index=False)  

Unnamed: 0,end,last_death,match_id,start
0,1203,1188,3960583608,1167
1,1589,1574,3960583608,1548
2,1933,1918,3960583608,1872
3,2087,2072,3960583608,2051
4,2123,2108,3960583608,2086
5,2232,2217,3960583608,2176
6,2501,2486,3960583608,2459
7,2861,2846,3960583608,2824
8,2910,2895,3960583608,2851
9,528,513,3960600123,484


In [14]:
# Prep basic players' data that we're interested in
basic_player_data = []
for match in match_dicts:
    for player in match['players']:
        data = {
            'account_id':player['account_id'],
            'match_id':player['match_id'],
            'camps_stacked':player['camps_stacked'],
            'creeps_stacked':player['creeps_stacked'],
            'deaths':player['deaths'],
            'denies':player['denies'],
            'gold_per_min':player['gold_per_min'],
            'hero_damage':player['hero_damage'],
            'hero_healing':player['hero_healing'],
            'hero_id':player['hero_id'],
            'kills':player['kills'],
            'rune_pickups':player['rune_pickups'],
            'teamfight_participation':player['teamfight_participation'],
            'tower_damage':player['tower_damage'],
            'xp_per_min':player['xp_per_min'],
            'win':player['win'],
            'total_gold':player['total_gold'],
            'total_xp':player['total_xp'],
            #'kills_per_min':player['kills_per_min'],
            'kda':player['kda'],
            'neutral_kills':player['neutral_kills'],
            'tower_kills':player['tower_kills'],
            'courier_kills':player['courier_kills'],
            'lane_kills':player['lane_kills'],
            'hero_kills':player['hero_kills'],
            'observer_kills':player['observer_kills'],
            'sentry_kills':player['sentry_kills'],
            'roshan_kills':player['roshan_kills'],
            'necronomicon_kills':player['necronomicon_kills'],
            'ancient_kills':player['ancient_kills'],
            'buyback_count':player['buyback_count'],
            'observer_uses':player['observer_uses'],
            'sentry_uses':player['sentry_uses'],
            'lane_efficiency_pct':player['lane_efficiency_pct'],
            'lane':player['lane'],
            'lane_role':player['lane_role'],
            'is_roaming':player['is_roaming'],
            'player_slot':player['player_slot'],
        }
        basic_player_data.append(data)

basic_player_df = pd.DataFrame(basic_player_data)
basic_player_df['player_side'] = basic_player_df['player_slot'].apply(lambda x: 'radiant' if x<=127 else 'dire')
basic_player_df.to_csv('output/basic_player.csv',index=False)

In [17]:
basic_player_df

Unnamed: 0,account_id,ancient_kills,buyback_count,camps_stacked,courier_kills,creeps_stacked,deaths,denies,gold_per_min,hero_damage,...,rune_pickups,sentry_kills,sentry_uses,teamfight_participation,total_gold,total_xp,tower_damage,tower_kills,win,xp_per_min
0,94004717,19,1,1,0,2,5,27,716,44256,...,3,0,0,0.758621,36933,33838,5447,6,0,656
1,84853828,0,1,1,0,3,14,2,343,13794,...,5,1,5,0.413793,17693,21252,0,0,0,412
2,116525052,0,2,0,0,0,7,2,284,19881,...,2,0,3,0.655172,14649,26152,452,0,0,507
3,86700461,3,2,1,0,3,10,26,531,25685,...,12,1,0,0.551724,27390,29763,3055,3,0,577
4,117956848,0,0,1,0,2,6,11,431,18726,...,5,0,0,0.620690,22232,20323,451,0,0,394
5,86974263,0,2,0,0,0,11,6,321,29497,...,10,1,8,0.738095,16558,23831,199,0,1,462
6,86822085,39,1,0,0,0,4,14,751,37923,...,8,1,0,0.619048,38739,31362,7257,5,1,608
7,85937380,36,2,0,0,0,4,9,474,11622,...,4,0,0,0.619048,24450,24966,11826,2,1,484
8,176502766,5,1,0,0,0,5,3,474,15530,...,9,2,1,0.738095,24450,27287,694,0,1,529
9,87063175,12,1,1,0,3,5,3,455,29197,...,10,0,0,0.690476,23470,28577,464,0,1,554


In [10]:
# Prep Obs and sentry ward data
wards_data = []
for match in match_dicts:
    match_id = match['match_id']
    
    for player in match['players']:    
        account_id = player['account_id']
        
        for wards in ['obs_log','obs_left_log','sen_log','sen_left_log']:
            log = [dict(ward, **{'match_id':match_id,
                                 'account_id':account_id}) for ward in player[wards]]
            wards_data+=log
    
wards_df = pd.DataFrame(wards_data)
wards_df.drop(['key'],axis=1,inplace=True)
wards_df.to_csv('output/wards.csv',index=False)

In [11]:
# Prep lane_pos data
lane_pos_df = pd.DataFrame()

for match in match_dicts:    
    for player in match['players']:    
        for x,y_key in player['lane_pos'].items():
            d = pd.DataFrame(
                [[x,y,v] for y,v in y_key.items()],
                columns=['x','y','frequency'])
            d['match_id'] = match['match_id']
            d['account_id'] = player['account_id']
            
            lane_pos_df=lane_pos_df.append(d,ignore_index=True)

lane_pos_df.to_csv('output/lane_pos.csv',index=False)
lane_pos_df

Unnamed: 0,x,y,frequency,match_id,account_id
0,72,78,5,3960583608,94004717
1,74,74,1,3960583608,94004717
2,74,80,1,3960583608,94004717
3,76,76,1,3960583608,94004717
4,76,80,1,3960583608,94004717
5,76,154,1,3960583608,94004717
6,76,158,1,3960583608,94004717
7,76,160,1,3960583608,94004717
8,78,78,1,3960583608,94004717
9,78,152,1,3960583608,94004717
