In [1]:
import requests
import pandas as pd
import numpy as np
import json
import time

# Some pages that help explain what the responses mean
# https://github.com/odota/web/blob/master/src/lang/en-US.json
# https://github.com/karigunnarsson/midMatchup/blob/master/getGames.R

# Based on expriments, it seems that pos_log records the position of players
# the first number is either X/Y axis, then X/Y axis, then probably the 
# total number of periods that the player spent on that square.
# Doesn't seem to have timing information associated with this.
# Each period seems to be around 3 seconds. Can do heatmap ba.

# Get data from opendota

In [3]:
# Load TI8 match ids
ti8_matches = pd.read_csv("data/match_ids.csv")
match_ids = ti8_matches['match_id'].unique().tolist()

In [24]:
# Get all matches that we're interested in from the opendota endpoint
matches_endpoint = "https://api.opendota.com/api/matches/"

match_dicts = []
for match_id in match_ids:
    r = requests.get(matches_endpoint+str(match_id))
    match_dict = r.json()
    match_dicts.append(match_dict)
    time.sleep(1) # 1s sleep cause got 60 per min rate limit  
    
# Save the match_dicts file  
with open('data/data.json', 'w') as file:
     file.write(json.dumps(match_dicts))

In [25]:
# Get basic info on DOTA 2 heroes e.g. id
heroes_endpoint = "https://api.opendota.com/api/heroes/"
r = requests.get(heroes_endpoint)
heroes= r.json()

# Prep for future use
basic_heroes_data = []
for hero in heroes:
    data = {
        'id':hero['id'],
        'name':hero['name'],
        'localized_name':hero['localized_name'],
        'primary_attr':hero['primary_attr'],
        'attack_type':hero['attack_type'],
        'roles':';'.join(hero['roles']).lower(),
    }
    basic_heroes_data.append(data)  

basic_heroes_df = pd.DataFrame(basic_heroes_data)
dummified_roles = basic_heroes_df['roles'].str.get_dummies(sep=';')
basic_heroes_df = pd.concat([basic_heroes_df, dummified_roles],axis=1)

basic_heroes_df.to_csv('data/basic_heroes.csv',index=False)

# Prep data for Qlik Sense

In [4]:
# Load match data from file, instead of calling from API
with open('data/data.json') as f:
    match_dicts = json.load(f)

In [5]:
weird_game_id = 3889571850 # This game has lots of missing data for some reason.
match_dicts = [match for match in match_dicts if match['match_id'] != weird_game_id]

In [5]:
# Prep basic match data that we're interested in
basic_match_data = []
for match in match_dicts:
    data = {
        'match_id':match['match_id'],
        'duration':match['duration'],
        'first_blood_time':match['first_blood_time'],
        'radiant_score':match['radiant_score'],
        'radiant_win':match['radiant_win'],
        'radiant_team_id':match['radiant_team']['team_id'],
        'radiant_team_name':match['radiant_team']['name'],
        'dire_score':match['dire_score'],
        'dire_team_id':match['dire_team']['team_id'],
        'dire_team_name':match['dire_team']['name'],
    }
    basic_match_data.append(data)
        
basic_match_df = pd.DataFrame(basic_match_data)
basic_match_df.to_csv('data/basic_match.csv',index=False,encoding='utf-8')

In [6]:
# Prep data for the drafts for each match 
draft_data = []
for match in match_dicts:
    for draft in match['draft_timings']:
        data = {
            'match_id':match['match_id'],
            'order':draft['order'],
            'pick':draft['pick'],
            'active_team':draft['active_team'],
            'hero_id':draft['hero_id'],
            'player_slot':draft['player_slot'],
            'extra_time':draft['extra_time'],
            'total_time_taken':draft['total_time_taken']
            }
        draft_data.append(data)

drafts_df = pd.DataFrame(draft_data)
drafts_df.to_csv('data/drafts.csv',index=False)  

In [7]:
# Prep match data for radiant gold and xp advantage that we're interested in
radiant_gold_and_xp_adv_df = pd.DataFrame()
for match in match_dicts:
    match_id = match['match_id']
    d = pd.DataFrame.from_dict(
        dict([('minute', list(np.arange(1,len(match['radiant_gold_adv'])+1))),
              ('radiant_gold_adv', match['radiant_gold_adv']),
              ('radiant_xp_adv', match['radiant_xp_adv'])
             ]))
    d['match_id'] = match_id
    radiant_gold_and_xp_adv_df=radiant_gold_and_xp_adv_df.append(d,ignore_index=True)
    
radiant_gold_and_xp_adv_df.to_csv('data/radiant_gold_and_xp_adv.csv',index=False)

In [8]:
# Prep objectives data that we're interested in
objectives_data = []
for match in match_dicts:
    match_id = match['match_id']
    objectives = [dict(objective, **{'match_id':match_id}) for objective in match['objectives']]
    objectives_data+=objectives
    
objectives_df = pd.DataFrame(objectives_data)
objectives_df.to_csv('data/objectives.csv',index=False)

In [9]:
# Prep team fight data that we're interested in
teamfights_data = []
for match in match_dicts:
    for teamfight in match['teamfights']:
        data = {
            'match_id':match['match_id'],
            'start':teamfight['start'],
            'end':teamfight['end'],
            'last_death':teamfight['last_death'],
        }
        teamfights_data.append(data)

teamfights_df = pd.DataFrame(teamfights_data)
teamfights_df.to_csv('data/teamfights.csv',index=False)  

In [10]:
# Prep basic players' data that we're interested in
basic_player_data = []
for match in match_dicts:
    for player in match['players']:
        data = {
            'account_id':player['account_id'],
            'match_id':player['match_id'],
            'camps_stacked':player['camps_stacked'],
            'creeps_stacked':player['creeps_stacked'],
            'deaths':player['deaths'],
            'denies':player['denies'],
            'gold_per_min':player['gold_per_min'],
            'hero_damage':player['hero_damage'],
            'hero_healing':player['hero_healing'],
            'hero_id':player['hero_id'],
            'kills':player['kills'],
            'rune_pickups':player['rune_pickups'],
            'teamfight_participation':player['teamfight_participation'],
            'tower_damage':player['tower_damage'],
            'xp_per_min':player['xp_per_min'],
            'win':player['win'],
            'total_gold':player['total_gold'],
            'total_xp':player['total_xp'],
            #'kills_per_min':player['kills_per_min'],
            'kda':player['kda'],
            'neutral_kills':player['neutral_kills'],
            'tower_kills':player['tower_kills'],
            'courier_kills':player['courier_kills'],
            'lane_kills':player['lane_kills'],
            'hero_kills':player['hero_kills'],
            'observer_kills':player['observer_kills'],
            'sentry_kills':player['sentry_kills'],
            'roshan_kills':player['roshan_kills'],
            'necronomicon_kills':player['necronomicon_kills'],
            'ancient_kills':player['ancient_kills'],
            'buyback_count':player['buyback_count'],
            'observer_uses':player['observer_uses'],
            'sentry_uses':player['sentry_uses'],
            'lane_efficiency_pct':player['lane_efficiency_pct'],
            'lane':player['lane'],
            'lane_role':player['lane_role'],
            'is_roaming':player['is_roaming'],
            'player_slot':player['player_slot'],
        }
        basic_player_data.append(data)

basic_player_df = pd.DataFrame(basic_player_data)
basic_player_df['player_side'] = basic_player_df['player_slot'].apply(lambda x: 'radiant' if x<=127 else 'dire')
basic_player_df.to_csv('data/basic_player.csv',index=False)

In [11]:
# Prep Obs and sentry ward data
wards_data = []
for match in match_dicts:
    match_id = match['match_id']
    
    for player in match['players']:    
        account_id = player['account_id']
        
        for wards in ['obs_log','obs_left_log','sen_log','sen_left_log']:
            log = [dict(ward, **{'match_id':match_id,
                                 'account_id':account_id}) for ward in player[wards]]
            wards_data+=log
    
wards_df = pd.DataFrame(wards_data)
wards_df.drop(['key'],axis=1,inplace=True)
wards_df.to_csv('data/wards.csv',index=False)

In [None]:
# Prep lane_pos data
lane_pos_df = pd.DataFrame()

for match in match_dicts:    
    for player in match['players']:    
        for x,y_key in player['lane_pos'].items():
            d = pd.DataFrame(
                [[x,y,v] for y,v in y_key.items()],
                columns=['x','y','frequency'])
            d['match_id'] = match['match_id']
            d['account_id'] = player['account_id']
            
            lane_pos_df=lane_pos_df.append(d,ignore_index=True)

lane_pos_df.to_csv('data/lane_pos.csv',index=False)