In [1]:
import dota2api
from dota2api.src.exceptions import APIError, APITimeoutError
import csv
from multiprocessing import Pool
import time
import sys
import numpy as np

In [2]:
def getMatchInfo( api, matchId ):
    for retries in range(3):
        try:
            match = api.get_match_details(match_id=matchId)
            break
        except APIError as e:
            #print(e.msg)
            raise APIError('Getting match ' + str(matchId) + ' Failed')
        except Exception as e:
           # print(sys.exc_info())
            if retries == 2:
                raise APIError('Getting match ' + str(matchId) + ' Failed')
            else:
                time.sleep(120)
    
    if match['human_players'] != 10 or len(match['players']) != 10:
        raise APIError('Bad number of players')
    if 'radiant_win' not in match.keys():
        raise APIError('Match not completed')
    matchRow = [0]*15
    matchRow[0] = matchId
    if match['radiant_win']: # eg True
        matchRow[1] = 1
    else:
        matchRow[1] = -1
    if('game_mode' not in match or 'lobby_type' not in match):
        raise APIError('match ' + str(matchId) + ' does not contain type')
        
    matchRow[2] = match['cluster'] # eg 227 -> translates to Europe West?
    matchRow[3] = match['game_mode'] # eg Captains Mode
    matchRow[4] = match['lobby_type'] # eg Ranked
    for i in range(10):
        matchRow[5 + i] = match['players'][i]['hero_id'] # eg 5

    return matchRow

In [3]:
def serialLoop( api, matchId, stopNum, writer ):
    while stopNum > 0:
        try:
            matchInfo = getMatchInfo( api, matchId )
            writer.writerow(matchInfo)
            stopNum -= 1
          #  print("Got " + str(matchId) + ", Need " + str(stopNum) + " more")
        except APIError as e:
           # print(e.msg)
            x=0
        finally:
            matchId -= 1

In [4]:
def getMatchStar( args ):
    try:
        match = getMatchInfo( args[0], args[1] )
        print("Match " + str(args[1]) + " successful")
        return match
    except APIError as e:
        print (e.msg)
    return []
   

In [5]:
     
def parallelLoop( api, matchId, stopNum, writer ):
    p = Pool(4)

    matchList = ( [ (api, matchId - x ) for x in range(2*stopNum) ] )
    for x in p.map(getMatchStar, matchList):
        if len(x) != 0:
            writer.writerow( x )
    p.terminate()

In [6]:
if __name__=="__main__":
    api = dota2api.Initialise('4515D85FAC69BE2FC86BE529A03D234E')
    matchId = 5283290278
    ### matchSeqNum = 2242825642
    stopNum = 10000
    batch_num = 1000
    batch_size = stopNum/batch_num
    for i in range(700,batch_num):
        print('batch ', i)
        outFile = open('temp_data/dotaMatch_'+str(i)+'.csv', 'a')
        writer = csv.writer(outFile)
        serialLoop( api, matchId - i*batch_size, batch_size, writer )
        outFile.close()

batch  700
batch  701
batch  702
batch  703
batch  704
batch  705
batch  706
batch  707
batch  708
batch  709
batch  710
batch  711
batch  712
batch  713
batch  714
batch  715
batch  716
batch  717
batch  718
batch  719
batch  720
batch  721
batch  722
batch  723
batch  724
batch  725
batch  726
batch  727
batch  728
batch  729
batch  730
batch  731
batch  732
batch  733
batch  734
batch  735
batch  736
batch  737
batch  738
batch  739
batch  740
batch  741
batch  742
batch  743
batch  744
batch  745
batch  746
batch  747
batch  748
batch  749
batch  750
batch  751
batch  752
batch  753
batch  754
batch  755
batch  756
batch  757
batch  758
batch  759
batch  760
batch  761
batch  762
batch  763
batch  764
batch  765
batch  766
batch  767
batch  768
batch  769
batch  770
batch  771
batch  772
batch  773
batch  774
batch  775
batch  776
batch  777
batch  778
batch  779
batch  780
batch  781
batch  782
batch  783
batch  784
batch  785
batch  786
batch  787
batch  788
batch  789
batch  790

In [6]:
import os
import glob
import pandas as pd
os.chdir("./temp_data")

In [7]:
extension = 'csv'
all_filenames = [i for i in glob.glob('dotaMatch_*.{}'.format(extension))]

In [16]:
names = ['match_id', 'team_won', 'cluster', 'game_mode', 'game_type'] + ['hero_' + str(x) for x in np.arange(1, 113)]

In [9]:
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f, names=names) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "../combined_csv.csv", index=False, encoding='utf-8-sig')

In [15]:
combined_csv

Unnamed: 0,match_id,team_won,cluster,game_mode,game_type,hero_1,hero_2,hero_3,hero_4,hero_5,hero_6,hero_7,hero_8,hero_9,hero_10
0,5.283290e+09,1,156,4,0,19,96,41,54,36,95,59,114,32,49
1,5.283290e+09,1,227,22,7,26,40,93,49,106,32,9,121,63,17
2,5.283290e+09,1,236,22,7,95,40,2,25,76,74,49,93,26,99
3,5.283290e+09,1,225,22,7,120,37,6,2,14,86,35,93,29,58
4,5.283290e+09,1,153,22,7,44,6,96,84,14,19,25,20,8,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,5.283280e+09,-1,131,22,7,70,64,95,32,84,8,28,30,74,51
6,5.283280e+09,1,187,22,7,88,74,70,53,54,7,14,6,86,11
7,5.283280e+09,1,155,22,7,74,101,80,83,42,26,97,44,31,60
8,5.283280e+09,-1,227,22,7,128,30,11,8,129,93,108,22,31,14


In [12]:
import pandas as pd


In [18]:
train_dota = pd.read_csv("../combined_csv.csv")

In [19]:
train_dota

Unnamed: 0,match_id,team_won,cluster,game_mode,game_type,hero_1,hero_2,hero_3,hero_4,hero_5,hero_6,hero_7,hero_8,hero_9,hero_10
0,5.283290e+09,1,156,4,0,19,96,41,54,36,95,59,114,32,49
1,5.283290e+09,1,227,22,7,26,40,93,49,106,32,9,121,63,17
2,5.283290e+09,1,236,22,7,95,40,2,25,76,74,49,93,26,99
3,5.283290e+09,1,225,22,7,120,37,6,2,14,86,35,93,29,58
4,5.283290e+09,1,153,22,7,44,6,96,84,14,19,25,20,8,15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22313,5.283280e+09,-1,131,22,7,70,64,95,32,84,8,28,30,74,51
22314,5.283280e+09,1,187,22,7,88,74,70,53,54,7,14,6,86,11
22315,5.283280e+09,1,155,22,7,74,101,80,83,42,26,97,44,31,60
22316,5.283280e+09,-1,227,22,7,128,30,11,8,129,93,108,22,31,14


In [None]:
names

In [None]:
5283290278 - 10000


In [None]:
match['radiant_score']

In [20]:
for retries in range(3):
    try:
        match = api.get_match_details(match_id=matchId)
        break
    except APIError as e:
        #print(e.msg)
        raise APIError('Getting match ' + str(matchId) + ' Failed')
    except Exception as e:
       # print(sys.exc_info())
        if retries == 2:
            raise APIError('Getting match ' + str(matchId) + ' Failed')
        else:
            time.sleep(120)

if match['human_players'] != 10 or len(match['players']) != 10:
    raise APIError('Bad number of players')
if 'radiant_win' not in match.keys():
    raise APIError('Match not completed')
matchRow = [0]*15
matchRow[0] = matchId
if match['radiant_win']: # eg True
    matchRow[1] = 1
else:
    matchRow[1] = -1
if('game_mode' not in match or 'lobby_type' not in match):
    raise APIError('match ' + str(matchId) + ' does not contain type')

In [21]:
match

{'players': [{'account_id': 1038188915,
   'player_slot': 0,
   'hero_id': 19,
   'item_0': 210,
   'item_1': 63,
   'item_2': 152,
   'item_3': 11,
   'item_4': 252,
   'item_5': 73,
   'backpack_0': 0,
   'backpack_1': 0,
   'backpack_2': 0,
   'item_neutral': 331,
   'kills': 8,
   'deaths': 7,
   'assists': 15,
   'leaver_status': 0,
   'last_hits': 136,
   'denies': 6,
   'gold_per_min': 443,
   'xp_per_min': 708,
   'level': 23,
   'hero_damage': 31640,
   'tower_damage': 4950,
   'hero_healing': 0,
   'gold': 2626,
   'gold_spent': 13925,
   'scaled_hero_damage': 18394,
   'scaled_tower_damage': 2951,
   'scaled_hero_healing': 0,
   'ability_upgrades': [{'ability': 5108, 'time': 118, 'level': 1},
    {'ability': 5106, 'time': 292, 'level': 2},
    {'ability': 5107, 'time': 353, 'level': 3},
    {'ability': 5106, 'time': 457, 'level': 4},
    {'ability': 5106, 'time': 521, 'level': 5},
    {'ability': 5107, 'time': 615, 'level': 6},
    {'ability': 5109, 'time': 745, 'level': 7},