# Dota 2 API Loader - Building our Dataset

Our Dataset will be extracted from the website "www.opendota.com". To connect to it we use the Dota 2 API Loader (A package in python). For more documentation of the libraries and the inputs and outputs of it, you can see the following link: https://dota2api.readthedocs.io/en/latest/

We upload the initial list of 437 professional players that we build based on the best 100 professional teams all around the world.

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

ds_players=pd.read_csv("dataset_players.csv", sep=",")
print(ds_players.shape)

We have 497 players but we keep 438 unique players.

In [None]:
ds_unique_players = ds_players.player_number.unique()
#ds_players.isnull().sum()
print(ds_unique_players.shape)

We took as initial base a function from a Github project, but we changed many parts because we wanted to download the 100 games for each of the professional players in our list and not just random games. The dispersion could be high if we just take randome games because the there are 800 unique players per day and also 11 millions unique players per month.

In [None]:
#By Account_ID
import dota2api
from dota2api.src.exceptions import APIError, APITimeoutError
import csv
from multiprocessing import Pool
import time
import sys

def getMatchInfo( api, matchId ):
    for retries in range(3):
        try:
            match = api.get_match_details(match_id=matchId)
            #league = api.get_live_league_games(match['leagueid'])
            break
        except APIError as e:
            print(e.msg)
            raise APIError('Getting match ' + str(matchId) + ' Failed')
        except Exception as e:
            print(sys.exc_info())
            if retries == 2:
                raise APIError('Getting match ' + str(matchId) + ' Failed')
            else:
                time.sleep(10)
                
    #try:
    if match['human_players'] != 10 or len(match['players']) != 10:
        raise APIError('Bad number of players')
    if 'radiant_win' not in match.keys():
        raise APIError('Match not completed')
    matchRow = [0]*96
    matchRow[0] = matchId
    if match['radiant_win']: # eg True
        matchRow[1] = 1
    else:
        matchRow[1] = -1
    matchRow[2] = match['cluster'] # eg 227 -> translates to Europe West?
    matchRow[3] = match['game_mode_name'] # eg Captains Mode
    matchRow[4] = match['duration'] # eg Ranked
        
    for retry in range(3):
        try:
            for i in range(10):
                matchRow[5 + i] = str(match['players'][i]['hero_id']) # eg 5
            for i in range(10):
                matchRow[15 + i] = str(match['players'][i]['account_id'])
            for i in range(10):
                matchRow[25 + i] = str(match['players'][i]['player_slot'])
            for i in range(10):
                matchRow[35 + i] = str(match['players'][i]['kills'])
            for i in range(10):
                matchRow[45 + i] = str(match['players'][i]['deaths'])
            for i in range(10):
                matchRow[55 + i] = str(match['players'][i]['assists'])
            for i in range(10):
                matchRow[65 + i] = str(match['players'][i]['gold_per_min'])
            for i in range(10):
                matchRow[75 + i] = str(match['players'][i]['last_hits'])
            for i in range(10):
                matchRow[85 + i] = str(match['players'][i]['xp_per_min'])
            matchRow[95] = match['leagueid'] # League Professional
           #matchRow[36] = league['league_tier']
            break
        except Exception as e:
            print(sys.exc_info())
            if retry == 2:
                raise APIError('Getting match ' + str(matchId) + ' Failed, problem of Data.')
            else:
                time.sleep(10)
    return matchRow
    
def serialLoop( api, match, stopNum, writer ):
    i = 0;
    while stopNum > 0:
        try:
            matchInfo = getMatchInfo(api, match[i]['match_id'])
            writer.writerow(matchInfo)
            stopNum -= 1
            print("Got " + str(match[i]['match_id']) + ", Need " + str(stopNum) + " more")
        except APIError as e:
            print(e.msg)
        finally:
            i += 1

def getMatchStar( args ):
    try:
        match = getMatchInfo( args[0], args[1] )
        print("Match " + str(args[1]) + " successful")
        return match
    except APIError as e:
        print(e.msg)
    return []
        
def parallelLoop( api, matchId, stopNum, writer ):
    p = Pool(4)

    matchList = ( [ (api, matchId - x ) for x in range(2*stopNum) ] )
    for x in p.map(getMatchStar, matchList):
        if len(x) != 0:
            writer.writerow( x )
    p.terminate()

Based on the functions defined, we run the code to take out the 100 games for each of the players in our list. We also build a log to monitor the import of the games in our dataset.

In [None]:
if __name__=="__main__":
    api = dota2api.Initialise("API Key - deleted from the deliverable")
    length_players = len(ds_unique_players)
    z = 0
    while z < length_players:
        try:
            print("Round-->",z)
            account_id=ds_unique_players[z]
            print("Information: Player -->", account_id)
            match = api.get_match_history(account_id)
            #print(match['matches'][95]['match_id'])
            stopNum = len(match['matches']) # Last 100 games of the Professional Player.
            outFile = open('dotaMatch_Final.out', 'a')
            writer = csv.writer(outFile)
            #match_id = match['matches'][0]['match_id']
            #serialLoop( api, match_id, stopNum, writer )
            serialLoop( api, match['matches'], stopNum, writer )
            outFile.close()
        except APIError as e:
            print(e.msg)
        finally:
            z += 1
    #parallelLoop( api, matchId, stopNum, writer )

We validate the quantity of the rows that we are having in the Dataset and that there are no spaces or bad characters in a line.

In [None]:
with open('dotaMatch_Final.out', 'r') as fp:
    data = fp.read()
with open('dotaMatch_Final.out') as myfile:
    count = sum(1 for line in myfile if line.rstrip('\n'))

with open('dotaMatch_Final.out') as myfile:
    count_empty = sum(1 for line in myfile)
print(count)
print(count_empty)

We also validate the information that we are downloading in each row (sample rows).

In [None]:
with open('dotaMatch_Final.out', 'r') as fp:
    data_base = fp.read()

In [None]:
with open('dotaMatch_Final.out') as f:
  last = None
  for last in (line for line in f if line.rstrip('\n')):
    pass
print(last)