This is the data gathering portion of my final capstone. The data was found by making API calls to <a href>OpenDota.com</a> and saving the response as a JSON file. Because the size of the calls individually were low, pandas was used instead of Dask. pandas also has some extra functionalities that I'm familiar with but Dask is used later in the capstone.

In [1]:
#importing the necessities
import pandas as pd
import json
import requests
import time

In [2]:
#setting the call parameters to make code neater later
api_start = 'https://api.opendota.com/api/'
api_target = 'parsedMatches'
value = 0
api_value = str(value)

In [3]:
#make the call
response = requests.get(api_start + api_target)

In [4]:
#check the response to confirm the call
print(response.json())

[{'match_id': 5974291868}, {'match_id': 5974232291}, {'match_id': 5974224431}, {'match_id': 5974214140}, {'match_id': 5974173761}, {'match_id': 5974171143}, {'match_id': 5974158356}, {'match_id': 5974110178}, {'match_id': 5974105900}, {'match_id': 5974058895}, {'match_id': 5974056246}, {'match_id': 5973969195}, {'match_id': 5973831922}, {'match_id': 5973788395}, {'match_id': 5973711358}, {'match_id': 5973709278}, {'match_id': 5973702003}, {'match_id': 5973553535}, {'match_id': 5973498596}, {'match_id': 5973427776}, {'match_id': 5973397071}, {'match_id': 5973371562}, {'match_id': 5973322350}, {'match_id': 5973299656}, {'match_id': 5973281999}, {'match_id': 5973234781}, {'match_id': 5973208315}, {'match_id': 5973196588}, {'match_id': 5973152941}, {'match_id': 5973113050}, {'match_id': 5973112878}, {'match_id': 5973101722}, {'match_id': 5973066314}, {'match_id': 5973055992}, {'match_id': 5973023984}, {'match_id': 5972981110}, {'match_id': 5972970190}, {'match_id': 5972931822}, {'match_id'

In [5]:
#there's probably a more elegant to do this but I'm very confortable
#with lists and given that it's a small amount (40,000 at most),
#a list worked fine for this
matches = []

OpenDota limits their free tier users to a maximum of 60 calls per minute and 50,000 calls per month. In order to have less than 60 calls per minute, the time.sleep() function was added to slow the calls down. Once the call was made, the relevant part of the response -- in this case, the match id -- was put into the list. The website and the response above showed that there is only 100 match ids given per call and that a parameter that is available is to find only matches with an id less than (param), so the second portion of this code sets the parameter to the lowest match id from the previous response, finding the next 100, and repeating the process.

In [6]:
#robocalling, but for good
api_param = '?less_than_match_id='
call_matches = requests.get(api_start + api_target)
time.sleep(1.2)
for i in range(len(call_matches.json())):
    matchid = (call_matches.json()[i])
    matchid = matchid.values()
    matches.append(list(matchid)[0])
value = (matches[(len(matches)-1)])
for i in range(399):
    api_value = str(value)
    call_matches = requests.get(api_start + api_target + api_param + api_value)
    time.sleep(1.2)
    for i in range(len(call_matches.json())):
        matchid = (call_matches.json()[i])
        matchid = matchid.values()
        matches.append(list(matchid)[0])
    value = (matches[(len(matches)-1)])

In [7]:
#checking the number of match ids
print(31717)

31717


In [8]:
#creating a dataframe really quick for two reasons. first because
#we'll use these match ids in the data cleaning and feature
#engineering notebook. the second is so that null and duplicate
#values can be found and removed.
df = pd.DataFrame()
df['matches'] = matches

In [9]:
#checking info.() confirms both the null values and dtype
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31717 entries, 0 to 31716
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   matches  31717 non-null  int64
dtypes: int64(1)
memory usage: 247.9 KB


In [10]:
#checking for duplicates
df['matches'].value_counts()

5569054719    1
5518726664    1
5914965516    1
5704723983    1
5650765328    1
             ..
5277686646    1
3572306807    1
2447074169    1
5565873018    1
5615521794    1
Name: matches, Length: 31717, dtype: int64

In [11]:
#writing this to a file to be called in the next notebook
df.to_csv('D:/DSF Files/matchids.txt', sep=('\t'), index=False)

In [12]:
#finally, the bread and butter of the notebook. this calls individual
#matches using the matchids gathered above. it creates a JSON file with
#the match id as the file name which is called in the next notebook
start = 'D:/DSF Files/'
folder = 'DotA 2/'
match = ''
end = '.json'
api_start = 'https://api.opendota.com/api/'
api_target = 'matches/'
api_key = '?api_key=72d85202-722b-40f3-9bee-1ded0440a148'
matchid = []
for i in range(len(df['matches'])):
    value = df['matches'].iloc[i]
    api_value = str(value)
    call_match = requests.get(api_start + api_target + api_value + api_key)
    match = (call_match.json())
    time.sleep(0.075)
    a_file = open(start + folder + api_value + end, "w")
    json.dump(match, a_file)
    a_file.close()
    matchid.append(value)
    print('{}: {} done.'.format(i, value))

0: 5968250483 done.
1: 5968198971 done.
2: 5968163525 done.
3: 5968112278 done.
4: 5968064626 done.
5: 5968044234 done.
6: 5967997371 done.
7: 5967940836 done.
8: 5967926053 done.
9: 5967846026 done.
10: 5967738856 done.
11: 5967649063 done.
12: 5967451417 done.
13: 5967326687 done.
14: 5967166598 done.
15: 5967102246 done.
16: 5966984671 done.
17: 5966946335 done.
18: 5966933589 done.
19: 5966878884 done.
20: 5966841342 done.
21: 5966817865 done.
22: 5966785272 done.
23: 5966744146 done.
24: 5966634438 done.
25: 5966594090 done.
26: 5966551291 done.
27: 5966455987 done.
28: 5966366018 done.
29: 5966348078 done.
30: 5966250146 done.
31: 5966222152 done.
32: 5966123503 done.
33: 5966099510 done.
34: 5965957787 done.
35: 5965816843 done.
36: 5965644218 done.
37: 5965584704 done.
38: 5965581291 done.
39: 5965532499 done.
40: 5965532297 done.
41: 5965496389 done.
42: 5965492247 done.
43: 5965476834 done.
44: 5965443797 done.
45: 5965428147 done.
46: 5965420968 done.
47: 5965413775 done.
48

3418: 5818716476 done.
3419: 5818685136 done.
3420: 5818671804 done.
3421: 5818636018 done.
3422: 5818628372 done.
3423: 5818613653 done.
3424: 5818573158 done.
3425: 5818562596 done.
3426: 5818554224 done.
3427: 5818553088 done.
3428: 5818537337 done.
3429: 5818520130 done.
3430: 5818516382 done.
3431: 5818469831 done.
3432: 5818463080 done.
3433: 5818423673 done.
3434: 5818410541 done.
3435: 5818356423 done.
3436: 5818233593 done.
3437: 5818200829 done.
3438: 5818129179 done.
3439: 5818113468 done.
3440: 5818052637 done.
3441: 5818052226 done.
3442: 5817975494 done.
3443: 5817932141 done.
3444: 5817904346 done.
3445: 5817835869 done.
3446: 5817826442 done.
3447: 5817730399 done.
3448: 5817636213 done.
3449: 5817635225 done.
3450: 5817524771 done.
3451: 5817524430 done.
3452: 5817420392 done.
3453: 5817395656 done.
3454: 5817343331 done.
3455: 5817334548 done.
3456: 5817326716 done.
3457: 5817282433 done.
3458: 5817273519 done.
3459: 5817227289 done.
3460: 5817225062 done.
3461: 58171

6918: 5712940981 done.
6919: 5712936950 done.
6920: 5712931107 done.
6921: 5712927039 done.
6922: 5712916865 done.
6923: 5712892072 done.
6924: 5712891058 done.
6925: 5712886408 done.
6926: 5712880904 done.
6927: 5712866720 done.
6928: 5712856571 done.
6929: 5712840832 done.
6930: 5712822995 done.
6931: 5712816089 done.
6932: 5712789351 done.
6933: 5712783756 done.
6934: 5712759415 done.
6935: 5712709190 done.
6936: 5712679952 done.
6937: 5712654700 done.
6938: 5712613148 done.
6939: 5712561377 done.
6940: 5712530494 done.
6941: 5712495020 done.
6942: 5712489433 done.
6943: 5712475621 done.
6944: 5712434234 done.
6945: 5712426048 done.
6946: 5712410711 done.
6947: 5712323816 done.
6948: 5712312113 done.
6949: 5712300201 done.
6950: 5712239374 done.
6951: 5712235307 done.
6952: 5712204428 done.
6953: 5712151201 done.
6954: 5712124543 done.
6955: 5712123310 done.
6956: 5712120811 done.
6957: 5712113757 done.
6958: 5712040978 done.
6959: 5712031210 done.
6960: 5712011626 done.
6961: 57120

10417: 5628180909 done.
10418: 5628172860 done.
10419: 5628169547 done.
10420: 5628140206 done.
10421: 5628132113 done.
10422: 5628107154 done.
10423: 5628077759 done.
10424: 5627923036 done.
10425: 5627878427 done.
10426: 5627870273 done.
10427: 5627818555 done.
10428: 5627807329 done.
10429: 5627801988 done.
10430: 5627766264 done.
10431: 5627747946 done.
10432: 5627738434 done.
10433: 5627731553 done.
10434: 5627701366 done.
10435: 5627664547 done.
10436: 5627655668 done.
10437: 5627654189 done.
10438: 5627640487 done.
10439: 5627613006 done.
10440: 5627591997 done.
10441: 5627578910 done.
10442: 5627575156 done.
10443: 5627574383 done.
10444: 5627573013 done.
10445: 5627571045 done.
10446: 5627569671 done.
10447: 5627568346 done.
10448: 5627566504 done.
10449: 5627506216 done.
10450: 5627419639 done.
10451: 5627306171 done.
10452: 5627229068 done.
10453: 5627201508 done.
10454: 5627189232 done.
10455: 5627188065 done.
10456: 5627144904 done.
10457: 5627141847 done.
10458: 562712898

13917: 5530970227 done.
13918: 5530938713 done.
13919: 5530928341 done.
13920: 5530923307 done.
13921: 5530922586 done.
13922: 5530916100 done.
13923: 5530879900 done.
13924: 5530870106 done.
13925: 5530864832 done.
13926: 5530838172 done.
13927: 5530829540 done.
13928: 5530822362 done.
13929: 5530751294 done.
13930: 5530698263 done.
13931: 5530655135 done.
13932: 5530602600 done.
13933: 5530555623 done.
13934: 5530537148 done.
13935: 5530474571 done.
13936: 5530465964 done.
13937: 5530393918 done.
13938: 5530368265 done.
13939: 5530311506 done.
13940: 5530284833 done.
13941: 5530241378 done.
13942: 5530140896 done.
13943: 5530010351 done.
13944: 5529880314 done.
13945: 5529760127 done.
13946: 5529742998 done.
13947: 5529696563 done.
13948: 5529637797 done.
13949: 5529620344 done.
13950: 5529595821 done.
13951: 5529548565 done.
13952: 5529533972 done.
13953: 5529501978 done.
13954: 5529494375 done.
13955: 5529482049 done.
13956: 5529454955 done.
13957: 5529442754 done.
13958: 552944086

17916: 5432546123 done.
17917: 5432541889 done.
17918: 5432530568 done.
17919: 5432522365 done.
17920: 5432507070 done.
17921: 5432476467 done.
17922: 5432469746 done.
17923: 5432453912 done.
17924: 5432442394 done.
17925: 5432400187 done.
17926: 5432388286 done.
17927: 5432379242 done.
17928: 5432374066 done.
17929: 5432313466 done.
17930: 5432289688 done.
17931: 5432285081 done.
17932: 5432265943 done.
17933: 5432242170 done.
17934: 5432231648 done.
17935: 5432217865 done.
17936: 5432199121 done.
17937: 5432156057 done.
17938: 5432132646 done.
17939: 5432125468 done.
17940: 5432118027 done.
17941: 5432079975 done.
17942: 5432062377 done.
17943: 5431623306 done.
17944: 5431601321 done.
17945: 5431538040 done.
17946: 5431535490 done.
17947: 5431454062 done.
17948: 5431446895 done.
17949: 5431442424 done.
17950: 5431369227 done.
17951: 5431354124 done.
17952: 5431335227 done.
17953: 5431288240 done.
17954: 5431101973 done.
17955: 5431028714 done.
17956: 5430994532 done.
17957: 543099078

21916: 5347045063 done.
21917: 5347040373 done.
21918: 5346979478 done.
21919: 5346954534 done.
21920: 5346937472 done.
21921: 5346936263 done.
21922: 5346931391 done.
21923: 5346920886 done.
21924: 5346919142 done.
21925: 5346897787 done.
21926: 5346856993 done.
21927: 5346830064 done.
21928: 5346812605 done.
21929: 5346804264 done.
21930: 5346802572 done.
21931: 5346792343 done.
21932: 5346791136 done.
21933: 5346777955 done.
21934: 5346773710 done.
21935: 5346767564 done.
21936: 5346758562 done.
21937: 5346734380 done.
21938: 5346731144 done.
21939: 5346687416 done.
21940: 5346668556 done.
21941: 5346658216 done.
21942: 5346656359 done.
21943: 5346653599 done.
21944: 5346650587 done.
21945: 5346649403 done.
21946: 5346647307 done.
21947: 5346646122 done.
21948: 5346645632 done.
21949: 5346645139 done.
21950: 5346643863 done.
21951: 5346642216 done.
21952: 5346642034 done.
21953: 5346640615 done.
21954: 5346640464 done.
21955: 5346639435 done.
21956: 5346639293 done.
21957: 534663828

25915: 5231780282 done.
25916: 5231779254 done.
25917: 5231778898 done.
25918: 5231778811 done.
25919: 5231773838 done.
25920: 5231772550 done.
25921: 5231770430 done.
25922: 5231767627 done.
25923: 5231761021 done.
25924: 5231758221 done.
25925: 5231752710 done.
25926: 5231748872 done.
25927: 5231739729 done.
25928: 5231733688 done.
25929: 5231731345 done.
25930: 5231730456 done.
25931: 5231729572 done.
25932: 5231728722 done.
25933: 5231724825 done.
25934: 5231722445 done.
25935: 5231721433 done.
25936: 5231719717 done.
25937: 5231719503 done.
25938: 5231716783 done.
25939: 5231714439 done.
25940: 5231714148 done.
25941: 5231710175 done.
25942: 5231699539 done.
25943: 5231692776 done.
25944: 5231690681 done.
25945: 5231689512 done.
25946: 5231689495 done.
25947: 5231689181 done.
25948: 5231688579 done.
25949: 5231687889 done.
25950: 5231686144 done.
25951: 5231686143 done.
25952: 5231685852 done.
25953: 5231685742 done.
25954: 5231685451 done.
25955: 5231684858 done.
25956: 523168448

29812: 3463891097 done.
29813: 3463683071 done.
29814: 3463650789 done.
29815: 3463518763 done.
29816: 3463381305 done.
29817: 3462066680 done.
29818: 3461982772 done.
29819: 3461724849 done.
29820: 3461600560 done.
29821: 3461471433 done.
29822: 3461252867 done.
29823: 3461137056 done.
29824: 3459826635 done.
29825: 3459744836 done.
29826: 3459640258 done.
29827: 3459506954 done.
29828: 3459409167 done.
29829: 3459362678 done.
29830: 3459261215 done.
29831: 3459241952 done.
29832: 3459109372 done.
29833: 3458981776 done.
29834: 3458854581 done.
29835: 3457764469 done.
29836: 3457676571 done.
29837: 3456885232 done.
29838: 3455860594 done.
29839: 3455804292 done.
29840: 3455692795 done.
29841: 3455159211 done.
29842: 3455042461 done.
29843: 3453966095 done.
29844: 3453899726 done.
29845: 3453678774 done.
29846: 3453529796 done.
29847: 3453445439 done.
29848: 3453428132 done.
29849: 3453336148 done.
29850: 3452367068 done.
29851: 3452296976 done.
29852: 3452192182 done.
29853: 345049089