In [1]:
import pandas as pd
import requests
from IPython.display import display, HTML
from tqdm import tqdm, trange
from time import sleep
import os
from glob import glob
import json

# Pro players

## Download PRO-players list

In [3]:
result = requests.get('https://api.opendota.com/api/proPlayers')

In [6]:
with open("data/opendota/proplayers/list.json", "w") as f:
    f.write(result.text)

## Download per-user datasets

In [4]:
proPlayers = pd.read_json("data/opendota/proplayers/list.json")
display(proPlayers.head())

Unnamed: 0,account_id,steamid,avatar,avatarmedium,avatarfull,profileurl,personaname,last_login,full_history_time,cheese,...,plus,name,country_code,fantasy_role,team_id,team_name,team_tag,is_locked,is_pro,locked_until
0,88470,76561197960354192,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcommunity.com/id/misterdurst69/,Tzy丶,,2020-04-23 03:47:06.179000+00:00,0,...,1.0,TZY,cn,1,6020739,,,False,True,
1,1296625,76561197961562352,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcommunity.com/id/jnewsham/,Newbsham,2018-12-21T03:12:56.027Z,2020-03-28 12:09:13.041000+00:00,0,...,1.0,Newsham,,2,6904594,TEAM TEAM,tt,True,True,
2,3916428,76561197964182160,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcommunity.com/profiles/7656119796...,gigablaster,,2020-03-12 11:37:28.695000+00:00,0,...,,EGM,se,2,7428719,GODSENT,GODSENT,True,True,
3,3940262,76561197964205984,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcommunity.com/id/March8/,March,,2020-04-23 07:23:00.312000+00:00,0,...,1.0,March,,2,2108395,TNC Predator,TNC,True,True,
4,4281729,76561197964547456,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcdn-a.akamaihd.net/steamcommunity...,https://steamcommunity.com/profiles/7656119796...,syndereN,,2020-04-19 02:28:57.402000+00:00,0,...,1.0,syndereN,,2,0,Anti-MagE-,A-M-,True,True,


## Download pro-player matches history

In [5]:
with tqdm(total=len(proPlayers)) as pbar: 
    for _, player in proPlayers.iterrows():
        account_id = player['account_id']
        url = f"https://api.opendota.com/api/players/{account_id}/matches"
        filename = f"data/opendota/proplayers/pro_players/{account_id}.json"
        
        if os.path.exists(filename):
            pbar.update(1)
            continue
        
        while (True):
            try:
                result = requests.get(url)
                if result.status_code == 200:
                    with open(filename, "w") as f:
                        f.write(result.text)
                    break
            except:
                sleep(1.0)

        pbar.update(1)

100%|██████████| 1078/1078 [14:58<00:00,  1.20it/s]


# Pro matches

## Download lists

In [33]:
NUM_LISTS = 100 # 100 * 100 = 10k matches

match_id = None
for i in trange(NUM_LISTS):
    url = "https://api.opendota.com/api/proMatches"
    if match_id != None:
        url = f"{url}?less_than_match_id={match_id}"
    
    pro_matches = requests.get(url)
    
    data = pd.read_json(pro_matches.text)
    match_id = data['match_id'].min()
    
    with open(f"data/opendota/proplayers/matches_lists/{i}.json", "w") as f:
        f.write(pro_matches.text)
        
    # 60 calls / minute
    sleep(1)

100%|██████████| 100/100 [02:18<00:00,  1.39s/it]


## Merge lists

In [37]:
files = glob("data/opendota/proplayers/matches_lists/*")

joined_df = pd.DataFrame()
for file in files:
    data = pd.read_json(file)
    
    joined_df = joined_df.append(data, ignore_index=True)
    
joined_df.to_csv("data/opendota/proplayers/promatches.csv", index=False)

## Download matches

In [2]:
matches_list = pd.read_csv("data/opendota/proplayers/promatches.csv")
display(matches_list.head())

Unnamed: 0,match_id,duration,start_time,radiant_team_id,radiant_name,dire_team_id,dire_name,leagueid,league_name,series_id,series_type,radiant_score,dire_score,radiant_win
0,5307673835,1321,2020-03-22 14:43:25,7748848.0,AsghaR,7787796.0,LEGENDARY,11439,PESC Monthly 2020 Season 1,419196,0,26,6,True
1,5307672307,1568,2020-03-22 14:42:46,5211276.0,Thunder,6593779.0,Gentlemen,11439,PESC Monthly 2020 Season 1,419194,0,42,8,True
2,5307670528,3433,2020-03-22 14:42:00,7764709.0,BeWare,,,11439,PESC Monthly 2020 Season 1,0,0,36,37,False
3,5307616134,648,2020-03-22 14:18:44,7528248.0,Manaburn,7715060.0,WOLVES,11478,YouTube SIVVIT - Big Russian Show,419183,0,15,7,True
4,5307609261,1493,2020-03-22 14:15:31,7359442.0,Team Aspirations,7314697.0,FIVE BROTHERS,11249,RED STAR CUP,419159,1,40,30,True


In [3]:
with tqdm(total=len(matches_list)) as pbar: 
    for _, match in matches_list.iterrows():
        match_id = match['match_id']
        url = f"https://api.opendota.com/api/matches/{match_id}"
        filename = f"data/opendota/proplayers/pro_matches/{match_id}.json"
        
        if os.path.exists(filename):
            pbar.update(1)
            continue
        
        while (True):
            try:
                result = requests.get(url)
                if result.status_code == 200:
                    with open(filename, "w") as f:
                        f.write(result.text)
                    break
            except:
                sleep(1.0)

        pbar.update(1)

100%|██████████| 10000/10000 [2:52:05<00:00,  1.03s/it]  


## Merge matches