In [82]:
import pandas as pd
import requests
from IPython.display import display
import tqdm
from glob import glob
import os
from common import entry_advanced
import json

In [83]:
bookies = pd.read_csv("bookies/odds.csv")
bookies.head()

Unnamed: 0,ID,Team 1,Team 2,Odds 1,Odds 2,Winner
0,0,OG,Team Liquid,1.71,2.06,1
1,1,PSG.LGD,Team Liquid,1.71,2.06,2
2,2,Team Liquid,Team Secret,1.75,2.33,1
3,3,PSG.LGD,OG,1.42,2.39,2
4,4,Vici Gaming,Team Secret,1.84,1.88,2


In [84]:
teams_json = requests.get("https://api.opendota.com/api/teams").text

In [92]:
# Download all teams info
required_teams = set(bookies['Team 1'].values.tolist() + bookies['Team 2'].values.tolist())
all_teams = pd.read_json(teams_json)

teams_ids = {}
for team in required_teams:
    team_id = all_teams[all_teams["name"] == team]
    
    if len(team_id) > 1:
        team_id = team_id.loc[team_id["last_match_time"].idxmax()]
    else:
        team_id = team_id.iloc[0]
        
    teams_ids[team_id['name']] = team_id['team_id']

## Download players list per team

In [95]:
for team_id in tqdm.tqdm(teams_ids.values()):
    url = f"https://api.opendota.com/api/teams/{team_id}/players?"
    data = requests.get(url).text
    with open(f"data/opendota/proplayers/test_teams/{team_id}.json", "w") as f:
        f.write(data)

100%|██████████| 17/17 [00:13<00:00,  1.25it/s]


## Check if all payers are in database

In [96]:
with open("utils/player_query.txt", "r") as f:
    query_template = f.read()
    
files = glob("data/opendota/proplayers/test_teams/*")
teams = {}

for file in tqdm.tqdm(files):
    print(file)
    players = pd.read_json(file)
    team_id = int(file.split("/")[-1].split(".")[0])
    print(team_id)
    teams[team_id] = []
    
    players_current = players[players['is_current_team_member'] == True]
    if len(players_current) > 5:
        continue
        players_current = players_current.iloc[:5]
    elif len(players_current) < 5:
        continue
        players_current = players_current.append(players.sample(n=(5-len(players_current))))
        
    for _, player in players_current.iterrows():        
        account_id = player['account_id']
        url = query_template.replace("[[[account_id]]]", str(account_id))
        filename = f"data/opendota/proplayers/test_players/{account_id}.json"
        
        teams[team_id].append(account_id)
        
        if os.path.exists(filename):
            continue
        
        while (True):
            try:
                result = requests.get(url)
                if result.status_code == 200:
                    with open(filename, "w") as f:
                        f.write(result.text)
                    break
                else:
                    sleep(1.0)
            except e:
                sleep(1.0)

100%|██████████| 17/17 [00:00<00:00, 214.57it/s]

data/opendota/proplayers/test_teams/111474.json
111474
data/opendota/proplayers/test_teams/2586976.json
2586976
data/opendota/proplayers/test_teams/39.json
39
data/opendota/proplayers/test_teams/36.json
36
data/opendota/proplayers/test_teams/1375614.json
1375614
data/opendota/proplayers/test_teams/6214973.json
6214973
data/opendota/proplayers/test_teams/1883502.json
1883502
data/opendota/proplayers/test_teams/2163.json
2163
data/opendota/proplayers/test_teams/5029074.json
5029074
data/opendota/proplayers/test_teams/350190.json
350190
data/opendota/proplayers/test_teams/15.json
15
data/opendota/proplayers/test_teams/5065748.json
5065748
data/opendota/proplayers/test_teams/726228.json
726228
data/opendota/proplayers/test_teams/2108395.json
2108395
data/opendota/proplayers/test_teams/6666989.json
6666989
data/opendota/proplayers/test_teams/6209804.json
6209804
data/opendota/proplayers/test_teams/1838315.json
1838315





## Calculate per / player stats

In [97]:
files = glob("data/opendota/proplayers/test_players/*")
players = pd.DataFrame()

for file in tqdm.tqdm(files):
    matches = pd.read_json(file, "r")
    account_id=file.split("/")[-1].split(".")[0]
    num_matches = len(matches)
    
    matches['win'] = (matches['player_slot'] < 128) == matches['radiant_win']
    
    # clean data
    matches_clean = matches.drop([
        "match_id",
        "player_slot",
        "radiant_win",
        "start_time",
        "lobby_type",
        "game_mode",
        "leaver_status",
        "hero_id",
        "version",
        "skill",
        "party_size"
    ], axis=1)

    entry = {
        'account_id': account_id,
        'num_matches': num_matches,
        **(matches_clean.mean().to_dict())
    }
    
    players = players.append(entry, ignore_index=True)

100%|██████████| 85/85 [00:09<00:00,  9.21it/s]


In [100]:
result_df = pd.DataFrame()
for _, match in tqdm.tqdm(bookies.iterrows()):  
    #if match[' Winner'] == 0:
    #    print("ok")
    #    continue
        
    team_rad = teams_ids[match['Team 1']]
    team_norad = teams_ids[match['Team 2']]
        
    players_rad_ids = teams[team_rad]
    players_norad_ids = teams[team_norad]
    
    if (len(players_rad_ids) != 5 or len(players_norad_ids) != 5):
        continue
        
    players_rad = players[players['account_id'].astype('int32').isin(players_rad_ids)]
    players_norad = players[players['account_id'].astype('int32').isin(players_norad_ids)]
            
    entry = entry_advanced(players_rad, players_norad, match[' Winner'] == 2)
    
    entry['odds_1'] = match['Odds 1']
    entry['odds_2'] = match['Odds 2']
    entry['winner'] = match[' Winner']
        
    result_df = result_df.append(entry, ignore_index=True)

45it [00:00, 84.48it/s]


In [101]:
result_df.to_csv("datasets/bookies.csv", index=None)