In [39]:
import pandas as pd
import numpy as np
import requests
import json
import random, time

In [119]:
### get_matches returns a dataframe from the opendota API with given parameters
### don't run this too often, as there is a request limit of one per second and 50000 pr. month
def get_matches(matches=10000, start_time_less_than=1594771200, mmr_lower=2000, mmr_upper=2500):
    # we build the base url for the opendota api 
    base = 'https://api.opendota.com/api/explorer'
    # we create the SQL percent encoded query. The query is as follows
    #"""
    #SELECT *
    #FROM public_matches
    #LEFT JOIN public_player_matches
    #ON public_matches.match_id = public_player_matches.match_id
    #WHERE lobby_type=7 AND game_mode=22 AND avg_mmr!=0 AND start_time<1594771200
    #ORDER BY start_time DESC
    #LIMIT 10000
    #"""
    matches_request = f'?sql=SELECT%20*%0AFROM%20public_matches%0ALEFT%20JOIN%20public_player_matches%0AON%20public_matches.match_id%20%3D%20public_player_matches.match_id%0AWHERE%20lobby_type%3D7%20AND%20game_mode%3D22%20AND%20avg_mmr!%3D0%20AND%20start_time<{start_time_less_than}%20AND%20avg_mmr>{mmr_lower}%20AND%20avg_mmr<{mmr_upper}%20AND%20hero_id!%3D0%0AORDER%20BY%20start_time%20DESC%0ALIMIT%20{str(matches*10)}'
    
    url = base + matches_request

    result = requests.get(url)
    print(result)
    j_matches = result.json()

    return pd.DataFrame(j_matches["rows"])

In [120]:
def practice_data(matches, sleep):
    mmr_steps = [i for i in range(1000, 8001) if i%1000 == 0]
    mmr_brackets = [[mmr_steps[i], mmr_steps[i+1]] for i in range(len(mmr_steps)) if mmr_steps[i] < 8000]
    dataframes = [get_matches(matches, start_time_less_than=1594771200, mmr_lower=lower, mmr_upper=upper) for lower,upper in mmr_brackets if time.sleep(sleep) is None]
    
    return dataframes
    
    #for lower,upper in mmr_brackets:
    #    practice_data_f"{lower}"_f"{upper}" = get_matches(matches=100, start_time_less_than=1594771200, mmr_lower=lower, mmr_upper=upper)
        
        

In [121]:
#practice_data(100, 2)

In [129]:
practice_data_2000_3000 = get_matches(matches=100000, start_time_less_than=1594771200, mmr_lower=2000, mmr_upper=3000)

<Response [200]>


In [89]:
practice_data_3000_4000 = get_matches(matches=100000, start_time_less_than=1594771200, mmr_lower=3000, mmr_upper=4000)

<Response [200]>


In [104]:
practice_data_no_limit = get_matches(matches=100000, start_time_less_than=1594771200, mmr_lower=0, mmr_upper=10000)

<Response [200]>


In [130]:
practice_data_2000_3000.to_pickle("../data/practice_data_2000_3000.pkl")
practice_data_3000_4000.to_pickle("../data/practice_data_3000_4000.pkl")
practice_data_no_limit.to_pickle("../data/practice_data_no_limit.pkl")

**We now get some test data in those brackets, which we will take from the day after, to avoid duplicates**

In [106]:
test_data_2000_3000 = get_matches(matches=10000, start_time_less_than=1594857600, mmr_lower=2000, mmr_upper=3000)

<Response [200]>


In [107]:
test_data_3000_4000 = get_matches(matches=10000, start_time_less_than=1594857600, mmr_lower=3000, mmr_upper=4000)

<Response [200]>


In [108]:
test_data_no_limit = get_matches(matches=10000, start_time_less_than=1594857600, mmr_lower=0, mmr_upper=10000)

<Response [200]>


In [109]:
test_data_2000_3000.to_pickle("../data/test_data_2000_3000.pkl")
test_data_3000_4000.to_pickle("../data/test_data_3000_4000.pkl")
test_data_no_limit.to_pickle("../data/test_data_no_limit.pkl")