In [5]:
# Import necessary dependencies
from requests import get
import pandas as pd
from tqdm.notebook import tqdm
import time
from typing import List, Dict

### Match Parsing

#### Sample of random matches

In [6]:
def get_matches_sample(
        min_rank: int = None,
        max_rank: int = None,
        mmr_ascending: int = None,
        mmr_descending: int = None,
        less_than_match_id: int = None,
        api_link: str = "https://api.opendota.com/api/publicMatches",
) -> List[Dict]:
    """
    Get a sample of 100 random matches from 'https://api.opendota.com/api/publicMatches' or 
    from the different API specifed in `api_link`.

    Parameters
    ----------
    min_rank: int
        Minimum rank for the matches. 
        Ranks are represented by integers (10-15: Herald, 20-25: Guardian, 30-35: Crusader, 40-45: Archon, 
        50-55: Legend, 60-65: Ancient, 70-75: Divine, 80-85: Immortal). 
        Each increment represents an additional star.
    max_rank: int
        Maximum rank for the matches. 
        Ranks are represented by integers (10-15: Herald, 20-25: Guardian, 30-35: Crusader, 40-45: Archon, 
        50-55: Legend, 60-65: Ancient, 70-75: Divine, 80-85: Immortal). 
        Each increment represents an additional star.
    mmr_ascending: int
        Order by average rank ascending.
    mmr_descending: int
        Order by average rank descending.
    api_link: str
        Link to the API where requests should be sent.
    
    Returns
    ----------
    response: list[dict]
        Information about 100 sampled matches. [Output format description](https://docs.opendota.com/#tag/public-matches/operation/get_public_matches).
    """

    response = get(
        api_link,
        params={
            "less_than_match_id": less_than_match_id,
            "min_rank": min_rank,
            "max_rank": max_rank,
            "mmr_ascending": mmr_ascending,
            "mmr_descending": mmr_descending,
        }
    )

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve data, status code: {response.status_code}")
        return []

In [7]:
# Get a subset of 100 random matches
test_response = get_matches_sample()
len(test_response)

100

In [8]:
# Creaate a Dataframe from the recieved response
match_stats_df = pd.DataFrame(test_response)
match_stats_df.head()

Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,radiant_team,dire_team
0,8211165303,6900604334,True,1741793361,482,7,22,81,5,153,"[97, 14, 38, 35, 74]","[41, 62, 40, 17, 44]"
1,8211146708,6900603441,True,1741792588,1290,7,22,21,3,227,"[36, 102, 123, 7, 15]","[84, 94, 5, 2, 39]"
2,8211146116,6900603067,True,1741792564,1290,7,22,54,7,117,"[49, 37, 109, 107, 1]","[30, 14, 70, 40, 94]"
3,8211146001,6900603964,False,1741792564,1360,7,22,73,5,181,"[13, 29, 9, 70, 30]","[109, 17, 63, 64, 81]"
4,8211145418,6900604512,True,1741792538,1402,7,22,72,2,185,"[53, 60, 37, 7, 74]","[54, 84, 30, 39, 114]"


In [11]:
def retrieve_x_matches(x: int) -> List[Dict]:
    """
    Retrieve X matches while adhering to API rate limits.

    Parameters
    ----------
    x: int
        The number of matches to retrieve.

    Returns
    ----------
    list[dict]
        Information about the retrieved matches.
    """
    matches = []
    less_than_match_id = None
    request_count = 0
    daily_limit = 2000
    rate_limit = 59  # requests per minute
    start_time = time.time()

    total_iterations = (x // 100) + (1 if x % 100 != 0 else 0)

    with tqdm(total=total_iterations, desc="Retrieving matches") as pbar:
        while len(matches) < x:
            if request_count >= daily_limit:
                print("Daily request limit reached.")
                break

            if request_count % rate_limit == 0 and request_count != 0:
                elapsed_time = time.time() - start_time
                if elapsed_time < 60:
                    time.sleep(60 - elapsed_time)
                start_time = time.time()

            data = get_matches_sample(less_than_match_id=less_than_match_id)
            if data:
                matches.extend(data)
                if len(data) > 0:
                    less_than_match_id = data[-1]['match_id'] - 1
                request_count += 1
                pbar.update(1)
            else:
                break

    return matches[:x]

In [None]:
# Try to retrieve some amount of matches
matches = retrieve_x_matches(12000)
print(f"Retrieved {len(matches)} matches.")

Retrieving matches:   0%|          | 0/120 [00:00<?, ?it/s]

Retrieved 12000 matches.


In [15]:
matches_df = pd.DataFrame(matches)
print(matches_df.shape)
matches_df.head()

(12000, 12)


Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,radiant_team,dire_team
0,8211165303,6900604334,True,1741793361,482,7,22,81,5,153,"[97, 14, 38, 35, 74]","[41, 62, 40, 17, 44]"
1,8211164203,6900610310,True,1741793318,830,7,22,81,9,232,"[17, 114, 29, 75, 68]","[15, 64, 7, 14, 70]"
2,8211161503,6900609448,True,1741793213,946,7,22,72,3,187,"[42, 7, 71, 10, 50]","[49, 91, 17, 8, 136]"
3,8211155611,6900605057,True,1741792978,929,7,22,62,4,410,"[135, 93, 30, 50, 80]","[14, 18, 102, 76, 62]"
4,8211153206,6900611136,False,1741792848,1396,7,22,44,6,227,"[63, 1, 49, 26, 88]","[11, 46, 138, 29, 51]"


In [17]:
# Save to a temporary ".csv" file
matches_df.to_csv("data/ignore_folder/match_stats_12k.csv", index=False)