In [1]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

Retrieve data from Open Dota using its API <br>
10,000 professional matches from March 6th, 2023 until March 29th, 2023
However, the oldest match found on the dataset was 3 months ago. This is because the results of those matches might not be updated immediately after the game ended.

In [58]:
api_url = 'https://api.opendota.com/api/proMatches'
params = {
    'less_than_match_id': None,
    'date_min': '2023-03-06',
    'date_max': datetime.now().strftime('%Y-%m-%d')
}

match_ids = []
request_count = 0
start_time = time.time()

# retrieve dota 2 matches
while len(match_ids) < 10000:
    # send GET request to API with current less_than_match_id value
    response = requests.get(api_url, params=params)

    # check if request was successful
    if response.status_code == 200:
        # extract match IDs from response data
        data = response.json()
        match_ids += [match['match_id'] for match in data]

        # check if there are more matches to retrieve
        if len(data) < 100:
            break

        # set less_than_match_id parameter to last match ID in current response
        params['less_than_match_id'] = data[-1]['match_id']
    else:
        print("Error retrieving data from OpenDota API.")

    # increase request count
    request_count += 1

    # wait for 1 second before making another request
    time.sleep(1)

print(f"Retrieved data for {len(match_ids)} professional matches in {request_count} requests.")

Error retrieving data from OpenDota API.
Error retrieving data from OpenDota API.
Error retrieving data from OpenDota API.
Error retrieving data from OpenDota API.
Retrieved data for 10000 professional matches in 104 requests.


Save match ids to csv file

In [60]:
matches_df = pd.DataFrame(match_ids, columns=['Match ID'])
matches_df.to_csv('pro_match_ids_march_2023.csv', index=False)

Run this to read the dataset to dataframe

In [2]:
match_df = pd.read_csv('pro_match_ids_march_2023.csv')
match_ids = match_df['Match ID'].tolist()

================================ Resource priority ================================

In [9]:
# set up API URL and parameters
api_url = 'https://api.opendota.com/api/matches/{}/'

# initialize empty list to store results
results = []
max_attempts = 3

# loop through match IDs and retrieve relevant data
for match_id in match_ids[:500]:
    # send GET request to API
    response = requests.get(api_url.format(match_id))
    
    # initialize number of attempts to 0
    num_attempts = 0
    while num_attempts < max_attempts:

        # check if request was successful
        if response.status_code == 200:
            # extract relevant data from response JSON
            data = response.json()
            players = data['players']

            # loop through each player in the match and extract relevant data
            for player in players:
                hero_id = player['hero_id']
                player_slot = player['player_slot']
                team = 'Radiant' if player_slot < 128 else 'Dire'
                gold = player['gold']
                gold_spent = player['gold_spent']
                xpm = player['xp_per_min']
                lane = player['lane']

                # add data to results list
                results.append({
                    'match_id': match_id,
                    'hero_id': hero_id,
                    'player_slot': player_slot,
                    'team': team,
                    'gold': gold,
                    'gold_spent': gold_spent,
                    'gold_total': gold + gold_spent,
                    'xpm': xpm,
                    'lane': lane
                })
            break
        else:
            print(f"Error retrieving data for match ID {match_id}.")
            num_attempts += 1
            time.sleep(60)
            
    if num_attempts == max_attempts:
        print(f"Unable to retrieve data from OpenDota API for match ID {match_id}. Moving on to next match.")

    # print progress every 100 matches
    if len(results) % 1000 == 0:
        print(f"Processed data for {len(results)} players in {len(results)/10} matches.")

    # break loop if we have processed data for 100000 players
#     if len(results) >= 100000:
#         break

non_retrieved_data = match_ids[500:]

# convert results to pandas DataFrame
players_df = pd.DataFrame(results)
players_df.head()

Error retrieving data for match ID 7082637284.
Error retrieving data for match ID 7082637284.
Error retrieving data for match ID 7082637284.
Unable to retrieve data from OpenDota API for match ID 7082637284. Moving on to next match.
Processed data for 1000 players in 100.0 matches.
Error retrieving data for match ID 7081441399.
Error retrieving data for match ID 7081441399.
Error retrieving data for match ID 7081441399.
Unable to retrieve data from OpenDota API for match ID 7081441399. Moving on to next match.
Processed data for 2000 players in 200.0 matches.
Error retrieving data for match ID 7079900212.
Error retrieving data for match ID 7079900212.
Error retrieving data for match ID 7079900212.
Unable to retrieve data from OpenDota API for match ID 7079900212. Moving on to next match.
Processed data for 3000 players in 300.0 matches.
Error retrieving data for match ID 7078634227.
Error retrieving data for match ID 7078634227.
Error retrieving data for match ID 7078634227.
Unable to 

Unnamed: 0,match_id,hero_id,player_slot,team,gold,gold_spent,gold_total,xpm,lane
0,7083727502,70,0,Radiant,1965,27465,29430,752,1
1,7083727502,120,1,Radiant,4887,22930,27817,746,2
2,7083727502,33,2,Radiant,1620,21315,22935,617,3
3,7083727502,86,3,Radiant,188,17420,17608,566,2
4,7083727502,75,4,Radiant,2066,13930,15996,403,1


In [66]:
if len(non_retrieved_data) < 500:
    new_match_ids = non_retrieved_data
else:
    new_match_ids = non_retrieved_data[:500]

# loop through match IDs and retrieve relevant data
for match_id in new_match_ids:
    # send GET request to API
    response = requests.get(api_url.format(match_id))
    
    # initialize number of attempts to 0
    num_attempts = 0
    while num_attempts < max_attempts:

        # check if request was successful
        if response.status_code == 200:
            # extract relevant data from response JSON
            data = response.json()
            players = data['players']

            # loop through each player in the match and extract relevant data
            for player in players:
                hero_id = player['hero_id']
                player_slot = player['player_slot']
                team = 'Radiant' if player_slot < 128 else 'Dire'
                gold = player['gold']
                gold_spent = player['gold_spent']
                xpm = player['xp_per_min']
                lane = player.get('lane', None)

                # add data to results list
                results.append({
                    'match_id': match_id,
                    'hero_id': hero_id,
                    'player_slot': player_slot,
                    'team': team,
                    'gold': gold,
                    'gold_spent': gold_spent,
                    'gold_total': gold + gold_spent,
                    'xpm': xpm,
                    'lane': lane
                })
            break
        else:
            print(f"Error retrieving data for match ID {match_id}.")
            num_attempts += 1
            time.sleep(60)
            
    if num_attempts == max_attempts:
        print(f"Unable to retrieve data from OpenDota API for match ID {match_id}. Moving on to next match.")

    # print progress every 100 matches
    if len(results) % 1000 == 0:
        print(f"Processed data for {len(results)} players in {len(results)/10} matches.")

    # break loop if we have processed data for 100000 players
#     if len(results) >= 100000:
#         break

if len(non_retrieved_data) >= 500:
    non_retrieved_data = non_retrieved_data[500:]
else:
    print("No more data to be retrieved!!!!")
    
# convert results to pandas DataFrame
players_df = pd.DataFrame(results)
players_df.head()

Processed data for 96000 players in 9600.0 matches.
Error retrieving data for match ID 6911466820.
Error retrieving data for match ID 6911466820.
Error retrieving data for match ID 6911466820.
Unable to retrieve data from OpenDota API for match ID 6911466820. Moving on to next match.
Processed data for 97000 players in 9700.0 matches.
Error retrieving data for match ID 6910179516.
Error retrieving data for match ID 6910179516.
Error retrieving data for match ID 6910179516.
Unable to retrieve data from OpenDota API for match ID 6910179516. Moving on to next match.
Error retrieving data for match ID 6908953492.
Error retrieving data for match ID 6908953492.
Error retrieving data for match ID 6908953492.
Unable to retrieve data from OpenDota API for match ID 6908953492. Moving on to next match.
Processed data for 98000 players in 9800.0 matches.
Error retrieving data for match ID 6907926259.
Error retrieving data for match ID 6907926259.
Error retrieving data for match ID 6907926259.
Unab

Unnamed: 0,match_id,hero_id,player_slot,team,gold,gold_spent,gold_total,xpm,lane
0,7083727502,70,0,Radiant,1965,27465,29430,752,1.0
1,7083727502,120,1,Radiant,4887,22930,27817,746,2.0
2,7083727502,33,2,Radiant,1620,21315,22935,617,3.0
3,7083727502,86,3,Radiant,188,17420,17608,566,2.0
4,7083727502,75,4,Radiant,2066,13930,15996,403,1.0


Save matches with its heroes and resource details to csv file

In [68]:
players_df.to_csv('players.csv', index=False)

================================ Abilities priority ================================

In [109]:
api_url = 'https://api.opendota.com/api/matches/{}'

# initialize empty list to store feature data
result = []
max_attempts = 3
processed_count = 0

# retrieve data for each match by sending GET request to API
for match_id in match_ids[:5]:  
    # initialize number of attempts to 0
    num_attempts = 0
    
    # loop to retry request until it succeeds or reaches maximum number of attempts
    while num_attempts < max_attempts:
    
        # send GET request to API with current match ID
        response = requests.get(api_url.format(match_id))

        # check if request was successful
        if response.status_code == 200:
            # extract data for desired features
            data = response.json()
            players = data['players']
            for player in players:
                level_count = 1
                ability_upgrades_arr = player['ability_upgrades_arr']
                if ability_upgrades_arr is not None:
                    for ability_id in ability_upgrades_arr:
                        result.append({
                            'match_id': match_id,
                            'player_slot': player['hero_id'],
                            'ability_id': ability_id,
                            'level': level_count,
                            
                            
                        })
                        level_count += 1
                # exit loop if request was successful
            break
        else:
            print(f"Error retrieving data from OpenDota API for match ID {match_id}.")
            num_attempts += 1
            time.sleep(60)
            
    # check if request was successful after maximum number of attempts
    if num_attempts == max_attempts:
        print(f"Unable to retrieve data from OpenDota API for match ID {match_id}. Moving on to next match.")
        
    # increment counter
    processed_count += 1
    
    # print update message every 100 matches
    if processed_count % 100 == 0:
        print(f"Processed {processed_count} matches.")
        
    
    if len(non_retrieved_data) >= 500:
        non_retrieved_data = non_retrieved_data[500:]
    else:
        print("No more data to be retrieved!!!!")

# convert data to DataFrame and save to CSV
abilities_upgrade_df = pd.DataFrame(result)

In [55]:
abilities_upgrade_df.to_csv('abilities_upgrades.csv', index=False)

In [37]:
import json

with open('ability_ids.json', 'r') as f:
    data = json.load(f)

# Convert JSON to DataFrame
df = pd.json_normalize(data)

# Save DataFrame to CSV file
df.to_csv('ability_ids.csv', index=False)

In [50]:
abilities_upgrade_df

Unnamed: 0,ability_id,level,player_slot,match_id
0,5357,1,0,7083727502
1,5359,2,0,7083727502
2,5359,3,0,7083727502
3,5358,4,0,7083727502
4,5359,5,0,7083727502
...,...,...,...,...
1612,5460,8,132,7083620999
1613,5460,9,132,7083620999
1614,5460,10,132,7083620999
1615,6368,11,132,7083620999
