In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import logging
from time import sleep
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [3]:
logger = logging.getLogger('pro_data_extraction')

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
API_KEY = os.getenv('STEAM_API_KEY')

In [7]:
#Extract game ids from all professional leagues
from time import sleep

API_URL = 'https://api.opendota.com/api'

def get_pro_matches(match_id: int, total_matches: int) -> pd.DataFrame:
    
    params = {
    "lobby_type": 1,  
    "limit": 100,
    "less_than_match_id": match_id
    }
    
    matches_per_request = params["limit"]
    num_requests = total_matches // matches_per_request
    
    df = pd.DataFrame()
    
    
    for _ in range(num_requests):
        sleep(1)
        try:
            response = requests.get(f'{API_URL}/proMatches', params=params)

            if response.status_code == 200:
                data = response.json()

                match_ids =  [match['match_id'] for match in data]

                match_df = pd.DataFrame({
                    "match_id": match_ids
                })

                df = pd.concat((df, match_df))
            else:
                logger.info(f'Something went wrong while requesting opendota API {response.status_code}, {response.text}')
        except Exception as e:
            logger.info(f'Something went wrong {str(e)}')
            
        if len(df) >= total_matches:
            break
            
    df['match_id'] = df['match_id'].astype('Int64')
    return df
    

In [8]:
df_test = get_pro_matches(7223482365, 15000)


KeyboardInterrupt



In [None]:
df_test.head(10)

In [None]:
len(df_test)

In [None]:
df_test.to_csv('data/opendota_pro_games.csv')

In [13]:
#Get detailed information about each game by it's game id
API_URL = f"https://api.steampowered.com/IDOTA2Match_570/GetMatchDetails/v1/?key={API_KEY}"

def get_match_details(match_id: int):
    url = f"{API_URL}&match_id={match_id}"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            match_data = response.json()
            return match_data
        else:
            logger.info(f"Error occurred while fetching match details for match ID {match_id}, {response.status_code}, {response.text}.")
            return None
    except:
        logger.info(f'{response.status_code}')
        return None

def process_matches(df: pd.DataFrame) -> pd.DataFrame:
    data_list = []

    for index, row in df.iterrows():
        sleep(0.1)
        match_id = row["match_id"]
        try:
            match_data = get_match_details(match_id)
        except Exception as e: 
            logger.info(f'Error while executing get_match_details {str(e)}')

        if match_data:
            data_list.append(match_data) 
        else:
            data_list.append(None)
    df["data"] = data_list

    return df

In [15]:
df_with_details = process_matches(df_test)

In [16]:
df_with_details.head(10)

Unnamed: 0,match_id,data
0,7206650897,{'result': {'players': [{'account_id': 1300455...
1,7206631794,{'result': {'players': [{'account_id': 2414674...
2,7206622803,{'result': {'players': [{'account_id': 1336821...
3,7206609235,{'result': {'players': [{'account_id': 1524712...
4,7206592298,{'result': {'players': [{'account_id': 1133046...
5,7206579005,{'result': {'players': [{'account_id': 1300455...
6,7206576475,{'result': {'players': [{'account_id': 2362143...
7,7206549622,{'result': {'players': [{'account_id': 1200963...
8,7206543514,{'result': {'players': [{'account_id': 1525150...
9,7206530094,{'result': {'players': [{'account_id': 3204234...


In [17]:
len(df_with_details)

200

In [21]:
df_with_details.isna().sum()

match_id    0
data        0
dtype: int64

In [22]:
df_with_details = df_with_details.dropna()

In [23]:
len(df_with_details)

200

In [5]:
#Retrieve hero_name - id dictionary
API_URL = f"http://api.steampowered.com/IEconDOTA2_570/GetHeroes/v1?key={API_KEY}"

response = requests.get(API_URL)

if response.status_code == 200:
    data = response.json()
    heroes = data["result"]["heroes"]

else:
    print("Error occurred while fetching hero data.")


In [6]:
heroes

[{'name': 'npc_dota_hero_antimage', 'id': 1},
 {'name': 'npc_dota_hero_axe', 'id': 2},
 {'name': 'npc_dota_hero_bane', 'id': 3},
 {'name': 'npc_dota_hero_bloodseeker', 'id': 4},
 {'name': 'npc_dota_hero_crystal_maiden', 'id': 5},
 {'name': 'npc_dota_hero_drow_ranger', 'id': 6},
 {'name': 'npc_dota_hero_earthshaker', 'id': 7},
 {'name': 'npc_dota_hero_juggernaut', 'id': 8},
 {'name': 'npc_dota_hero_mirana', 'id': 9},
 {'name': 'npc_dota_hero_nevermore', 'id': 11},
 {'name': 'npc_dota_hero_morphling', 'id': 10},
 {'name': 'npc_dota_hero_phantom_lancer', 'id': 12},
 {'name': 'npc_dota_hero_puck', 'id': 13},
 {'name': 'npc_dota_hero_pudge', 'id': 14},
 {'name': 'npc_dota_hero_razor', 'id': 15},
 {'name': 'npc_dota_hero_sand_king', 'id': 16},
 {'name': 'npc_dota_hero_storm_spirit', 'id': 17},
 {'name': 'npc_dota_hero_sven', 'id': 18},
 {'name': 'npc_dota_hero_tiny', 'id': 19},
 {'name': 'npc_dota_hero_vengefulspirit', 'id': 20},
 {'name': 'npc_dota_hero_windrunner', 'id': 21},
 {'name': 'np