In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import logging
from time import sleep
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
logger = logging.getLogger('pro_data_extraction')

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
API_KEY = os.getenv('STEAM_API_KEY')

In [62]:
#Extract game ids from all professional leagues
from time import sleep

API_URL = 'https://api.opendota.com/api'

def get_pro_matches(initial_match_id, total_matches):
    params = {
        "lobby_type": 1,  
        "limit": 100,
        "less_than_match_id": initial_match_id
    }

    matches_per_request = params["limit"]
    num_requests = total_matches // matches_per_request

    df = pd.DataFrame()

    for i in range(num_requests):
        sleep(1)
        try:
            response = requests.get(f'{API_URL}/proMatches', params=params)

            if response.status_code == 200:
                data = response.json()

                match_ids =  [match['match_id'] for match in data]
                match_df = pd.DataFrame({
                    "match_id": match_ids
                })
                df = pd.concat((df, match_df))
                
                if data:
                    params["less_than_match_id"] = data[-1]['match_id']
                else:
                    logger.info(f'No matches returned by API request')
                    break
            else:
                logger.info(f'Something went wrong while requesting opendota API {response.status_code}, {response.text}')
                break  
        except Exception as e:
            logger.info(f'Something went wrong {str(e)}')
            break 

        if len(df) >= total_matches:
            break

    df['match_id'] = df['match_id'].astype('Int64')
    return df
    

In [None]:
df_matches = get_pro_matches(7258005870, 15000)

In [64]:
df_matches.head(10)

Unnamed: 0,match_id
0,6652712454
1,6652699293
2,6652690901
3,6652662461
4,6652639756
5,6652620894
6,6652608810
7,6652579617
8,6652561542
9,6652559165


In [65]:
len(df_matches)

15000

In [66]:
API_URL = f"https://api.steampowered.com/IDOTA2Match_570/GetMatchDetails/v1/?key={API_KEY}"

def get_match_details(match_id: int):
    url = f"{API_URL}&match_id={match_id}"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            match_data = response.json()
            return match_data
        else:
            logger.error(f"Error occurred while fetching match details for match ID {match_id}, {response.status_code}, {response.text}.")
            return None
    except:
        logger.error(f'{response.status_code}')
        return None

def process_matches(df: pd.DataFrame) -> pd.DataFrame:
    data_list = []

    for index, row in df.iterrows():
        sleep(0.1)
        match_id = row["match_id"]
        try:
            match_data = get_match_details(match_id)
        except Exception as e: 
            logger.error(f'Error while executing get_match_details {str(e)}')

        if match_data:
            data_list.append(match_data) 
        else:
            data_list.append(None)
    df["data"] = data_list

    return df

In [67]:
df_with_details = process_matches(df_test)

In [68]:
df_with_details.head(10)

Unnamed: 0,match_id,data
0,6652712454,{'result': {'players': [{'account_id': 1251984...
1,6652699293,{'result': {'players': [{'account_id': 2941354...
2,6652690901,{'result': {'players': [{'account_id': 1345566...
3,6652662461,{'result': {'players': [{'account_id': 1645308...
4,6652639756,{'result': {'players': [{'account_id': 1981611...
5,6652620894,{'result': {'players': [{'account_id': 1210331...
6,6652608810,{'result': {'players': [{'account_id': 2941354...
7,6652579617,{'result': {'players': [{'account_id': 1083977...
8,6652561542,{'result': {'players': [{'account_id': 1404110...
9,6652559165,{'result': {'players': [{'account_id': 1399765...


In [70]:
df_with_details['match_id'] = df_with_details['data'].apply(lambda x: x['result'].get('match_id'))

In [71]:
df_with_details['series_id'] = None
df_with_details['is_live'] = False

In [72]:
df_with_details.isna().sum()

match_id         0
data             0
series_id    15000
is_live          0
dtype: int64

In [73]:
len(df_with_details)

15000

In [74]:
df_with_details['match_data'] = df_with_details['data']

In [75]:
db_name = os.getenv('POSTGRES_DB')
user = os.getenv('POSTGRES_USER')
password = os.getenv('POSTGRES_PASSWORD')
host = 'localhost'
port = '5432'

In [85]:
import psycopg2
conn = psycopg2.connect(dbname=db_name, user=user, password=password, host=host, port=port)

In [77]:
df_with_details = df_with_details[['match_id', 'series_id', 'match_data', 'is_live']]

In [78]:
df_with_details.drop_duplicates(subset='match_id')

Unnamed: 0,match_id,series_id,match_data,is_live
0,6652712454,,{'result': {'players': [{'account_id': 1251984...,False
1,6652699293,,{'result': {'players': [{'account_id': 2941354...,False
2,6652690901,,{'result': {'players': [{'account_id': 1345566...,False
3,6652662461,,{'result': {'players': [{'account_id': 1645308...,False
4,6652639756,,{'result': {'players': [{'account_id': 1981611...,False
...,...,...,...,...
95,6288277706,,{'result': {'players': [{'account_id': 9294909...,False
96,6288268302,,{'result': {'players': [{'account_id': 9415515...,False
97,6288251510,,{'result': {'players': [{'account_id': 9165458...,False
98,6288250862,,{'result': {'players': [{'account_id': 2562697...,False


In [81]:
df_with_details['match_data'] = df_with_details['match_data'].apply(lambda x: x.get('result'))

In [86]:
df_with_details

Unnamed: 0,match_id,series_id,match_data,is_live
0,6652712454,,"{'players': [{'account_id': 1251984026, 'playe...",False
1,6652699293,,"{'players': [{'account_id': 294135421, 'player...",False
2,6652690901,,"{'players': [{'account_id': 134556694, 'player...",False
3,6652662461,,"{'players': [{'account_id': 164530809, 'player...",False
4,6652639756,,"{'players': [{'account_id': 198161112, 'player...",False
...,...,...,...,...
95,6288277706,,"{'players': [{'account_id': 92949094, 'player_...",False
96,6288268302,,"{'players': [{'account_id': 94155156, 'player_...",False
97,6288251510,,"{'players': [{'account_id': 91654584, 'player_...",False
98,6288250862,,"{'players': [{'account_id': 256269737, 'player...",False


In [87]:
import json

In [88]:
cur = conn.cursor()

data = df_with_details.to_dict('records')

table = 'dota_dds.pro_matches'
cols = ','.join(list(data[0].keys()))
vals = ', '.join(['%s'] * len(data[0]))
query = f"INSERT INTO {table}({cols}) VALUES ({vals})"

cur.executemany(query, [tuple(json.dumps(d[col]) if isinstance(d[col], dict) else d[col] for col in df_with_details.columns) for d in data])

conn.commit()
cur.close()
conn.close()

In [90]:
API_URL = f"http://api.steampowered.com/IEconDOTA2_570/GetHeroes/v1?key={API_KEY}"

response = requests.get(API_URL)

if response.status_code == 200:
    data = response.json()
    heroes = data["result"]["heroes"]

else:
    print("Error occurred while fetching hero data.")


In [93]:
heroes[:5]

[{'name': 'npc_dota_hero_antimage', 'id': 1},
 {'name': 'npc_dota_hero_axe', 'id': 2},
 {'name': 'npc_dota_hero_bane', 'id': 3},
 {'name': 'npc_dota_hero_bloodseeker', 'id': 4},
 {'name': 'npc_dota_hero_crystal_maiden', 'id': 5}]