In [2]:
from dotenv import load_dotenv
import os
import requests
import pandas as pd
from tqdm import tqdm
import numpy as np

from pangres import upsert
from sqlalchemy import text, create_engine

import json

load_dotenv()

True

In [3]:
db_username=os.environ.get("db_username")
db_password=os.environ.get("db_password")
db_host=os.environ.get("db_host")
db_port=os.environ.get("db_port")
db_name=os.environ.get("db_name")

def create_db_connection_string(db_username: str, db_password: str, db_host: str, db_port: int, db_name: str):
    connection_url = f"postgresql+psycopg2://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
    return connection_url

In [5]:
def process_timeline_data(match_timeline):

    participants = match_timeline['info']['participants']
    simplified_participants = {}
    for participant in participants:
        simplified_participants[participant['participantId']] = participant['puuid']

    participants_timeline = []
    match_metadata = match_timeline['metadata']
    frames = match_timeline['info']['frames']
    match_info = match_timeline['info']

    for frame in frames:
        for participant_id, participant_dto in frame['participantFrames'].items():
            participant_timeline = {}
            participant_timeline['data_version'] = match_metadata['dataVersion']
            participant_timeline['match_id'] = match_metadata['matchId']
            participant_timeline['frame_interval'] = match_info['frameInterval']/1000
            participant_timeline['end_of_game_result'] = match_info['endOfGameResult']
            # convert into minutes
            participant_timeline['timestamp'] = round(frame['timestamp']/1000/60, 4)
            participant_timeline['participant_id'] = int(participant_id)
            participant_timeline['puuid'] = simplified_participants[int(participant_id)]
            participant_timeline['current_gold'] = participant_dto['currentGold']
            participant_timeline['level'] = participant_dto['level']
            participant_timeline['jungle_minions_killed'] = participant_dto['jungleMinionsKilled']
            participant_timeline['minions_killed'] = participant_dto['minionsKilled']
            participant_timeline['total_gold'] = participant_dto['totalGold']
            participant_timeline['xp'] = participant_dto['xp']

            champion_stats = participant_dto['championStats']
            participant_timeline['health'] = champion_stats['health']
            participant_timeline['health_max'] = champion_stats['healthMax']
            participant_timeline['mana'] = champion_stats['power']
            participant_timeline['mana_max'] = champion_stats['powerMax']
            

            player_position = participant_dto['position']
            participant_timeline['x'] = player_position['x']
            participant_timeline['y'] = player_position['y']

            participants_timeline.append(participant_timeline)

    participants_timeline_df = pd.DataFrame(participants_timeline)
    return participants_timeline_df

In [None]:
from concurrent.futures import as_completed, ProcessPoolExecutor
from requests_futures.sessions import FuturesSession
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time

def retrieve_match_timelines(match_ids, api_key:str):
    match_timeline_session = FuturesSession(executor=ProcessPoolExecutor(max_workers=10))
    retries = 5
    status_forcelist = [429]
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        respect_retry_after_header=True,
        status_forcelist=status_forcelist,
    )

    adapter = HTTPAdapter(max_retries=retry)

    match_timeline_session.mount('http://', adapter)
    match_timeline_session.mount('https://', adapter)

    # Retrieve match data from match_ids
    # would be a good idea to track which match_ids belong to which user
    match_timeline_threads = [match_timeline_session.get(f'https://americas.api.riotgames.com/lol/match/v5/matches/{match_id}/timeline?api_key={api_key}') for match_id in match_ids]

    dataframe_list = []
    t1 = time.time()
    for future in tqdm(as_completed(match_timeline_threads)):
        resp = future.result()
        match_timeline = resp.json()
        participants_timeline_df = process_timeline_data(match_timeline)
        dataframe_list.append(participants_timeline_df)
        
    t2 = time.time()
    print(f'EXTRACTING MATCH IDS -- {round(t2 - t1, 2)}s')
    return dataframe_list


In [7]:
connection_url = create_db_connection_string(db_username, db_password, db_host, db_port, db_name)
db_engine = create_engine(connection_url, pool_recycle=3600)
connection = db_engine.connect()

In [26]:
with db_engine.connect() as connection:
    df = pd.read_sql(text("SELECT * FROM soloq.player_matches"), connection)

In [14]:
try:
    with db_engine.connect() as connection:
        player_matches_timeline_df = pd.read_sql(text("SELECT * FROM soloq.player_matches_timeline"), connection)
except:
    player_matches_timeline_df = pd.DataFrame()

In [20]:
if not player_matches_timeline_df.empty:
    match_ids = list(set(df['match_id'].unique()) - set(player_matches_timeline_df['match_id'].unique()))
else:
    match_ids = list(df['match_id'].unique())

In [25]:
len(match_ids)

402

In [22]:
api_key = os.getenv("riot_api_key")
dataframe_list = retrieve_match_timelines(match_ids, api_key)

402it [08:06,  1.21s/it]

EXTRACTING MATCH IDS -- 486.53s





In [23]:
all_matches_timeline_dataframe = pd.concat(dataframe_list)

all_matches_timeline_dataframe['uuid'] = all_matches_timeline_dataframe["match_id"] + '_' + all_matches_timeline_dataframe["participant_id"].astype(str) + '_' + all_matches_timeline_dataframe["timestamp"].astype(str)
print(all_matches_timeline_dataframe.shape[0])
all_matches_timeline_dataframe = all_matches_timeline_dataframe.set_index('uuid')

123690


In [29]:
connection_url = create_db_connection_string(db_username, db_password, db_host, db_port, db_name)
db_engine = create_engine(connection_url, pool_recycle=3600)
connection = db_engine.connect()
upsert(con=connection, df=all_matches_timeline_dataframe, schema='soloq', table_name='player_matches_timeline', create_schema=True, if_row_exists='update')

In [30]:
connection.commit()