In [14]:
# IMPORTS
import requests
import numpy as np
import time
import pandas as pd
import os
import datetime
from io import TextIOWrapper
from fastparquet import write
from pandas import DataFrame
from riotwatcher import LolWatcher, ApiError
from pynput import keyboard
from pathlib import Path

In [15]:
path = Path(os.getcwd())
PARENT_DIR = path.parent.absolute()

In [16]:
API_KEY = 'RGAPI-fe16ad7b-d99f-4f96-a503-0958b0c15f93'
REGIONS = ['na1', 'kr', 'euw1']
RANKS = ['PLATINUM', 'DIAMOND']
TIERS = ['I', 'II', 'III', 'IV']
TASKS = [(lambda x: x, 'puuids')]
DATA_DIR = os.path.join(PARENT_DIR, 'data')
lol_watcher = LolWatcher(API_KEY)

In [17]:
def write_to_parquet(task: str, df: DataFrame, region: str, rank: str, tier: str) -> str:
    DIRECTORY_PATH = f'F://data//{task}//{region}//'
    file_name = f'{task}_{rank}_{tier}.parquet'
    file_path = DIRECTORY_PATH + file_name

    # create the parquet if it doesn't exist
    if not os.path.isfile(file_path):
        print(f'Creating {file_name}')
        write(file_path, df)
    else:
        write(file_path, df, append=True)
    return file_path

In [18]:
def get_summoner_puuids(region: str, summonerID: str) -> DataFrame:
    """
    Gets random summoner ids from North America from the various ranks (Diamond:Platinum:Gold:Silver:Bronze)
    Returns a pandas dictionary only with summoner ids that have wins + losses >= 20
    Args:
        region (str): The region we want to search in
        rank (str): The rank we want to get from
        division (str): The divions we want to get from
        num_pages (int, optional): An amount of summoner ids to get, a group of 204 are a 'page'. Defaults to 4.

    Returns:
        bool: Whether getting the summoner ids was successful
    """
    max_retries = 3
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.summoner.by_id(region, summonerID)

            return pd.DataFrame.from_records(response, index=[0])

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at summonerID: {summonerID}')
    return None

## Step 1: Get the PUUIDs

In [19]:
def get_puuids(region: str, rank: str, tier: str, summoners: DataFrame):
    print(f'Number of {rank} {tier} summonerIds: {summoners.shape[0]}')
    seen_summoners = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for summoner in summoners['summonerId']:
        num_requested += 1
        
        if summoner in seen_summoners:
            continue

        seen_summoners.add(summoner)
        df = get_summoner_puuids(region, summoner)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            print(f'Finished: {num_requested}')

    write_to_parquet(out_df, region, rank, tier)

In [20]:
def get_task_df(task, region, rank, tier, cols) -> DataFrame:
    prev_tasks_map = {'puuids': 'summoner', 'matchIds': 'puuids'}
    prev_task = prev_tasks_map[task]

    # read the summoner dataframe for the given region
    file_name = f'//{prev_task}//{region}//{prev_task}_{rank}_{tier}.parquet'
    complete_fp = DATA_DIR + file_name

    return pd.read_parquet(complete_fp, columns=[cols])

In [21]:
summoner_df = get_task_df('puuids', 'na1', 'DIAMOND', 'II', 'summonerId')

In [22]:
summoner_df

Unnamed: 0,summonerId
0,KdgmRdGwQn1m19U8dR0rg5Y7tdtugb9B6IvzJD0V9ELGYs8
1,HoTw1lnCMetth0h3BbXuQ5mHXj7W7PwMCIkfuKlkdQ_DVd0
2,4ozTyhNWaHdo8UI3xGDz1boRFiDPN55Vm0f_wqpkFOsgC9g
3,Lmo8HGch3qjlMhXzgJyHWkWUn0hibIhco41IN3FCaDF8oeU
4,jZWVMcycWgyZeBUzVyEzI96a1V-gp4BOFJlRhVSD6Qdzx_c
...,...
3734,hk0TDr27f122MB0Ay7EnfBrVfEAjC6KJ-XDMDSs7bEcARWE
3735,YI-Pxt7X16H-QVXVUMzob2UxBguq92oUi0jUExDo8JvWlTk
3736,nKMw34mdJdq5qdOEdbiyZHcni2-V0BFI7fUzkge-K6YlRwM
3737,hL-Vvg3hpQ5SlEfrPhe-xY5C74euMSSmL637nh5Xi1u4mF8


In [23]:
#get_puuids('na1', 'DIAMOND', 'II', summoner_df)

## Step 2: Get the Match IDs

In [24]:
def get_match_id(route: str, puuid: str) -> DataFrame:
    max_retries = 3
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.match.matchlist_by_puuid(route, puuid, queue=700, count=100)
            if len(response) == 0:
                return None

            out_df = pd.DataFrame.from_dict({'match_ids': response})
            out_df['puuid'] = puuid
            return out_df

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at puuid: {puuid}')
    return None

In [25]:
puuids_df = get_task_df('matchIds', 'na1', 'DIAMOND', 'I', 'puuid')

In [26]:
puuids_df

Unnamed: 0_level_0,puuid
index,Unnamed: 1_level_1
0,bmQ_Q7xWpOM9sD0HkFODWy5qAFShLoGS5V2utYqiBZ5jv8...
0,ta7pflkBOweD0usqFX4xFjGgK1xcArgwWRO8B-7C8O4x8O...
0,9NP1wvNEn2QYtfkrjl7Td40PAMv1v3eoAMStYP_561lDD7...
0,fwKYSlyH3J5FHHCwzjyV7AhC3wDtpgP6uPCs8hszvZx_8p...
0,2Ww9m4cRWQ828Sg331LOxHPMcLm2mPeET8sOhx7k27Pm13...
...,...
0,Dtc2H2KIM4xpoemBM3TIq5SJgV3cCHKhTFpn0_hL78vGMI...
0,1OZTvjmafRoTqfu6WVi_GOz-cLDoqUC8QFvJ95d2lBZ94j...
0,7BpNCMmB1u4e3Wazef1dwbbLaGVRQhR2dppJ58Ochg7md2...
0,b0lIIuGkqXFwvJqFMxQQ-ooyJ_QViKjJUs92D5waSq0HLM...


In [27]:
def batch_match_ids(route: str, region: str, rank:str, tier:str, puuids: DataFrame)-> DataFrame:
    print(f'Number of {rank} {tier} puuids: {puuids.shape[0]}')
    seen_puuids = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for puuid in puuids['puuid']:
        num_requested += 1
        
        if puuid in seen_puuids:
            continue

        seen_puuids.add(puuid)
        df = get_match_id(route, puuid)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            print(f'Finished: {num_requested}')

    write_to_parquet(out_df, region, rank, tier)

## Step 2: Get Match Data

In [28]:
def get_match_data(route: str, match_id: str) -> DataFrame:
    max_retries = 3
    retry_delay = 2

    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.match.by_id(route, match_id)
            if len(response) == 0:
                return None

            # store complete data in dictionary
            complete = {}

            team_1 = []
            team_2 = []

            for i in range(10):
                if i < 5:
                    team_1.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))
                else:
                    team_2.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))

            for i in range(5):
                team_1[i] = team_1[i] + (response['info']['teams'][0]['bans'][i]['championId'],)
                team_2[i] = team_2[i] + (response['info']['teams'][1]['bans'][i]['championId'],)

            complete['team_1'] = team_1
            complete['team_2'] = team_2

            complete['game_version'] = response['info']['gameVersion']
            complete['match_id'] = response['metadata']['matchId']

            #organizes data into dataframe that we want
            dict_df = {}
            dict_df['match_id'] = [complete['match_id'], complete['match_id']]
            dict_df['game_version'] = [complete['game_version'], complete['game_version']]
            dict_df['team'] = [100, 200]
            dict_df['summoner_id1'] = [complete['team_1'][0][0], complete['team_2'][0][0]]
            dict_df['summoner_id2'] = [complete['team_1'][1][0], complete['team_2'][1][0]]
            dict_df['summoner_id3'] = [complete['team_1'][2][0], complete['team_2'][2][0]]
            dict_df['summoner_id4'] = [complete['team_1'][3][0], complete['team_2'][3][0]]
            dict_df['summoner_id5'] = [complete['team_1'][4][0], complete['team_2'][4][0]]
            dict_df['ban1'] = [complete['team_1'][0][3], complete['team_2'][0][3]]
            dict_df['ban2'] = [complete['team_1'][1][3], complete['team_2'][1][3]]
            dict_df['ban3'] = [complete['team_1'][2][3], complete['team_2'][2][3]]
            dict_df['ban4'] = [complete['team_1'][3][3], complete['team_2'][3][3]]
            dict_df['ban5'] = [complete['team_1'][4][3], complete['team_2'][4][3]]
            dict_df['lane1'] = [complete['team_1'][0][1], complete['team_2'][0][1]]
            dict_df['lane2'] = [complete['team_1'][1][1], complete['team_2'][1][1]]
            dict_df['lane3'] = [complete['team_1'][2][1], complete['team_2'][2][1]]
            dict_df['lane4'] = [complete['team_1'][3][1], complete['team_2'][3][1]]
            dict_df['lane5'] = [complete['team_1'][4][1], complete['team_2'][4][1]]
            dict_df['pick1'] = [complete['team_1'][0][2], complete['team_2'][0][2]]
            dict_df['pick2'] = [complete['team_1'][1][2], complete['team_2'][1][2]]
            dict_df['pick3'] = [complete['team_1'][2][2], complete['team_2'][2][2]]
            dict_df['pick4'] = [complete['team_1'][3][2], complete['team_2'][3][2]]
            dict_df['pick5'] = [complete['team_1'][4][2], complete['team_2'][4][2]]

            return pd.DataFrame.from_dict(dict_df).set_index('match_id')

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at puuid: {match_id}')
    return None

In [36]:
#response = lol_watcher.match.by_id('AMERICAS', 'NA1_4485645219')
#response

{'metadata': {'dataVersion': '2',
  'matchId': 'NA1_4485645219',
  'participants': ['IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w',
   'ZwFdSLJq2RbUMM3Mh1St-y1vB8uY-Ch09S7Ch7Eu7XrnNAXo1pyPV17yCSz3JBXYD3AxluVDcMhrXA',
   'ZT9VaGce6BHwn_hycdn-dSqCo7xYLw4iIERrTA-2IMuUwZtnXsQYSLeExtSiIdys_MWZsoiblM-WoQ',
   'u3koQkFGWonSkTjxkbZ_zIPFWkTXYB_9g9WE1GqxCNZGZscDF52K62OsBXNEASzdx_b7RG0xZzo7hQ',
   'iD2RxOsYsSb0Hvq50MyfmCT5sgDmsWlnO3GXIWVjblIfSWyVTceiNPrAmBSEop7nZH-JUGGCP4r4fA',
   'fxw2bvz0D6gtuTiKwlTXAMEp78kOAvoBEi_gyBSW2enbjHLWqZIo1lb3G7wUdRUGblHY1JTjUSXm_Q',
   'o5lIjODAKqBytFlwy4SVT5LEddYBjz9ZP20p842Ax-SAvpYjqMETfHcljSNo1u7WwLfXz85HdHYPAQ',
   'ImlRw9iZjRUEG3R_yc5bvESPjA-A9gOxHYPi-7-wz_Pali6TAc6-KJ9blJrndpQjKFxSy75gpu39pw',
   'fvxdlPgkYBGDNMtZYGR7HAqTnHF7ppswo1si-QW_jufV9mmTsOK3EVZhUu93wYVFH-8B7dWco6On-g',
   '02auVdER8608C5-1BPf-LaWHH8Nfxrk53fpj7CHO5kByIXZuWuWA9W-ILfS7Fc_XzZx61VaBz5v8vg']},
 'info': {'gameCreation': 1667797091675,
  'gameDuration': 1895,
  

In [97]:
"""
complete = {}

team_1 = []
team_2 = []

for i in range(10):
    if i < 5:
        team_1.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))
    else:
        team_2.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))

for i in range(5):
    team_1[i] = team_1[i] + (response['info']['teams'][0]['bans'][i]['championId'],)
    team_2[i] = team_2[i] + (response['info']['teams'][1]['bans'][i]['championId'],)

team_1

complete['team_1'] = team_1
complete['team_2'] = team_2

complete['game_version'] = response['info']['gameVersion']
complete['match_id'] = response['metadata']['matchId']

dict_df = {}
dict_df['match_id'] = [complete['match_id'], complete['match_id']]
dict_df['game_version'] = [complete['game_version'], complete['game_version']]
dict_df['team'] = [100, 200]
dict_df['summoner_id1'] = [complete['team_1'][0][0], complete['team_2'][0][0]]
dict_df['summoner_id2'] = [complete['team_1'][1][0], complete['team_2'][1][0]]
dict_df['summoner_id3'] = [complete['team_1'][2][0], complete['team_2'][2][0]]
dict_df['summoner_id4'] = [complete['team_1'][3][0], complete['team_2'][3][0]]
dict_df['summoner_id5'] = [complete['team_1'][4][0], complete['team_2'][4][0]]
dict_df['ban1'] = [complete['team_1'][0][3], complete['team_2'][0][3]]
dict_df['ban2'] = [complete['team_1'][1][3], complete['team_2'][1][3]]
dict_df['ban3'] = [complete['team_1'][2][3], complete['team_2'][2][3]]
dict_df['ban4'] = [complete['team_1'][3][3], complete['team_2'][3][3]]
dict_df['ban5'] = [complete['team_1'][4][3], complete['team_2'][4][3]]
dict_df['lane1'] = [complete['team_1'][0][1], complete['team_2'][0][1]]
dict_df['lane2'] = [complete['team_1'][1][1], complete['team_2'][1][1]]
dict_df['lane3'] = [complete['team_1'][2][1], complete['team_2'][2][1]]
dict_df['lane4'] = [complete['team_1'][3][1], complete['team_2'][3][1]]
dict_df['lane5'] = [complete['team_1'][4][1], complete['team_2'][4][1]]
dict_df['pick1'] = [complete['team_1'][0][2], complete['team_2'][0][2]]
dict_df['pick2'] = [complete['team_1'][1][2], complete['team_2'][1][2]]
dict_df['pick3'] = [complete['team_1'][2][2], complete['team_2'][2][2]]
dict_df['pick4'] = [complete['team_1'][3][2], complete['team_2'][3][2]]
dict_df['pick5'] = [complete['team_1'][4][2], complete['team_2'][4][2]]

pd.DataFrame.from_dict(dict_df).set_index('match_id')
"""

"\n\ncomplete = {}\n\nteam_1 = []\nteam_2 = []\n\nfor i in range(10):\n    if i < 5:\n        team_1.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))\n    else:\n        team_2.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['lane'], response['info']['participants'][i]['championId']))\n\nfor i in range(5):\n    team_1[i] = team_1[i] + (response['info']['teams'][0]['bans'][i]['championId'],)\n    team_2[i] = team_2[i] + (response['info']['teams'][1]['bans'][i]['championId'],)\n\nteam_1\n\ncomplete['team_1'] = team_1\ncomplete['team_2'] = team_2\n\ncomplete['game_version'] = response['info']['gameVersion']\ncomplete['match_id'] = response['metadata']['matchId']\n\ndict_df = {}\ndict_df['match_id'] = [complete['match_id'], complete['match_id']]\ndict_df['game_version'] = [complete['game_version'], complete['game_version']]\ndict_df['team

In [30]:
# batch_match_ids('AMERICAS', 'na1', 'DIAMOND', 'I', puuids_df)

In [31]:
for region in REGIONS:
    for task, taskname in TASKS:
        for rank in RANKS:
            for tier in TIERS:
                if task:
                    df = get_task_df()

TypeError: get_task_df() missing 5 required positional arguments: 'task', 'region', 'rank', 'tier', and 'cols'

In [None]:
#summoner ids, ban orders, picks, player lanes, patch/game version, index=match id