In [1]:
# IMPORTS
import requests
import numpy as np
import time
import pandas as pd
import os
import datetime
from io import TextIOWrapper
from fastparquet import write
from pandas import DataFrame
from riotwatcher import LolWatcher, ApiError
from pynput import keyboard
from pathlib import Path

In [2]:
path = Path(os.getcwd())
PARENT_DIR = path.parent.absolute()

In [3]:
API_KEY = 'RGAPI-fd809b14-ba86-4c2b-b6cf-853aae895c14'
REGIONS = ['na1'] # , 'kr', 'euw1'
RANKS = ['PLATINUM', 'DIAMOND']
TIERS = ['I', 'II', 'III', 'IV']
DATA_DIR = os.path.join(PARENT_DIR, 'data')
lol_watcher = LolWatcher(API_KEY)

In [4]:
def write_to_parquet(task: str, df: DataFrame, region: str, rank: str, tier: str) -> str:    
    DIRECTORY_PATH = os.path.join(PARENT_DIR, f'data/{task}/{region}')
    if not os.path.exists(DIRECTORY_PATH):
        # create the path
        os.mkdir(DIRECTORY_PATH)

    file_name = f'{rank}_{tier}.parquet'
    file_path = os.path.join(DIRECTORY_PATH, file_name)

    # create the parquet if it doesn't exist
    if not os.path.isfile(file_path):
        print(f'Creating {file_name}')
        write(file_path, df)
    else:
        write(file_path, df, append=True)
    return file_path

## Step 1: Get the Summoner IDS

In [5]:
EXPECTED_SUMMONERS_OUT = 205

In [6]:
def get_summoner_ids(region: str, rank: str, division: str, start_page: int, num_pages=4) -> tuple[DataFrame, bool]:
    """
    Gets random summoner ids from North America from the various ranks (Diamond:Platinum:Gold:Silver:Bronze)
    Returns a pandas dictionary only with summoner ids that have wins + losses >= 20
    Args:
        region (str): The region we want to search in
        rank (str): The rank we want to get from
        division (str): The divions we want to get from
        num_pages (int, optional): An amount of summoner ids to get, a group of 204 are a 'page'. Defaults to 4.

    Returns:
        bool: Whether getting the summoner ids was successful
    """
    print(f"Current rank {rank} {division}")
    df = pd.DataFrame(columns=['leagueId',
                               'queueType',
                               'tier',
                               'rank',
                               'summonerId',
                               'summonerName',
                               'leaguePoints',
                               'wins',
                               'losses',
                               'veteran',
                               'inactive',
                               'freshBlood',
                               'hotStreak'])
    more_summoners = True
    for page_num in range(start_page, start_page+num_pages+1):
        try:
            # attempt to get a response
            response = lol_watcher.league.entries(region, 'RANKED_SOLO_5x5', rank, division, page_num)

            # if we don't get the expected number of summoners, we have probably reached the end of the catalog
            # in another way, there are no more players in the rank and divison to get 
            if len(response) < EXPECTED_SUMMONERS_OUT:
                more_summoners = False
                
            for summoner in response:
                # We only take people with greater than or equal to 20 combined wins and losses
                if summoner['wins'] + summoner['losses'] >= 20:
                    df = pd.concat([df, pd.DataFrame.from_records(summoner, index=[0])], ignore_index=True)
        except ApiError as err:
            print(err)
            print(f'Failed at page number: {page_num}\nFailed at rank and division: {rank} {division}')
            return df, False

    return df, more_summoners

In [7]:
def batch_summoner_ids(region: str, rank: str, division: str):
    # get the summoner ids
    page_increments = 20
    max_pages = 1_000
    for start_page in range(1, max_pages + 1, page_increments):
        summoners, more = get_summoner_ids(region, rank, division, start_page, page_increments)
        write_to_parquet('summoner', summoners, region, rank, division)
        if not more:
            return
            

In [8]:
# get_save_summoner_ids('na1', 'DIAMOND', 'II')

In [9]:
# pd.read_parquet('/Repository/MOBA_Project/data/summoner/na1/summoner_DIAMOND_II.parquet')

## Step 2: Get the PUUIDs

In [10]:
def get_summoner_puuids(region: str, summonerID: str) -> DataFrame:
    """
    Gets random summoner ids from North America from the various ranks (Diamond:Platinum:Gold:Silver:Bronze)
    Returns a pandas dictionary only with summoner ids that have wins + losses >= 20
    Args:
        region (str): The region we want to search in
        rank (str): The rank we want to get from
        division (str): The divions we want to get from
        num_pages (int, optional): An amount of summoner ids to get, a group of 204 are a 'page'. Defaults to 4.

    Returns:
        bool: Whether getting the summoner ids was successful
    """
    max_retries = 3
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.summoner.by_id(region, summonerID)

            return pd.DataFrame.from_records(response, index=[0])

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at summonerID: {summonerID}')
    return None

In [11]:
def batch_puuids(region: str, rank: str, tier: str, summoners: DataFrame):
    print(f'Number of {rank} {tier} summonerIds: {summoners.shape[0]}')
    seen_summoners = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for summoner in summoners['summonerId']:
        num_requested += 1
        
        if summoner in seen_summoners:
            continue

        seen_summoners.add(summoner)
        df = get_summoner_puuids(region, summoner)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            write_to_parquet(out_df, region, rank, tier)
            out_df = pd.DataFrame()
            print(f'Finished: {num_requested}')
            
    write_to_parquet(out_df, region, rank, tier)

In [12]:
def get_task_df(task, region, rank, tier, cols) -> DataFrame:
    prev_tasks_map = {'puuids': 'summoner', 
                      'matchIds': 'puuids',
                      'matchData': 'matchIds',
                      'ChampExp': 'MatchData'}
    prev_task = prev_tasks_map[task]

    # read the summoner dataframe for the given region
    file_name = f'//{prev_task}//{region}//{rank}_{tier}.parquet'
    complete_fp = DATA_DIR + file_name

    return pd.read_parquet(complete_fp, columns=[cols])

In [13]:
summoner_df = get_task_df('puuids', 'na1', 'DIAMOND', 'II', 'summonerId')

In [14]:
summoner_df

Unnamed: 0,summonerId
0,m5xohNS6LfBgVUXBaV6EuJpSIId8uibtiTX8piiGjmmiiSWg
1,wru_KFPtk_hahGE1Ej0cLcOnDJi4sq8k7SZP8_oX8T11q7en
2,ssFcmFGxNSGb9z5dzLX-Tp_XYknzDjPZamGnXIyl_LyA3fGr
3,NAzkR2zvvdQ8MrMcMb9oCwXXdM69Q0GjhjCWVx5P63ZKJzqA
4,EQbDpjaKm2lY1RTPgcEKWG4Hqn1TD6myGwEUmLYqAXOIDAeK
...,...
4262,G59MKV3zZMIcY2fgH49hrJ6bV4jUZ6z86fP6CrJhMCpUmy...
4263,q37Tm5iS1f14JJT4xobDHqtJFUnTNamK3prVnC54ZVQZug...
4264,aJG5PK0MPRIZFrmbrDWvBUwbXIG4Hf8Csz13HFa8N4Gs_X...
4265,pBVfZ3k0gLHmAU3QRO9u26cf8UEb-fi9d7RKDwSJA0x1Zb...


In [16]:
# get_puuids('na1', 'DIAMOND', 'II', summoner_df)

## Step 3: Get the Match IDs

In [54]:
def get_match_id(route: str, puuid: str) -> DataFrame:
    max_retries = 3
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.match.matchlist_by_puuid(route, puuid, queue=700, count=100)
            if len(response) == 0:
                return None

            out_df = pd.DataFrame.from_dict({'match_id': response})
            out_df['puuid'] = puuid
            return out_df

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at puuid: {puuid}')
    return None

In [55]:
puuids_df = get_task_df('matchIds', 'na1', 'DIAMOND', 'I', 'puuid')

In [56]:
puuids_df

Unnamed: 0_level_0,puuid
index,Unnamed: 1_level_1
0,bmQ_Q7xWpOM9sD0HkFODWy5qAFShLoGS5V2utYqiBZ5jv8...
0,ta7pflkBOweD0usqFX4xFjGgK1xcArgwWRO8B-7C8O4x8O...
0,9NP1wvNEn2QYtfkrjl7Td40PAMv1v3eoAMStYP_561lDD7...
0,fwKYSlyH3J5FHHCwzjyV7AhC3wDtpgP6uPCs8hszvZx_8p...
0,2Ww9m4cRWQ828Sg331LOxHPMcLm2mPeET8sOhx7k27Pm13...
...,...
0,Dtc2H2KIM4xpoemBM3TIq5SJgV3cCHKhTFpn0_hL78vGMI...
0,1OZTvjmafRoTqfu6WVi_GOz-cLDoqUC8QFvJ95d2lBZ94j...
0,7BpNCMmB1u4e3Wazef1dwbbLaGVRQhR2dppJ58Ochg7md2...
0,b0lIIuGkqXFwvJqFMxQQ-ooyJ_QViKjJUs92D5waSq0HLM...


In [57]:
def batch_match_ids(route: str, region: str, rank:str, tier:str, puuids: DataFrame)-> DataFrame:
    print(f'Number of {rank} {tier} puuids: {puuids.shape[0]}')
    seen_puuids = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for puuid in puuids['puuid']:
        num_requested += 1
        
        if puuid in seen_puuids:
            continue

        seen_puuids.add(puuid)
        df = get_match_id(route, puuid)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            write_to_parquet(out_df, region, rank, tier)
            out_df = pd.DataFrame()
            print(f'Finished: {num_requested}')

    write_to_parquet(out_df, region, rank, tier)

In [58]:
get_match_id('AMERICAS', 'IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w')

Attempt 1 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Attempt 2 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Attempt 3 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Failed at puuid: IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w


## Step 4: Get Match Data

In [17]:
def get_match_data(route: str, match_id: str) -> DataFrame:
    max_retries = 3
    retry_delay = 2

    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.match.by_id(route, match_id)
            if len(response) == 0:
                return None

            # store complete data in dictionary
            complete = {}

            team_1 = []
            team_2 = []

            for i in range(10):
                if i < 5:
                    team_1.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['teamPosition'], response['info']['participants'][i]['championId']))
                else:
                    team_2.append((response['info']['participants'][i]['summonerId'], response['info']['participants'][i]['teamPosition'], response['info']['participants'][i]['championId']))

            for i in range(5):
                team_1[i] = team_1[i] + (response['info']['teams'][0]['bans'][i]['championId'],)
                team_2[i] = team_2[i] + (response['info']['teams'][1]['bans'][i]['championId'],)

            complete['team_1'] = team_1
            complete['team_2'] = team_2

            complete['game_version'] = response['info']['gameVersion']
            complete['match_id'] = response['metadata']['matchId']

            #organizes data into dataframe that we want
            dict_df = {}
            dict_df['match_id'] = [complete['match_id'], complete['match_id']]
            dict_df['game_version'] = [complete['game_version'], complete['game_version']]
            dict_df['team'] = [100, 200]
            dict_df['summoner_id1'] = [complete['team_1'][0][0], complete['team_2'][0][0]]
            dict_df['summoner_id2'] = [complete['team_1'][1][0], complete['team_2'][1][0]]
            dict_df['summoner_id3'] = [complete['team_1'][2][0], complete['team_2'][2][0]]
            dict_df['summoner_id4'] = [complete['team_1'][3][0], complete['team_2'][3][0]]
            dict_df['summoner_id5'] = [complete['team_1'][4][0], complete['team_2'][4][0]]
            dict_df['ban1'] = [complete['team_1'][0][3], complete['team_2'][0][3]]
            dict_df['ban2'] = [complete['team_1'][1][3], complete['team_2'][1][3]]
            dict_df['ban3'] = [complete['team_1'][2][3], complete['team_2'][2][3]]
            dict_df['ban4'] = [complete['team_1'][3][3], complete['team_2'][3][3]]
            dict_df['ban5'] = [complete['team_1'][4][3], complete['team_2'][4][3]]
            dict_df['lane1'] = [complete['team_1'][0][1], complete['team_2'][0][1]]
            dict_df['lane2'] = [complete['team_1'][1][1], complete['team_2'][1][1]]
            dict_df['lane3'] = [complete['team_1'][2][1], complete['team_2'][2][1]]
            dict_df['lane4'] = [complete['team_1'][3][1], complete['team_2'][3][1]]
            dict_df['lane5'] = [complete['team_1'][4][1], complete['team_2'][4][1]]
            dict_df['pick1'] = [complete['team_1'][0][2], complete['team_2'][0][2]]
            dict_df['pick2'] = [complete['team_1'][1][2], complete['team_2'][1][2]]
            dict_df['pick3'] = [complete['team_1'][2][2], complete['team_2'][2][2]]
            dict_df['pick4'] = [complete['team_1'][3][2], complete['team_2'][3][2]]
            dict_df['pick5'] = [complete['team_1'][4][2], complete['team_2'][4][2]]
            dict_df['won'] = [response['info']['teams'][0]['win'], response['info']['teams'][1]['win']]

            return pd.DataFrame.from_dict(dict_df).set_index('match_id')

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at puuid: {match_id}')
    return None

In [62]:
def batch_match_data(route: str, region: str, rank:str, tier:str, match_ids: DataFrame)-> DataFrame:
    print(f'Number of {rank} {tier} match_ids: {match_ids.shape[0]}')
    seen_match_ids = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for match_id in match_ids['match_id']:
        num_requested += 1
        
        if match_id in seen_match_ids:
            continue

        seen_match_ids.add(match_id)
        df = get_match_data(route, match_id)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            write_to_parquet('match_data', out_df, region, rank, tier)
            out_df = pd.DataFrame()
            print(f'Finished: {num_requested}')

    write_to_parquet('match_data', out_df, region, rank, tier)

In [63]:
batch_match_data('AMERICAS', 'na1', 'DIAMOND', 'I', get_match_id('AMERICAS', 'IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w'))

Attempt 1 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Attempt 2 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Attempt 3 failed: 400 Client Error: Bad Request for url: https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w/ids?count=100&queue=700
Failed at puuid: IRPWksl-d9yDN2RO9zPH1FhYau3CMjEOCTdgyeDyjL7D9WQ_PNk39Hwt4VT6cg0Mca9uBRjn78ex1w


AttributeError: 'NoneType' object has no attribute 'shape'

## 3. Champion Experience

In [None]:
def get_champion_experience(region: str, summoner_id: str) -> DataFrame:
    max_retries = 3
    retry_delay = 2
    for attempt in range(max_retries):
        try:
            # attempt to get a response
            response = lol_watcher.champion_mastery.by_summoner(region, summoner_id)
            if len(response) == 0:
                return None

            output_dict = {}

            output_dict['summonerId'] = []
            output_dict['championId'] = []
            output_dict['championPoints'] = []
            output_dict['lastPlayTime'] = []

            for champion in response:
                if champion['championPoints'] < 10000:
                    break

                output_dict['summonerId'] += [champion['summonerId']]
                output_dict['championId'] += [champion['championId']]
                output_dict['championPoints'] += [champion['championPoints']]
                output_dict['lastPlayTime'] += [champion['lastPlayTime']]

            return pd.DataFrame.from_dict(output_dict)

        except ApiError as err:
            print("Attempt", attempt + 1, "failed:", err)
            time.sleep(retry_delay)
    
    print(f'Failed at puuid: {summoner_id}')
    return None

In [None]:
def batch_champion_experience(route: str, region: str, rank:str, tier:str, summoner_df: np.array) -> DataFrame:
    print(f'Number of {rank} {tier} match_ids: {summoner_df.shape[0]}')
    seen_summoner_ids = set()
    num_requested = 0
    out_df = pd.DataFrame()
    for summoner_id in summoner_df:
        num_requested += 1
        
        if summoner_id in seen_summoner_ids:
            continue

        seen_summoner_ids.add(summoner_id)
        df = get_champion_experience(region, summoner_id)

        if df is None:
            continue

        if out_df.shape[0] == 0:
            out_df = df.copy(deep=True)
        else:
            out_df = pd.concat([out_df, df], axis=0)

        if num_requested % 100 == 0:
            write_to_parquet('champion_experience', out_df, region, rank, tier)
            out_df = pd.DataFrame()
            print(f'Finished: {num_requested}')

    write_to_parquet('champion_experience', out_df, region, rank, tier)

In [None]:
get_champion_experience('na1', 'rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs')

Unnamed: 0,summonerId,championId,championPoints,lastPlayTime
0,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,11,182215,1655316799000
1,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,107,137376,1679979550000
2,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,38,133619,1684731740000
3,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,29,133451,1684793032000
4,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,64,115955,1680930600000
...,...,...,...,...
115,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,221,12255,1680757685000
116,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,429,12048,1685415559000
117,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,69,10989,1680488391000
118,rf3hsRA_mwhGQzBSpGvQyNjCUk6rEal9VPnl7a_lQFXtfjs,50,10113,1680321046000


In [None]:
TASKS = [(batch_summoner_ids, 'summoner', None),
         (batch_puuids, 'puuids', 'summonerId'), 
         (batch_match_ids, 'matchIds', 'puuid'), 
         (batch_match_data, 'matchData', 'match_id'), 
         (batch_champion_experience, 'champExp', '')]

NameError: name 'batch_match_data' is not defined

In [18]:
for region in REGIONS:
    for task, taskname, colToGet in TASKS:
        for rank in RANKS:
            for tier in TIERS:
                if task:
                    if taskname != 'summoner':
                        if taskname == 'champExp':
                            for s in [f'summoner_id{i}' for i in range(1, 6)]:
                                df = get_task_df(taskname, region, rank, tier, s)
                                task()
                        else:
                            df = get_task_df(taskname, region, rank, tier, colToGet)
                            task(region)
                    else:
                        task(region, rank, tier)
                    

NameError: name 'TASKS' is not defined

In [None]:
#summoner ids, ban orders, picks, player lanes, patch/game version, index=match id