# Step 4: Query the PUUIDs of challenger and grandmaster using summonerId
In order to retrieve the match history of players later, we need to provide the PUUID of the player, which is why we do this extra step to collect the PUUID of our challenger and grandmaster players. For gold players, the previous step already gave us their PUUID.


In [1]:
import pyspark.sql.types as T
import pyspark.sql.functions as F

from pyspark import SparkContext, SparkConf, SQLContext

from dateutil.relativedelta import relativedelta
from pyspark.sql.functions import pandas_udf
from pyspark.sql.window import Window

import pandas as pd
import numpy as np

from datetime import date, datetime, timedelta, timezone
import os
import json
import requests
import time
import yaml

In [2]:
appName = "PySpark TFT puuids"
master = "local[*]"
conf = SparkConf() \
    .setAppName(appName) \
    .setMaster(master) \
    .set("spark.executor.memory", "40g") \
    .set("spark.driver.memory", "40g") \
    .set("spark.executor.memoryOverhead", "8g") \
    .set("spark.local.dir", "/home/mai/spark-temp") \
    .set("spark.sql.session.timeZone", "UTC") \
    .set("spark.dynamicAllocation.enabled", "true") \
    .set("spark.dynamicAllocation.minExecutors", "2") \
    .set("spark.dynamicAllocation.maxExecutors", "50") \
    .set("spark.speculation", "true") 
   
sc = SparkContext.getOrCreate(conf=conf)
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

25/02/06 21:08:12 WARN Utils: Your hostname, LAPTOP-4O0SI9BK resolves to a loopback address: 127.0.1.1; using 172.26.83.105 instead (on interface eth0)
25/02/06 21:08:12 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/02/06 21:08:13 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/02/06 21:08:13 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).


## Set up

In [3]:
# retrieve API key
with open('./api_key.yaml', 'r') as file:
    API_KEY = yaml.safe_load(file)['api_key']

In [50]:
# set up parameters for the API query
REGION = 'kr'
BASE_URL = f'https://{REGION}.api.riotgames.com/tft/summoner/v1/summoners'    # API endpoint for querying players PUUID
TIER = 'GRANDMASTER'
DIVISION = 'I'

## Get the players puuid

In [51]:
# get the summonerId previously retrieved
df = pd.read_parquet(f'./data/tft_players/players_{REGION}_{TIER}_{DIVISION}.parquet/')

In [52]:
df.head()

Unnamed: 0,summonerId,leaguePoints,rank,wins,losses,veteran,inactive,freshBlood,hotStreak
0,dqmUNFs259mIFEdBVOxtiyHWzPZ6WXs6IQbmSw8S7Hh77w,1250,I,374,284,False,False,True,True
1,JCGB8OxzkrKchP6rK2MjNOimzWEW5n09oRWOjUjFAInZ0o...,1217,I,386,346,False,False,True,False
2,qcWxxzK64N_Hd_5jb-wIflEazUQiUuXANrvHbQ9_4o5vSdk,1213,I,387,330,False,False,False,False
3,zL5O66dVe1Mj2NOUBeGYY9HgXxxhn02LBHyuTBtIrPEwnZk,1193,I,163,100,False,False,True,True
4,9uUOsQ89Zuk1genCsk3cFy-y0FqLVwoUS_VLM7fXTG6EWY4,1192,I,122,66,False,False,False,True


In [53]:
# collect the summonerId into a list
summoner_ids = df['summonerId']

In [54]:
len(summoner_ids)

600

In [55]:
# function to query player puuid from summonerId
def get_puuid(ids_):
    puuid_data = []
    i = 0
    while i < len(ids_):
        # construct url
        url = f'{BASE_URL}/{ids_[i]}'
        # headers 
        headers = {
            'X-Riot-Token': API_KEY,
        }
        # send request
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            # parse json response
            data = response.json()
            # add summonerid to the data for mapping later
            data['summonerId'] = ids_[i]
            puuid_data.append(data)
            # time.sleep(1.2)
            i += 1
        elif response.status_code == 429:
            print("Rate limit exceeded. Waiting before retrying...")
            # Extract "Retry-After" header if available
            retry_after = int(response.headers.get("Retry-After", 10))
            time.sleep(retry_after)  # Wait for the specified time
        else:
            print(f'Error: {response.status_code} - {response.text}')
            # i += 1
            break
    return puuid_data

In [56]:
puuid_data = get_puuid(summoner_ids)

Rate limit exceeded. Waiting before retrying...
Rate limit exceeded. Waiting before retrying...
Rate limit exceeded. Waiting before retrying...
Rate limit exceeded. Waiting before retrying...
Rate limit exceeded. Waiting before retrying...


In [57]:
# convert response into a pd dataframe
puuid_df = pd.json_normalize(puuid_data)
puuid_df.head()

Unnamed: 0,id,accountId,puuid,profileIconId,revisionDate,summonerLevel,summonerId
0,dqmUNFs259mIFEdBVOxtiyHWzPZ6WXs6IQbmSw8S7Hh77w,tozex8nfNMJnA_VnXDQI63xofJ-D7m9WlEx17qWJj9DA,RBE6iHUktHgz_gXnOPb4E5GKDknIwz2VShQ-3HIWXpF-PQ...,1636,1738865177000,141,dqmUNFs259mIFEdBVOxtiyHWzPZ6WXs6IQbmSw8S7Hh77w
1,JCGB8OxzkrKchP6rK2MjNOimzWEW5n09oRWOjUjFAInZ0o...,MjeVryD8Akeyqy-fLbdAdD25o1qvtZygiTgFe8zxs0FT-h...,e5VVeturZHwmymLEkGMAEMabWEIAFyhz5HqEqZRjwJQ3fU...,907,1738721203000,2,JCGB8OxzkrKchP6rK2MjNOimzWEW5n09oRWOjUjFAInZ0o...
2,qcWxxzK64N_Hd_5jb-wIflEazUQiUuXANrvHbQ9_4o5vSdk,49MCrIpWyFHsA41Bc6YdBQN5YpYD94xzgXqsm3McqXlcVc...,JZ6wXkKsBxZDs5UFQAVkYP-ZsZGCBv_MZhv44vlif_rB3h...,3376,1738694061000,201,qcWxxzK64N_Hd_5jb-wIflEazUQiUuXANrvHbQ9_4o5vSdk
3,zL5O66dVe1Mj2NOUBeGYY9HgXxxhn02LBHyuTBtIrPEwnZk,GIUZvQoRYw9WlsNlk87Lum6a7N4IzT2yBo8Yk6eGnKdnsU4,HF9QMYd7LqmJBqHwyqIVBXBtIFDpxctZKMmvzyPyEytdvP...,3478,1738854159778,374,zL5O66dVe1Mj2NOUBeGYY9HgXxxhn02LBHyuTBtIrPEwnZk
4,9uUOsQ89Zuk1genCsk3cFy-y0FqLVwoUS_VLM7fXTG6EWY4,VRm6Cb_wiKZE2jjJ1yR8rwJA7WcTYFN_byXffF3xcFU_,ZtRX8x6-0dBAL8VSSNvwxWT9MT7TxfxPT1TKRquH6g5Knv...,6000,1738836707704,641,9uUOsQ89Zuk1genCsk3cFy-y0FqLVwoUS_VLM7fXTG6EWY4


In [58]:
# convert into a PySpark dataframe
puuid_df = spark.createDataFrame(puuid_df)
puuid_df.show(10)

+--------------------+--------------------+--------------------+-------------+-------------+-------------+--------------------+
|                  id|           accountId|               puuid|profileIconId| revisionDate|summonerLevel|          summonerId|
+--------------------+--------------------+--------------------+-------------+-------------+-------------+--------------------+
|dqmUNFs259mIFEdBV...|tozex8nfNMJnA_VnX...|RBE6iHUktHgz_gXnO...|         1636|1738865177000|          141|dqmUNFs259mIFEdBV...|
|JCGB8OxzkrKchP6rK...|MjeVryD8Akeyqy-fL...|e5VVeturZHwmymLEk...|          907|1738721203000|            2|JCGB8OxzkrKchP6rK...|
|qcWxxzK64N_Hd_5jb...|49MCrIpWyFHsA41Bc...|JZ6wXkKsBxZDs5UFQ...|         3376|1738694061000|          201|qcWxxzK64N_Hd_5jb...|
|zL5O66dVe1Mj2NOUB...|GIUZvQoRYw9WlsNlk...|HF9QMYd7LqmJBqHwy...|         3478|1738854159778|          374|zL5O66dVe1Mj2NOUB...|
|9uUOsQ89Zuk1genCs...|VRm6Cb_wiKZE2jjJ1...|ZtRX8x6-0dBAL8VSS...|         6000|1738836707704|          64

In [59]:
puuid_df.count()

600

In [60]:
# write to a parquet file
puuid_df.write.mode('overwrite').parquet(f'./data/tft_puuids/puuids_{REGION}_{TIER}_{DIVISION}.parquet')