# Step 3: Collect the PUUID of Gold TFT players

In [1]:
import pyspark.sql.types as T
import pyspark.sql.functions as F

from pyspark import SparkContext, SparkConf, SQLContext

from dateutil.relativedelta import relativedelta
from pyspark.sql.functions import pandas_udf
from pyspark.sql.window import Window

import pandas as pd
import numpy as np

from datetime import date, datetime, timedelta, timezone
import os
import json
import requests
import time
import yaml

In [2]:
appName = "PySpark Lol players by rank"
master = "local[*]"
conf = SparkConf() \
    .setAppName(appName) \
    .setMaster(master) \
    .set("spark.executor.memory", "40g") \
    .set("spark.driver.memory", "40g") \
    .set("spark.executor.memoryOverhead", "8g") \
    .set("spark.local.dir", "/home/mai/spark-temp") \
    .set("spark.sql.session.timeZone", "UTC") \
    .set("spark.dynamicAllocation.enabled", "true") \
    .set("spark.dynamicAllocation.minExecutors", "2") \
    .set("spark.dynamicAllocation.maxExecutors", "50") \
    .set("spark.speculation", "true") 
   
sc = SparkContext.getOrCreate(conf=conf)
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

25/02/06 23:52:56 WARN Utils: Your hostname, LAPTOP-4O0SI9BK resolves to a loopback address: 127.0.1.1; using 172.26.83.105 instead (on interface eth0)
25/02/06 23:52:56 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/02/06 23:52:58 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/02/06 23:52:58 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).
25/02/06 23:52:59 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


## Set up

In [3]:
# retrieve API key
with open('./api_key.yaml', 'r') as file:
    API_KEY = yaml.safe_load(file)['api_key']

In [15]:
# set up parameters for querying the API
REGION = 'kr'      # region of the player (KR or EUW1) 
BASE_URL = f'https://{REGION}.api.riotgames.com/tft/league/v1/entries' # API endpoint (for players below Master rank, we use this API endpoint which will also return the PUUID of the player)
QUEUE = 'RANKED_TFT'  # queue of the player. We are interested in players who reached challenger in ranked queue    
TIER = 'GOLD'      # tier / rank of the player 
DIVISION = 'I'     # subdivision of the tier (For ranks below Master, there are 4 subdivisions I, II, III, IV)

In [16]:
# set up the entry limit for each page in the result returned
ENTRY_LIMIT = 205

In [18]:
# function to return the puuid of gold TFT players
def get_players(queue, tier, division, entry_limit):
    players = []
    page = 1
    is_paginating = True
    while is_paginating:
        # construct url
        url = f'{BASE_URL}/{TIER}/{DIVISION}?queue={QUEUE}&page={page}'
        # headers
        headers = {
            'X-Riot-Token': API_KEY
        }
        # send request
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            # print(page)
            # parse json response
            data = response.json()
            players.extend(data)
            if len(data) < entry_limit or page >= 5:
                is_paginating = False
            else:
                page += 1
            # time.sleep(1.2)
        elif response.status_code == 429:  # Rate-limited response
            print("Rate limit exceeded. Waiting before retrying...")
            # Extract "Retry-After" header if available
            retry_after = int(response.headers.get("Retry-After", 10))
            time.sleep(retry_after)  # Wait for the specified time
        else:
            print(f'Error: {response.status_code} - {response.text}')
            is_paginating = False
    return players

## Get the players by rank

In [19]:
tft_players = get_players(QUEUE, TIER, DIVISION, ENTRY_LIMIT)

In [20]:
# convert json response to pd dataframe
tft_players_df = pd.json_normalize(tft_players)

In [21]:
tft_players_df.shape

(1025, 13)

In [22]:
# convert into a PySpark dataframe
tft_players_df = spark.createDataFrame(tft_players_df)

In [23]:
tft_players_df.show(10)

+--------------------+--------------------+----------+----+----+--------------------+------------+----+------+-------+--------+----------+---------+
|               puuid|            leagueId| queueType|tier|rank|          summonerId|leaguePoints|wins|losses|veteran|inactive|freshBlood|hotStreak|
+--------------------+--------------------+----------+----+----+--------------------+------------+----+------+-------+--------+----------+---------+
|uJr5xHJWi1YbxEjmA...|5f38105e-3c70-4f7...|RANKED_TFT|GOLD|   I|OGh2Z6KNBeg1vwDT0...|          24|  94|   106|  false|   false|     false|    false|
|UKNPf16zVG0-hn5m4...|c003cdd3-1b68-419...|RANKED_TFT|GOLD|   I|ww9_0qou-pPMD5asd...|          32|  27|    24|  false|   false|     false|    false|
|wogzb-1g1KxdC9r1p...|bf5f46cc-6735-436...|RANKED_TFT|GOLD|   I|zxUZvHcJ_lI4sZ0Tj...|          12|  33|    29|  false|   false|     false|    false|
|aumJ0m9ngyLklNA2Z...|eaf9004c-1551-487...|RANKED_TFT|GOLD|   I|1ICDes8AlSNGsxysX...|           0|  37|   

In [24]:
# write to parquet file
tft_players_df.write.mode('overwrite').parquet(f'./data/tft_players/players_{REGION}_{TIER}_{DIVISION}.parquet/')