# Step 2: Collect summonerId of Grandmaster TFT players

In [1]:
import pyspark.sql.types as T
import pyspark.sql.functions as F

from pyspark import SparkContext, SparkConf, SQLContext

from dateutil.relativedelta import relativedelta
from pyspark.sql.functions import pandas_udf
from pyspark.sql.window import Window

import pandas as pd
import numpy as np

from datetime import date, datetime, timedelta, timezone
import os
import json
import requests
import time
import yaml

In [2]:
# set up a spark session
appName = "PySpark TFT players by rank"
master = "local[*]"
conf = SparkConf() \
    .setAppName(appName) \
    .setMaster(master) \
    .set("spark.executor.memory", "40g") \
    .set("spark.driver.memory", "40g") \
    .set("spark.executor.memoryOverhead", "8g") \
    .set("spark.local.dir", "/home/mai/spark-temp") \
    .set("spark.sql.session.timeZone", "UTC") \
    .set("spark.dynamicAllocation.enabled", "true") \
    .set("spark.dynamicAllocation.minExecutors", "2") \
    .set("spark.dynamicAllocation.maxExecutors", "50") \
    .set("spark.speculation", "true") 
   
sc = SparkContext.getOrCreate(conf=conf)
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

25/02/05 15:32:41 WARN Utils: Your hostname, LAPTOP-4O0SI9BK resolves to a loopback address: 127.0.1.1; using 172.26.83.22 instead (on interface eth0)
25/02/05 15:32:41 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/02/05 15:32:43 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/02/05 15:32:43 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).
25/02/05 15:32:44 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
25/02/05 15:32:44 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.


## Set up

In [3]:
# retrieve API key
with open('./api_key.yaml', 'r') as file:
    API_KEY = yaml.safe_load(file)['api_key']

In [15]:
# set up parameters for the querying the API
REGION = 'kr'          # region of the player (KR or EUW1)
BASE_URL = f'https://{REGION}.api.riotgames.com/tft/league/v1/grandmaster'     # API endpoint
QUEUE = 'RANKED_TFT'   # queue of the player. We are interested in players who reached challenger in ranked queue
TIER = 'GRANDMASTER'   # tier / rank of the player 
DIVISION = 'I'         # subdivision of the tier (For Master, Grandmaster, Challenger, there is only 1 subdivision so this defaults to I)

In [16]:
# construct the url and the headers with the API
url = f'{BASE_URL}?{QUEUE}'
headers = {
        'X-Riot-Token': API_KEY
    }

## Query the API

In [17]:
# get the response
response = requests.get(url, headers=headers)

In [18]:
if response.status_code == 200:
    # parse json response
    data = response.json()['entries']

In [19]:
# convert the json response into a dataframe
tft_players = pd.json_normalize(data)

In [20]:
tft_players.head()

Unnamed: 0,summonerId,leaguePoints,rank,wins,losses,veteran,inactive,freshBlood,hotStreak
0,dqmUNFs259mIFEdBVOxtiyHWzPZ6WXs6IQbmSw8S7Hh77w,1250,I,374,284,False,False,True,True
1,JCGB8OxzkrKchP6rK2MjNOimzWEW5n09oRWOjUjFAInZ0o...,1217,I,386,346,False,False,True,False
2,qcWxxzK64N_Hd_5jb-wIflEazUQiUuXANrvHbQ9_4o5vSdk,1213,I,387,330,False,False,False,False
3,zL5O66dVe1Mj2NOUBeGYY9HgXxxhn02LBHyuTBtIrPEwnZk,1193,I,163,100,False,False,True,True
4,9uUOsQ89Zuk1genCsk3cFy-y0FqLVwoUS_VLM7fXTG6EWY4,1192,I,122,66,False,False,False,True


In [21]:
tft_players.shape

(600, 9)

In [22]:
# convert the Pandas dataframe into a PySpark dataframe for later usage
tft_players = spark.createDataFrame(tft_players)

In [23]:
tft_players.show()

+--------------------+------------+----+----+------+-------+--------+----------+---------+
|          summonerId|leaguePoints|rank|wins|losses|veteran|inactive|freshBlood|hotStreak|
+--------------------+------------+----+----+------+-------+--------+----------+---------+
|dqmUNFs259mIFEdBV...|        1250|   I| 374|   284|  false|   false|      true|     true|
|JCGB8OxzkrKchP6rK...|        1217|   I| 386|   346|  false|   false|      true|    false|
|qcWxxzK64N_Hd_5jb...|        1213|   I| 387|   330|  false|   false|     false|    false|
|zL5O66dVe1Mj2NOUB...|        1193|   I| 163|   100|  false|   false|      true|     true|
|9uUOsQ89Zuk1genCs...|        1192|   I| 122|    66|  false|   false|     false|     true|
|DLFOjULdDwm3vOgoe...|        1187|   I| 431|   389|   true|   false|     false|     true|
|VBXyqNs_5AeYw1TL-...|        1171|   I| 379|   326|  false|   false|     false|    false|
|RmfKs0P8FYoeMAunH...|        1168|   I| 178|   107|  false|   false|      true|    false|

In [24]:
# write the PySpark dataframe to a parquet file
tft_players.write.mode('overwrite').parquet(f'./data/tft_players/players_{REGION}_{TIER}_{DIVISION}.parquet')