In [1]:
import pyspark.sql.types as T
import pyspark.sql.functions as F

from pyspark import SparkContext, SparkConf, SQLContext

from dateutil.relativedelta import relativedelta
from pyspark.sql.functions import pandas_udf
from pyspark.sql.window import Window

import pandas as pd
import numpy as np

from datetime import date, datetime, timedelta, timezone
import os
import json
import requests
import time
import yaml

In [2]:
appName = "PySpark Lol players by rank"
master = "local[*]"
conf = SparkConf() \
    .setAppName(appName) \
    .setMaster(master) \
    .set("spark.executor.memory", "40g") \
    .set("spark.driver.memory", "40g") \
    .set("spark.executor.memoryOverhead", "8g") \
    .set("spark.local.dir", "/home/mai/spark-temp") \
    .set("spark.sql.session.timeZone", "UTC") \
    .set("spark.dynamicAllocation.enabled", "true") \
    .set("spark.dynamicAllocation.minExecutors", "2") \
    .set("spark.dynamicAllocation.maxExecutors", "50") \
    .set("spark.speculation", "true") 
   
sc = SparkContext.getOrCreate(conf=conf)
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

25/02/05 14:59:50 WARN Utils: Your hostname, LAPTOP-4O0SI9BK resolves to a loopback address: 127.0.1.1; using 172.26.83.22 instead (on interface eth0)
25/02/05 14:59:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/02/05 14:59:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/02/05 14:59:52 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).


## Set up

In [3]:
with open('./api_key.yaml', 'r') as file:
    API_KEY = yaml.safe_load(file)['api_key']

In [4]:
API_KEY

'RGAPI-1a8d985b-72d0-45c3-a1b5-bd1be81564a0'

## Get the players by rank

In [14]:
# function to get the players
def get_players(queue, tier, division, entry_limit):
    players = []
    page = 1
    is_paginating = True
    while is_paginating:
        # construct url
        url = f'{BASE_URL}/{QUEUE}/{TIER}/{DIVISION}?page={page}'
        # headers
        headers = {
            'X-Riot-Token': API_KEY
        }
        # send request
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            # parse json response
            data = response.json()
            players.extend(data)
            if len(data) < entry_limit:
                is_paginating = False
            else:
                page += 1
            time.sleep(1.2)
        elif response.status_code == 429:  # Rate-limited response
            print("Rate limit exceeded. Waiting before retrying...")
            # Extract "Retry-After" header if available
            retry_after = int(response.headers.get("Retry-After", 10))
            time.sleep(retry_after)  # Wait for the specified time
        else:
            print(f'Error: {response.status_code} - {response.text}')
            is_paginating = False
    return players

In [31]:
# parameters for the query
REGION = "kr"       # change these parameters if necessary
BASE_URL = f"https://{REGION}.api.riotgames.com/lol/league-exp/v4/entries"
# api parameters
QUEUE = 'RANKED_SOLO_5x5'   # keep this
TIER = 'EMERALD'             # change these parameters if necessary
DIVISION = "IV"             # change these parameters if necessary

In [32]:
ENTRY_LIMIT = 205

In [33]:
lol_players = get_players(QUEUE, TIER, DIVISION, ENTRY_LIMIT)

In [34]:
lol_players_df = pd.json_normalize(lol_players)
lol_players_df = spark.createDataFrame(lol_players_df)

In [35]:
lol_players_df.show()

[Stage 8:>                                                          (0 + 1) / 1]

+--------------------+---------------+-------+----+--------------------+------------+----+------+-------+--------+----------+---------+
|            leagueId|      queueType|   tier|rank|          summonerId|leaguePoints|wins|losses|veteran|inactive|freshBlood|hotStreak|
+--------------------+---------------+-------+----+--------------------+------------+----+------+-------+--------+----------+---------+
|bb00d398-759c-40c...|RANKED_SOLO_5x5|EMERALD|  IV|3bfRAmfJlv7IKTL2J...|          45|   6|     7|  false|   false|      true|    false|
|1a88576b-f067-472...|RANKED_SOLO_5x5|EMERALD|  IV|3UYt7jC3dd0guVS4-...|          45|   4|     7|  false|   false|     false|    false|
|22347bcf-9140-49b...|RANKED_SOLO_5x5|EMERALD|  IV|nD98gXnWlrfy8hgE7...|           0|  33|    29|  false|   false|     false|    false|
|f9255570-72f4-4d3...|RANKED_SOLO_5x5|EMERALD|  IV|Ex8GzYO0LI40K4IS5...|          27|  75|    70|  false|   false|     false|    false|
|10316489-e3d3-4e5...|RANKED_SOLO_5x5|EMERALD|  

                                                                                

In [None]:
len(lol_players)

In [36]:
lol_players_df.write.mode('overwrite').parquet(f'./data/players/players_{REGION}_{TIER}_{DIVISION}.parquet')

                                                                                