# Ingest Summoner Games API to Bronze
This notebook loads the summoner games data from the League of Legends API. Cf. detail on the API by Riot Games on: https://developer.riotgames.com/

We load only the games which we have not already ingested. Historical data were loaded up to April the 16th 2024. We store the date at which we make the API call and only try and load data from this point on.

This notebook requires the list of summoners for which we want to get data as input.

The list of summoners from which you want to get data was loaded in a csv file created for that and manually ingested in the Files section of the Bronze Lakehouse. The csv file contains summoner name (which can change) ,the Player Universally Unique ID (puuid, which cannot change) and the Summoner ID (which is needed for some API calls). The puuid was fetched using the summoner API : https://euw1.api.riotgames.com/lol/summoner/v4/summoners/by-name/"summoner-name"


In [None]:
%run NB_Functions

### 1. General configuration

In [None]:
# Import libraries
from trident_token_library_wrapper import PyTridentTokenLibrary as tl
import requests as r
import json
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType
from datetime import datetime
from notebookutils import mssparkutils
import time

In [None]:
# Load secrets from Azure Key Vault
key_vault_name = 'testasa-akv-dev-001'
key_name = 'lol-api-key'
access_token = mssparkutils.credentials.getToken("keyvault")
api_key = tl.get_secret_with_token(f"https://{key_vault_name}.vault.azure.net/", key_name, access_token)

# Define API endpoint and parameters
base_url = 'https://europe.api.riotgames.com/lol/'
batch_size = 100

### 2. Load Summoner Games data

In [None]:
# Get the date from which we want to extract summoner games
LH_ID = mssparkutils.lakehouse.get("LKH_Bronze").id
WS_ID = mssparkutils.lakehouse.get("LKH_Bronze").workspaceId
files = get_dir_content(f'abfss://{WS_ID}@onelake.dfs.fabric.microsoft.com/{LH_ID}/Files/raw_data/summoner_games/recurring')

if files: # if we already loaded some games, make sure we only load games after the last loaded game
    startTime = get_most_recent_date_in_epoch(files) # API expect startTime in epoch format

else: # if the recurring folder is empty
    t = datetime(2024,4,16,0,0,0) # get games from the day historical data was loaded
    startTime = int(t.timestamp()) # API expect startTime in epoch format

print(startTime)

In [None]:
# Read Summoners.csv into a DataFrame
print("Reading summoners csv file into a DataFrame")
df_summoners = spark.read.option("header", "true").option("inferSchema", "true").option("delimiter", ";").csv("Files/raw_data/Summoners.csv")
print("Finished reading summoners csv file into a DataFrame, found", df_summoners.count(), "summoners")

# Make a list out of the PUUIDs to iterate on PUUIDs
puuid_list = df_summoners.select("PUUID").collect()

In [None]:
# Make API calls to get games of each summoner and collect responses

print("Starting API calls to get summoner games")

new_games = []
current_date = datetime.now().strftime("%Y-%m-%d")

for puuid_row in puuid_list:
    print(f"Getting data for puuid: {puuid_row[0]}")
    offset = 0
    puuid = puuid_row[0]
    while True:
        # Make API call to fetch game identifiers for the current batch
        print(f"Getting data from games {offset} to {offset + batch_size}")
        params = {'api_key': api_key, 'count': batch_size, 'start': offset, 'startTime': startTime}
        response = r.get(f"{base_url}match/v5/matches/by-puuid/{puuid}/ids", params=params)
        
        if response.status_code == 200:
            # Append retrieved game identifiers to the list
            games = response.json()
            for game in games:
                new_games.append({"puuid": puuid, "game": game, "date": current_date})
        else:
            print(f"Failed to fetch game identifiers for summoner {puuid}")
            break
        
        # Check if there are more batches to fetch
        if not games:
            break
        else:
            offset += batch_size
            
    time.sleep(5) # see API limits: https://developer.riotgames.com/docs/portal

In [None]:
# Define the schema for the Summoner Games
schema = StructType([
    StructField("puuid", StringType(), True),
    StructField("game", StringType(), True),
    StructField("date", StringType(), True)
])

# Create DataFrame from the collected rows and schema
df_summoner_games = spark.createDataFrame(new_games, schema=schema)

# Get current date
current_date = datetime.now()
date_str = current_date.strftime("%Y/%m/%d")

# Save file to appropriate bronze folder; replace old file if it already existed
print(f"Writing summoner games data to Bronze Lakehouse in files {date_str}")
df_summoner_games.coalesce(1).write.mode("overwrite").json(f'Files/raw_data/summoner_games/recurring/{date_str}')