## **Note: This code was run on Google Colab with Drive mounted, hence all otputs were saved in the Googel Drive**

In [1]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO

In [2]:
import logging

In [3]:
logging.basicConfig(level=logging.INFO, force = True)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"
data_writing_dir = "/content/drive/MyDrive/DropBox Hackathon/LoL_Data_games_small/"

#### Function for games after 2020

In [6]:
# using generators
# ~3 sec / game
# least memory consuming

def download_gzip_and_write_to_json(file_name):

    # local_file_name = file_name.replace(":", "_")
    platform_game_id = file_name.split("/")[1]
    local_file_name = data_writing_dir + platform_game_id


    # If file already exists locally do not re-download game
    if os.path.isfile(f"{local_file_name}.json"):
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    # print(f'response is {response}')

    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:


                full_data = json.loads(gzipped_file.read()) # .decode('utf-8'))
                my_game_data = {}

                for data in full_data:

                    if (data["eventType"] == "stats_update") and (data["gameOver"] == True): # pick the last stats_update when game ends

                        my_game_data["eventTime"] = data["eventTime"]

                        # Define a function to get data based on team ID
                        def get_data(attribute, team_id_1 = 100, team_id_2 = 200):
                            return ([participant[attribute] for participant in data["participants"] if participant["teamID"] == team_id_1],
                                    [participant[attribute] for participant in data["participants"] if participant["teamID"] == team_id_2] )


                        # Extract data using generator expressions
                        my_game_data["level_100"], my_game_data["level_200"] = get_data("level")
                        my_game_data["xp_100"], my_game_data["xp_200"] = get_data("XP")
                        my_game_data["totalGold_100"], my_game_data["totalGold_200"] = get_data("totalGold")
                        my_game_data["attackSpeed_100"], my_game_data["attackSpeed_200"] = get_data("attackSpeed")
                        my_game_data["respawnTimer_100"], my_game_data["respawnTimer_200"] = get_data("respawnTimer")
                        my_game_data["armor_100"], my_game_data["armor_200"] = list(get_data("armor"))
                        my_game_data["magicResist_100"], my_game_data["magicResist_200"] = list(get_data("magicResist"))
                        my_game_data["armorPenetration_100"], my_game_data["armorPenetration_200"] = list(get_data("armorPenetration"))


                        for team in data["teams"]:  # 2 teams only
                            suffix = "_" + str(team["teamID"])

                            my_game_data[f'{"inhibKills" + suffix}'] = team["inhibKills"]
                            my_game_data[f'{"towerKills" + suffix}'] = team["towerKills"]
                            my_game_data[f'{"baronKills" + suffix}'] = team["baronKills"]
                            my_game_data[f'{"dragonKills" + suffix}'] = team["dragonKills"]
                            my_game_data[f'{"assists" + suffix}'] = team["assists"]
                            my_game_data[f'{"championsKills" + suffix}'] = team["championsKills"]
                            my_game_data[f'{"totalGold" + suffix}'] = team["totalGold"]
                            my_game_data[f'{"deaths" + suffix}'] = team["deaths"]



                    # elif data["eventType"] == "game_info":      # there is only one game_info
                    #     pass

                    elif data["eventType"] == "game_end":  # there is only one game_end
                        my_game_data["winning_team"] = data["winningTeam"]
                        my_game_data["platformGameId"] = data["platformGameId"]

                    # print(f'eventType is {data["eventType"]}')

                # print(f"full datas my data = {my_game_data}")

                with open(f"{local_file_name}.json", 'w') as output_file: # 'wb' changed to 'w'
                    json.dump(my_game_data, output_file)
                    # shutil.copyfileobj(gzipped_file, output_file)
                # print(f"{file_name}.json written")
                logging.info(f"{file_name}.json written")

        except Exception as e:
            # print("Error:", e)
            logging.info("Error:", e)
    else:
        # print(f"Failed to download {file_name}")
        logging.info(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")


#### For games before 2020

In [7]:
# using generators
# ~3 sec / game
# least memory consuming

def download_gzip_and_write_to_json_2020_prev(file_name):

    # local_file_name = file_name.replace(":", "_")
    platform_game_id = file_name.split("/")[1]
    local_file_name = data_writing_dir + platform_game_id


    # If file already exists locally do not re-download game
    if os.path.isfile(f"{local_file_name}.json"):
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    # print(f'response is {response}')

    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:


                full_data = json.loads(gzipped_file.read()) # .decode('utf-8'))
                my_game_data = {}

                for data in full_data:

                    if (data["eventType"] == "stats_update") and (data["gameOver"] == True): # pick the last stats_update when game ends

                        my_game_data["eventTime"] = data["eventTime"]

                        # Define a function to get data based on team ID
                        def get_data(attribute, team_id_1 = 100, team_id_2 = 200):
                            return ([participant[attribute] for participant in data["participants"] if participant["teamID"] == team_id_1],
                                    [participant[attribute] for participant in data["participants"] if participant["teamID"] == team_id_2] )


                        # Extract data using generator expressions
                        my_game_data["level_100"], my_game_data["level_200"] = get_data("level")
                        my_game_data["xp_100"], my_game_data["xp_200"] = get_data("XP")
                        my_game_data["totalGold_100"], my_game_data["totalGold_200"] = get_data("totalGold")
                        my_game_data["attackSpeed_100"], my_game_data["attackSpeed_200"] = get_data("attackSpeed")
                        my_game_data["respawnTimer_100"], my_game_data["respawnTimer_200"] = 0, 0 # get_data("respawnTimer")
                        my_game_data["armor_100"], my_game_data["armor_200"] = list(get_data("armor"))
                        my_game_data["magicResist_100"], my_game_data["magicResist_200"] = list(get_data("magicResist"))
                        my_game_data["armorPenetration_100"], my_game_data["armorPenetration_200"] = list(get_data("armorPenetration"))


                        for team in data["teams"]:  # 2 teams only
                            suffix = "_" + str(team["teamID"])

                            my_game_data[f'{"inhibKills" + suffix}'] = team["inhibKills"]
                            my_game_data[f'{"towerKills" + suffix}'] = team["towerKills"]
                            my_game_data[f'{"baronKills" + suffix}'] = team["baronKills"]
                            my_game_data[f'{"dragonKills" + suffix}'] = team["dragonKills"]
                            my_game_data[f'{"assists" + suffix}'] = team["assists"]
                            my_game_data[f'{"championsKills" + suffix}'] = team["championsKills"]
                            my_game_data[f'{"totalGold" + suffix}'] = team["totalGold"]
                            my_game_data[f'{"deaths" + suffix}'] = team["deaths"]



                    # elif data["eventType"] == "game_info":      # there is only one game_info
                    #     pass

                    elif data["eventType"] == "game_end":  # there is only one game_end
                        my_game_data["winning_team"] = data["winningTeam"]
                        my_game_data["platformGameId"] = data["platformGameId"]

                    # print(f'eventType is {data["eventType"]}')

                # print(f"full datas my data = {my_game_data}")

                with open(f"{local_file_name}.json", 'w') as output_file: # 'wb' changed to 'w'
                    json.dump(my_game_data, output_file)
                    # shutil.copyfileobj(gzipped_file, output_file)
                # print(f"{file_name}.json written")
                logging.info(f"{file_name}.json written")

        except Exception as e:
            # print("Error:", e)
            logging.info("Error:", e)
    else:
        # print(f"Failed to download {file_name}")
        logging.info(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")


In [8]:
!cp /content/esports-data/* "/content/drive/MyDrive/DropBox Hackathon/LoL_Data"

In [9]:
# !rm -rf /content/games

In [10]:
tournaments_file_path = "/content/drive/MyDrive/DropBox Hackathon/LoL_Data/tournaments.json"
mapping_file_path = "/content/drive/MyDrive/DropBox Hackathon/LoL_Data/mapping_data.json"

In [11]:
def download_games(year):
    start_time = time.time()
    # with open("esports-data/tournaments.json", "r") as json_file:
    with open(tournaments_file_path, "r") as json_file:
        tournaments_data = json.load(json_file)

    # with open("esports-data/mapping_data.json", "r") as json_file:
    with open(mapping_file_path, "r") as json_file:
        mappings_data = json.load(json_file)

    directory = "games"
    if not os.path.exists(directory):
        os.makedirs(directory)

    mappings = {
        esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
    }

    game_counter = 0

    for tournament in tournaments_data:
        start_date = tournament.get("startDate", "")
        if start_date.startswith(str(year)):
            logging.info(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        for game in match["games"]:
                            if game["state"] == "completed":
                                try:
                                    platform_game_id = mappings[game["id"]]["platformGameId"]
                                except KeyError:
                                    logging.info(f"{platform_game_id} {game['id']} not found in the mapping table")
                                    continue


                                if year <= 2020: download_gzip_and_write_to_json_2020_prev(f"{directory}/{platform_game_id}")
                                else: download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")

                                game_counter += 1

                                # break
                                # exit()

                            if game_counter % 50 == 0:
                                logging.info(
                                    f"----- Processed {game_counter} games, current run time: \
                                    {round((time.time() - start_time)/60, 2)} minutes"
                                )

                            # if game_counter == 200:
                            #     break



In [12]:
download_games(2023) # done # 7750 games

INFO:root:Processing nacl_qualifiers_2_summer_2023
INFO:root:ESPORTSTMNT03:3196057 110733838936446954 not found in the mapping table
INFO:root:ESPORTSTMNT03:3198296 110733838936447001 not found in the mapping table
INFO:root:ESPORTSTMNT03:3198296 110733838936447002 not found in the mapping table
INFO:root:----- Processed 50 games, current run time:                                     0.07 minutes
INFO:root:Processing gll_summer_2023
INFO:root:ESPORTSTMNT03:3174697 110428723805336981 not found in the mapping table
INFO:root:----- Processed 100 games, current run time:                                     0.07 minutes
INFO:root:Processing elite_series_summer_2023
INFO:root:----- Processed 150 games, current run time:                                     0.07 minutes
INFO:root:----- Processed 200 games, current run time:                                     0.07 minutes
INFO:root:----- Processed 200 games, current run time:                                     0.07 minutes
INFO:root:ESPORTSTM

In [13]:
download_games(2022) # done # 7650 games

INFO:root:Processing elements_league_opening_2022
INFO:root:----- Processed 50 games, current run time:                                     0.04 minutes
INFO:root:Processing lck_challengers_spring_2022
INFO:root:ESPORTSTMNT02:2570350 107439257587481816 not found in the mapping table
INFO:root:----- Processed 100 games, current run time:                                     0.04 minutes
INFO:root:----- Processed 150 games, current run time:                                     0.04 minutes
INFO:root:ESPORTSTMNT02:2556636 107439257588333950 not found in the mapping table
INFO:root:ESPORTSTMNT03:2549870 107559433600069811 not found in the mapping table
INFO:root:ESPORTSTMNT03:2565045 107559433600069817 not found in the mapping table
INFO:root:ESPORTSTMNT03:2546121 107559433600069827 not found in the mapping table
INFO:root:ESPORTSTMNT03:2568234 107559433600135367 not found in the mapping table
INFO:root:ESPORTSTMNT03:2549623 107559433600135389 not found in the mapping table
INFO:root:ESPORT

In [14]:
download_games(2021)

INFO:root:Processing hitpoint_masters_summer_2021
INFO:root:----- Processed 50 games, current run time:                                     0.05 minutes
INFO:root:Processing tal_summer_2021
INFO:root:ESPORTSTMNT05:2060302 106342376606018114 not found in the mapping table
INFO:root:----- Processed 100 games, current run time:                                     0.05 minutes
INFO:root:ESPORTSTMNT04:1810824 106342376606018144 not found in the mapping table
INFO:root:----- Processed 150 games, current run time:                                     0.05 minutes
INFO:root:Processing nlc_summer_2021
INFO:root:----- Processed 200 games, current run time:                                     0.05 minutes
INFO:root:----- Processed 250 games, current run time:                                     0.05 minutes
INFO:root:ESPORTSTMNT05:2060164 106340535337738127 not found in the mapping table
INFO:root:----- Processed 300 games, current run time:                                     0.05 minutes
INFO:ro

In [15]:
download_games(2020) # respawn timer field did not exist in a lot of games --> taking that as 0

INFO:root:Processing midseason_cup_2020
INFO:root:Processing lcs_academy_summer_2020
INFO:root:----- Processed 50 games, current run time:                                     0.04 minutes
INFO:root:ESPORTSTMNT01:1424413 104174601835007700 not found in the mapping table
INFO:root:----- Processed 100 games, current run time:                                     0.04 minutes
INFO:root:Processing ljl_spring_2020
INFO:root:----- Processed 150 games, current run time:                                     0.04 minutes
INFO:root:ESPORTSTMNT01:1313781 103540398665570569 not found in the mapping table
INFO:root:ESPORTSTMNT01:1313781 103540398665570570 not found in the mapping table
INFO:root:----- Processed 200 games, current run time:                                     0.04 minutes
INFO:root:Processing opl_2020_split1
INFO:root:ESPORTSTMNT01:1303999 103535402770536748 not found in the mapping table
INFO:root:----- Processed 250 games, current run time:                                     0.04 mi

In [16]:
# 2019 and earlier did not exist

In [17]:
download_games(2019) # respawn timer field did not exist in a lot of games --> taking that as 0