In [1]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO


S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"


def download_gzip_and_write_to_json(file_name):
   local_file_name = file_name.replace(":", "_")
   # If file already exists locally do not re-download game
   if os.path.isfile(f"{local_file_name}.json"):
       return

   response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
   if response.status_code == 200:
       try:
           gzip_bytes = BytesIO(response.content)
           with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
               with open(f"{local_file_name}.json", 'wb') as output_file:
                   shutil.copyfileobj(gzipped_file, output_file)
               print(f"{file_name}.json written")
       except Exception as e:
           print("Error:", e)
   else:
       print(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")


def download_games(year):
   start_time = time.time()
   with open("esports-data/tournaments.json", "r") as json_file:
       tournaments_data = json.load(json_file)
   with open("esports-data/mapping_data.json", "r") as json_file:
       mappings_data = json.load(json_file)

   directory = "games"
   if not os.path.exists(directory):
       os.makedirs(directory)

   mappings = {
       esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
   }

   game_counter = 0
    
    
   # TEMPORARY
   MAX_GAMES = 50
   #################
   for tournament in tournaments_data:
       start_date = tournament.get("startDate", "")
       if start_date.startswith(str(year)):
           print(f"Processing {tournament['slug']}")
           for stage in tournament["stages"]:
               for section in stage["sections"]:
                   for match in section["matches"]:
                       for game in match["games"]:
                           ##############
                           if game_counter >= MAX_GAMES:
                               break
                           ##############
                           if game["state"] == "completed":
                               try:
                                   platform_game_id = mappings[game["id"]]["platformGameId"]
                               except KeyError:
                                   print(f"{platform_game_id} {game['id']} not found in the mapping table")
                                   continue

                               download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
                               game_counter += 1

                           if game_counter % 10 == 0:
                               print(
                                   f"----- Processed {game_counter} games, current run time: \
                                   {round((time.time() - start_time)/60, 2)} minutes"
                               )


if __name__ == "__main__":
   download_esports_files()
   download_games(2023)


esports-data/leagues.json written
esports-data/tournaments.json written
esports-data/players.json written
esports-data/teams.json written
esports-data/mapping_data.json written
Processing nacl_qualifiers_2_summer_2023
games/ESPORTSTMNT03:3196037.json written
games/ESPORTSTMNT03:3196049.json written
games/ESPORTSTMNT03:3196058.json written
games/ESPORTSTMNT03:3197014.json written
games/ESPORTSTMNT03:3198185.json written
games/ESPORTSTMNT03:3200156.json written
games/ESPORTSTMNT03:3199178.json written
games/ESPORTSTMNT03:3198199.json written
games/ESPORTSTMNT03:3200168.json written
games/ESPORTSTMNT03:3198544.json written
----- Processed 10 games, current run time:                                    0.23 minutes
games/ESPORTSTMNT03:3198546.json written
games/ESPORTSTMNT03:3195064.json written
games/ESPORTSTMNT03:3196051.json written
games/ESPORTSTMNT03:3196057.json written
ESPORTSTMNT03:3196057 110733838936446954 not found in the mapping table
games/ESPORTSTMNT03:3197025.json written
gam

In [2]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO


S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"


def download_gzip_and_write_to_json(file_name):
   local_file_name = file_name.replace(":", "_")
   # If file already exists locally do not re-download game
   if os.path.isfile(f"{local_file_name}.json"):
       return

   response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
   if response.status_code == 200:
       try:
           gzip_bytes = BytesIO(response.content)
           with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
               with open(f"{local_file_name}.json", 'wb') as output_file:
                   shutil.copyfileobj(gzipped_file, output_file)
               print(f"{file_name}.json written")
       except Exception as e:
           print("Error:", e)
   else:
       print(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")


def download_games(year):
   start_time = time.time()
   with open("esports-data/tournaments.json", "r") as json_file:
       tournaments_data = json.load(json_file)
   with open("esports-data/mapping_data.json", "r") as json_file:
       mappings_data = json.load(json_file)

   directory = "games"
   if not os.path.exists(directory):
       os.makedirs(directory)

   mappings = {
       esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
   }

   game_counter = 0
    
    
   # TEMPORARY
   MAX_GAMES = 50
   #################
   for tournament in tournaments_data:
       start_date = tournament.get("startDate", "")
       if start_date.startswith(str(year)):
           print(f"Processing {tournament['slug']}")
           for stage in tournament["stages"]:
               for section in stage["sections"]:
                   for match in section["matches"]:
                       for game in match["games"]:
                           ##############
                           if game_counter >= MAX_GAMES:
                               break
                           ##############
                           if game["state"] == "completed":
                               try:
                                   platform_game_id = mappings[game["id"]]["platformGameId"]
                               except KeyError:
                                   print(f"{platform_game_id} {game['id']} not found in the mapping table")
                                   continue

                               download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
                               game_counter += 1

                           if game_counter % 10 == 0:
                               print(
                                   f"----- Processed {game_counter} games, current run time: \
                                   {round((time.time() - start_time)/60, 2)} minutes"
                               )


if __name__ == "__main__":
   download_esports_files()
   download_games(2022)


Processing elements_league_opening_2022
games/ESPORTSTMNT01:2706724.json written
games/ESPORTSTMNT03:2543653.json written
games/ESPORTSTMNT02:2577618.json written
games/ESPORTSTMNT02:2553984.json written
games/ESPORTSTMNT02:2557132.json written
games/ESPORTSTMNT01:2697304.json written
games/ESPORTSTMNT02:2573380.json written
games/ESPORTSTMNT02:2553970.json written
games/ESPORTSTMNT03:2543686.json written
games/ESPORTSTMNT02:2557639.json written
----- Processed 10 games, current run time:                                    0.24 minutes
games/ESPORTSTMNT02:2573377.json written
games/ESPORTSTMNT02:2557126.json written
games/ESPORTSTMNT01:2697336.json written
games/ESPORTSTMNT02:2573359.json written
games/ESPORTSTMNT01:2697284.json written
games/ESPORTSTMNT02:2557653.json written
games/ESPORTSTMNT01:2706774.json written
games/ESPORTSTMNT02:2557118.json written
games/ESPORTSTMNT02:2557107.json written
games/ESPORTSTMNT01:2697328.json written
----- Processed 20 games, current run time:     