In [11]:
import requests
import json
import gzip
import shutil
from io import BytesIO, StringIO
import os

S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"
def download_gzip_and_write_to_json(file_name):
    local_file_name = file_name.replace(":", "_")
    # If file already exists locally do not re-download game
    if os.path.isfile(f"{local_file_name}.json"):
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
                with open(f"{local_file_name}.json", 'wb') as output_file:
                    shutil.copyfileobj(gzipped_file, output_file)
                print(f"{file_name}.json written")
        except Exception as e:
            print("Error:", e)
    else:
        print(f"Failed to download {file_name}")

def download_gzip_and_shorten_json(file_name):
    local_file_name = file_name.replace(":", "_")
    shortened_file_name = f"{local_file_name}_shortened.json"

    if os.path.isfile(shortened_file_name):
        print(f"{shortened_file_name} already exists.")
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
                data = json.load(gzipped_file)

                # Subset and shorten the data
                data = data[-2:]
                extracted_data = []
                for element in data:
                    extracted_element = {
                        'eventTime': element.get('eventTime'),
                        'platformGameId': element.get('platformGameId'),
                        'gameTime': element.get('gameTime'),
                        'participants': [],
                        'teams': []
                    }

                    participants = element.get('participants', [])
                    for participant in participants:
                        extracted_participant = {
                            'participantID': participant.get('participantID'),
                            'totalGold': participant.get('totalGold')
                        }
                        extracted_element['participants'].append(extracted_participant)

                    teams = element.get('teams', [])
                    for team in teams:
                        extracted_team = {
                            'teamID': team.get('teamID'),
                            'totalGold': team.get('totalGold')
                        }
                        extracted_element['teams'].append(extracted_team)

                    if extracted_element['participants'] and extracted_element['teams']:
                        extracted_data.append(extracted_element)

                # Write only the shortened data to disk
                with open(shortened_file_name, 'w') as output_file:
                    json.dump(extracted_data, output_file, indent=2)
                print(f"{shortened_file_name} written")
        except Exception as e:
            print("Error:", e)
    else:
        print(f"Failed to download {file_name}")


def download_esports_files():
    directory = "esports-data"
    if not os.path.exists(directory):
        os.makedirs(directory)

    esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
    for file_name in esports_data_files:
        download_gzip_and_write_to_json(f"{directory}/{file_name}")


def download_all_games_from_year(year):
    with open("esports-data/tournaments.json", "r") as json_file:
        tournaments_data = json.load(json_file)
    with open("esports-data/mapping_data.json", "r") as json_file:
        mappings_data = json.load(json_file)

    directory = "games"
    if not os.path.exists(directory):
        os.makedirs(directory)

    mappings = {
        esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
    }

    game_counter = 0

    for tournament in tournaments_data:
        start_date = tournament.get("startDate", "")
        if start_date.startswith(str(year)):
            print(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        for game in match["games"]:
                            if game["state"] == "completed":
                                try:
                                    platform_game_id = mappings[game["id"]]["platformGameId"]
                                except KeyError:
                                    print(f"{game['id']} not found in the mapping table")
                                    continue
                                download_gzip_and_shorten_json(f"{directory}/{platform_game_id}")
                                game_counter += 1

                            if game_counter % 10 == 0:
                                print(f"----- Processed {game_counter} games")


if __name__ == "__main__":
    download_esports_files()
    #download_all_games_from_year(2023)
    #download_all_games_from_year(2022)
    #download_all_games_from_year(2021)
    download_all_games_from_year(2020)


Processing midseason_cup_2020
games/ESPORTSTMNT03_1392262_shortened.json already exists.
games/ESPORTSTMNT03_1392233_shortened.json already exists.
games/ESPORTSTMNT03_1392215_shortened.json already exists.
games/ESPORTSTMNT03_1392104_shortened.json already exists.
games/ESPORTSTMNT03_1392251_shortened.json already exists.
games/ESPORTSTMNT03_1392275_shortened.json already exists.
games/ESPORTSTMNT03_1382479_shortened.json already exists.
games/ESPORTSTMNT03_1382462_shortened.json already exists.
games/ESPORTSTMNT03_1382494_shortened.json already exists.
games/ESPORTSTMNT03_1382488_shortened.json already exists.
----- Processed 10 games
games/ESPORTSTMNT03_1382425_shortened.json already exists.
games/ESPORTSTMNT03_1382485_shortened.json already exists.
games/ESPORTSTMNT03_1382505_shortened.json already exists.
games/ESPORTSTMNT03_1382513_shortened.json already exists.
games/ESPORTSTMNT03_1392533_shortened.json already exists.
games/ESPORTSTMNT03_1392549_shortened.json already exists.
g

In [None]:
!zip -r games.zip games


updating: games/ (stored 0%)
updating: games/ESPORTSTMNT01_2697288_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT04_2450129_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT04_2471889_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_3218641_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT04_2450840_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT01_3085550_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_3215095_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_2551444_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT06_2821251_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT04_2673966_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_2574986_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_2914630_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT01_2707442_shortened.json (deflated 75%)
updating: games/ESPORTSTMNT02_3170851_shortened.json (deflated 75%)
updating: games/ESP

In [3]:
conda install -y -c conda-forge zip

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 23.3.1
  latest version: 23.9.0

Please update conda by running

    $ conda update -n base -c defaults conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.9.0



## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - zip


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1w             |       h7f8727e_0         3.7 MB
    zip-3.0                    |       h7f98852_1         110 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.8 MB

The following NEW packages will be INSTALLED:

  zip                conda-forge/linux-64::zip-3.0-h7f98852_1 

The following packages will be UPDATED:

  openssl      

In [7]:
import os

def count_files_in_folder(folder_path):
    """
    Count the number of files in a folder.
    
    Parameters:
        folder_path (str): The path to the folder.
    
    Returns:
        int: The number of files in the folder.
    """
    try:
        # List all files and folders in the specified folder
        items = os.listdir(folder_path)
        
        # Count and return only the files (ignore subfolders)
        return sum(os.path.isfile(os.path.join(folder_path, item)) for item in items)
    except FileNotFoundError:
        print(f"The folder {folder_path} was not found.")
        return None
    except PermissionError:
        print(f"You do not have permission to access {folder_path}.")
        return None

# Example usage:
folder_path = 'games'
num_files = count_files_in_folder(folder_path)
print(f"There are {num_files} files in the folder {folder_path}.")


In [2]:
import os
import json

# Specify the directory containing the JSON files
directory = 'games'

# Initialize a list to store the combined data
combined_data = []

# Loop through all files in the specified directory
for filename in os.listdir(directory):
    # Check if the file is a JSON file
    if filename.endswith('.json'):
        # Construct the full file path
        filepath = os.path.join(directory, filename)

        # Open and load the JSON file
        with open(filepath, 'r') as file:
            data = json.load(file)

            # Append the data to the combined_data list
            combined_data.extend(data)

# Specify the name of the output file
output_file = 'combined_data.json'

# Write the combined data to the output file
with open(output_file, 'w') as file:
    json.dump(combined_data, file, indent=2)

print(f"Combined data written to {output_file}")

Combined data written to combined_data.json


In [7]:
import json

# Load JSON files
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

teams_data = load_json_file('esports-data/teams.json')
players_data = load_json_file('esports-data/players.json')
mapping_data = load_json_file('esports-data/mapping_data.json')
combined_data = load_json_file('combined_data.json')
leagues_data = load_json_file('esports-data/leagues.json')
tournaments_data = load_json_file('esports-data/tournaments.json')

# Create mappings for easy lookup
team_id_to_name = {team['team_id']: team['name'] for team in teams_data}
player_id_to_handle = {player['player_id']: player['handle'] for player in players_data}
platform_game_id_to_combined_data = {game['platformGameId']: game for game in combined_data}
league_id_to_region = {league['id']: league['region'] for league in leagues_data}

# Define update functions
def update_team_info(node):
    if isinstance(node, dict):
        if 'team_id' in node and node['team_id'] in team_id_to_name:
            node['team_name'] = team_id_to_name[node['team_id']]
        for key in node:
            update_team_info(node[key])
    elif isinstance(node, list):
        for item in node:
            update_team_info(item)

def update_player_info(node):
    if isinstance(node, dict):
        if 'player_id' in node and node['player_id'] in player_id_to_handle:
            node['player_handle'] = player_id_to_handle[node['player_id']]
        for key in node:
            update_player_info(node[key])
    elif isinstance(node, list):
        for item in node:
            update_player_info(item)

def update_game_info(node):
    if isinstance(node, dict):
        if 'esportsGameId' in node and node['esportsGameId'] in platform_game_id_to_combined_data:
            node['platformGameId'] = platform_game_id_to_combined_data[node['esportsGameId']]['platformGameId']
            node['teamMapping'] = mapping_data[node['esportsGameId']]['teamMapping']
        for key in node:
            update_game_info(node[key])
    elif isinstance(node, list):
        for item in node:
            update_game_info(item)

def update_game_additional_info(node):
    if isinstance(node, dict):
        if 'platformGameId' in node and node['platformGameId'] in platform_game_id_to_combined_data:
            node['eventTime'] = platform_game_id_to_combined_data[node['platformGameId']]['eventTime']
            for team in node.get('teams', []):
                team_id = node['teamMapping'].get(str(team['teamID']))
                if team_id:
                    team['totalGold'] = next((t['totalGold'] for t in platform_game_id_to_combined_data[node['platformGameId']]['teams'] if t['teamID'] == int(team_id)), None)
        for key in node:
            update_game_additional_info(node[key])
    elif isinstance(node, list):
        for item in node:
            update_game_additional_info(item)

def update_region_info(node, current_region=None):
    if isinstance(node, dict):
        if 'leagueId' in node and node['leagueId'] in league_id_to_region:
            current_region = league_id_to_region[node['leagueId']]
        if 'team_name' in node and current_region is not None:
            node['region'] = current_region
        for key in node:
            update_region_info(node[key], current_region)
    elif isinstance(node, list):
        for item in node:
            update_region_info(item, current_region)

# Apply the transformations to each tournament in the tournaments_data list
for tournament in tournaments_data:
    update_team_info(tournament)
    update_player_info(tournament)
    update_game_info(tournament)
    update_game_additional_info(tournament)
    update_region_info(tournament)

# Get the current working directory
current_working_dir = os.getcwd()

# Define the path for the new JSON file in the current working directory
output_filepath = os.path.join(current_working_dir, 'updated_tournaments.json')

# Save the updated tournament data to the new JSON file
with open(output_filepath, 'w') as file:
    json.dump(tournaments_data, file, indent=4)

In [10]:
import json

# Load the datasets
with open('merged_mapping_data.json', 'r') as file:
    esport_data = json.load(file)

with open('esports-data/tournaments.json', 'r') as file:
    tournaments_data = json.load(file)

# Function to perform a deep search in a nested dictionary
def deep_search(key, value, data, path=[]):
    if isinstance(data, dict):
        for k, v in data.items():
            new_path = path + [k]
            for result in deep_search(key, value, v, new_path):
                yield result
    elif isinstance(data, list):
        for i, v in enumerate(data):
            new_path = path + [i]
            for result in deep_search(key, value, v, new_path):
                yield result

# Function to find tournament details based on esportsGameId
def find_tournament_details(game_id, tournaments_data):
    search_results = list(deep_search('id', game_id, tournaments_data))
    if search_results:
        path = search_results[0]
        tournament_index = path[0]
        tournament = tournaments_data[tournament_index]
        details = {
            'tournamentId': tournament.get('id'),
            'leagueId': tournament.get('leagueId'),
            'leagueSlug': tournament.get('slug'),
        }
        return details

# Applying the function to each game in the esports data
for game in esport_data:
    esports_game_id = game.get('esportsGameId')
    tournament_details = find_tournament_details(esports_game_id, tournaments_data)
    if tournament_details:
        game.update(tournament_details)

# Saving the updated data to a new JSON file
with open('updated_esport_data.json', 'w') as file:
    json.dump(esport_data, file, indent=4)



KeyboardInterrupt: 

In [3]:
import json

# Load the tournaments data
with open('esports-data/tournaments.json') as file:
    tournaments_data = json.load(file)

# Load the merged_mapping_data
with open('merged_mapping_data.json') as file:
    esport_data = json.load(file)

# Function to perform a flexible search for the game ID within the tournaments data
def flexible_search(game_id_to_find, data, path=[]):
    if isinstance(data, dict):
        for key, value in data.items():
            new_path = path + [key]
            for result in flexible_search(game_id_to_find, value, new_path):
                yield result
    elif isinstance(data, list):
        for index, value in enumerate(data):
            new_path = path + [index]
            for result in flexible_search(game_id_to_find, value, new_path):
                yield result
    elif isinstance(data, (str, int)) and str(data) == str(game_id_to_find):
        yield path

# Function to trace back and retrieve tournament details based on the found path
def retrieve_tournament_details(path, tournaments_data):
    tournament_index = path[0]
    tournament = tournaments_data[tournament_index]
    return {
        'tournamentId': tournament.get('id'),
        'leagueId': tournament.get('leagueId'),
        'leagueSlug': tournament.get('slug')
    }

# Applying the flexible search and trace-back approach to all game IDs in the merged_mapping_data.json
for game in esport_data:
    esports_game_id = game.get('esportsGameId')
    search_results = list(flexible_search(esports_game_id, tournaments_data))
    if search_results:  # If any matching paths are found
        tournament_details = retrieve_tournament_details(search_results[0], tournaments_data)
        game.update(tournament_details)

# Saving the updated esports data to a new JSON file
with open('esport_data.json', 'w', encoding='utf-8') as file:
    json.dump(esport_data, file, ensure_ascii=False, indent=4)


KeyboardInterrupt: 

In [None]:

import json

def is_team_mapping_complete(team_mapping):
    essential_fields = ['teamId', 'mappingId', 'totalGold', 'name', 'region']
    return all(field in team_mapping and team_mapping[field] for field in essential_fields)

def is_game_data_complete(game_data):
    essential_fields = ['esportsGameId', 'platformGameId', 'eventTime', 'tournamentId', 'leagueId', 'leagueSlug']
    if not all(field in game_data and game_data[field] for field in essential_fields):
        return False
    
    team_mappings = game_data.get('teamMappings', [])
    return all(is_team_mapping_complete(team_mapping) for team_mapping in team_mappings)

def preprocess_game_data(game_data):
    team_mappings = game_data['teamMappings']
    if len(team_mappings) < 2:
        return None
    
    team1, team2 = team_mappings
    gold_diff = abs(team1['totalGold'] - team2['totalGold'])
    
    if team1['totalGold'] > team2['totalGold']:
        winning_team = team1['name']
    elif team1['totalGold'] < team2['totalGold']:
        winning_team = team2['name']
    else:
        winning_team = 'Tie'
    
    game_data['goldDifference'] = gold_diff
    game_data['winningTeam'] = winning_team
    
    return game_data

# Load the data from the JSON file
input_file_path = 'path_to_your_input_file.json'
with open(input_file_path, 'r') as file:
    esports_data = json.load(file)

# Clean and preprocess the data
cleaned_esports_data = [game_data for game_data in esports_data if is_game_data_complete(game_data)]
preprocessed_esports_data = [preprocess_game_data(game_data.copy()) for game_data in cleaned_esports_data]
preprocessed_esports_data = [game_data for game_data in preprocessed_esports_data if game_data]

# Save the preprocessed data to a new JSON file
output_file_path = 'path_to_your_output_file.json'
with open(output_file_path, 'w', encoding='utf-8') as file:
    json.dump(preprocessed_esports_data, file, ensure_ascii=False, indent=4)