In [3]:
import os
import json
import pandas as pd

def process_json_files(directory_path):
    # Initialize the DataFrame to store results for this directory
    df_result = pd.DataFrame()

    # Loop over each JSON file in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            with open(os.path.join(directory_path, filename), 'r') as file:
                loaded_data = json.load(file)

            # Extract game_info and ward-related entries
            game_info_entries = [entry for entry in loaded_data if entry.get("eventType") == "game_info"]
            ward_killed_entries = [entry for entry in loaded_data if entry.get("eventType") == "ward_killed"]
            ward_placed_entries = [entry for entry in loaded_data if entry.get("eventType") == "ward_placed"]

            # Initialize dictionaries for ward counts
            ward_killed_by_team = {100: 0, 200: 0}
            ward_placed_by_team = {100: 0, 200: 0}

            # Calculate ward_killed counts
            for ward_event in ward_killed_entries:
                killer_id = ward_event.get("killer")
                for entry in game_info_entries:
                    for participant in entry.get("participants", []):
                        if participant.get("participantID") == killer_id:
                            team_id = participant.get("teamID")
                            ward_killed_by_team[team_id] += 1
                            break

            # Calculate ward_placed counts
            for ward_event in ward_placed_entries:
                placer_id = ward_event.get("placer")
                for entry in game_info_entries:
                    for participant in entry.get("participants", []):
                        if participant.get("participantID") == placer_id:
                            team_id = participant.get("teamID")
                            ward_placed_by_team[team_id] += 1
                            break

            # Extract the platformGameId
            platformGameId = None
            for entry in loaded_data:
                if entry.get("eventType") == "game_info":
                    platformGameId = entry.get("platformGameId")
                    break

            # Process the loaded data
            results = {
                'platformGameId': platformGameId,
                'building_destroyed': {},
                'champion_kill': {},
                'baron_kill': {},
                'riftHerald_kill': {},
                'dragon_kill': {},
                'game_end': {},
                'first_baron_team': None,
                'first_riftHerald_team': None
            }

            first_baron_time = float('inf')
            first_riftHerald_time = float('inf')

            for event in loaded_data:
                event_type = event.get("eventType")
                if event_type == "building_destroyed":
                    team_id = event["teamID"]
                    results['building_destroyed'][team_id] = results['building_destroyed'].get(team_id, 0) + 1
                elif event_type == "champion_kill":
                    killer_team_id = event["killerTeamID"]
                    results['champion_kill'][killer_team_id] = results['champion_kill'].get(killer_team_id, 0) + 1
                elif event_type == "epic_monster_kill":
                    monster_type = event.get("monsterType")
                    killer_team_id = event["killerTeamID"]
                    if monster_type == "baron":
                        results['baron_kill'][killer_team_id] = results['baron_kill'].get(killer_team_id, 0) + 1
                        if event["gameTime"] < first_baron_time:
                            first_baron_time = event["gameTime"]
                            results['first_baron_team'] = killer_team_id
                    elif monster_type == "riftHerald":
                        results['riftHerald_kill'][killer_team_id] = results['riftHerald_kill'].get(killer_team_id, 0) + 1
                        if event["gameTime"] < first_riftHerald_time:
                            first_riftHerald_time = event["gameTime"]
                            results['first_riftHerald_team'] = killer_team_id
                    elif monster_type == "dragon":
                        results['dragon_kill'][killer_team_id] = results['dragon_kill'].get(killer_team_id, 0) + 1
                elif event_type == "game_end":
                    winning_team_id = event.get("winningTeam")
                    results['game_end'][winning_team_id] = results['game_end'].get(winning_team_id, 0) + 1

            # Transform the results into a DataFrame
            data = {
                'platformGameId': [],
                'teamID': [],
                'building_destroyed': [],
                'destroyed_opponent_buildings': [],
                'champion_kill': [],
                'baron_kill': [],
                'riftHerald_kill': [],
                'dragon_kill': [],
                'first_baron_team': [],
                'first_riftHerald_team': [],
                'game_end': []
            }

            all_teams = set(list(results['building_destroyed'].keys()) + list(results['champion_kill'].keys()) +
                            list(results['baron_kill'].keys()) + list(results['riftHerald_kill'].keys()) +
                            list(results['game_end'].keys()) + list(results['dragon_kill'].keys()))

            for team_id in all_teams:
                data['platformGameId'].append(platformGameId)
                data['teamID'].append(team_id)
                data['building_destroyed'].append(results['building_destroyed'].get(team_id, 0))
                opponent_team_id = [tid for tid in all_teams if tid != team_id][0]  # Assuming only two teams
                data['destroyed_opponent_buildings'].append(results['building_destroyed'].get(opponent_team_id, 0))
                data['champion_kill'].append(results['champion_kill'].get(team_id, 0))
                data['baron_kill'].append(results['baron_kill'].get(team_id, 0))
                data['riftHerald_kill'].append(results['riftHerald_kill'].get(team_id, 0))
                data['dragon_kill'].append(results['dragon_kill'].get(team_id, 0))
                data['first_baron_team'].append(1 if team_id == results['first_baron_team'] else 0)
                data['first_riftHerald_team'].append(1 if team_id == results['first_riftHerald_team'] else 0)
                data['game_end'].append(results['game_end'].get(team_id, 0))

            df = pd.DataFrame(data)
            # Append ward data to the DataFrame
            for team_id in [100, 200]:
                df.loc[df['teamID'] == team_id, 'ward_killed'] = ward_killed_by_team[team_id]
                df.loc[df['teamID'] == team_id, 'ward_placed'] = ward_placed_by_team[team_id]

            # Append the DataFrame to df_result
            df_result = pd.concat([df_result, df], ignore_index=True)

    return df_result



In [5]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO

S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"

def download_gzip_and_write_to_json(file_name):
    local_file_name = file_name.replace(":", "_")
    # If file already exists locally do not re-download game
    if os.path.isfile(f"{local_file_name}.json"):
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
                with open(f"{local_file_name}.json", 'wb') as output_file:
                    shutil.copyfileobj(gzipped_file, output_file)
                print(f"{file_name}.json written")
        except Exception as e:
            print("Error:", e)
    else:
        print(f"Failed to download {file_name}")

def download_esports_files():
    directory = "esports-data"
    if not os.path.exists(directory):
        os.makedirs(directory)

    esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
    for file_name in esports_data_files:
        download_gzip_and_write_to_json(f"{directory}/{file_name}")

def download_games(year,max_game = 250):
    start_time = time.time()
    game_number = 100
    master_df = pd.DataFrame()
    with open("esports-data/tournaments.json", "r") as json_file:
        tournaments_data = json.load(json_file)
    with open("esports-data/mapping_data.json", "r") as json_file:
        mappings_data = json.load(json_file)

    directory = "games"
    if not os.path.exists(directory):
        os.makedirs(directory)

    mappings = {
        esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
    }

    game_counter = 0
    int_game_counter = 0

    for tournament in tournaments_data:
        start_date = tournament.get("startDate", "")
        if start_date.startswith(str(year)):
            print(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        for game in match["games"]:
                            if game["state"] == "completed":
                                try:
                                    platform_game_id = mappings[game["id"]]["platformGameId"]
                                except KeyError:
                                    print(f"{game['id']} not found in the mapping table")
                                    continue

                                download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
                                game_counter += 1
                                int_game_counter += 1
                            
                            # Exit condition once max_games have been processed
                            if int_game_counter >= game_number:
                                int_game_counter = 0 
                                directory = "C:\\data for lol\\code\\games"
                                master_df = pd.concat([master_df, process_json_files(directory)], ignore_index=True)
                                print("data process complete\n")
                                for filename in os.listdir(directory):
                                    file_path = os.path.join(directory, filename)
                                    
                                    # Check if it's a file
                                    if os.path.isfile(file_path):
                                        os.remove(file_path)  # Delete the file
                                print("data delete complete\n")

                            if game_counter % 10 == 0:
                                print(
                                    f"----- Processed {game_counter} games, current run time: \
                                    {round((time.time() - start_time)/60, 2)} minutes"
                                )
                            if game_counter > max_game: 
                                print("1000 complete\n")
                                return
                                        
                                
    master_df.to_csv("C:\\data for lol\\result chart\\master_consolidated_data.csv", index=False)

if __name__ == "__main__":
    download_esports_files()
    download_games(2023)



esports-data/leagues.json written
esports-data/tournaments.json written
esports-data/players.json written
esports-data/teams.json written
esports-data/mapping_data.json written
Processing nacl_qualifiers_2_summer_2023
games/ESPORTSTMNT03:3196037.json written
games/ESPORTSTMNT03:3196049.json written
games/ESPORTSTMNT03:3196058.json written
games/ESPORTSTMNT03:3197014.json written
games/ESPORTSTMNT03:3198185.json written
games/ESPORTSTMNT03:3200156.json written
games/ESPORTSTMNT03:3199178.json written
games/ESPORTSTMNT03:3198199.json written
games/ESPORTSTMNT03:3200168.json written
games/ESPORTSTMNT03:3198544.json written
----- Processed 10 games, current run time:                                     0.37 minutes
games/ESPORTSTMNT03:3198546.json written
games/ESPORTSTMNT03:3195064.json written
games/ESPORTSTMNT03:3196051.json written
games/ESPORTSTMNT03:3196057.json written
110733838936446954 not found in the mapping table
games/ESPORTSTMNT03:3197025.json written
games/ESPORTSTMNT03:3198

KeyboardInterrupt: 