In [1]:

import json
import os
import pandas as pd

def process_json_files(directory_path):
    # Initialize a DataFrame to store the results for all files
    df_all_results = pd.DataFrame()
    
    # Loop over each JSON file in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            with open(os.path.join(directory_path, filename), 'r') as file:
                loaded_data = json.load(file)

                # Filter out the required events from loaded_data
                champion_kill_events = [event for event in loaded_data if event.get('eventType', '') == 'champion_kill']
                ward_killed_events = [event for event in loaded_data if event.get('eventType', '') == 'ward_killed']
                ward_placed_events = [event for event in loaded_data if event.get('eventType', '') == 'ward_placed']

                # Process champion_kill events
                kill_counts = {}
                victim_counts = {}
                for event in champion_kill_events:
                    killer = event.get('killer')
                    victim = event.get('victim')
                    if killer:
                        kill_counts[killer] = kill_counts.get(killer, 0) + 1
                    if victim:
                        victim_counts[victim] = victim_counts.get(victim, 0) + 1

                # Process ward_killed events
                ward_killed_counts = {}
                for event in ward_killed_events:
                    killer = event.get('killer')
                    if killer:
                        ward_killed_counts[killer] = ward_killed_counts.get(killer, 0) + 1

                # Process ward_placed events
                ward_placed_counts = {}
                for event in ward_placed_events:
                    placer = event.get('placer')
                    if placer:
                        ward_placed_counts[placer] = ward_placed_counts.get(placer, 0) + 1

                # Create a DataFrame for this file
                unique_player_ids = set(kill_counts.keys()) | set(victim_counts.keys()) | set(ward_killed_counts.keys()) | set(ward_placed_counts.keys())
                data = []
                for player_id in unique_player_ids:
                    data.append([
                        champion_kill_events[0]['platformGameId'],
                        player_id,
                        kill_counts.get(player_id, 0),
                        victim_counts.get(player_id, 0),
                        ward_killed_counts.get(player_id, 0),
                        ward_placed_counts.get(player_id, 0)
                    ])

                df_file = pd.DataFrame(data, columns=['platformGameId', 'playerID', 'killCount', 'victimCount', 'wardKilledCount', 'wardPlacedCount'])
                
                # Append the DataFrame for this file to the overall results DataFrame
                df_all_results = pd.concat([df_all_results, df_file], ignore_index=True)

    return df_all_results


In [2]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO

S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"

def download_gzip_and_write_to_json(file_name):
    local_file_name = file_name.replace(":", "_")
    # If file already exists locally do not re-download game
    if os.path.isfile(f"{local_file_name}.json"):
        return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    if response.status_code == 200:
        try:
            gzip_bytes = BytesIO(response.content)
            with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
                with open(f"{local_file_name}.json", 'wb') as output_file:
                    shutil.copyfileobj(gzipped_file, output_file)
                print(f"{file_name}.json written")
        except Exception as e:
            print("Error:", e)
    else:
        print(f"Failed to download {file_name}")

def download_esports_files():
    directory = "esports-data"
    if not os.path.exists(directory):
        os.makedirs(directory)

    esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
    for file_name in esports_data_files:
        download_gzip_and_write_to_json(f"{directory}/{file_name}")

def download_games(year):
    start_time = time.time()
    game_number = 100
    master_df = pd.DataFrame()
    with open("esports-data/tournaments.json", "r") as json_file:
        tournaments_data = json.load(json_file)
    with open("esports-data/mapping_data.json", "r") as json_file:
        mappings_data = json.load(json_file)

    directory = "games"
    if not os.path.exists(directory):
        os.makedirs(directory)

    mappings = {
        esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
    }

    game_counter = 0
    int_game_counter = 0

    for tournament in tournaments_data:
        start_date = tournament.get("startDate", "")
        if start_date.startswith(str(year)):
            print(f"Processing {tournament['slug']}")
            for stage in tournament["stages"]:
                for section in stage["sections"]:
                    for match in section["matches"]:
                        for game in match["games"]:
                            if game["state"] == "completed":
                                try:
                                    platform_game_id = mappings[game["id"]]["platformGameId"]
                                except KeyError:
                                    print(f"{game['id']} not found in the mapping table")
                                    continue

                                download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
                                game_counter += 1
                                int_game_counter += 1
                            
                            # Exit condition once max_games have been processed
                            if int_game_counter >= game_number:
                                int_game_counter = 0 
                                directory_of_deleted = "C:\\data for lol\\code\\games"
                                master_df = pd.concat([master_df, process_json_files(directory_of_deleted)], ignore_index=True)
                                print("data process complete\n")
                                for filename in os.listdir(directory_of_deleted):
                                    file_path = os.path.join(directory_of_deleted, filename)
                                    
                                    # Check if it's a file
                                    if os.path.isfile(file_path):
                                        os.remove(file_path)  # Delete the file
                                print("data delete complete\n")

                            if game_counter % 10 == 0:
                                print(
                                    f"----- Processed {game_counter} games, current run time: \
                                    {round((time.time() - start_time)/60, 2)} minutes"
                                )
                            # if game_counter > max_game: 
                            #     print("1000 complete\n")
                            #     master_df.to_csv("C:\\data for lol\\result chart\\master_consolidated_data.csv", index=False)
                                # return
                                        
                                
    master_df.to_csv("C:\\data for lol\\result chart\\master_consolidated_data.csv", index=False)

if __name__ == "__main__":
    download_esports_files()
    download_games(2023)



esports-data/leagues.json written
esports-data/tournaments.json written
esports-data/players.json written
esports-data/teams.json written
esports-data/mapping_data.json written
Processing nacl_qualifiers_2_summer_2023
games/ESPORTSTMNT03:3196037.json written
games/ESPORTSTMNT03:3196049.json written
games/ESPORTSTMNT03:3196058.json written
games/ESPORTSTMNT03:3197014.json written
games/ESPORTSTMNT03:3198185.json written
games/ESPORTSTMNT03:3200156.json written
games/ESPORTSTMNT03:3199178.json written
games/ESPORTSTMNT03:3198199.json written
games/ESPORTSTMNT03:3200168.json written
games/ESPORTSTMNT03:3198544.json written
----- Processed 10 games, current run time:                                     0.32 minutes
games/ESPORTSTMNT03:3198546.json written
games/ESPORTSTMNT03:3195064.json written
games/ESPORTSTMNT03:3196051.json written
games/ESPORTSTMNT03:3196057.json written
110733838936446954 not found in the mapping table
games/ESPORTSTMNT03:3197025.json written
games/ESPORTSTMNT03:3198

In [3]:
import csv
import json
import pandas as pd

# Read the CSV file
with open("C:\\data for lol\\result chart\\master_consolidated_data.csv", 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    csv_data = list(csv_reader)

# Read the mapping data from the JSON file
with open("C:\\data for lol\\esports-data\\mapping_data.json", 'r', encoding='utf-8') as json_file:
    mapping_data = json.load(json_file)

# Create a dictionary for faster lookup of participantMapping based on platformGameId
platform_game_id_to_mapping = {entry['platformGameId']: entry['participantMapping'] for entry in mapping_data}

# Add the "participantMapping" column to the CSV data
for row in csv_data:
    platform_game_id = row['platformGameId']
    player_id = row['playerID']
    if platform_game_id in platform_game_id_to_mapping:
        row['participantMapping'] = platform_game_id_to_mapping[platform_game_id].get(player_id, None)
    else:
        row['participantMapping'] = None

# Convert the updated CSV data to a pandas DataFrame
df_updated_csv = pd.DataFrame(csv_data)

# Read the players data from the JSON file
with open("C:\\data for lol\\esports-data\\players.json", 'r', encoding='utf-8') as json_file:
    players_data = json.load(json_file)

# Create a dictionary for faster lookup of handle based on player_id
player_id_to_handle = {entry['player_id']: entry['handle'] for entry in players_data}

# Add the "handle" column to the CSV data
df_updated_csv['handle'] = df_updated_csv['participantMapping'].map(player_id_to_handle)

# Group by "handle" and concatenate platformGameId values with commas
grouped_platform_game_ids = df_updated_csv.groupby('handle')['platformGameId'].apply(lambda x: ','.join(x)).reset_index()

# Convert the columns to numeric type for aggregation
numeric_columns = ['killCount', 'victimCount', 'wardKilledCount', 'wardPlacedCount']
df_updated_csv[numeric_columns] = df_updated_csv[numeric_columns].apply(pd.to_numeric)

# Aggregate the columns by summing them for each handle
grouped_df = df_updated_csv.groupby('handle').agg({
    'killCount': 'sum',
    'victimCount': 'sum',
    'wardKilledCount': 'sum',
    'wardPlacedCount': 'sum'
}).reset_index()

# Merge the aggregated platformGameId data with the grouped dataframe
final_df = pd.merge(grouped_df, grouped_platform_game_ids, on='handle', how='left')

# Convert players_data to a DataFrame for easy column selection and merging
df_players = pd.DataFrame(players_data)
# Merge the player_id and home_team_id columns using handle as the key
final_df = pd.merge(final_df, df_players[['handle', 'player_id', 'home_team_id']], on='handle', how='left')

# Read the teams data from the JSON file and merge the team_name column using home_team_id as the key
with open("C:\\data for lol\\esports-data\\teams.json", 'r', encoding='utf-8') as json_file:
    teams_data = json.load(json_file)
df_teams = pd.DataFrame(teams_data)
final_df = pd.merge(final_df, df_teams[['team_id', 'name']], left_on='home_team_id', right_on='team_id', how='left')

# Rename the 'name' column to 'team_name' for clarity and drop the redundant team_id column
final_df.rename(columns={'name': 'team_name'}, inplace=True)
final_df.drop(columns=['team_id'], inplace=True)

# Save the final dataframe to the specified path
final_df.to_csv('C:\\data for lol\\result chart\\final_data.csv', index=False)
