In [None]:
import os
import json
import sys

%run fetchGameData.ipynb

In [None]:
def cleanAndSaveFiles(rawFolderPath, cleanedFolderPath, columnsToExclude, displayNoZero):
    # Ensure the cleaned data folder exists
    if not os.path.exists(cleanedFolderPath):
        os.makedirs(cleanedFolderPath)

    files = os.listdir(rawFolderPath)
    totalAmountOfFiles = len(files)
    processedFilesCount = 0

    # Iterate through each file in the raw data folder
    for filename in files:
        rawFilePath = os.path.join(rawFolderPath, filename)
        cleanedFilePath = os.path.join(cleanedFolderPath, filename)

        # Read the raw game data
        with open(rawFilePath, "r", encoding="utf-8") as file:
            data = json.load(file)

        # Clean the data
        cleanedData = cleanData(data, columnsToExclude, displayNoZero)

        # Save the cleaned data
        with open(cleanedFilePath, "w", encoding="utf-8") as file:
            json.dump(cleanedData, file, indent=4, ensure_ascii=False)
        
        processedFilesCount += 1
        if processedFilesCount % 250 == 0:
            print(f"Processed {processedFilesCount} out of {totalAmountOfFiles} files.")

    if processedFilesCount % 250 != 0:
        # Print the final count if it doesn't end on a multiple of 250
        print(f"Processed {processedFilesCount} out of {totalAmountOfFiles} files.")

# Example usage
columnsToExclude = ['magicDamageDone', 'magicDamageDoneToChampions', 'magicDamageTaken', 
                    'physicalDamageDone', 'physicalDamageDoneToChampions', 'physicalDamageTaken', 
                    'trueDamageDone', 'trueDamageDoneToChampions', 'trueDamageTaken', 'timeEnemySpentControlled']
displayNoZero = True

In [None]:
#rawFolderPath = "E:\\RawIronMatchJsonsKorea"
#cleanedFolderPath = "E:\\CleanedIronMatchJsonsKorea"
#cleanAndSaveFiles(rawFolderPath, cleanedFolderPath, columnsToExclude, displayNoZero)

In [None]:
def filterPostData(data):
    # Define the keys to keep in the info section
    infoKeysToKeep = {'endOfGameResult', 'gameCreation', 'gameDuration', 'gameEndTimestamp', 'gameId',
                      'gameMode', 'gameName', 'gameStartTimestamp', 'gameType', 'gameVersion', 'mapId',
                      'platformId', 'queueId', 'teams', 'tournamentCode'}
    
    # Define participant keys to keep
    participantKeysToKeep = {'championName', 'damageDealtToBuildings', 'damageDealtToObjectives',
                             'damageDealtToTurrets', 'goldEarned', 'goldSpent', 'individualPosition',
                             'inhibitorTakedowns', 'lane', 'participantId', 'perks', 'puuid', 'riotIdGameName',
                             'riotIdTagline', 'role', 'summonerId', 'summonerName', 'teamEarlySurrendered', 'teamId',
                             'teamPosition', 'totalAllyJungleMinionsKilled', 'totalDamageDealt',
                             'totalDamageDealtToChampions', 'totalDamageTaken', 'totalEnemyJungleMinionsKilled',
                             'turretTakedowns', 'visionScore', 'wardsKilled', 'wardsPlaced', 'win', 'challenges'}

    # Define the keys to keep within the challenges dictionary
    challengesKeysToKeep = {'controlWardsPlaced', 'dragonTakedowns', 'goldPerMinute', 'kda',
                            'killParticipation', 'killsNearEnemyTurret', 'killsUnderOwnTurret',
                            'maxCsAdvantageOnLaneOpponent', 'maxLevelLeadLaneOpponent', 'soloKills',
                            'stealthWardsPlaced', 'takedownOnFirstTurret', 'takedowns', 'teamBaronKills',
                            'teamDamagePercentage', 'teamElderDragonKills', 'teamRiftHeraldKills',
                            'visionScorePerMinute', 'wardTakedowns'}
    
    # Keep entire metadata
    filteredData = {'metadata': data['metadata']}
    
    # Filter info section
    filteredInfo = {key: data['info'][key] for key in data['info'] if key in infoKeysToKeep}
    
    # Filter participants
    filteredParticipants = []
    for participant in data['info']['participants']:
        filteredParticipant = {}
        for key in participant:
            if key in participantKeysToKeep:
                if key == 'challenges':
                    # Special handling for the challenges dictionary
                    challenges = participant[key]
                    filteredChallenges = {ckey: challenges[ckey] for ckey in challenges if ckey in challengesKeysToKeep}
                    filteredParticipant[key] = filteredChallenges
                else:
                    filteredParticipant[key] = participant[key]
        filteredParticipants.append(filteredParticipant)
    
    # Add filtered info and participants to the filteredData
    filteredInfo['participants'] = filteredParticipants
    filteredData['info'] = filteredInfo

    return filteredData

In [None]:
def cleanAndSavePostGameData(rawFolderPath, cleanedFolderPath):
    # Ensure the cleaned data folder exists
    if not os.path.exists(cleanedFolderPath):
        os.makedirs(cleanedFolderPath)

    files = os.listdir(rawFolderPath)
    totalAmountOfFiles = len(files)
    processedFilesCount = 0

    # Iterate through each file in the raw data folder
    for filename in files:
        rawFilePath = os.path.join(rawFolderPath, filename)
        cleanedFilePath = os.path.join(cleanedFolderPath, filename)

        # Read the raw game data
        with open(rawFilePath, "r", encoding="utf-8") as file:
            data = json.load(file)

        # Clean the data using filterPostData
        cleanedData = filterPostData(data)

        # Save the cleaned data
        with open(cleanedFilePath, "w", encoding="utf-8") as file:
            json.dump(cleanedData, file, indent=4, ensure_ascii=False)
        
        processedFilesCount += 1
        if processedFilesCount % 250 == 0:
            print(f"Processed {processedFilesCount} out of {totalAmountOfFiles} files.")

    if processedFilesCount % 250 != 0:
        # Print the final count if it doesn't end on a multiple of 250
        print(f"Processed {processedFilesCount} out of {totalAmountOfFiles} files.")

In [None]:
#for finding amount of exclusive files
#use for comparing timeline vs postmatch data
def find_exclusive_files(folder1, folder2):
    # Get the list of file names in both folders
    files_folder1 = set(os.listdir(folder1))
    files_folder2 = set(os.listdir(folder2))
    
    # Find files that are exclusive to each folder (i.e., present in one folder but not the other)
    exclusive_to_folder1 = files_folder1 - files_folder2
    exclusive_to_folder2 = files_folder2 - files_folder1
    
    # Calculate the total amount of exclusive files
    total_exclusive_folder1 = len(exclusive_to_folder1)
    total_exclusive_folder2 = len(exclusive_to_folder2)
    
    # Print out the exclusive files and their total count
    if exclusive_to_folder1:
        print(f"Files exclusive to {folder1}:")
        for file in exclusive_to_folder1:
            print(file)
        print(f"Total exclusive files in {folder1}: {total_exclusive_folder1}")
    else:
        print(f"No files exclusive to {folder1}")
        
    if exclusive_to_folder2:
        print(f"Files exclusive to {folder2}:")
        for file in exclusive_to_folder2:
            print(file)
        print(f"Total exclusive files in {folder2}: {total_exclusive_folder2}")
    else:
        print(f"No files exclusive to {folder2}")

In [None]:
#for deleting exclusive files timeline vs postmatch data
def find_and_delete_exclusive_files(folder1, folder2):
    # Get the list of file names in both folders
    files_folder1 = set(os.listdir(folder1))
    files_folder2 = set(os.listdir(folder2))
    
    # Find files that are exclusive to each folder (i.e., present in one folder but not the other)
    exclusive_to_folder1 = files_folder1 - files_folder2
    exclusive_to_folder2 = files_folder2 - files_folder1
    
    # Calculate the total amount of exclusive files
    total_exclusive_folder1 = len(exclusive_to_folder1)
    total_exclusive_folder2 = len(exclusive_to_folder2)
    
    # Print out the exclusive files and their total count
    if exclusive_to_folder1:
        print(f"Deleting files exclusive to {folder1}:")
        for file in exclusive_to_folder1:
            file_path = os.path.join(folder1, file)
            os.remove(file_path)
            print(f"Deleted {file}")
        print(f"Total exclusive files deleted in {folder1}: {total_exclusive_folder1}")
    else:
        print(f"No files exclusive to {folder1}")
        
    if exclusive_to_folder2:
        print(f"Deleting files exclusive to {folder2}:")
        for file in exclusive_to_folder2:
            file_path = os.path.join(folder2, file)
            os.remove(file_path)
            print(f"Deleted {file}")
        print(f"Total exclusive files deleted in {folder2}: {total_exclusive_folder2}")
    else:
        print(f"No files exclusive to {folder2}")

In [None]:
#folder1 = ""
#folder2 = ""
# find_exclusive_files(folder1, folder2)