Here we will merge all runs into a single list to simulate how we could store them in a DDBB

In [7]:
from glob import glob
import json

In [13]:
import logging
from bisect import bisect
from logging import getLogger, Formatter, LogRecord, StreamHandler
from typing import Dict


class LevelFormatter(Formatter):
    def __init__(self, formats: Dict[int, str], **kwargs):
        super().__init__()

        if 'fmt' in kwargs:
            raise ValueError(
                'Format string must be passed to level-surrogate formatters, '
                'not this one'
            )

        self.formats = sorted(
            (level, Formatter(fmt, **kwargs)) for level, fmt in formats.items()
        )

    def format(self, record: LogRecord) -> str:
        idx = bisect(self.formats, (record.levelno,), hi=len(self.formats)-1)
        level, formatter = self.formats[idx]
        return formatter.format(record)
    

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

if not logger.hasHandlers():
    handler = StreamHandler()
    formatter = LevelFormatter(
        {
            logging.DEBUG: '\033[94m[%(asctime)s - %(lineno)d] DEBUG\033[0m: %(message)s',
            logging.INFO: '\033[94mINFO\033[0m: %(message)s',
            logging.WARNING: '\033[93mWARNING\033[0m: %(message)s',
            logging.ERROR: '\033[91mERROR\033[0m: %(message)s',
            logging.CRITICAL: '\033[91mCRITICAL\033[0m: %(message)s'
        }
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)

logger.info("Logger set up successfully")

[91mINFO[0m: Logger set up successfully


In [12]:
file_names = glob("./prototype data/*")
logger.debug(file_names)

[91m[2024-09-29 17:52:18,861 - 2] DEBUG[0m: ['./prototype data/celeste_asc_4400.json', './prototype data/celeste_asc_4800.json', './prototype data/celeste_asc_5600.json', './prototype data/celeste_asc_6000.json', './prototype data/celeste_asc_7200.json', './prototype data/celeste_desc_4200.json', './prototype data/celeste_desc_5000.json', './prototype data/celeste_desc_200.json', './prototype data/celeste_desc_6600.json', './prototype data/celeste_asc_0.json', './prototype data/celeste_desc_7800.json', './prototype data/celeste_desc_7400.json', './prototype data/celeste_desc_5200.json', './prototype data/celeste_desc_4000.json', './prototype data/celeste_desc_6800.json', './prototype data/celeste_desc_7600.json', './prototype data/celeste_desc_6400.json', './prototype data/celeste_asc_5400.json', './prototype data/celeste_asc_4600.json', './prototype data/celeste_asc_5800.json', './prototype data/celeste_asc_7000.json', './prototype data/celeste_asc_6200.json', './prototype data/cele

In [16]:
from typing import List


file_data: List = []


for file_name in file_names:
    logger.debug(file_name)

    with open(file_name, "r") as fp:
        data_chunk: List = json.load(fp)["data"]

        logger.debug(f"chunk length: {len(data_chunk)}")
        file_data.extend(data_chunk)


logger.info(len(file_data))
logger.debug(file_data[0])

[91m[2024-09-29 18:05:18,795 - 8] DEBUG[0m: ./prototype data/celeste_asc_4400.json
[91m[2024-09-29 18:05:18,827 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:18,828 - 8] DEBUG[0m: ./prototype data/celeste_asc_4800.json
[91m[2024-09-29 18:05:19,081 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:19,082 - 8] DEBUG[0m: ./prototype data/celeste_asc_5600.json
[91m[2024-09-29 18:05:19,119 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:19,122 - 8] DEBUG[0m: ./prototype data/celeste_asc_6000.json
[91m[2024-09-29 18:05:19,150 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:19,153 - 8] DEBUG[0m: ./prototype data/celeste_asc_7200.json
[91m[2024-09-29 18:05:19,184 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:19,189 - 8] DEBUG[0m: ./prototype data/celeste_desc_4200.json
[91m[2024-09-29 18:05:19,268 - 13] DEBUG[0m: chunk length: 200
[91m[2024-09-29 18:05:19,270 - 8] DEBUG[0m: ./prototype data/celeste_desc_5000.json
[91m[2024-09

In [17]:
run_set = set()
unique_runs = [run for run in file_data if run["id"] not in run_set and not run_set.add(run["id"])]

logger.info(len(unique_runs))

[91mINFO[0m: 18746


In [18]:
outfile_name = "celeste_runs.json"

with open(outfile_name, "w+") as fp:
    logger.debug(f"saving runs to: {outfile_name}")
    json.dump(unique_runs, fp, indent=2)
logger.debug("runs saved")

[91m[2024-09-29 18:07:41,335 - 4] DEBUG[0m: saving runs to: celeste_runs.json
[91m[2024-09-29 18:07:50,670 - 6] DEBUG[0m: runs saved
