In [1]:
import json
import os
import pandas as pd
from tqdm import tqdm

from amelia_datatools.utils import common as C
from amelia_datatools.utils import utils as U

In [2]:
import math

class RunningStats:
    def __init__(self):
        self.n = 0
        self.old_m = 0
        self.new_m = 0
        self.old_s = 0
        self.new_s = 0

    def clear(self):
        self.n = 0

    def push(self, x):
        self.n += 1

        if self.n == 1:
            self.old_m = self.new_m = x
            self.old_s = 0
        else:
            self.new_m = self.old_m + (x - self.old_m) / self.n
            self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m)

            self.old_m = self.new_m
            self.old_s = self.new_s

    def mean(self):
        return self.new_m if self.n else 0.0

    def variance(self):
        return self.new_s / (self.n - 1) if self.n > 1 else 0.0

    def std(self):
        return math.sqrt(self.variance())

In [None]:
black_list = ['Frame', 'ID', 'Type', 'Interp']
base_dir = C.DATA_DIR
traj_version = C.VERSION

for airport in ["katl", "kbos", "kdca", "kewr", "kjfk", "klax", "kmdw", "kmsy", "ksea", "ksfo", "panc"]:
    print(f"Running: {airport.upper()}")
    traj_dir = os.path.join(base_dir, f'traj_data_{traj_version}/raw_trajectories', f'{airport}')
    assets_dir = os.path.join(base_dir, 'assets', f'{airport}')

    limits_file = os.path.join(assets_dir, 'limits.json')
    with open(limits_file, 'r') as f:
        ref_data = json.load(f)

    # print(f"Reference Data:\n{json.dumps(ref_data, indent=4)}")

    traj_files = [os.path.join(traj_dir, f) for f in os.listdir(traj_dir)]
    print(f"\tFound {len(traj_files)} trajectory files in {traj_dir}")

    # Create limits dictionary
    data = pd.read_csv(traj_files[0])
    limits = {}
    incstats = {}
    for k, v in data.items():
        if k in black_list:
            continue
        limits[k] = {
            "min": float('inf'), "max": -float('inf'), "mean": 0.0, "std": 0.0
        }
        incstats[k] = RunningStats()

    for f in tqdm(traj_files):
        data = pd.read_csv(traj_files[0])
        for k in limits.keys():
            arr = data[k].to_numpy()
            limits[k]["min"] = min(limits[k]["min"], arr.min())
            limits[k]["max"] = max(limits[k]["max"], arr.max())

            for a in arr:
                incstats[k].push(a)

        for k in limits.keys():
            limits[k]["mean"] = incstats[k].mean()
            limits[k]["std"] = incstats[k].std()

    ref_data['limits'] = limits
    with open(limits_file, 'w') as f:
        json.dump(ref_data, f, indent=2)
    print(f"\tAdding limits to reference file in: {limits_file}")

Running: KMKE


FileNotFoundError: [Errno 2] No such file or directory: '/Users/alonso.cano/Developer/CMU/Amelia/AmeliaDataTools/datasets/amelia/assets/kmke/limits.json'