In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

from tqdm import tqdm

out_dir = "../../output/sequence_lengths"
os.makedirs(out_dir, exist_ok=True)

name = {'total': 'Total', '0': 'Aircraft', '1': 'Vehicle', '2': 'Unknown'}
colors = {'ksea': 'blue', 'kewr': 'red', 'kbos': 'green', 'kmdw': 'orange'}


In [None]:
airport = "kbos"
base_dir = '../../datasets/amelia'
traj_version = 'a10v08'


traj_dir = os.path.join(base_dir, f'traj_data_{traj_version}', 'raw_trajectories', f'{airport}')

traj_files = [os.path.join(traj_dir, f) for f in os.listdir(traj_dir)]
print(traj_files[0])

data = pd.read_csv(traj_files[0])
data.head()

In [None]:
agent_seqlens = {
    'total': [],
}
total_count = 0
for f in tqdm(traj_files):
    data = pd.read_csv(f)

    unique_IDs = np.unique(data.ID)
    for ID in unique_IDs:
        seq = data[data.ID == ID]

        atype = seq.Type.astype(int)
        atype = atype[np.diff(atype, prepend=np.nan).astype(bool)].astype(str).tolist()

        key = ''.join(atype)
        if agent_seqlens.get(key) is None:
            agent_seqlens[key] = []

        T = seq.shape[0]
        agent_seqlens[key].append(T)
        agent_seqlens['total'].append(T)


In [None]:
agent_seqlens_stats = {}
for k, v in agent_seqlens.items():
    v = np.asarray(v)
    agent_seqlens_stats[k] = {
        "min": round(v.min().astype(float), 5),
        "max": round(v.max().astype(float), 5),
        "mean": round(v.mean().astype(float),5),
        "std": round(v.std().astype(float), 5),
    }

    agent_seqlens[k] = v

agent_seqlens_stats

In [None]:
with open(f'{out_dir}/{airport}_stats.json', 'w') as f:
    json.dump(agent_seqlens_stats, f, indent=2)

with open(f'{out_dir}/{airport}.pkl', 'wb') as f:
    pickle.dump(agent_seqlens, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
for key, value in name.items():
    arr = agent_seqlens[key]

    plt.hist(arr, bins = (arr.max() // 10), color=colors[airport])
    plt.title(f"{airport}: {value}")
    plt.xlabel("Sequence Length")
    plt.ylabel("Count")
    plt.savefig(f"{out_dir}/{airport}_{key}.png", dpi=600)
    plt.close()

In [None]:
for key, value in name.items():
    arr = agent_seqlens[key]
    q_lower = np.quantile(arr, 0.05)
    q_upper = np.quantile(arr, 0.95)

    arr = arr[(arr >= q_lower) & (arr <= q_upper)]

    plt.hist(arr, bins = (arr.max() // 10), color=colors[airport])
    plt.title(f"{airport}: {value}")
    plt.xlabel("Sequence Length")
    plt.ylabel("Count")
    plt.savefig(f"{out_dir}/{airport}_{key}_iqr.png", dpi=600)
    plt.close()