In [1]:
import requests
import pandas as pd

In [2]:
BASE_URL = "https://data.transportation.gov/resource/8ect-6jqj.json"

In [3]:

def fetch_ngsim_api(location=None, direction=None, lane=None,
                    t_start_ms=None, t_end_ms=None,
                    limit=50000, max_pages=20, app_token=None):
    headers = {}
    if app_token:
        headers["X-App-Token"] = app_token

    where_clauses = []
    if location:  where_clauses.append(f"location='{location}'")
    if direction is not None: where_clauses.append(f"direction='{direction}'")
    if lane is not None:      where_clauses.append(f"lane_id='{lane}'")
    if t_start_ms is not None: where_clauses.append(f"global_time >= '{t_start_ms}'")
    if t_end_ms is not None:   where_clauses.append(f"global_time <= '{t_end_ms}'")
    where = " AND ".join(where_clauses) if where_clauses else None

    params = {
        "$select": "vehicle_id,frame_id,global_time,global_x,global_y,local_x,local_y,v_vel,lane_id,direction,location",
        "$order": "global_time ASC",
        "$limit": limit
    }
    if where:
        params["$where"] = where

    frames = []
    offset = 0
    for _ in range(max_pages):
        params["$offset"] = offset
        r = requests.get(BASE_URL, params=params, headers=headers, timeout=60)
        r.raise_for_status()
        chunk = r.json()
        if not chunk:
            break
        frames.append(pd.DataFrame(chunk))
        offset += limit
    if not frames:
        return pd.DataFrame()
    df = pd.concat(frames, ignore_index=True)

    # Types & units
    to_float = ["global_time","global_x","global_y","local_x","local_y","v_vel"]
    to_int   = ["frame_id","lane_id","direction"]
    for c in to_float:
        if c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce")
    for c in to_int:
        if c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce").astype("Int64")

    # Time to seconds
    if "global_time" in df and df["global_time"].max() > 1e6:
        df["t"] = df["global_time"] / 1000.0
    else:
        # Fallback from frame_id (10Hz)
        df["t"] = df["frame_id"] / 10.0

    # Rename for consistency; use Global_X as longitudinal x
    df = df.rename(columns={
        "global_x":"x", "v_vel":"speed", "lane_id":"lane",
        "direction":"dir", "location":"loc"
    })

    # mph -> m/s if needed (this dataset looks like mph)
    if df["speed"].dropna().median() > 70:
        df["speed"] = df["speed"] * 0.44704

    return df[["vehicle_id","t","x","speed","lane","dir","loc"]]


In [4]:
# Choose a 10-minute window by epoch ms (example values)
t0_ms = 1163050000  # replace with your desired start
t1_ms = 1163050000 + 10*60*1000

df = fetch_ngsim_api(location="peachtree", direction=2, lane=1,
                     t_start_ms=t0_ms, t_end_ms=t1_ms, limit=50000)
print(df.head(), df.shape)


  vehicle_id          t            x  speed  lane  dir        loc
0         69  1163050.0  2230523.390   1.50     1    2  peachtree
1         70  1163050.0  2230522.584   1.60     1    2  peachtree
2         17  1163050.0  2230521.592   0.00     1    2  peachtree
3         13  1163050.0  2230568.402   3.63     1    2  peachtree
4         67  1163050.0  2230526.199   0.00     1    2  peachtree (117014, 7)


In [5]:
import numpy as np
import torch

In [7]:
def aggregate_to_grid_df(df, minutes=10, dx=10.0, dt=1.0):
    # Ensure we have at least some rows
    if df.empty:
        raise ValueError("No data after API filtering")

    # Time slice to exactly minutes duration from min t
    t0 = df["t"].min()
    t1 = t0 + 60*minutes
    df = df[(df["t"]>=t0) & (df["t"]<=t1)].copy()

    # Spatial slice (trim outliers)
    x_min = df["x"].quantile(0.01)
    x_max = df["x"].quantile(0.99)
    df = df[df["x"].between(x_min, x_max)]

    # Define bins and centers
    x_bins = np.arange(x_min, x_max+dx, dx)
    t_bins = np.arange(t0, t1+dt, dt)
    x_c = (x_bins[:-1] + x_bins[1:]) / 2
    t_c = (t_bins[:-1] + t_bins[1:]) / 2

    Ix, Jt = len(x_c), len(t_c)
    rho = np.full((Ix, Jt), np.nan)
    u   = np.full((Ix, Jt), np.nan)

    for j,(ta,tb) in enumerate(zip(t_bins[:-1], t_bins[1:])):
        df_t = df[(df["t"]>=ta)&(df["t"]<tb)]
        if df_t.empty: continue
        idx = np.digitize(df_t["x"].values, x_bins) - 1
        m = (idx>=0) & (idx<Ix)
        idx = idx[m]; spd = df_t["speed"].values[m]
        counts = np.bincount(idx, minlength=Ix)
        sums   = np.bincount(idx, weights=spd, minlength=Ix)
        with np.errstate(invalid="ignore"):
            u[:,j]   = np.where(counts>0, sums/counts, np.nan)
            rho[:,j] = np.where(counts>0, counts/dx, np.nan)  # veh/m

    mask = ~np.isnan(rho)
    rho_f = np.where(mask, rho, 0.0)
    u_f   = np.where(mask, u,   0.0)

    rho_max = np.nanpercentile(rho, 99)
    v_free  = np.nanpercentile(u, 95)
    rho_nd  = np.clip(rho_f/rho_max, 0, 1)
    u_nd    = np.clip(u_f/v_free,    0, 1)

    x_nd = (x_c - x_c.min())/(x_c.max()-x_c.min())
    t_nd = (t_c - t_c.min())/(t_c.max()-t_c.min())
    Xg, Tg = np.meshgrid(x_nd, t_nd, indexing="ij")

    tensors = {
        "x": torch.tensor(Xg.reshape(-1,1), dtype=torch.float32),
        "t": torch.tensor(Tg.reshape(-1,1), dtype=torch.float32),
        "rho": torch.tensor(rho_nd.reshape(-1,1), dtype=torch.float32),
        "u": torch.tensor(u_nd.reshape(-1,1), dtype=torch.float32),
        "mask": torch.tensor(mask.reshape(-1,1), dtype=torch.float32),
        "rho_max": float(rho_max),
        "v_free": float(v_free),
        "x_centers": x_c, "t_centers": t_c
    }
    return tensors


In [8]:
tensors = aggregate_to_grid_df(df, minutes=10, dx=10.0, dt=1.0)
x, t, rho, u, mask = tensors["x"], tensors["t"], tensors["rho"], tensors["u"], tensors["mask"]

In [None]:
import pandas as pd
import numpy as np

def save_grid_as_csv(tensors, base_path="ngsim_grid"):
    x = tensors["x"].numpy().ravel()
    t = tensors["t"].numpy().ravel()
    rho = tensors["rho"].numpy().ravel()
    u = tensors["u"].numpy().ravel()
    mask = tensors["mask"].numpy().ravel().astype(np.int32)

    df = pd.DataFrame({
        "x_nd": x,
        "t_nd": t,
        "rho_nd": rho,
        "u_nd": u,
        "mask": mask
    })
    df.to_csv(f"{base_path}.csv", index=False)

    pd.DataFrame({"x_centers_m": tensors["x_centers"]}).to_csv(f"{base_path}_x_centers.csv", index=False)
    pd.DataFrame({"t_centers_s": tensors["t_centers"]}).to_csv(f"{base_path}_t_centers.csv", index=False)


In [30]:
save_grid_as_csv(tensors, base_path="peachtree_dir2_lane1_10m_1s_10min")