# Some code to check production run metadata

In [None]:
import geopandas as gpd
import pathlib
import os
import pandas as pd
import shapely.geometry as sg
import json

from IPython.display import display, HTML

RD_CRS = "EPSG:28992"  # CRS code for the Dutch Rijksdriehoek coordinate system
LAT_LON_CRS = "EPSG:4326"  # CRS code for WGS84 latitude/longitude coordinate system

## New style JSON metadata

In [None]:
date = "2025-05-12"  # Date of the run
metadata_folder = f"../datasets/oor/landingzone/metadata/{date}/"  # Download "full_frame_metadata" from landingzone here

MAX_GPS_DELAY = 5


from collections.abc import MutableMapping

def flatten_dict(dictionary: dict, parent_key: str="", separator: str=".") -> dict:
    items = []
    for key, value in dictionary.items():
        new_key = parent_key + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            items.extend(flatten_dict(value, new_key, separator=separator).items())
        else:
            items.append((new_key, value))
    return dict(items)

def load_metadata(metadata_file: str) -> pd.DataFrame:
    with open(metadata_file, "r") as f:
        json_content = json.load(f)

    df = pd.DataFrame.from_dict(data=[flatten_dict(frame) for frame in json_content["frames"]])
    df["record_timestamp"] = pd.to_datetime(df["record_timestamp"], format="ISO8601")
    df["gps_data.coordinate_time_stamp"] = pd.to_datetime(df["gps_data.coordinate_time_stamp"], format="ISO8601")
    df["image_file_timestamp"] = pd.to_datetime(df["image_file_timestamp"], format="ISO8601")
    
    df["gps_delay"] = (df["image_file_timestamp"] - df["gps_data.coordinate_time_stamp"]).dt.total_seconds().to_numpy()
    df["accept_delay"] = df['gps_delay'] <= MAX_GPS_DELAY
    
    df["recording_name"] = json_content["data_path"].split(sep="/")[-2]
    df["metadata_file"] = os.path.basename(metadata_file)
    
    df.drop(columns="image_path", inplace=True)

    return df

def distance_and_duration(gdf: gpd.GeoDataFrame) -> pd.Series:
    distance = sg.LineString(gdf.geometry).length / 1000
    duration = gdf["image_file_timestamp"].max() - gdf["image_file_timestamp"].min()
    speed = distance * 1000 / duration.seconds
    fps = len(gdf) / duration.seconds
    result = {
        "distance (km)": distance,
        "duration": pd.Timedelta(seconds=duration.seconds),
        "speed (m/s)": speed,
        "FPS": fps
    }
    return pd.Series(result)


metadata_files = list(pathlib.Path(metadata_folder).glob("*.json"))

metadata_df = pd.concat([load_metadata(file) for file in metadata_files])
metadata_df.set_index("image_file_name", inplace=True)
metadata_df.sort_index(inplace=True)
metadata_df = metadata_df[~metadata_df.index.duplicated(keep='first')]

metadata_df = gpd.GeoDataFrame(
    metadata_df,
    geometry=gpd.points_from_xy(
        x=metadata_df["gps_data.longitude"],
        y=metadata_df["gps_data.latitude"],
        crs=LAT_LON_CRS,
    ),
).to_crs(RD_CRS)

metadata_df["valid_point"] = metadata_df.distance(sg.Point(121000, 488000)) < 50000

# metadata = metadata[metadata["recording_name"].isin(["recording_2025-05-12_14-35-19"])]

groupby_column = "recording_name"
# groupby_column = "metadata_file"
distances_driven = metadata_df[metadata_df["valid_point"]].groupby(by=groupby_column).apply(distance_and_duration, include_groups=False)

display(HTML(distances_driven.to_html()))

n_frames = len(metadata_df)
valid_gps = metadata_df['valid_point'].sum()
accept_delay = metadata_df['accept_delay'].sum()

print(f"Total distance:   {distances_driven['distance (km)'].sum():.1f} km")
print(f"Number of frames: {n_frames}")
print(f"Valid GPS:        {valid_gps} ({valid_gps / n_frames * 100:.1f}%)")
print(f"Acceptable delay: {accept_delay} ({accept_delay / n_frames * 100:.1f}%) (max_delay={MAX_GPS_DELAY}s)")

In [None]:
# Plot the data on a map
metadata_df[metadata_df["valid_point"]][["geometry", "gps_delay", "accept_delay"]].explore(column="accept_delay", tooltip="gps_delay", cmap="RdYlGn")

## Old style CSV metadata

In [None]:
date = "2024-11-11"  # Dat of the run
metadata_folder = f"../datasets/oor/landingzone/metadata/{date}/"  # Download "full_frame_metadata" from landingzone here

metadata_files = list(pathlib.Path(metadata_folder).glob("*.csv"))

MAX_GPS_DELAY = 5

def load_csv(file):
    data = pd.read_csv(file)
    data["pylon://0_frame_timestamp"] = pd.to_datetime(data["pylon://0_frame_timestamp"], unit="s")
    data["gps_internal_timestamp"] = pd.to_datetime(data["gps_internal_timestamp"], unit="s")
    data["gps_delay"] = (data["pylon://0_frame_timestamp"] - data["gps_internal_timestamp"]).dt.total_seconds().to_numpy()
    data["accept_delay"] = data['gps_delay'] <= MAX_GPS_DELAY
    
    # Old naming convention
    # data["filename"] = "0-" + file.stem.split(sep="-", maxsplit=1)[1] + "-" + data["pylon://0_frame_counter"].astype(str)
    # New naming convention
    data["filename"] = file.stem + "-" + data["pylon://0_frame_counter"].astype(str).str.zfill(5)
    
    data["run_time"] = file.stem.split(sep="-")[-1]
    data.set_index("filename", inplace=True)
    return data

def distance_and_duration(df):
    distance = sg.LineString(df.geometry).length / 1000
    duration = df["pylon://0_frame_timestamp"].iloc[-1] - df["pylon://0_frame_timestamp"].iloc[0]
    speed = distance * 1000 / duration.seconds
    result = {
        "distance (km)": distance,
        "duration": pd.Timedelta(seconds=duration.seconds),
        "speed (m/s)": speed
    }
    return pd.Series(result)

metadata_df = pd.concat([load_csv(file) for file in metadata_files])
metadata_df.sort_values(by=["pylon://0_frame_timestamp"], inplace=True)
metadata_df = metadata_df[~metadata_df.index.duplicated(keep='first')]

metadata_df = gpd.GeoDataFrame(
    metadata_df,
    geometry=gpd.points_from_xy(
        x=metadata_df.gps_lon,
        y=metadata_df.gps_lat,
        crs=LAT_LON_CRS,
    ),
).to_crs(RD_CRS)

metadata_df["valid_point"] = metadata_df.distance(sg.Point(121000, 488000)) < 50000

# metadata = metadata[metadata["run_time"].isin(["H12M27S12", "H12M35S56"])]

distances_driven = metadata_df[metadata_df["valid_point"]].groupby(by="run_time").apply(distance_and_duration, include_groups=False)

print(distances_driven)
print()

n_frames = len(metadata_df)
valid_gps = metadata_df['valid_point'].sum()
accept_delay = metadata_df['accept_delay'].sum()

print(f"Total distance:   {distances_driven['distance (km)'].sum():.1f} km")
print(f"Number of frames: {n_frames}")
print(f"Valid GPS:        {valid_gps} ({valid_gps / n_frames * 100:.1f}%)")
print(f"Acceptable delay: {accept_delay} ({accept_delay / n_frames * 100:.1f}%) (max_delay={MAX_GPS_DELAY}s)")

In [None]:
# Plot the data on a map
metadata_df[metadata_df["valid_point"]][["geometry", "gps_delay", "accept_delay"]].explore(column="accept_delay", tooltip="gps_delay", cmap="RdYlGn")

## Optional: get stats for images as well

In [None]:
image_folder = f"../datasets/oor/landingzone/images/{date}/"  # Download images from landing zone here

img_names = [os.path.splitext(os.path.basename(file))[0] for file in os.listdir(image_folder) if file.endswith(".jpg")]
img_df = metadata_df.loc[img_names,:]
img_df.sort_values(by=["run_time", "pylon://0_frame_counter"], inplace=True)

total_images = len(img_df)
acceptable = sum(img_df["accept_delay"])
print(f"Total images: {total_images}")
print(f"Acceptable:   {acceptable} ({acceptable / total_images * 100:.1f}%)")

## GPX Plot - to view routes delivered in GPX format

In [None]:
from gpxplotter import read_gpx_file, create_folium_map, add_segment_to_map

the_map = create_folium_map()
for track in read_gpx_file("../OOR/notebooks/26_08_2024_12_26.gpx"):
    for i, segment in enumerate(track['segments']):
        add_segment_to_map(the_map, segment)

# To display the map in a Jupyter notebook:
the_map