# Some code to check production run metadata

In [None]:
import geopandas as gpd
import pathlib
import os
import pandas as pd
import shapely.geometry as sg

In [None]:
date = "2024-11-11"  # Dat of the run
csv_folder = f"../datasets/oor/landingzone/metadata/{date}/"  # Download "full_frame_metadata" from landingzone here

csv_files = list(pathlib.Path(csv_folder).glob("*.csv"))

MAX_GPS_DELAY = 5

RD_CRS = "EPSG:28992"  # CRS code for the Dutch Rijksdriehoek coordinate system
LAT_LON_CRS = "EPSG:4326"  # CRS code for WGS84 latitude/longitude coordinate system

def load_csv(file):
    data = pd.read_csv(file)
    data["pylon://0_frame_timestamp"] = pd.to_datetime(data["pylon://0_frame_timestamp"], unit="s")
    data["gps_internal_timestamp"] = pd.to_datetime(data["gps_internal_timestamp"], unit="s")
    data["gps_delay"] = (data["pylon://0_frame_timestamp"] - data["gps_internal_timestamp"]).dt.total_seconds().to_numpy()
    data["accept_delay"] = data['gps_delay'] <= MAX_GPS_DELAY
    
    # Old naming convention
    # data["filename"] = "0-" + file.stem.split(sep="-", maxsplit=1)[1] + "-" + data["pylon://0_frame_counter"].astype(str)
    # New naming convention
    data["filename"] = file.stem + "-" + data["pylon://0_frame_counter"].astype(str).str.zfill(5)
    
    data["run_time"] = file.stem.split(sep="-")[-1]
    data.set_index("filename", inplace=True)
    return data

def distance_and_duration(df):
    distance = sg.LineString(df.geometry).length / 1000
    duration = df["pylon://0_frame_timestamp"].iloc[-1] - df["pylon://0_frame_timestamp"].iloc[0]
    speed = distance * 1000 / duration.seconds
    result = {
        "distance (km)": distance,
        "duration": pd.Timedelta(seconds=duration.seconds),
        "speed (m/s)": speed
    }
    return pd.Series(result)

metadata = pd.concat([load_csv(file) for file in csv_files])
metadata.sort_values(by=["pylon://0_frame_timestamp"], inplace=True)
metadata = metadata[~metadata.index.duplicated(keep='first')]

metadata = gpd.GeoDataFrame(
    metadata,
    geometry=gpd.points_from_xy(
        x=metadata.gps_lon,
        y=metadata.gps_lat,
        crs=LAT_LON_CRS,
    ),
).to_crs(RD_CRS)

metadata["valid_point"] = metadata.distance(sg.Point(121000, 488000)) < 50000

# metadata = metadata[metadata["run_time"].isin(["H12M27S12", "H12M35S56"])]

distances_driven = metadata[metadata["valid_point"]].groupby(by="run_time").apply(distance_and_duration, include_groups=False)

print(distances_driven)
print()

n_frames = len(metadata)
valid_gps = metadata['valid_point'].sum()
accept_delay = metadata['accept_delay'].sum()

print(f"Total distance:   {distances_driven['distance (km)'].sum():.1f} km")
print(f"Number of frames: {n_frames}")
print(f"Valid GPS:        {valid_gps} ({valid_gps / n_frames * 100:.1f}%)")
print(f"Acceptable delay: {accept_delay} ({accept_delay / n_frames * 100:.1f}%) (max_delay={MAX_GPS_DELAY}s)")

In [None]:
# Plot the data on a map
metadata[metadata["valid_point"]][["geometry", "gps_delay", "accept_delay"]].explore(column="accept_delay", tooltip="gps_delay", cmap="RdYlGn")

## Optional: get stats for images as well

In [None]:
image_folder = f"../datasets/oor/landingzone/images/{date}/"  # Download images from landing zone here

img_names = [os.path.splitext(os.path.basename(file))[0] for file in os.listdir(image_folder) if file.endswith(".jpg")]
img_df = metadata.loc[img_names,:]
img_df.sort_values(by=["run_time", "pylon://0_frame_counter"], inplace=True)

total_images = len(img_df)
acceptable = sum(img_df["accept_delay"])
print(f"Total images: {total_images}")
print(f"Acceptable:   {acceptable} ({acceptable / total_images * 100:.1f}%)")

## GPX Plot - to view routes delivered in GPX format

In [None]:
from gpxplotter import read_gpx_file, create_folium_map, add_segment_to_map

the_map = create_folium_map()
for track in read_gpx_file("../OOR/notebooks/26_08_2024_12_26.gpx"):
    for i, segment in enumerate(track['segments']):
        add_segment_to_map(the_map, segment)

# To display the map in a Jupyter notebook:
the_map