In [None]:
import os
import pandas as pd
import shutil

from datetime import datetime
from typing import List

In [None]:
# All recorded images and the corresponding metadata csv file are assumed to be in the same folder
data_folder = "../datasets/oor/data_collection/"

# Name of metadata file
metadata_file = "edw inspection.csv"

# Landing zone files will be prepared and stored here, which can be uploaded afterwards
output_folder = "../datasets/oor/data_collection/landingzone"

FILETYPE = ".jpg"
suffix = ""  # Optional, append a suffix to redo the ingestion

In [None]:
def fix_metadata(raw_metadata: pd.DataFrame) -> pd.DataFrame:
    """
    Make sure all relevant fields are filled, and rename / delete fields to
    match what we expect.
    """
    column_map = {
        "GX": "imu_gx",
        "GY": "imu_gy",
        "GZ": "imu_gz",
    }
    column_delete = [
        "pylon://1_frame_counter",
        "pylon://1_frame_timestamp",
        "imu_timestamp",
        ]
    raw_metadata["pylon://0_frame_timestamp"] = raw_metadata["timestamp"]
    raw_metadata["gps_timestamp"] = raw_metadata["timestamp"]
    raw_metadata["gps_internal_timestamp"] = raw_metadata["timestamp"]
    raw_metadata["gps_date"] = pd.to_datetime(raw_metadata["timestamp"], unit="s").apply(lambda x: x.strftime(format="%d/%m/%Y"))
    raw_metadata.drop(labels=column_delete, axis="columns", inplace=True)
    raw_metadata.rename(columns=column_map, inplace=True)
    return raw_metadata

def generate_frame_metadata(run_metadata: pd.DataFrame, img_names: List[str]) -> pd.DataFrame:
    """
    Generate frame_metadata from prepared run_metadata. This links rows to image
    names and adds model name and code version.
    """
    index_order = ['image_name', 'timestamp', 'pylon://0_frame_counter',
       'pylon://0_frame_timestamp', 'imu_state', 'imu_pitch', 'imu_roll',
       'imu_heading', 'imu_gx', 'imu_gy', 'imu_gz', 'gps_timestamp',
       'gps_state', 'gps_lat', 'gps_lon', 'gps_time', 'gps_date',
       'gps_internal_timestamp', 'model_name', 'model_version',
       'code_version']
    frame_metadata = run_metadata.copy()
    frame_to_name = {get_frame_counter_for_image(img): img for img in img_names}
    img_names_ordered = [frame_to_name[frame] for frame in frame_metadata["pylon://0_frame_counter"]]
    frame_metadata["image_name"] = img_names_ordered
    frame_metadata["model_name"] = "manual_collection"
    frame_metadata["model_version"] = 0
    frame_metadata["code_version"] = 0
    frame_metadata = frame_metadata[index_order]
    return frame_metadata

def generate_detection_metadata(frame_metadata: pd.DataFrame) -> pd.DataFrame:
    """
    Generate detection_metadata from prepared frame_metadata. This adds
    arbitrary detection bounding boxes for each image.
    """
    detection_metadata = frame_metadata[["image_name"]].copy()
    detection_metadata["object_class"] = 2
    detection_metadata["x_center"] = 0.5
    detection_metadata["y_center"] = 0.5
    detection_metadata["width"] = 0.5
    detection_metadata["height"] = 0.5
    detection_metadata["confidence"] = 1.0
    detection_metadata["tracking_id"] = -1
    return detection_metadata

def get_image_names_in_folder(folder: str, filetype: str = FILETYPE) -> List[str]:
    files = [f for f in os.listdir(folder) if f.endswith(filetype)]
    return files

def get_frame_counter_for_image(image_name: str) -> int:
    frame_counter = int(image_name.split(sep=".")[0])
    return frame_counter

def get_date_from_metadata(metadata: pd.DataFrame) -> str:
    def unix_to_yyyy_mm_dd(unix_timestamp) -> str:
        date_time = datetime.fromtimestamp(unix_timestamp)
        return date_time.strftime("%Y-%m-%d")

    unix_timestamp = metadata.loc[0, "timestamp"]
    return unix_to_yyyy_mm_dd(unix_timestamp)

In [None]:
raw_metadata = pd.read_csv(os.path.join(data_folder, metadata_file))
image_names = get_image_names_in_folder(data_folder)

# Convert raw metadata to frame_metadata and detection_metadata
run_metadata = fix_metadata(raw_metadata=raw_metadata)
frame_metadata = generate_frame_metadata(run_metadata=run_metadata, img_names=image_names)
detection_metadata = generate_detection_metadata(frame_metadata=frame_metadata)

In [None]:
# Date of collection is date of folder in landingzone (e.g. 2025-01-28)
date = get_date_from_metadata(metadata=run_metadata)

frame_metadata_folder = os.path.join(output_folder, "frame_metadata", date)
detection_metadata_folder = os.path.join(output_folder, "detection_metadata", date)
images_folder = os.path.join(output_folder, "images", date)

os.makedirs(frame_metadata_folder, exist_ok=True)
os.makedirs(detection_metadata_folder, exist_ok=True)
os.makedirs(images_folder, exist_ok=True)

# Write csv files
frame_metadata.to_csv(os.path.join(frame_metadata_folder, f"frame_metadata{suffix}.csv"), index=False)
detection_metadata.to_csv(os.path.join(detection_metadata_folder, f"detection_metadata{suffix}.csv"), index=False)

# Copy images (for convenience)
for img in image_names:
    shutil.copy2(os.path.join(data_folder, img), images_folder)