In [1]:
import os

def list_files_by_type(folder_path):
    """
    List and group files by type from the folder.
    Args:
        folder_path: Path to the folder containing pickle files.
    Returns:
        A dictionary with keys as file types ('object', 'detections', 'egomotion') 
        and values as lists of file paths.
    """
    file_groups = {"frr40_objects": [], "frr40_detections": [], "egoMotionDynamicData": []}

    # Iterate through all files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.startswith("frr40_objects") and file_name.endswith(".p"):
            file_groups["frr40_objects"].append(os.path.join(folder_path, file_name))
        elif file_name.startswith("frr40_detections") and file_name.endswith(".p"):
            file_groups["frr40_detections"].append(os.path.join(folder_path, file_name))
        elif file_name.startswith("egoMotionDynamicData") and file_name.endswith(".p"):
            file_groups["egoMotionDynamicData"].append(os.path.join(folder_path, file_name))

    return file_groups

# Specify your folder path
folder_path = "C:\\FRR40\\rat25-main\\temp"

# List files by type
file_groups = list_files_by_type(folder_path)

# Inspect the grouped files
print("Object files:", file_groups["frr40_objects"])
print("Detections files:", file_groups["frr40_detections"])
print("Egomotion files:", file_groups["egoMotionDynamicData"])


Object files: []
Detections files: []
Egomotion files: []


In [2]:
import os

def create_folders(base_path):
    """
    Create separate folders for detections, objects, and ego_motion files.
    Args:
        base_path: The directory containing all files.
    """
    folders = ["detections", "objects", "ego_motion"]
    for folder in folders:
        folder_path = os.path.join(base_path, folder)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            print(f"Created folder: {folder_path}")
        else:
            print(f"Folder already exists: {folder_path}")

# Base path where all files are currently stored
base_path = "C:/FRR40/rat25-main/temp"

# Create the folders
create_folders(base_path)


Folder already exists: C:/FRR40/rat25-main/temp\detections
Folder already exists: C:/FRR40/rat25-main/temp\objects
Folder already exists: C:/FRR40/rat25-main/temp\ego_motion


In [3]:
import os
import shutil

def organize_files(base_path):
    """
    Organize files into 'detections', 'objects', and 'ego_motion' folders.
    Args:
        base_path: Directory containing all files.
    """
    # Define destination folders and corresponding file patterns
    file_mappings = {
        "detections": "frr40_detections",
        "objects": "frr40_objects",
        "ego_motion": "egoMotionDynamicData"
    }

    # Create folders and move files
    for folder_name, pattern in file_mappings.items():
        folder_path = os.path.join(base_path, folder_name)

        # Create the folder if it doesn't exist
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
            print(f"Created folder: {folder_path}")

        # Move matching files to the folder
        for file_name in os.listdir(base_path):
            if pattern in file_name and file_name.endswith(".p"):
                source = os.path.join(base_path, file_name)
                destination = os.path.join(folder_path, file_name)
                shutil.move(source, destination)
                print(f"Moved {file_name} to {folder_name}")

# Base directory containing all files
base_path = "C:/FRR40/rat25-main/temp"

# Organize the files
organize_files(base_path)


In [4]:
import os
import pandas as pd

def find_egosignal(timestamp, ego_motion):
    """
    Find the closest timestamp in the ego_motion DataFrame and return yawrate and egospeed.
    Args:
        timestamp: Timestamp from detection DataFrame.
        ego_motion: Ego motion DataFrame.
    Returns:
        Tuple of yawrate and egospeed from the closest ego_motion timestamp.
    """
    ind = ego_motion.iloc[(ego_motion['timestamp'] - timestamp).abs().argsort()[:1]].reset_index()
    return ind['RotationRates.yawRateVehicleBody.value'].values[0], ind['Velocity.SpeedCog.SpeedCog'].values[0]

def process_detection_ego_motion(detection_folder, ego_motion_folder):
    """
    Process all detection and ego motion files, match timestamps, and combine features.
    Args:
        detection_folder: Path to the folder containing detection files.
        ego_motion_folder: Path to the folder containing ego motion files.
    Returns:
        Combined DataFrame with matched features from detection and ego motion files.
    """
    combined_data = []

    # Get all detection and ego motion files
    detection_files = sorted([f for f in os.listdir(detection_folder) if f.endswith(".p")])
    ego_motion_files = sorted([f for f in os.listdir(ego_motion_folder) if f.endswith(".p")])

    for detection_file, ego_motion_file in zip(detection_files, ego_motion_files):
        print(f"Processing Detection File: {detection_file} with Ego Motion File: {ego_motion_file}")
        
        # Load detection and ego motion files
        detection_path = os.path.join(detection_folder, detection_file)
        ego_motion_path = os.path.join(ego_motion_folder, ego_motion_file)

        detectionlist = pd.read_pickle(detection_path)
        ego_motion = pd.read_pickle(ego_motion_path)

        # Filter relevant columns
        ego_motion = ego_motion[["timestamp","RotationRates.yawRateVehicleBody.value", "Velocity.SpeedCog.SpeedCog"]]
        detectionlist = detectionlist[["timestamp", "rcs", "distance", "angleAzimuth", "angleElevation", "radialVelocity", "radialVelocityDomainMax"]]

        # Match timestamps and add yawrate, egospeed to detectionlist
        detectionlist["yawrate"], detectionlist["egospeed"] = zip(*detectionlist["timestamp"].map(lambda ts: find_egosignal(ts, ego_motion)))

        # Append the processed DataFrame to the combined list
        combined_data.append(detectionlist)

    # Combine all processed DataFrames into one
    combined_df = pd.concat(combined_data, ignore_index=True)
    return combined_df


# Define folder paths
detection_folder = "C:/FRR40/rat25-main/temp/detections"
ego_motion_folder = "C:/FRR40/rat25-main/temp/ego_motion"

# Process and combine all detection and ego motion files
combined_features = process_detection_ego_motion(detection_folder, ego_motion_folder)

# Inspect the result
print("Combined Data Shape:", combined_features.shape)
print("Sample Data:")
print(combined_features.head())


Processing Detection File: frr40_detections_0.p with Ego Motion File: egoMotionDynamicData_0.p
Processing Detection File: frr40_detections_1.p with Ego Motion File: egoMotionDynamicData_1.p
Processing Detection File: frr40_detections_10.p with Ego Motion File: egoMotionDynamicData_10.p
Processing Detection File: frr40_detections_11.p with Ego Motion File: egoMotionDynamicData_11.p
Processing Detection File: frr40_detections_12.p with Ego Motion File: egoMotionDynamicData_12.p
Processing Detection File: frr40_detections_13.p with Ego Motion File: egoMotionDynamicData_13.p
Processing Detection File: frr40_detections_14.p with Ego Motion File: egoMotionDynamicData_14.p
Processing Detection File: frr40_detections_15.p with Ego Motion File: egoMotionDynamicData_15.p
Processing Detection File: frr40_detections_2.p with Ego Motion File: egoMotionDynamicData_2.p
Processing Detection File: frr40_detections_3.p with Ego Motion File: egoMotionDynamicData_3.p
Processing Detection File: frr40_detec

In [6]:
# Inspect a single ego motion file
ego_motion_path = "C:/FRR40/rat25-main/temp/ego_motion/egoMotionDynamicData_0.p"

# Load the file
ego_motion_sample = pd.read_pickle(ego_motion_path)

# Print all columns with a counter
print("Columns in the ego motion file:")
for idx, column in enumerate(ego_motion_sample.columns):
    print(f"{idx}: {column}")


Columns in the ego motion file:
0: counter
1: egoMotionTimeStamp.NanoSecWheelSpeedSensors.timestampEgomotionNanoSecWheelSpeedSensors
2: egoMotionTimeStamp.NanoSecWheelSpeedSensors.valueQTimestampEgomotionNanoSecWheelSpeedSensors
3: egoMotionTimeStamp.NanoSecInertialSensors.timestampEgomotionNanoSecInertialSensors
4: egoMotionTimeStamp.NanoSecInertialSensors.valueQTimestampEgomotionNanoSecInertialSensors
5: egoMotionTimeStamp.Sec.timestampEgomotionSec
6: egoMotionTimeStamp.Sec.valueQTimestampEgomotionSec
7: egoMotionTimeStamp.Sec.timestampEgomotionNanoSecOverflow
8: egoMotionTimeStamp.SyncState
9: Velocity.SpeedLongitudinal.SpeedLongitudinal
10: Velocity.SpeedLongitudinal.valueQEgoSpeedLongitudinal
11: Velocity.SpeedLongitudinal.quEgoSpeedLongitudinal
12: Velocity.SpeedLongitudinal.SpeedLongitudinalErrAmp
13: Velocity.SpeedLongitudinal.valueQEgoSpeedLongitudinalErrAmp
14: Velocity.SpeedCog.SpeedCog
15: Velocity.SpeedCog.valueQEgoSpeedCog
16: Velocity.SpeedCog.quEgoSpeedCog
17: Velocity.

In [10]:
import pandas as pd
import os

def filter_object_columns(object_folder):
    """
    Filter relevant columns from object files and keep only label and filtering columns.
    Args:
        object_folder: Path to the folder containing object files.
    Returns:
        A dictionary of filtered DataFrames with file names as keys.
    """
    filtered_objects = {}

    # List all object files in the folder
    object_files = sorted([f for f in os.listdir(object_folder) if f.endswith(".p")])

    for object_file in object_files:
        print(f"Processing Object File: {object_file}")
        
        # Load the object file
        object_path = os.path.join(object_folder, object_file)
        object_data = pd.read_pickle(object_path)
        
        # Filter relevant columns
        relevant_columns = [
            "timestamp",  # For matching
            "orientation", "x", "y","width_edge_mean", "length_edge_mean",  # Label columns
            "status_measurement", "status_movement", "overdrivable", "underdrivable",  # Filtering columns
            
            "header.origin.x", "header.origin.y", "header.origin.z",
            "header.origin.roll", "header.origin.pitch", "header.origin.yaw" # for coord system transformationn
        ]
        filtered_data = object_data[relevant_columns]

        # Store the filtered DataFrame
        filtered_objects[object_file] = filtered_data

    return filtered_objects


# Define the folder path
object_folder = "C:/FRR40/rat25-main/temp/objects"

# Filter the object files
filtered_object_files = filter_object_columns(object_folder)

# Inspect a sample filtered object DataFrame
for file_name, df in filtered_object_files.items():
    print(f"Filtered Object DataFrame for {file_name}:")
    print(df.head())
    break  # Display only the first file for verification


Processing Object File: frr40_objects_0.p
Processing Object File: frr40_objects_1.p
Processing Object File: frr40_objects_10.p
Processing Object File: frr40_objects_11.p
Processing Object File: frr40_objects_12.p
Processing Object File: frr40_objects_13.p
Processing Object File: frr40_objects_14.p
Processing Object File: frr40_objects_15.p
Processing Object File: frr40_objects_2.p
Processing Object File: frr40_objects_3.p
Processing Object File: frr40_objects_4.p
Processing Object File: frr40_objects_5.p
Processing Object File: frr40_objects_6.p
Processing Object File: frr40_objects_7.p
Processing Object File: frr40_objects_8.p
Processing Object File: frr40_objects_9.p
Filtered Object DataFrame for frr40_objects_0.p:
      timestamp                                        orientation  \
0  2.739721e+07  [3.0897224, -1.5708438, -1.5708438, 0.0, 0.0, ...   
1  2.739721e+07  [3.141304, -0.8097746, -1.5708438, 0.0, 0.0, 0...   
2  2.739721e+07  [-0.042569254, -0.8095828, -0.99836403, 0.0, 0

In [6]:
# Inspect filtering columns from the filtered object files
for file_name, df in filtered_object_files.items():
    print(f"Inspecting filtering columns for {file_name}:")
    print(df[["status_measurement", "status_movement", "overdrivable", "underdrivable"]].head())
    break  # Display only the first file for verification


Inspecting filtering columns for frr40_objects_0.p:
                                  status_measurement  \
0  [(0, measured), (0, measured), (0, measured), ...   
1  [(0, measured), (2, predicted), (0, measured),...   
2  [(0, measured), (2, predicted), (2, predicted)...   
3  [(0, measured), (2, predicted), (0, measured),...   
4  [(0, measured), (0, measured), (0, measured), ...   

                                     status_movement  \
0  [(2, stopped), (0, moved), (0, moved), (1, sta...   
1  [(2, stopped), (0, moved), (0, moved), (1, sta...   
2  [(2, stopped), (0, moved), (0, moved), (1, sta...   
3  [(2, stopped), (0, moved), (0, moved), (1, sta...   
4  [(2, stopped), (0, moved), (1, stationary), (1...   

                                        overdrivable  \
0  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 6, 6, 0, 0, ...   
1  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, ...   
2  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, ...   
3  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 7, 0, 0, .

In [16]:
# Inspect a single ego motion file
ego_motion_path = "C:/FRR40/rat25-main/temp/objects/frr40_objects_0.p"

# Load the file
ego_motion_sample = pd.read_pickle(ego_motion_path)

# Print all columns with a counter
print("Columns in the ego motion file:")
for idx, column in enumerate(ego_motion_sample.columns):
    print(f"{idx}: {column}")

Columns in the ego motion file:
0: cRC
1: length
2: counter
3: dataID
4: header.privateExtendedQualifier
5: header.origin.x
6: header.origin.y
7: header.origin.z
8: header.origin.roll
9: header.origin.roll_std_dev
10: header.origin.pitch
11: header.origin.pitch_std_dev
12: header.origin.yaw
13: header.origin.yaw_std_dev
14: header.timestampDAQ.fractional_seconds
15: header.timestampDAQ.seconds
16: header.timestampDAQ.sync_status
17: header.timestampSP_start.fractional_seconds
18: header.timestampSP_start.seconds
19: header.timestampSP_start.sync_status
20: header.timestampSP_end.fractional_seconds
21: header.timestampSP_end.seconds
22: header.timestampSP_end.sync_status
23: header.integrityFrontRadar
24: number_of_objects
25: qualifier
26: BMWcodecAssesment
27: BMWcodecClass
28: BMWdrivingDirection
29: BMWFuncRelev
30: height_edge_mean
31: orientation_deg
32: z_original
33: vabs_kmh
34: existence_probability
35: existence_qualifier
36: mirror_probability
37: x
38: x_std_dev
39: y
40: y

In [8]:
def filter_objects_and_inputs(labels_df, inputs_df):
    """
    Filter objects in both input features and labels DataFrames based on defined rules.
    Args:
        labels_df: DataFrame containing labels and filtering columns (e.g., object_x.p).
        inputs_df: DataFrame containing input features (e.g., detection_x.p).
    Returns:
        Tuple of cleaned input and label DataFrames.
    """
    def filter_cycle(row_labels, row_inputs):
        """
        Filter a single radar cycle (row) for both labels and inputs.
        Args:
            row_labels: Row from the labels DataFrame.
            row_inputs: Row from the input features DataFrame.
        Returns:
            Tuple of filtered labels and inputs for the row.
        """
        # Unpack relevant columns from the labels
        status_measurement = row_labels["status_measurement"]
        status_movement = row_labels["status_movement"]
        overdrivable = row_labels["overdrivable"]
        underdrivable = row_labels["underdrivable"]

        # Step 1: Keep only 'measured' objects
        measured_indices = [i for i, obj in enumerate(status_measurement) if obj[1] == "measured"]

        # Step 2: Keep only 'moved' objects
        moved_indices = [i for i, obj in enumerate(status_movement) if obj[1] == "moved"]

        # Step 3: Remove objects where (overdrivable + underdrivable) > 50
        valid_indices = [
            i for i in range(len(overdrivable))
            if (overdrivable[i] + underdrivable[i]) <= 50
        ]

        # Intersection of all valid indices
        final_indices = set(measured_indices) & set(moved_indices) & set(valid_indices)

        # Filter all columns in the labels row
        filtered_labels = {
            col: [row_labels[col][i] for i in final_indices] if isinstance(row_labels[col], list) else row_labels[col]
            for col in labels_df.columns
        }

        # Filter all columns in the inputs row
        filtered_inputs = {
            col: [row_inputs[col][i] for i in final_indices] if isinstance(row_inputs[col], list) else row_inputs[col]
            for col in inputs_df.columns
        }

        return filtered_labels, filtered_inputs

    # Apply the filter to all rows in both DataFrames
    filtered_labels_data, filtered_inputs_data = zip(
        *[filter_cycle(row_labels, row_inputs) for (_, row_labels), (_, row_inputs) in zip(labels_df.iterrows(), inputs_df.iterrows())]
    )

    # Convert filtered data back to DataFrames
    filtered_labels_df = pd.DataFrame(filtered_object_files)
    filtered_inputs_df = pd.DataFrame(combined_features )

    return filtered_inputs_df, filtered_labels_df


# Example Usage:
# Assuming `labels_df` and `inputs_df` are already loaded as DataFrames
filtered_inputs, filtered_labels = filter_objects_and_inputs(filtered_object_files, combined_features)

# Inspect the filtered DataFrames
print("Filtered Input Features:")
print(filtered_inputs.head())
print("\nFiltered Labels:")
print(filtered_labels.head())


AttributeError: 'dict' object has no attribute 'iterrows'

In [None]:
# Process all files in the filtered_object_files dictionary
all_filtered_inputs = []
all_filtered_labels = []

for file_name, labels_df in filtered_object_files.items():
    print(f"Processing file: {file_name}")
    # Assume `combined_features` has corresponding input features for the file
    inputs_df = combined_features  # Adjust this if you have separate inputs for each file
    
    # Filter inputs and labels
    filtered_inputs, filtered_labels = filter_objects_and_inputs(labels_df, inputs_df)
    
    # Append results to lists
    all_filtered_inputs.append(filtered_inputs)
    all_filtered_labels.append(filtered_labels)

# Combine all results into single DataFrames
final_filtered_inputs = pd.concat(all_filtered_inputs, ignore_index=True)
final_filtered_labels = pd.concat(all_filtered_labels, ignore_index=True)

# Inspect the combined DataFrames
print("Final Filtered Input Features:")
print(final_filtered_inputs.head())
print("\nFinal Filtered Labels:")
print(final_filtered_labels.head())


In [11]:
def process_labels(object_folder, save_folder):
    """
    Process and filter object files, save the filtered DataFrames, and store them in a list.
    Args:
        object_folder: Path to the folder containing object files.
        save_folder: Path to save the processed label DataFrames.
    Returns:
        List of processed DataFrames for all object files.
    """
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)  # Create save folder if it doesn't exist

    processed_labels = []

    # List all object files in the folder
    object_files = sorted([f for f in os.listdir(object_folder) if f.endswith(".p")])

    for object_file in object_files:
        print(f"Processing Object File: {object_file}")
        
        # Load the object file
        object_path = os.path.join(object_folder, object_file)
        object_data = pd.read_pickle(object_path)
        
        # Filter relevant columns
        relevant_columns = [
            "timestamp",  # For matching
            "orientation", "x", "y", "width_edge_mean", "length_edge_mean",  # Label columns
            "status_measurement", "status_movement", "overdrivable", "underdrivable",  # Filtering columns
            "header.origin.x", "header.origin.y", "header.origin.z",
            "header.origin.roll", "header.origin.pitch", "header.origin.yaw"  # For coord system transformation
        ]
        filtered_data = object_data[relevant_columns]

        # Save the filtered DataFrame
        save_path = os.path.join(save_folder, object_file)
        filtered_data.to_pickle(save_path)
        print(f"Filtered object file saved to: {save_path}")

        # Append to the list
        processed_labels.append(filtered_data)

    return processed_labels


# Define folder paths
object_folder = "C:/FRR40/rat25-main/temp/objects"
save_folder = "C:/FRR40/rat25-main/temp/processed_objects"

# Process and save labels
processed_labels = process_labels(object_folder, save_folder)


Processing Object File: frr40_objects_0.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_0.p
Processing Object File: frr40_objects_1.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_1.p
Processing Object File: frr40_objects_10.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_10.p
Processing Object File: frr40_objects_11.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_11.p
Processing Object File: frr40_objects_12.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_12.p
Processing Object File: frr40_objects_13.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_13.p
Processing Object File: frr40_objects_14.p
Filtered object file saved to: C:/FRR40/rat25-main/temp/processed_objects\frr40_objects_14.p
Processing Object File: frr40_objects_15.p
Filtered 

In [12]:
def process_detection_ego_motion(detection_folder, ego_motion_folder, save_folder):
    """
    Process all detection and ego motion files, match timestamps, combine features, and save processed files.
    Args:
        detection_folder: Path to the folder containing detection files.
        ego_motion_folder: Path to the folder containing ego motion files.
        save_folder: Path to save the processed input feature DataFrames.
    Returns:
        List of processed DataFrames for all detection files.
    """
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)  # Create save folder if it doesn't exist

    combined_data = []

    # Get all detection and ego motion files
    detection_files = sorted([f for f in os.listdir(detection_folder) if f.endswith(".p")])
    ego_motion_files = sorted([f for f in os.listdir(ego_motion_folder) if f.endswith(".p")])

    for detection_file, ego_motion_file in zip(detection_files, ego_motion_files):
        print(f"Processing Detection File: {detection_file} with Ego Motion File: {ego_motion_file}")
        
        # Load detection and ego motion files
        detection_path = os.path.join(detection_folder, detection_file)
        ego_motion_path = os.path.join(ego_motion_folder, ego_motion_file)

        detectionlist = pd.read_pickle(detection_path)
        ego_motion = pd.read_pickle(ego_motion_path)

        # Filter relevant columns
        ego_motion = ego_motion[["timestamp", "RotationRates.yawRateVehicleBody.value", "Velocity.SpeedCog.SpeedCog"]]
        detectionlist = detectionlist[["timestamp", "rcs", "distance", "angleAzimuth", "angleElevation", "radialVelocity", "radialVelocityDomainMax"]]

        # Match timestamps and add yawrate, egospeed to detectionlist
        detectionlist["yawrate"], detectionlist["egospeed"] = zip(*detectionlist["timestamp"].map(lambda ts: find_egosignal(ts, ego_motion)))

        # Save the processed detection DataFrame
        save_path = os.path.join(save_folder, detection_file)
        detectionlist.to_pickle(save_path)
        print(f"Processed detection file saved to: {save_path}")

        # Append to the list
        combined_data.append(detectionlist)

    return combined_data


# Define folder paths
detection_folder = "C:/FRR40/rat25-main/temp/detections"
ego_motion_folder = "C:/FRR40/rat25-main/temp/ego_motion"
save_folder = "C:/FRR40/rat25-main/temp/processed_detections"

# Process and save input features
processed_input_features = process_detection_ego_motion(detection_folder, ego_motion_folder, save_folder)


Processing Detection File: frr40_detections_0.p with Ego Motion File: egoMotionDynamicData_0.p
Processed detection file saved to: C:/FRR40/rat25-main/temp/processed_detections\frr40_detections_0.p
Processing Detection File: frr40_detections_1.p with Ego Motion File: egoMotionDynamicData_1.p
Processed detection file saved to: C:/FRR40/rat25-main/temp/processed_detections\frr40_detections_1.p
Processing Detection File: frr40_detections_10.p with Ego Motion File: egoMotionDynamicData_10.p
Processed detection file saved to: C:/FRR40/rat25-main/temp/processed_detections\frr40_detections_10.p
Processing Detection File: frr40_detections_11.p with Ego Motion File: egoMotionDynamicData_11.p
Processed detection file saved to: C:/FRR40/rat25-main/temp/processed_detections\frr40_detections_11.p
Processing Detection File: frr40_detections_12.p with Ego Motion File: egoMotionDynamicData_12.p
Processed detection file saved to: C:/FRR40/rat25-main/temp/processed_detections\frr40_detections_12.p
Proces

In [13]:
def filter_objects_and_inputs(object_folder, detection_folder, save_folder_objects, save_folder_detections):
    """
    Filter objects in labels, synchronize with input features, and save the results.
    Args:
        object_folder: Path to the folder containing processed object files.
        detection_folder: Path to the folder containing processed detection files.
        save_folder_objects: Path to save filtered object files.
        save_folder_detections: Path to save synchronized detection files.
    Returns:
        Tuple of lists of filtered object and synchronized detection DataFrames.
    """
    if not os.path.exists(save_folder_objects):
        os.makedirs(save_folder_objects)
    if not os.path.exists(save_folder_detections):
        os.makedirs(save_folder_detections)

    filtered_objects = []
    synchronized_inputs = []

    # List all object and detection files
    object_files = sorted([f for f in os.listdir(object_folder) if f.endswith(".p")])
    detection_files = sorted([f for f in os.listdir(detection_folder) if f.endswith(".p")])

    for object_file, detection_file in zip(object_files, detection_files):
        print(f"Processing Object File: {object_file} and Detection File: {detection_file}")
        
        # Load the object and detection files
        object_path = os.path.join(object_folder, object_file)
        detection_path = os.path.join(detection_folder, detection_file)

        object_data = pd.read_pickle(object_path)
        detection_data = pd.read_pickle(detection_path)

        def filter_cycle(row_object, row_detection):
            # Unpack relevant columns
            status_measurement = row_object["status_measurement"]
            status_movement = row_object["status_movement"]
            overdrivable = row_object["overdrivable"]
            underdrivable = row_object["underdrivable"]

            # Step 1: Keep only 'measured' objects
            measured_indices = [i for i, obj in enumerate(status_measurement) if obj[1] == "measured"]

            # Step 2: Keep only 'moved' objects
            moved_indices = [i for i, obj in enumerate(status_movement) if obj[1] == "moved"]

            # Step 3: Remove objects where (overdrivable + underdrivable) > 50
            valid_indices = [
                i for i in range(len(overdrivable))
                if (overdrivable[i] + underdrivable[i]) <= 50
            ]

            # Intersection of all valid indices
            final_indices = set(measured_indices) & set(moved_indices) & set(valid_indices)

            # Filter all columns in the labels row
            filtered_labels = {
                col: [row_object[col][i] for i in final_indices] if isinstance(row_object[col], list) else row_object[col]
                for col in object_data.columns
            }

            # Filter all columns in the input features row
            filtered_inputs = {
                col: [row_detection[col][i] for i in final_indices] if isinstance(row_detection[col], list) else row_detection[col]
                for col in detection_data.columns
            }

            return filtered_labels, filtered_inputs

        # Apply the filter to all rows in the DataFrames
        filtered_object_data, filtered_input_data = zip(
            *[filter_cycle(row_object, row_detection) for (_, row_object), (_, row_detection) in zip(object_data.iterrows(), detection_data.iterrows())]
        )

        # Convert filtered data back to DataFrames
        filtered_object_df = pd.DataFrame(filtered_object_data)
        filtered_input_df = pd.DataFrame(filtered_input_data)

        # Save the filtered DataFrames
        filtered_object_path = os.path.join(save_folder_objects, object_file)
        filtered_input_path = os.path.join(save_folder_detections, detection_file)

        filtered_object_df.to_pickle(filtered_object_path)
        filtered_input_df.to_pickle(filtered_input_path)

        print(f"Saved filtered objects to: {filtered_object_path}")
        print(f"Saved synchronized inputs to: {filtered_input_path}")

        # Append to lists
        filtered_objects.append(filtered_object_df)
        synchronized_inputs.append(filtered_input_df)

    return filtered_objects, synchronized_inputs


# Define folder paths
object_folder = "C:/FRR40/rat25-main/temp/processed_objects"
detection_folder = "C:/FRR40/rat25-main/temp/processed_detections"
save_folder_objects = "C:/FRR40/rat25-main/temp/filtered_objects"
save_folder_detections = "C:/FRR40/rat25-main/temp/filtered_detections"

# Filter labels and synchronize inputs
filtered_objects, synchronized_inputs = filter_objects_and_inputs(object_folder, detection_folder, save_folder_objects, save_folder_detections)


Processing Object File: frr40_objects_0.p and Detection File: frr40_detections_0.p
Saved filtered objects to: C:/FRR40/rat25-main/temp/filtered_objects\frr40_objects_0.p
Saved synchronized inputs to: C:/FRR40/rat25-main/temp/filtered_detections\frr40_detections_0.p
Processing Object File: frr40_objects_1.p and Detection File: frr40_detections_1.p
Saved filtered objects to: C:/FRR40/rat25-main/temp/filtered_objects\frr40_objects_1.p
Saved synchronized inputs to: C:/FRR40/rat25-main/temp/filtered_detections\frr40_detections_1.p
Processing Object File: frr40_objects_10.p and Detection File: frr40_detections_10.p
Saved filtered objects to: C:/FRR40/rat25-main/temp/filtered_objects\frr40_objects_10.p
Saved synchronized inputs to: C:/FRR40/rat25-main/temp/filtered_detections\frr40_detections_10.p
Processing Object File: frr40_objects_11.p and Detection File: frr40_detections_11.p
Saved filtered objects to: C:/FRR40/rat25-main/temp/filtered_objects\frr40_objects_11.p
Saved synchronized inputs

In [14]:
import os
import pandas as pd

def align_timestamps(filtered_detections_folder, filtered_objects_folder, save_folder_inputs, save_folder_labels):
    """
    Align timestamps between filtered detection and object files, and save aligned DataFrames.
    Args:
        filtered_detections_folder: Path to the folder containing filtered detection files.
        filtered_objects_folder: Path to the folder containing filtered object files.
        save_folder_inputs: Path to save aligned input feature DataFrames.
        save_folder_labels: Path to save aligned label DataFrames.
    Returns:
        None
    """
    if not os.path.exists(save_folder_inputs):
        os.makedirs(save_folder_inputs)
    if not os.path.exists(save_folder_labels):
        os.makedirs(save_folder_labels)

    # Get all filtered detection and object files
    detection_files = sorted([f for f in os.listdir(filtered_detections_folder) if f.endswith(".p")])
    object_files = sorted([f for f in os.listdir(filtered_objects_folder) if f.endswith(".p")])

    for detection_file, object_file in zip(detection_files, object_files):
        print(f"Aligning Detection File: {detection_file} with Object File: {object_file}")
        
        # Load the filtered detection and object files
        detection_path = os.path.join(filtered_detections_folder, detection_file)
        object_path = os.path.join(filtered_objects_folder, object_file)

        detection_data = pd.read_pickle(detection_path)
        object_data = pd.read_pickle(object_path)

        # Align rows based on timestamps
        aligned_data = pd.merge(detection_data, object_data, on="timestamp", how="inner", suffixes=("_input", "_label"))

        # Split into aligned inputs and labels
        aligned_inputs = aligned_data[[col for col in detection_data.columns]]
        aligned_labels = aligned_data[[col for col in object_data.columns]]

        # Save aligned DataFrames
        aligned_input_path = os.path.join(save_folder_inputs, detection_file)
        aligned_label_path = os.path.join(save_folder_labels, object_file)

        aligned_inputs.to_pickle(aligned_input_path)
        aligned_labels.to_pickle(aligned_label_path)

        print(f"Saved aligned input features to: {aligned_input_path}")
        print(f"Saved aligned labels to: {aligned_label_path}")

# Define folder paths
filtered_detections_folder = "C:/FRR40/rat25-main/temp/filtered_detections"
filtered_objects_folder = "C:/FRR40/rat25-main/temp/filtered_objects"
save_folder_inputs = "C:/FRR40/rat25-main/temp/aligned_inputs"
save_folder_labels = "C:/FRR40/rat25-main/temp/aligned_labels"

# Align timestamps and save aligned DataFrames
align_timestamps(filtered_detections_folder, filtered_objects_folder, save_folder_inputs, save_folder_labels)


Aligning Detection File: frr40_detections_0.p with Object File: frr40_objects_0.p
Saved aligned input features to: C:/FRR40/rat25-main/temp/aligned_inputs\frr40_detections_0.p
Saved aligned labels to: C:/FRR40/rat25-main/temp/aligned_labels\frr40_objects_0.p
Aligning Detection File: frr40_detections_1.p with Object File: frr40_objects_1.p
Saved aligned input features to: C:/FRR40/rat25-main/temp/aligned_inputs\frr40_detections_1.p
Saved aligned labels to: C:/FRR40/rat25-main/temp/aligned_labels\frr40_objects_1.p
Aligning Detection File: frr40_detections_10.p with Object File: frr40_objects_10.p
Saved aligned input features to: C:/FRR40/rat25-main/temp/aligned_inputs\frr40_detections_10.p
Saved aligned labels to: C:/FRR40/rat25-main/temp/aligned_labels\frr40_objects_10.p
Aligning Detection File: frr40_detections_11.p with Object File: frr40_objects_11.p
Saved aligned input features to: C:/FRR40/rat25-main/temp/aligned_inputs\frr40_detections_11.p
Saved aligned labels to: C:/FRR40/rat25-

In [18]:
def handle_missing_values(input_df, label_df):
    """
    Remove rows with missing values in either the input features or labels.
    Validate that input and label DataFrames are clean and aligned.
    Args:
        input_df: Input features DataFrame.
        label_df: Labels DataFrame.
    Returns:
        Tuple of cleaned input and label DataFrames.
    """
    # Step 1: Combine input and label DataFrames on "timestamp"
    combined_df = pd.merge(input_df, label_df, on="timestamp", how="inner", suffixes=("_input", "_label"))

    # Step 2: Identify rows with missing values
    missing_rows = combined_df[combined_df.isnull().any(axis=1)]

    # Print removed rows (e.g., timestamps)
    if not missing_rows.empty:
        print("Rows with missing values (timestamps):")
        print(missing_rows["timestamp"].values)

    # Step 3: Drop rows with missing values
    cleaned_combined_df = combined_df.dropna()

    # Step 4: Separate cleaned DataFrame back into input and label DataFrames
    cleaned_input_df = cleaned_combined_df[[col for col in input_df.columns]]
    cleaned_label_df = cleaned_combined_df[[col for col in label_df.columns]]

    # Step 5: Validate that the cleaned DataFrames have the same length
    assert len(cleaned_input_df) == len(cleaned_label_df), "Input and label DataFrames are not aligned after cleaning."

    # Step 6: Print summary of the cleaning process
    print(f"Initial Input Rows: {len(input_df)}")
    print(f"Initial Label Rows: {len(label_df)}")
    print(f"Rows Removed: {len(input_df) - len(cleaned_input_df)}")
    print(f"Cleaned Input Rows: {len(cleaned_input_df)}")
    print(f"Cleaned Label Rows: {len(cleaned_label_df)}")

    return cleaned_input_df, cleaned_label_df


In [20]:
import os
import pandas as pd

# Load a sample aligned input file
aligned_inputs_folder = "C:/FRR40/rat25-main/temp/aligned_inputs"
aligned_labels_folder = "C:/FRR40/rat25-main/temp/aligned_labels"

# Example: Load the first file in the aligned inputs folder
input_files = sorted(os.listdir(aligned_inputs_folder))
label_files = sorted(os.listdir(aligned_labels_folder))

# Load the first aligned input and label files
aligned_inputs_path = os.path.join(aligned_inputs_folder, input_files[0])
aligned_labels_path = os.path.join(aligned_labels_folder, label_files[0])

aligned_inputs = pd.read_pickle(aligned_inputs_path)
aligned_labels = pd.read_pickle(aligned_labels_path)

# Inspect the loaded data
print("\n--- Loaded Aligned Inputs ---")
print(aligned_inputs.head())

print("\n--- Loaded Aligned Labels ---")
print(aligned_labels.head())



--- Loaded Aligned Inputs ---
      timestamp                                                rcs  \
0  2.739721e+07  [8, 12, -13, -22, -7, -2, -18, -24, -14, 11, -...   
1  2.739721e+07  [7, -15, 11, -7, -3, -17, -26, -23, -32, -15, ...   
2  2.739721e+07  [8, -12, -18, -7, -3, -18, -23, -5, -16, 4, -2...   
3  2.739721e+07  [8, 7, -17, 18, -28, -18, -2, -24, -14, -3, 11...   
4  2.739721e+07  [6, -4, -17, 18, 2, -2, -3, -16, -23, -27, -14...   

                                            distance  \
0  [3.6299999, 4.04, 4.12, 4.36, 4.63, 4.98, 5.56...   
1  [3.62, 4.13, 4.5299997, 4.62, 4.92, 5.5299997,...   
2  [3.62, 4.13, 4.29, 4.62, 4.96, 5.56, 6.1299996...   
3  [3.6399999, 3.6899998, 4.11, 4.62, 5.25, 5.529...   
4  [3.6599998, 3.87, 4.27, 4.64, 4.92, 5.24, 5.31...   

                                        angleAzimuth  \
0  [-0.13927734, -0.00518363, -0.03576181, -0.070...   
1  [-0.13943441, -0.033300895, -0.08131493, -0.27...   
2  [-0.13922498, -0.35170147, -0.3300768, -

In [23]:
import matplotlib.pyplot as plt

def validate_data(inputs_df, labels_df):
    """
    Perform comprehensive validation of preprocessed data.
    Args:
        inputs_df: Preprocessed input features DataFrame.
        labels_df: Preprocessed labels DataFrame.
    """
    # 1. Print Shapes
    print("\n--- Data Shapes ---")
    print(f"Input Features Shape: {inputs_df.shape}")
    print(f"Labels Shape: {labels_df.shape}")

    # 2. Inspect Data
    print("\n--- Input Features Sample ---")
    print(inputs_df.head())
    print("\n--- Labels Sample ---")
    print(labels_df.head())

    # 3. Check Data Types
    print("\n--- Input Features Data Types ---")
    print(inputs_df.dtypes)
    print("\n--- Labels Data Types ---")
    print(labels_df.dtypes)

    # 4. Check for Missing Values
    print("\n--- Missing Values Check ---")
    print("Missing values in inputs:", inputs_df.isnull().sum().sum())
    print("Missing values in labels:", labels_df.isnull().sum().sum())

    # 5. Visualize Data Distribution
    # Example: Plot distribution of a key feature (e.g., "distance") and labels (e.g., "x")
    if "distance" in inputs_df.columns:
        plt.figure(figsize=(10, 5))
        plt.hist(inputs_df["distance"].explode(), bins=50, alpha=0.7, label="Distance")
        plt.title("Distribution of Distance (Input Feature)")
        plt.xlabel("Distance")
        plt.ylabel("Frequency")
        plt.legend()
        plt.show()

    if "x" in labels_df.columns:
        plt.figure(figsize=(10, 5))
        plt.hist(labels_df["x"].explode(), bins=50, alpha=0.7, label="X (Label)")
        plt.title("Distribution of X Coordinate (Label)")
        plt.xlabel("X Coordinate")
        plt.ylabel("Frequency")
        plt.legend()
        plt.show()

    # Example: Scatter plot to visualize relationship (e.g., Distance vs Radial Velocity)
    if "distance" in inputs_df.columns and "radialVelocity" in inputs_df.columns:
        plt.figure(figsize=(10, 5))
        for dist, rv in zip(inputs_df["distance"], inputs_df["radialVelocity"]):
            plt.scatter(dist, rv, alpha=0.5)
        plt.title("Distance vs Radial Velocity")
        plt.xlabel("Distance")
        plt.ylabel("Radial Velocity")
        plt.show()

    print("\n--- Validation Complete ---")


In [None]:
import os
import pandas as pd

def combine_objects_and_detections(objects_folder, detections_folder):
    """
    Combine objects and detections files based on matching timestamps.
    """
    combined_data = []

    # List object and detection files
    object_files = sorted([f for f in os.listdir(objects_folder) if f.endswith(".p")])
    detection_files = sorted([f for f in os.listdir(detections_folder) if f.endswith(".p")])

    for obj_file, det_file in zip(object_files, detection_files):
        print(f"Combining Object File: {obj_file} with Detection File: {det_file}")

        # Load files
        obj_path = os.path.join(objects_folder, obj_file)
        det_path = os.path.join(detections_folder, det_file)

        objects = pd.read_pickle(obj_path)
        detections = pd.read_pickle(det_path)

        # Merge on timestamp
        merged = pd.merge(detections, objects, on="timestamp", how="inner")
        combined_data.append(merged)

    # Combine all merged data
    return pd.concat(combined_data, ignore_index=True)

def match_ego_motion(merged_data, ego_motion_folder):
    """
    Match ego motion data to the merged data based on the closest timestamp.
    """
    ego_motion_files = sorted([f for f in os.listdir(ego_motion_folder) if f.endswith(".p")])
    matched_data = []

    for ego_file in ego_motion_files:
        print(f"Matching Ego Motion File: {ego_file}")
        
        # Load ego motion file
        ego_path = os.path.join(ego_motion_folder, ego_file)
        ego_motion = pd.read_pickle(ego_path)

        # Match closest ego motion timestamps
        def find_closest_ego(ts):
            closest_row = ego_motion.iloc[(ego_motion['timestamp'] - ts).abs().argsort()[:1]]
            return closest_row[['RotationRates.yawRateVehicleBody.value', 'Velocity.SpeedCog.SpeedCog']].iloc[0]

        # Apply the function to find closest ego motion data
        merged_data[["yaw_rate", "ego_speed"]] = merged_data["timestamp"].apply(
            lambda ts: find_closest_ego(ts) if not pd.isna(ts) else (None, None)
        )

        matched_data.append(merged_data)

    # Combine all matched data
    return pd.concat(matched_data, ignore_index=True)

# Define folder paths
objects_folder = "C:/FRR40/rat25-main/processed/objects"
detections_folder = "C:/FRR40/rat25-main/processed/detections"
ego_motion_folder = "C:/FRR40/rat25-main/processed/ego_motion"

# Step 1: Combine Objects and Detections
merged_data = combine_objects_and_detections(objects_folder, detections_folder)

# Step 2: Match Ego Motion Data
final_combined_data = match_ego_motion(merged_data, ego_motion_folder)

# Save the final combined data
combined_data_path = "C:/FRR40/rat25-main/processed/final_combined_data.pkl"
final_combined_data.to_pickle(combined_data_path)

# Inspect the final combined data
print("\nFinal Combined Data Shape:", final_combined_data.shape)
print("Sample Data:")
print(final_combined_data.head())
