In [2]:
import os
import pandas as pd

# Folder paths
base_folder = "C:/FRR40/rat25-main/temp"
detections_folder = os.path.join(base_folder, "detections")
ego_motion_folder = os.path.join(base_folder, "ego_motion")
objects_folder = os.path.join(base_folder, "objects")

# Function to load and analyze a single file
def analyze_file(folder):
    # Get the first file in the folder
    files = sorted([f for f in os.listdir(folder) if f.endswith(".p")])
    if not files:
        print(f"No files found in folder: {folder}")
        return None
    file_path = os.path.join(folder, files[0])  # Select the first file
    print(f"Analyzing file: {file_path}\n")
    
    # Load the file
    data = pd.read_pickle(file_path)
    
    # Display the first few rows and column names
    print("Columns in the file:")
    print(data.columns)
    print("\nSample Data (First 5 Rows):")
    print(data.head())
    return data

# Analyze one file from each folder
print("\n--- Detection File Analysis ---")
detections_sample = analyze_file(detections_folder)

print("\n--- Ego Motion File Analysis ---")
ego_motion_sample = analyze_file(ego_motion_folder)

print("\n--- Object File Analysis ---")
objects_sample = analyze_file(objects_folder)



--- Detection File Analysis ---
Analyzing file: C:/FRR40/rat25-main/temp\detections\frr40_detections_0.p

Columns in the file:
Index(['cRC', 'length', 'counter', 'dataID', 'radialVelocityDomainMin',
       'numberOfDetections', 'azimuthCorrection', 'radialVelocityDomainMax',
       'misalignmentProbability', 'elevationCorrection',
       'header.privateExtendedQualifier', 'header.origin.x', 'header.origin.y',
       'header.origin.z', 'header.origin.roll', 'header.origin.roll_std_dev',
       'header.origin.pitch', 'header.origin.pitch_std_dev',
       'header.origin.yaw', 'header.origin.yaw_std_dev',
       'header.timestampDAQ.fractional_seconds', 'header.timestampDAQ.seconds',
       'header.timestampDAQ.sync_status',
       'header.timestampSP_start.fractional_seconds',
       'header.timestampSP_start.seconds',
       'header.timestampSP_start.sync_status',
       'header.timestampSP_end.fractional_seconds',
       'header.timestampSP_end.seconds', 'header.timestampSP_end.sync_st

In [3]:
import os
import pandas as pd

# Folder paths
base_folder = "C:/FRR40/rat25-main/temp"
detections_folder = os.path.join(base_folder, "detections")
ego_motion_folder = os.path.join(base_folder, "ego_motion")
objects_folder = os.path.join(base_folder, "objects")

# Function to load all pickle files in a folder
def load_files(folder):
    files = sorted([f for f in os.listdir(folder) if f.endswith(".p")])
    data = {}
    for file in files:
        file_path = os.path.join(folder, file)
        data[file] = pd.read_pickle(file_path)
    return data

# Load files from the respective folders
detections_data = load_files(detections_folder)
ego_motion_data = load_files(ego_motion_folder)
objects_data = load_files(objects_folder)

# Print the number of files loaded
print(f"Number of Detection Files: {len(detections_data)}")
print(f"Number of Ego Motion Files: {len(ego_motion_data)}")
print(f"Number of Object Files: {len(objects_data)}")

# Inspect the structure of one file from each folder
print("\nSample Detection File Columns:")
print(detections_data[list(detections_data.keys())[0]].columns)

print("\nSample Ego Motion File Columns:")
print(ego_motion_data[list(ego_motion_data.keys())[0]].columns)

print("\nSample Object File Columns:")
print(objects_data[list(objects_data.keys())[0]].columns)

# Inspect the first few rows of one file from each folder
print("\nSample Detection Data:")
print(detections_data[list(detections_data.keys())[0]].head())

print("\nSample Ego Motion Data:")
print(ego_motion_data[list(ego_motion_data.keys())[0]].head())

print("\nSample Object Data:")
print(objects_data[list(objects_data.keys())[0]].head())


Number of Detection Files: 16
Number of Ego Motion Files: 16
Number of Object Files: 16

Sample Detection File Columns:
Index(['cRC', 'length', 'counter', 'dataID', 'radialVelocityDomainMin',
       'numberOfDetections', 'azimuthCorrection', 'radialVelocityDomainMax',
       'misalignmentProbability', 'elevationCorrection',
       'header.privateExtendedQualifier', 'header.origin.x', 'header.origin.y',
       'header.origin.z', 'header.origin.roll', 'header.origin.roll_std_dev',
       'header.origin.pitch', 'header.origin.pitch_std_dev',
       'header.origin.yaw', 'header.origin.yaw_std_dev',
       'header.timestampDAQ.fractional_seconds', 'header.timestampDAQ.seconds',
       'header.timestampDAQ.sync_status',
       'header.timestampSP_start.fractional_seconds',
       'header.timestampSP_start.seconds',
       'header.timestampSP_start.sync_status',
       'header.timestampSP_end.fractional_seconds',
       'header.timestampSP_end.seconds', 'header.timestampSP_end.sync_status',
 

In [4]:
import os

# Define a folder to save the processed files
processed_folder = "C:/FRR40/rat25-main/processed"
os.makedirs(processed_folder, exist_ok=True)  # Create the folder if it doesn't exist

# Function to filter relevant columns and save the processed files
def filter_and_save_columns(data, columns_to_keep, save_subfolder):
    processed_data = {}
    save_path = os.path.join(processed_folder, save_subfolder)
    os.makedirs(save_path, exist_ok=True)  # Create subfolder if it doesn't exist
    
    for file, df in data.items():
        print(f"Processing and saving file: {file}")
        
        # Filter relevant columns
        filtered_df = df[columns_to_keep]
        processed_data[file] = filtered_df
        
        # Save the filtered file
        save_file_path = os.path.join(save_path, file)
        filtered_df.to_pickle(save_file_path)
    
    return processed_data

# Apply filtering and save processed files for each data type
filtered_detections = filter_and_save_columns(
    detections_data, 
    ["timestamp", "distance", "angleAzimuth", "angleElevation", "radialVelocity", "radialVelocityDomainMax"],
    "detections"
)

filtered_ego_motion = filter_and_save_columns(
    ego_motion_data, 
    ["timestamp", "RotationRates.yawRateVehicleBody.value", "Velocity.SpeedCog.SpeedCog"],
    "ego_motion"
)

filtered_objects = filter_and_save_columns(
    objects_data, 
    [
        "timestamp", "orientation", "x", "y", "width_edge_mean", "length_edge_mean",
        "status_measurement", "status_movement", "overdrivable", "underdrivable",
        "header.origin.x", "header.origin.y", "header.origin.yaw"
    ],
    "objects"
)

# Verify saved files and structure
print(f"\nProcessed files saved in: {processed_folder}")


Processing and saving file: frr40_detections_0.p
Processing and saving file: frr40_detections_1.p
Processing and saving file: frr40_detections_10.p
Processing and saving file: frr40_detections_11.p
Processing and saving file: frr40_detections_12.p
Processing and saving file: frr40_detections_13.p
Processing and saving file: frr40_detections_14.p
Processing and saving file: frr40_detections_15.p
Processing and saving file: frr40_detections_2.p
Processing and saving file: frr40_detections_3.p
Processing and saving file: frr40_detections_4.p
Processing and saving file: frr40_detections_5.p
Processing and saving file: frr40_detections_6.p
Processing and saving file: frr40_detections_7.p
Processing and saving file: frr40_detections_8.p
Processing and saving file: frr40_detections_9.p
Processing and saving file: egoMotionDynamicData_0.p
Processing and saving file: egoMotionDynamicData_1.p
Processing and saving file: egoMotionDynamicData_10.p
Processing and saving file: egoMotionDynamicData_1

In [6]:
import numpy as np

def transform_to_scs(row):
    """
    Transform coordinates from VCS to SCS.
    Args:
        row: A row of the DataFrame containing VCS data (x, y, yaw, etc.).
    Returns:
        A tuple of (x_scs, y_scs).
    """
    # Extract VCS coordinates and offsets
    x_vcs = row.get("x", 0)
    y_vcs = row.get("y", 0)
    yaw = row.get("header.origin.yaw", 0)
    x_offset = row.get("header.origin.x", 0)
    y_offset = row.get("header.origin.y", 0)

    # Apply translation
    x_translated = x_vcs - x_offset
    y_translated = y_vcs - y_offset

    # Apply rotation
    R = np.array([
        [np.cos(yaw), -np.sin(yaw)],
        [np.sin(yaw), np.cos(yaw)]
    ])
    vcs_coords = np.array([x_translated, y_translated])
    scs_coords = R @ vcs_coords

    return scs_coords[0], scs_coords[1]


In [7]:
def apply_vcs_to_scs(data):
    """
    Apply VCS to SCS transformation to the entire DataFrame.
    """
    print("Starting VCS to SCS Transformation...")
    transformed_data = data.copy()
    transformed_data[["x_scs", "y_scs"]] = transformed_data.apply(
        lambda row: pd.Series(transform_to_scs(row)), axis=1
    )
    print("Transformation Completed.")
    return transformed_data

# Apply transformation to the filtered object files
transformed_objects = {
    file: apply_vcs_to_scs(df)
    for file, df in filtered_objects.items()
}

# Save the transformed objects
transformed_objects_folder = os.path.join(processed_folder, "transformed_objects")
os.makedirs(transformed_objects_folder, exist_ok=True)

for file, df in transformed_objects.items():
    save_file_path = os.path.join(transformed_objects_folder, file)
    df.to_pickle(save_file_path)

print("\nTransformed object files saved in:", transformed_objects_folder)


Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Transformation Completed.
Starting VCS to SCS Transformation...
Tr

In [8]:
# Add the transformed columns back to the main data
merged_objects = {}

for file, transformed_df in transformed_objects.items():
    # Get the original object DataFrame
    original_df = filtered_objects[file]
    
    # Ensure indexes match, then merge
    transformed_columns = transformed_df[["x_scs", "y_scs"]]
    merged_df = pd.concat([original_df.reset_index(drop=True), transformed_columns.reset_index(drop=True)], axis=1)
    
    # Save the merged DataFrame
    merged_objects[file] = merged_df

# Save the merged files
merged_objects_folder = os.path.join(processed_folder, "merged_objects")
os.makedirs(merged_objects_folder, exist_ok=True)

for file, df in merged_objects.items():
    save_file_path = os.path.join(merged_objects_folder, file)
    df.to_pickle(save_file_path)

print("\nMerged object files saved in:", merged_objects_folder)



Merged object files saved in: C:/FRR40/rat25-main/processed\merged_objects


In [9]:
# Inspect merged data for the first file
for file, df in merged_objects.items():
    print(f"Merged Data Preview for {file}:")
    print(df.head())  # Display the first few rows
    print("\nColumns in the merged data:")
    print(df.columns)  # Display the columns
    break  # Display only the first file for verification


Merged Data Preview for frr40_objects_0.p:
      timestamp                                        orientation  \
0  2.739721e+07  [3.0897224, -1.5708438, -1.5708438, 0.0, 0.0, ...   
1  2.739721e+07  [3.141304, -0.8097746, -1.5708438, 0.0, 0.0, 0...   
2  2.739721e+07  [-0.042569254, -0.8095828, -0.99836403, 0.0, 0...   
3  2.739721e+07  [-0.048321854, -0.9982682, 2.328078, 0.0, 0.0,...   
4  2.739721e+07  [3.0900102, 2.2941377, 0.0, 0.0, 0.0, 0.0, 0.0...   

                                                   x  \
0  [10.74, 27.42, 38.95, 8.4, 14.429999, 22.72, 2...   
1  [10.71, 27.92, 39.18, 8.46, 14.41, 22.699999, ...   
2  [10.559999, 28.359999, 39.53, 8.51, 14.679999,...   
3  [10.469999, 39.79, 40.309998, 8.58, 14.63, 22....   
4  [10.58, 39.76, 8.63, 14.69, 22.71, 26.359999, ...   

                                                   y  \
0  [-1.61, 0.31, 0.45999998, 4.46, -7.87, -9.0, 1...   
1  [-1.62, -0.53999996, 0.21, 4.46, -7.79, -9.03,...   
2  [-1.5799999, -1.02, -0.47, 4

In [6]:
import os
import pandas as pd

def find_closest_object(timestamp, objects):
    """
    Find the closest timestamp in the objects DataFrame and return the closest row.
    """
    closest_row = objects.iloc[(objects['timestamp'] - timestamp).abs().argsort()[:1]]
    return closest_row.iloc[0]  # Return the first matching row as a Series

def match_objects(detections, objects):
    """
    Match timestamps from detections with the closest timestamps in objects data.
    Add object labels and filtering columns to detections.
    """
    # Apply the timestamp matching to the detections DataFrame
    matched_objects = detections["timestamp"].map(lambda ts: find_closest_object(ts, objects))

    # Flatten the matched object rows into a DataFrame
    matched_objects_df = pd.DataFrame([row.to_dict() for row in matched_objects], index=detections.index)

    # Combine detections with matched objects
    combined_df = pd.concat([detections, matched_objects_df], axis=1)
    
    return combined_df

def find_closest_ego(timestamp, ego_motion):
    """
    Find the closest timestamp in the ego_motion DataFrame and return yawrate and egospeed.
    """
    closest_row = ego_motion.iloc[(ego_motion['timestamp'] - timestamp).abs().argsort()[:1]]
    return closest_row["RotationRates.yawRateVehicleBody.value"].values[0], closest_row["Velocity.SpeedCog.SpeedCog"].values[0]

def match_ego_motion(detections, ego_motion):
    """
    Match timestamps from detections with the closest timestamps in ego_motion data.
    Add yaw rate and ego speed to detections.
    """
    detections["yaw_rate"], detections["ego_speed"] = zip(
        *detections["timestamp"].map(lambda ts: find_closest_ego(ts, ego_motion))
    )
    return detections

def combine_all_data(detection_folder, ego_motion_folder, object_folder):
    """
    Process all detection, ego motion, and object files, match timestamps, and combine features.
    """
    combined_data = []
    
    detection_files = sorted([f for f in os.listdir(detection_folder) if f.endswith(".p")])
    ego_motion_files = sorted([f for f in os.listdir(ego_motion_folder) if f.endswith(".p")])
    object_files = sorted([f for f in os.listdir(object_folder) if f.endswith(".p")])

    for detection_file in detection_files:
        print(f"Processing file: {detection_file}")
        
        # Derive corresponding filenames for ego motion and object files
        ego_motion_file = detection_file.replace("frr40_detections", "egoMotionDynamicData")
        object_file = detection_file.replace("frr40_detections", "frr40_objects")

        # Check if corresponding files exist
        if ego_motion_file not in ego_motion_files:
            print(f"No matching ego motion file for {detection_file}. Skipping...")
            continue
        if object_file not in object_files:
            print(f"No matching object file for {detection_file}. Skipping...")
            continue

        # Load the data
        detection_path = os.path.join(detection_folder, detection_file)
        ego_motion_path = os.path.join(ego_motion_folder, ego_motion_file)
        object_path = os.path.join(object_folder, object_file)

        detections_df = pd.read_pickle(detection_path)
        ego_motion_df = pd.read_pickle(ego_motion_path)
        objects_df = pd.read_pickle(object_path)

        # Filter relevant columns
        detections_df = detections_df[["timestamp", "rcs", "distance", "angleAzimuth", "angleElevation", "radialVelocity", "radialVelocityDomainMax"]]
        ego_motion_df = ego_motion_df[["timestamp", "RotationRates.yawRateVehicleBody.value", "Velocity.SpeedCog.SpeedCog"]]
        objects_df = objects_df[["timestamp", "orientation", "x", "y", "width_edge_mean", "length_edge_mean", "status_measurement", "status_movement", "overdrivable", "underdrivable"]]

        # Match ego motion with detections
        detections_with_ego = match_ego_motion(detections_df, ego_motion_df)

        # Match objects with detections
        final_combined = match_objects(detections_with_ego, objects_df)

        # Add to the combined list
        combined_data.append(final_combined)

    # Combine all DataFrames into one
    if combined_data:
        return pd.concat(combined_data, ignore_index=True)
    else:
        print("No matching data found to combine.")
        return pd.DataFrame()  # Return an empty DataFrame if no data

# Define folder paths
detection_folder = "C:/FRR40/rat25-main/temp/detections"
ego_motion_folder = "C:/FRR40/rat25-main/temp/ego_motion"
object_folder = "C:/FRR40/rat25-main/temp/objects"

# Combine all data
unified_data = combine_all_data(detection_folder, ego_motion_folder, object_folder)

# Save the unified data for training
if not unified_data.empty:
    save_path = "C:/FRR40/rat25-main/processed/unified_data.pkl"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    unified_data.to_pickle(save_path)
    print("\nUnified Data Shape:", unified_data.shape)
    print("Sample Unified Data:")
    print(unified_data.head())
else:
    print("Unified data is empty. Please check file alignment or processing steps.")


Processing file: frr40_detections_0.p
Processing file: frr40_detections_1.p
Processing file: frr40_detections_10.p
Processing file: frr40_detections_11.p
Processing file: frr40_detections_12.p
Processing file: frr40_detections_13.p
Processing file: frr40_detections_14.p
Processing file: frr40_detections_15.p
Processing file: frr40_detections_2.p
Processing file: frr40_detections_3.p
Processing file: frr40_detections_4.p
Processing file: frr40_detections_5.p
Processing file: frr40_detections_6.p
Processing file: frr40_detections_7.p
Processing file: frr40_detections_8.p
Processing file: frr40_detections_9.p

Unified Data Shape: (99552, 19)
Sample Unified Data:
      timestamp                                                rcs  \
0  2.739721e+07  [8, 12, -13, -22, -7, -2, -18, -24, -14, 11, -...   
1  2.739721e+07  [7, -15, 11, -7, -3, -17, -26, -23, -32, -15, ...   
2  2.739721e+07  [8, -12, -18, -7, -3, -18, -23, -5, -16, 4, -2...   
3  2.739721e+07  [8, 7, -17, 18, -28, -18, -2, -24, 

In [7]:
def handle_missing_values(data):
    """
    Handle missing values by removing rows with NaN values in critical columns.
    Args:
        data: Unified DataFrame.
    Returns:
        Cleaned DataFrame without missing values.
    """
    print("\n=== Handling Missing Values ===")
    print("\nBefore Handling Missing Values:")
    print("Total Rows:", data.shape[0])
    print("Missing Values Per Column:")
    print(data.isnull().sum())

    # Define critical columns
    critical_columns = ["rcs", "distance", "angleAzimuth", "angleElevation", "radialVelocity",
                        "orientation", "x", "y", "width_edge_mean", "length_edge_mean"]

    # Drop rows with missing values in critical columns
    cleaned_data = data.dropna(subset=critical_columns)

    print("\nAfter Handling Missing Values:")
    print("Total Rows:", cleaned_data.shape[0])
    print("Removed Rows:", data.shape[0] - cleaned_data.shape[0])

    return cleaned_data


In [8]:
# Apply missing value handling to unified data
cleaned_data = handle_missing_values(unified_data)

# Save the cleaned data for inspection
cleaned_save_path = "C:/FRR40/rat25-main/processed/cleaned_data.pkl"
cleaned_data.to_pickle(cleaned_save_path)

print("\nCleaned Data saved at:", cleaned_save_path)



=== Handling Missing Values ===

Before Handling Missing Values:
Total Rows: 99552
Missing Values Per Column:
timestamp                  0
rcs                        0
distance                   0
angleAzimuth               0
angleElevation             0
radialVelocity             0
radialVelocityDomainMax    0
yaw_rate                   0
ego_speed                  0
timestamp                  0
orientation                0
x                          0
y                          0
width_edge_mean            0
length_edge_mean           0
status_measurement         0
status_movement            0
overdrivable               0
underdrivable              0
dtype: int64

After Handling Missing Values:
Total Rows: 99552
Removed Rows: 0

Cleaned Data saved at: C:/FRR40/rat25-main/processed/cleaned_data.pkl


In [10]:
import pandas as pd

# Function to filter objects based on overdrivable + underdrivable condition
def filter_invalid_objects(row):
    valid_objects = []
    for i, (od, ud) in enumerate(zip(row["overdrivable"], row["underdrivable"])):
        # Keep only valid objects
        if od + ud <= 50:
            valid_objects.append(i)
    return valid_objects

def apply_filter_to_all_columns(df):
    """
    Apply the filtering operation to all columns based on valid objects.
    This will remove invalid objects from each feature column.
    """
    filtered_df = df.copy()

    # Apply filter to each row
    for idx, row in filtered_df.iterrows():
        # Find the valid objects
        valid_objects = filter_invalid_objects(row)

        # For each relevant column, filter based on valid objects
        for col in ["angleAzimuth", "angleElevation", "radialVelocity", "distance"]:
            # Keep only valid objects
            filtered_df.at[idx, col] = [obj for i, obj in enumerate(row[col]) if i in valid_objects]

        # Apply the same for other columns if necessary
        for col in ["overdrivable", "underdrivable", "status_measurement", "status_movement"]:
            filtered_df.at[idx, col] = [obj for i, obj in enumerate(row[col]) if i in valid_objects]
    
    return filtered_df

# Assuming `filtered_detections` is the DataFrame with all the feature columns
# and `filtered_objects` is the DataFrame with the object information
filtered_detections_filtered = {
    file: apply_filter_to_all_columns(df)
    for file, df in filtered_detections.items()
}

# Save the filtered detections after removing invalid objects
filtered_detections_folder = "C:/FRR40/rat25-main/processed/filtered_detections_filtered"
os.makedirs(filtered_detections_folder, exist_ok=True)

for file, df in filtered_detections_filtered.items():
    save_file_path = os.path.join(filtered_detections_folder, file)
    df.to_pickle(save_file_path)

print("\nFiltered detections with removed invalid objects saved in:", filtered_detections_folder)


KeyError: 'overdrivable'