In [None]:
import pandas as pd
import os

In [None]:
# The topics which we will combine into a single csv file
desired_topics = [
    '/control/vehicle_cmd',
    #'/detection/detected_objects',
    #'/detection/final_objects',
    '/localization/current_pose',
    '/localization/current_velocity',
    '/novatel/oem7/bestpos',
    '/novatel/oem7/inspva',
    #'/planning/global_path',
    #'/planning/local_path',
    '/vehicle/vehicle_status',
    #'/pacmod/enabled'
]

In [None]:
# The folders of rides we want to create unified csv files from
desired_folders_for_training = [
    'F:\\2023-10-16-10-14-49_tiksoja_ride_02_sfa',
    'F:\\2023-10-16-10-30-17_tiksoja_ride_03_cluster',
    'F:\\2023-10-16-10-50-24_tiksoja_ride_04_sfa',
    'F:\\2023-10-16-13-47-19_tiksoja_ride_05_cluster',
    'F:\\2023-10-16-15-23-20_tiksoja_ride_08_cluster',
    'F:\\2023-10-30-09-55-20_tiksoja_ride_09_sfa_split_2',
    'F:\\2023-10-30-10-21-51_tiksoja_ride_10_cluster_split_1',
    'F:\\2023-10-30-10-34-58_tiksoja_ride_10_cluster_split_2',
    'F:\\2023-10-30-10-53-17_tiksoja_ride_11_cluster_split_1',
    'F:\\2023-10-30-11-07-11_tiksoja_ride_11_cluster_split_2',
    'F:\\2023-10-30-14-28-38_tiksoja_ride_12_sfa_split_1',
    'F:\\2023-10-30-15-04-36_tiksoja_ride_13_cluster_split_1',
    'F:\\2023-10-30-15-19-32_tiksoja_ride_13_cluster_split_2',
    'F:\\2023-10-31-09-57-58_tiksoja_ride_14_sfa_split_1',
    'F:\\2023-10-31-10-17-12_tiksoja_ride_14_sfa_split_2',
    'F:\\2023-10-31-10-41-16_tiksoja_ride_15_cluster_split_1',
    'F:\\2023-10-31-10-59-27_tiksoja_ride_15_cluster_split_2',
    'F:\\2023-11-02-11-47-11_tiksoja_ride_16_sfa_split_1',
    'F:\\2023-11-02-12-00-24_tiksoja_ride_16_sfa_split_2',
    'F:\\2023-11-02-12-44-53_tiksoja_ride_17_cluster_split_1',
    'F:\\2023-11-02-12-58-32_tiksoja_ride_17_cluster_split_2',
    'F:\\2023-11-03-09-57-03_tiksoja_ride_18_sfa_split_1',
    'F:\\2023-11-03-10-21-22_tiksoja_ride_18_sfa_split_2',
    'F:\\2023-11-03-10-59-56_tiksoja_ride_19_cluster_split_1',
    'F:\\2023-11-03-11-14-53_tiksoja_ride_19_cluster_split_2',
    'F:\\2023-11-03-13-42-01_tiksoja_ride_20_sfa_split_1',
    'F:\\2023-11-03-13-56-06_tiksoja_ride_20_sfa_split_2'
]

In [None]:
# The parameters we would like to exclude from the csv files
excluded_parameters = [
        'header', 'component_type', 'child_frame'
    ]

In [None]:
# Function to combine the csv files into a single csv file
def combine_csv_files(folder_path, desired_topics, excluded_parameters):
    dataframes = []
    
    # Processing each desired CSV file
    for file_name in desired_topics:
        file_path = os.path.join(folder_path, file_name)
        
        print(f"Processing file: {file_path}")
        
        try:
            # Loading the CSV file
            df = pd.read_csv(file_path)
            
            # Removing columns containing any of the excluded parameters
            df = df.loc[:, ~df.columns.str.contains('|'.join(excluded_parameters))]
            
            # Ensure 'Time' column is included and set it as the index (this is the timestamp)
            if 'Time' in df.columns:
                df['Time'] = pd.to_datetime(df['Time'], unit='s', errors='coerce')  # Converting to datetime
                df.set_index('Time', inplace=True)
            else:
                print(f"Warning: {file_name} does not contain a 'Time' column and will be skipped.")
                continue

            # Appending the DataFrame to the list
            dataframes.append(df)
            
        except FileNotFoundError:
            print(f"File {file_name} not found at path: {file_path}. Skipping this file.")
    
    # Merge all DataFrames on the 'Time' index, using an outer join to keep all unique timestamps
    combined_df = pd.concat(dataframes, axis=1, join='outer')

    # Using forward fill and back fill to fill in missing values
    combined_df.ffill(inplace=True)
    combined_df.bfill(inplace=True)

    combined_df.reset_index(inplace=True)

    # Creating output path
    folder_name = os.path.basename(folder_path)
    output_file = f'F:\\{folder_name}_unified_timestamps.csv'
    
    # Saving
    combined_df.to_csv(output_file, index=False)
    
    print(f"Unified CSV file created: {output_file}")

In [None]:
# Iterating over each folder
for folder in desired_folders_for_training:
    folder_path = folder
    print(f"Processing folder: {folder_path}")
    combine_csv_files(folder_path, desired_topics, excluded_parameters)