In [1]:
import pandas as pd
from datetime import datetime, timedelta
import os

# Function to process each file set
def process_files(name):
    combined_path = f'own/combined/{name}.csv'
    start_path = f'own/start/{name}.txt'
    camera_path = f'own/cam/{name}.csv'
    output_path = f'own/data/{name}.csv'

    # Check if start file exists
    if not os.path.exists(start_path):
        print(f"Start file {start_path} not found. Skipping {name}.")
        return

    # Read the start time from the start.txt file
    with open(start_path, 'r') as file:
        start_time_str = file.read().strip()
    start_time = datetime.strptime(start_time_str, "%Y-%m-%d %H:%M:%S")

    # Read the combined and camera CSV files
    combined_df = pd.read_csv(combined_path)
    camera_df = pd.read_csv(camera_path)
    
    # Print the columns for debugging
    print(f"Columns in {camera_path}: {camera_df.columns}")
    print(f"Columns in {combined_path}: {combined_df.columns}")

    # Check if combined_df has a date/time column
    if 'date/time' not in combined_df.columns:
        print(f"'date/time' column not found in the combined CSV file {combined_path}. Skipping {name}.")
        return

    # Replace the frame number with timestamps if the 'frame_num' column exists
    if 'frame_num' in camera_df.columns:
        camera_df['timestamp'] = camera_df['frame_num'] / 30.0  # Assuming 30 FPS
        camera_df['timestamp'] = camera_df['timestamp'].apply(lambda x: start_time + timedelta(seconds=x))
    else:
        raise KeyError("'frame_num' column not found in the camera CSV file.")

    # Rename the x, y, z columns
    camera_df = camera_df.rename(columns={'x': 'x_cam', 'y': 'y_cam', 'z': 'z_cam'})

    # Convert the date/time column to datetime
    combined_df['timestamp'] = pd.to_datetime(combined_df['date/time'])

    # Merge the dataframes based on the closest timestamps
    merged_df = pd.merge_asof(combined_df.sort_values('timestamp'),
                              camera_df.sort_values('timestamp'),
                              on='timestamp',
                              direction='nearest')

    # Interpolate the merged dataframe to fill in the missing values
    interpolated_df = merged_df.interpolate(method='linear')

    # Drop the frame_num column as it is no longer needed
    interpolated_df = interpolated_df.drop(columns=['frame_num'], errors='ignore')

    # Save the merged and interpolated dataframe to the output path
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    interpolated_df.to_csv(output_path, index=False)

# Get the list of file names to process (without extensions)
names = [os.path.splitext(file)[0] for file in os.listdir('own/combined') if file.endswith('.csv')]

# Process each file set
for name in names:
    try:
        process_files(name)
    except KeyError as e:
        print(f"Error processing {name}: {e}")


Columns in own/cam/1.csv: Index(['frame_num', 'x', 'y', 'z'], dtype='object')
Columns in own/combined/1.csv: Index(['date/time', 'gyro_x', 'gyro_y', 'gyro_z', 'accel_x', 'accel_y',
       'accel_z', 'x', 'y'],
      dtype='object')
Columns in own/cam/2.csv: Index(['frame_num', 'x', 'y', 'z'], dtype='object')
Columns in own/combined/2.csv: Index(['date/time', 'gyro_x', 'gyro_y', 'gyro_z', 'accel_x', 'accel_y',
       'accel_z', 'x', 'y'],
      dtype='object')
Start file own/start/3.txt not found. Skipping 3.
Columns in own/cam/4.csv: Index(['frame_num', 'x', 'y', 'z'], dtype='object')
Columns in own/combined/4.csv: Index(['date/time', 'gyro_x', 'gyro_y', 'gyro_z', 'accel_x', 'accel_y',
       'accel_z', 'x', 'y'],
      dtype='object')
Columns in own/cam/5.csv: Index(['frame_num', 'x', 'y', 'z'], dtype='object')
Columns in own/combined/5.csv: Index(['date/time', 'gyro_x', 'gyro_y', 'gyro_z', 'accel_x', 'accel_y',
       'accel_z', 'x', 'y'],
      dtype='object')
