In [1]:
import pandas as pd
import dask.dataframe as dd
import os

# --- User-defined Feature Combinations ---
feature_combinations = [
    ['EyeDirWorldCombined.x', 'NoseVector.x'],
    ['EyeDirWorldCombined.x', 'NoseVector.x', 'SteeringInput'],
    ['EyeDirWorldCombined.x', 'NoseVector.x', 'EyeDirWorldCombined.y', 'NoseVector.y'],
    ['EyeDirWorldCombined.x', 'NoseVector.x', 'EyeDirWorldCombined.y', 'NoseVector.y', 'SteeringInput'],
]

# --- Event Names to Process ---
event_names = ['StagEventNew', 'FallingRocksEventNew', 'FogEventNew']

# --- Shared Configuration ---
alg = "nearest_neighbor"
steering_var = 'SteeringInput'
base_dir = os.getcwd()
data_root = os.path.join(base_dir, 'data', 'cleaned_data', 'data_segment')

# --- Loop through all feature combinations and events ---
for features in feature_combinations:
    feature_count = len(features)
    folder_name = f"outliers_removed_{feature_count}features"
    data_path = os.path.join(data_root, folder_name)

    for event_name in event_names:
        filename = f"segment_around_{event_name}_{alg}.csv"
        filepath = os.path.join(data_path, filename)

        if not os.path.exists(filepath):
            print(f"File not found, skipping: {filepath}")
            continue

        print(f"Processing: {filepath}")

        # Load file with Dask
        dtypes = {
            'uid': 'str', 'dataset': 'str', 'city_section': 'str', 'ExperimentalCondition': 'str',
            'EventName': 'object', 'HitObjectName': 'object', 'ObjectName_4': 'object', 'ObjectName_5': 'object',
            'EventDuration': 'float64', 'TimeStamp': 'float64'
        }

        df = dd.read_csv(filepath, assume_missing=True, dtype=dtypes, blocksize="100MB", low_memory=False).compute()

        # --- Create frameNumber column ---
        df['frameNumber'] = df.groupby('uid').cumcount() + 1

        # --- Set SteeringInput to exactly 0 for frameNumber <= 250 ---
        if steering_var in df.columns:
            df.loc[df['frameNumber'] <= 250, steering_var] = 0

        # --- Save updated CSV ---
        output_dir = os.path.join(data_path, 'steeringRemoved')
        os.makedirs(output_dir, exist_ok=True)

        output_filename = f"segment_around_{event_name}_{alg}_steeringRemoved.csv"
        output_path = os.path.join(output_dir, output_filename)

        df.to_csv(output_path, index=False)
        print(f"Saved updated file to: {output_path}\n")


Processing: c:\Users\erene\OneDrive\Desktop\fpca-takeOverRequests\data\cleaned_data\data_segment\outliers_removed_2features\segment_around_StagEventNew_nearest_neighbor.csv
Saved updated file to: c:\Users\erene\OneDrive\Desktop\fpca-takeOverRequests\data\cleaned_data\data_segment\outliers_removed_2features\steeringRemoved\segment_around_StagEventNew_nearest_neighbor_steeringRemoved.csv

Processing: c:\Users\erene\OneDrive\Desktop\fpca-takeOverRequests\data\cleaned_data\data_segment\outliers_removed_2features\segment_around_FallingRocksEventNew_nearest_neighbor.csv
Saved updated file to: c:\Users\erene\OneDrive\Desktop\fpca-takeOverRequests\data\cleaned_data\data_segment\outliers_removed_2features\steeringRemoved\segment_around_FallingRocksEventNew_nearest_neighbor_steeringRemoved.csv

Processing: c:\Users\erene\OneDrive\Desktop\fpca-takeOverRequests\data\cleaned_data\data_segment\outliers_removed_2features\segment_around_FogEventNew_nearest_neighbor.csv
Saved updated file to: c:\Users\