In [1]:
import pandas as pd
import numpy as np
import os
from scipy.fft import fft

# Define the input and output directories
input_dir = 'testing_data_trial_2'
output_dir = 'testing_data_trial_2_with_features'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each CSV file
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):
        # Read the CSV file
        file_path = os.path.join(input_dir, file_name)
        df = pd.read_csv(file_path)

        # Extract the VRM column
        signal = df['VRM'].values

        # Calculate features
        mean_value = np.mean(signal)
        std_value = np.std(signal)
        variance_value = np.var(signal)

        # Perform FFT and find the dominant frequency
        fft_result = fft(signal)
        dominant_frequency_index = np.argmax(np.abs(fft_result))

        # Create a new DataFrame
        features_df = pd.DataFrame({
            'Time Series': np.arange(len(signal)),
            'VRM': signal,
            'Mean': [mean_value] * len(signal),
            'Standard Deviation': [std_value] * len(signal),
            'Variance': [variance_value] * len(signal),
            'Dominant Frequency Index': [dominant_frequency_index] * len(signal)
        })

        # Save the new DataFrame to a CSV file
        output_file_path = os.path.join(output_dir, f'{file_name}')
        features_df.to_csv(output_file_path, index=False)