# Two Second Intervals

In [None]:
import os
import pandas as pd

In [None]:
project_directory = r'C:\Users\marco\OneDrive\Desktop\Final Year Project'
os.chdir(project_directory)
base_dir = os.getcwd() 

In [17]:
def numeric(s):
    return int(''.join(filter(str.isdigit, s)))

def create_windowed_data(df, arousal_columns, valence_columns, participant_id):

    # Compute the median arousal and valence for each time instance
    df['median_arousal'] = df[arousal_columns].median(axis=1)
    df['median_valence'] = df[valence_columns].median(axis=1)

    # Creating 2-second time windows
    df['time_window'] = ((df['time in seconds'] // 2) * 2).astype(int)

    if 'frame_path' in df.columns:
        df = df.drop(columns=['frame_path'])

    # Group the data by the time_window column and compute the mean for each group
    grouped = df.groupby('time_window', as_index=False).mean()
    grouped['participant_id'] = participant_id

    cols = ['participant_id', 'time_window'] + [col for col in grouped.columns if col not in ['participant_id', 'time_window']]
    windowed_df = grouped[cols]

    return windowed_df

def process_all_files(input_folder, output_folder):

    all_data_frames = []

    # arousal and valence columns
    arousal_columns = ['FM1 _x', 'FM2 _x', 'FM3 _x', 'FF1 _x', 'FF2 _x', 'FF3_x']
    valence_columns = ['FM1 _y', 'FM2 _y', 'FM3 _y', 'FF1 _y', 'FF2 _y', 'FF3_y']

    for filename in os.listdir(input_folder):
        if filename.endswith('.csv'):

            # Extract participant id from files
            participant_id = filename.split('.')[0]

            file_path = os.path.join(input_folder, filename)
            df = pd.read_csv(file_path)

            if 'Unnamed: 0' in df.columns:
                df.drop('Unnamed: 0', axis=1, inplace=True)
            if 'time in seconds' not in df.columns:
                df['time in seconds'] = df.index


            windowed_df = create_windowed_data(df, arousal_columns, valence_columns, participant_id)
            all_data_frames.append(windowed_df)

    combined_df = pd.concat(all_data_frames, ignore_index=True)

    # Sort by participant_id and time_window
    combined_df['numeric_participant_id'] = combined_df['participant_id'].map(numeric)
    combined_df.sort_values(by=['numeric_participant_id', 'time_window'], inplace=True)
    combined_df.drop(columns=['numeric_participant_id'], inplace=True)

    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    combined_output_path = os.path.join(output_folder, 'ArousalValenceTimeSeries.csv')
    combined_df.to_csv(combined_output_path, index=False)

    print("Windowed Data Created Successfully For All Participants.")

input_folder = os.path.join('RECOLA Ranking Algorithms', 'RECOLA_Processed_Data')
output_folder = os.path.join('RECOLA Ranking Algorithms', 'RECOLA_Intervals_Data')

process_all_files(input_folder, output_folder)


Windowed Data Created Successfully For All Participants.
