In [1]:
import os
import csv
import pandas as pd
import numpy as np

In [2]:
data_dir = '/Users/rooskraaijveld/Desktop/innovaid_hackathon_anima/input'

In [3]:
all_files = sorted(os.listdir(data_dir))
for i in range(0, 10):
    folder_name = os.path.join(data_dir, all_files[i])
    df = pd.read_csv(folder_name, header=0, index_col=0)
    

In [4]:
def median_time_pos_neg_per_scene(csv_path):
    df = pd.read_csv(csv_path, header=0, index_col=0)
    
    block_durations = {'positive': [], 'negative': [], 'neutral': [], 'nan': []}
    current_scene = None
    start_time = None
    current_block = None

    for index, row in df.iterrows():
        posneg_value = str(row['IMAGE_TYPE'])
        scene_value = row['SCENE_INDEX']

        if current_block is None or current_block != posneg_value or current_scene != scene_value:
            # Start a new block
            if current_block is not None:
                block_durations[str(current_block)].append(row['TIMESTAMP'] - start_time)

            current_block = posneg_value
            start_time = row['TIMESTAMP']
            current_scene = scene_value
        elif current_block == posneg_value and current_scene == scene_value:
            # Continue the current block

            if index == len(df) - 1:
                block_durations[str(current_block)].append(row['TIMESTAMP'] - start_time + 1)

    # Median duration for each block type
    median_durations = {block_type: pd.to_timedelta(durations).median().total_seconds() if durations else 0
                        for block_type, durations in block_durations.items()}

    results = {
        'duration_positive': median_durations['positive'],
        'duration_negative': median_durations['negative'],
        'duration_neutral': median_durations['neutral'],
        'duration_nan': median_durations['nan']
    }

    return results

In [5]:
duration_scene = median_time_pos_neg_per_scene(os.path.join(data_dir, all_files[0]))
duration_scene

{'duration_positive': 2.16e-07,
 'duration_negative': 2.5e-07,
 'duration_neutral': 2.66e-07,
 'duration_nan': 2.66e-07}

In [6]:
def median_time_pos_neg(csv_path):
    df = pd.read_csv(csv_path, header=0, index_col=0)
    
    block_velocities = {'positive': [], 'negative': [], 'neutral' : [], 'nan' : []}
    current_block = None
    start_time = None
    current_scene = None

    for index, row in df.iterrows():
        posneg_value = str(row['IMAGE_TYPE'])
        scene_value = row['SCENE_INDEX']

        if current_block is None or current_block != posneg_value or current_scene != scene_value:
            # Start a new block
            if current_block is not None:
                block_durations[str(current_block)].append(row['TIMESTAMP'] - start_time)
            current_block = posneg_value
            start_time = row['TIMESTAMP']
            current_scene = scene_value
        elif current_block == posneg_value and current_scene == scene_value:
            # Continue the current block

            if index == len(df) - 1:
                block_durations[str(current_block)].append(row['TIMESTAMP'] - start_time + 1)

    # Calculate the average duration for each block type
    average_durations = {block_type: sum(durations) / len(durations) if durations else 0
                         for block_type, durations in block_durations.items()}

    results = {'duration_positive' : average_durations['positive'],
              'duration_negative' : average_durations['negative'],
              'duration_neutral' : average_durations['neutral'],
              'duration_nan' : average_durations['nan']}
              
    return results

In [8]:
def number_of_switches_per_scene(csv_path):
    df = pd.read_csv(csv_path, header=0, index_col=0)
    
    start_time = None
    current_scene = None
    state_switch_count = {'positive': 0, 'negative': 0, 'neutral': 0, 'nan': 0}
    previous_state = None

    min_block_duration = 1

    # Iterate through the DataFrame
    for index, row in df.iterrows():
        posneg_value = str(row['IMAGE_TYPE'])
        scene_index = row['SCENE_INDEX']
        
        if current_scene is None or current_scene != scene_index:
            # Start a new block
            if current_scene is not None and (row['TIMESTAMP'] - start_time) >= min_block_duration:
                # Save the duration of the previous block
                state_switch_count[str(previous_state)] += 1

            current_scene = scene_index
            start_time = row['TIMESTAMP']
        elif current_scene == scene_index:
            # Continue the current block

            # Check if the state has switched
            if previous_state is not None and posneg_value != previous_state:
                state_switch_count[str(posneg_value)] += 1

        previous_state = posneg_value

    # Check the last block
    if current_scene is not None and (row['TIMESTAMP'] - start_time) >= min_block_duration:
        state_switch_count[str(previous_state)] += 1

    state_switch_count["total_switches"] = sum(state_switch_count.values())

    results = {
        'switches_positive': state_switch_count['positive'],
        'switches_negative': state_switch_count['negative'],
        'switches_neutral': state_switch_count['neutral'],
        'switches_nan': state_switch_count['nan'],
        'switches_total': state_switch_count['total_switches']
    }
    return results


In [9]:
switch_count = number_of_switches_per_scene(os.path.join(data_dir, all_files[0]))
switch_count

{'switches_positive': 92,
 'switches_negative': 87,
 'switches_neutral': 93,
 'switches_nan': 142,
 'switches_total': 414}

In [10]:
switch_count = number_of_switches(os.path.join(data_dir, all_files[0]))
switch_count

{'switches_positive': 100,
 'switches_negative': 85,
 'switches_neutral': 90,
 'switches_nan': 139,
 'switches_total': 414}

In [57]:
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
# Function to calculate total length of a scanpath
def calculate_total_length(df):
    total_length = 0
    for i in range(1, len(df)):
        x1, y1 = df.iloc[i - 1] ['RX'], df.iloc[i - 1]['RY']
        x2, y2 = df.iloc[i]     ['RX'], df.iloc[i]    ['RY']
        total_length += euclidean_distance(x1, y1, x2, y2)
    return total_length

def total_distance_scene(csv_path):
    df = pd.read_csv(csv_path, header=0, index_col=0)
    total_length_per_image = df.groupby('IMAGE_TYPE').apply(calculate_total_length)
    
    if total_length_per_image.empty:
        results = {
        'distance_positive': 0,
        'distance_negative': 0,
        'distance_neutral': 0}
    else:   
        results = {
            'distance_positive': total_length_per_image['positive'],
            'distance_negative': total_length_per_image['negative'],
            'distance_neutral': total_length_per_image['neutral'],

        }
        
   
    return results
    

In [58]:
total_distance_scene(os.path.join(data_dir, all_files[0]))

{'distance_positive': 81.43569046996839,
 'distance_negative': 61.84394945589903,
 'distance_neutral': 66.78798276347858}

In [59]:
def get_velocities(csv_path):
    df = pd.read_csv(csv_path)
    df.sort_values(by='TIMESTAMP')
    time0 = df.drop(index = len(df) - 1)
    time1 = df.drop(index = 0).reset_index()
    rx_delta = time0['RX']-time1['RX']
    ry_delta = time0['RY']-time1['RY']
    velocity = (rx_delta**2 + ry_delta**2)**0.5
    velocity = pd.concat([pd.Series([0]), velocity])
    df['velocity'] = velocity.values
    img_velocities = df.groupby('IMAGE_TYPE')['velocity'].mean()
    
    if img_velocities.empty:
        results = {
        'velocity_positive': 0,
        'velocity_negative': 0,
        'velocity_neutral': 0}
    else:   
        results = {
            'velocity_positive': img_velocities['positive'],
            'velocity_negative': img_velocities['negative'],
            'velocity_neutral': img_velocities['neutral'],

        }
    
    return results
get_velocities(os.path.join(data_dir, 'sHsuhtf1RpCPKAcc1vTW.csv'))

{'velocity_positive': 0, 'velocity_negative': 0, 'velocity_neutral': 0}

In [60]:
output_dir = '/Users/rooskraaijveld/Desktop/innovaid_hackathon_anima/output.csv'

In [65]:
def analyse_all_csv(csv_path, output_path, save_path):
    all_csv_files = os.listdir(csv_path)
    
    for file in range(0, len(all_csv_files)):
        try:
            print("Percentage done: ", file/len(all_csv_files)*100, "%", end='\r' )
            subject_csv_path = os.path.join(csv_path, all_csv_files[file])
            subject = (subject_csv_path.split("/")[-1]).split(".csv")[0]

            df = pd.read_csv(output_path, header=0, index_col=0)

            target_value = df.loc[df['sid'] == subject, 'BDI'].iloc[0]

            if target_value >= 0 and target_value <= 9:
                BDI = 0
            if target_value >= 10 and target_value <= 19:
                BDI = 1
            if target_value >= 19 and target_value <= 29:
                BDI = 2
            if target_value >= 30 and target_value <= 64:
                BDI = 3

            switch_count = number_of_switches_per_scene(subject_csv_path)
            average_durations = median_time_pos_neg_per_scene(subject_csv_path)
            velocities = get_velocities(subject_csv_path)
            distance = total_distance_scene(subject_csv_path)

            results = {'subject':subject,'BDI_class': BDI,'BDI': target_value, **switch_count, **average_durations, **distance, **velocities}

            df = pd.DataFrame([results])
            csv_file_path = os.path.join(save_path, str(subject)+"_variables.csv")

            df.to_csv(csv_file_path, index=False)
        except Exception as e:
            print(f"An error occurred: {e}")

    print("Completed")

In [66]:
analyse_all_csv(data_dir, output_dir, '/Users/rooskraaijveld/Desktop/innovaid_hackathon_anima/csv_variables_2')

An error occurred: 'positive'3165699 %%%
An error occurred: 'positive'840103 %%
An error occurred: 'negative'789812 %%
An error occurred: 'neutral'81947132 %
An error occurred: 'negative'647969 %%
An error occurred: 'neutral'98710505 %
An error occurred: 'negative'627982 %%
An error occurred: 'positive'8923276 %
An error occurred: 'positive'996132 %%
An error occurred: 'negative'565441 %
An error occurred: 'neutral'0199871 %
An error occurred: 'negative'36557 %%
Completede done:  99.96776273372018 %
