In [None]:
# READ OF THE FILES, SET THE DATA, READ THE FSTR FUNCTION AND CALCULATE THE VELOCITY THRESHOLDS

import tqdm
import time
start_time = time.time()
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from separation_algorithm_gpt import form_groups

# Read in the CSV files and create dataframes, dropping rows with missing values and resetting index
dfs = {}
x_values = {}
y_values = {}
t_values = {}
velocity_values = {}

participant_numbers = [] # Initialize a list to store participant numbers
empty_adhd_participant_counter = 0 # Initialize a variable to count the number of empty adhd participants
empty_non_adhd_participant_counter = 0 # Initialize a variable to count the number of empty non-adhd participants

# Add participants with ADHD to dfs dictionary
i_start_adhd = 1
i_stop_adhd = 29

for i in tqdm.tqdm(range(i_start_adhd, i_stop_adhd)):
    csv_file = 'data{}.csv'.format(i)
    df_name = 'df{}'.format(i)
    dfs[df_name] = pd.read_csv(csv_file).dropna().reset_index(drop=True)
    x_name = 'x{}'.format(i)
    y_name = 'y{}'.format(i)
    t_name = 't{}'.format(i)
    v_name = 'v{}'.format(i)
    x_values[x_name] = dfs[df_name]['Position_1'].values
    y_values[y_name] = dfs[df_name]['Position_2'].values
    t_values[t_name] = dfs[df_name]['Time'].values
    dx = np.diff(x_values[x_name])
    dy = np.diff(y_values[y_name])
    dt = np.diff(t_values[t_name])
    velocity_values[v_name] = np.sqrt(dx**2 + dy**2) / dt
    
    if len(velocity_values[v_name]) > 0: # Only add participant number if the velocity array is not empty
        participant_numbers.append(str(i)) # Convert the participant number to a string and add it to the list
    else:
        empty_adhd_participant_counter += 1 # Increment the counter if the velocity array is empty
        
# Add participants without ADHD to dfs dictionary
i_start_no_adhd = 29
i_stop_no_adhd = 51

for i in tqdm.tqdm(range(i_start_no_adhd, i_stop_no_adhd)):
    csv_file = 'data{}.csv'.format(i)
    df_name = 'df{}'.format(i)
    dfs[df_name] = pd.read_csv(csv_file).dropna().reset_index(drop=True)
    x_name = 'x{}'.format(i)
    y_name = 'y{}'.format(i)
    t_name = 't{}'.format(i)
    v_name = 'v{}'.format(i)
    x_values[x_name] = dfs[df_name]['Position_1'].values
    y_values[y_name] = dfs[df_name]['Position_2'].values
    t_values[t_name] = dfs[df_name]['Time'].values
    dx = np.diff(x_values[x_name])
    dy = np.diff(y_values[y_name])
    dt = np.diff(t_values[t_name])
    velocity_values[v_name] = np.sqrt(dx**2 + dy**2) / dt
    
    if len(velocity_values[v_name]) > 0: # Only add participant number if the velocity array is not empty
        participant_numbers.append(str(i)) # Convert the participant number to a string and add it to the list
    else:
        empty_non_adhd_participant_counter += 1 # Increment the counter if the velocity array is empty
               
        
# Initialize a list to store the threshold_array and k_values for each case
v_values = []

# Initialize a dictionary to store the velocity threshold for each participant
threshold_dict = {}

# Set the threshold_array and call the 'form_groups' function for each case
for i in tqdm.tqdm(range(i_start_adhd, i_stop_adhd)):
    v_name = 'v{}'.format(i)
    if len(velocity_values[v_name]) == 0:  # Check if velocity array is empty
        continue  # Skip calling form_groups for this participant
    max_v = np.max(velocity_values[v_name])
    threshold_array = np.arange(0.1/max_v, 15.0/max_v, 0.1/max_v)
    detection = form_groups(velocity_values[v_name], threshold_array, False, x_label="Velocity Threshold", title="Fixations and Saccades", x_axis_format="%.3f")  # Changed x_axis_format to "%.3f"
    k_values = detection
    v_values.append((threshold_array*max_v, k_values))
    threshold_dict['Participant {}'.format(i)] = threshold_array[np.argmin(detection)] * max_v  # Changed np.argmax to np.argmin


for i in tqdm.tqdm(range(i_start_no_adhd, i_stop_no_adhd)):
    v_name = 'v{}'.format(i)
    if len(velocity_values[v_name]) == 0: # Check if velocity array is empty
        continue # Skip calling form_groups for this participant
    max_v = np.max(velocity_values[v_name])
    threshold_array = np.arange(0.1/max_v, 15.0/max_v, 0.1/max_v)
    detection = form_groups(velocity_values[v_name], threshold_array, False, x_label="Velocity Threshold", title="Fixations and Saccades", x_axis_format="%.3f")  # Changed x_axis_format to "%.3f"
    k_values = detection
    v_values.append((threshold_array*max_v, k_values))
    threshold_dict['Participant {}'.format(i)] = threshold_array[np.argmin(detection)] * max_v  # Changed np.argmax to np.argmin


    # Create a summary table of participant and corresponding velocity threshold
threshold_table = pd.DataFrame(threshold_dict.items(), columns=['Participant', 'Velocity Threshold'])

print(threshold_table)        
  
# 1. Plot all the curves in the same graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values):
    ax.plot(threshold_array, k_values, label='Participant {}'.format(participant_numbers[i]))
ax.legend(ncol=5, fontsize=12)
ax.set_title('K-Ratio for IVT / All Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_all.jpg', format='jpeg', bbox_inches='tight')
plt.show()



# 2A. Plot the ADHD curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, k_values, label='Participant {}'.format(participant_numbers[i]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('K-Ratio for IVT / ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_ADHD.jpg', format='jpeg', bbox_inches='tight')
plt.show()


# 2B. Plot the ADHD curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, k_values, label='Participant {}'.format(participant_numbers[i+(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd))]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('K-Ratio for IVT / non-ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_nonADHD.jpg', format='jpeg', bbox_inches='tight')
plt.show()


# 3A. Plot the ADHD cumulative curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, np.cumsum(k_values), label='Participant {}'.format(participant_numbers[i]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('Cumulative sum K-Ratio for IVT / ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('Cumulative sum K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_ADHD_cum.jpg', format='jpeg', bbox_inches='tight')
plt.show()


# 3B. Plot the non-ADHD cumulative curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, np.cumsum(k_values), label='Participant {}'.format(participant_numbers[i+(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd))]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('Cumulative sum K-Ratio for IVT / non-ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('Cumulative sum K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_nonADHD_cum.jpg', format='jpeg', bbox_inches='tight')
plt.show()



# 4. Plot the ADHD cumulative curves and non-ADHD cumulative curves in the same graph
import matplotlib.lines as mlines
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, np.cumsum(k_values), color='red')
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, np.cumsum(k_values), color='blue')

# Create a custom legend with one label for the red curves and one label for the blue curves
red_line = mlines.Line2D([], [], color='red', label='ADHD')
blue_line = mlines.Line2D([], [], color='blue', label='Non-ADHD')
#plt.legend(handles=[red_line, blue_line])
ax.legend(handles=[red_line, blue_line], ncol=3, fontsize=12)
ax.set_title('Cumulative sum K-Ratio for IVT / All Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold', fontsize=16)
ax.set_ylabel('Cumulative sum K-Ratio Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('K_ratio_cum.jpg', format='jpeg', bbox_inches='tight')
plt.show()


# Create table for cumulative values 
# Create an empty list to store the maximum values for each participant
max_values = []
# Iterate over each participant's data and find their maximum cumulative value
for i, (threshold_array, k_values) in enumerate(v_values):
    max_values.append(np.max(np.cumsum(k_values)))

# Create a DataFrame with the participant number and their maximum value
participant_data = pd.DataFrame({
    'Participant': participant_numbers,
    'Max Value': max_values
})
# Print the DataFrame
print(participant_data)




end_time = time.time()
execution_time = end_time - start_time
print("Execution time:", execution_time, "seconds")

In [None]:
# Create and save a csv file with the participant's number, the velocity threshold, the FSTR value and the Cumulative FSTR value
Vel_K_CumK_list = threshold_table.copy()
min_k_values_list = [np.min(k_values) for threshold_array, k_values in v_values]
Vel_K_CumK_list['FSTR Value'] = min_k_values_list
Vel_K_CumK_list['Cumulative FSTR Value'] = list(participant_data['Max Value'])
Vel_K_CumK_list.to_csv('Vel_K_CumK_list.csv', index=False)
Vel_K_CumK_list

In [None]:
# ALL THE PLOTTING

# 1. Plot all the curves in the same graph
import matplotlib.lines as mlines
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, k_values, color='orange')
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, k_values, color='blue')

# Create a custom legend with one label for the red curves and one label for the blue curves
red_line = mlines.Line2D([], [], color='orange', label='ADHD')
blue_line = mlines.Line2D([], [], color='blue', label='non-ADHD')
#plt.legend(handles=[red_line, blue_line])
ax.legend(handles=[red_line, blue_line], ncol=3, fontsize=12)
ax.set_title('FSTR curves', fontsize=20)
ax.set_xlabel('Velocity Threshold (pixels/ms)', fontsize=16)
ax.set_ylabel('FSTR Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('KRatio_all.jpg', format='jpeg', bbox_inches='tight')
plt.show()



# 2A. Plot the ADHD curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, k_values, label='Participant {}'.format(participant_numbers[i]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('FSTR curves for ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold (pixels/ms)', fontsize=16)
ax.set_ylabel('FSTR Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('KRatio_ADHD.jpg', format='jpeg', bbox_inches='tight')
plt.show()


# 2B. Plot the ADHD curves in a separate graph
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, k_values, label='Participant {}'.format(participant_numbers[i+(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd))]))
ax.legend(ncol=3, fontsize=12)
ax.set_title('FSTR curves for non-ADHD Participants', fontsize=20)
ax.set_xlabel('Velocity Threshold (pixels/ms)', fontsize=16)
ax.set_ylabel('FSTR Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('KRatio_nonADHD.jpg', format='jpeg', bbox_inches='tight')
plt.show()




# 4. Plot the ADHD cumulative curves and non-ADHD cumulative curves in the same graph
import matplotlib.lines as mlines
fig, ax = plt.subplots(figsize=(14, 8))
for i, (threshold_array, k_values) in enumerate(v_values[:(i_stop_adhd-i_start_adhd)-empty_adhd_participant_counter]):
    ax.plot(threshold_array, np.cumsum(k_values), color='orange')
for i, (threshold_array, k_values) in enumerate(v_values[(len(participant_numbers)+empty_adhd_participant_counter-(i_stop_no_adhd-i_start_no_adhd)):]):
    ax.plot(threshold_array, np.cumsum(k_values), color='blue')

# Create a custom legend with one label for the red curves and one label for the blue curves
red_line = mlines.Line2D([], [], color='orange', label='ADHD')
blue_line = mlines.Line2D([], [], color='blue', label='non-ADHD')
#plt.legend(handles=[red_line, blue_line])
ax.legend(handles=[red_line, blue_line], ncol=3, fontsize=12)
ax.set_title('Cumulative Sum FSTR curves', fontsize=20)
ax.set_xlabel('Velocity Threshold (pixels/ms)', fontsize=16)
ax.set_ylabel('Cumulative Sum FSTR Value', fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=14)

# Adjust the subplot parameters to reduce the blank space
fig.subplots_adjust(left=0.05, right=0.95, top=0.9, bottom=0.2)

# Save the figure with a tight bounding box
fig.savefig('KRatio_cum.jpg', format='jpeg', bbox_inches='tight')
plt.show()


In [None]:
# Set up the plot with one subplot for the FSTR value

fig, ax = plt.subplots(figsize=(20,8))
  
# Define the x-axis values (participant numbers)
x = np.arange(len(Vel_K_CumK_list))

# Define the bar width
bar_width = 0.4

# Plot the Min K-Ratio Value
k_ratio_bars = ax.bar(x, Vel_K_CumK_list['FSTR Value'], width=bar_width)

# Set the colors
for i in range(27):
    k_ratio_bars[i].set_color('orange')
for i in range(27, len(Vel_K_CumK_list)):
    k_ratio_bars[i].set_color('blue')

# Set the x-axis ticks and labels
ax.set_xticks(x)
ax.set_xticklabels([f'P{participant}' for participant in participant_numbers], fontsize=10)


# Set the y-axis label
ax.set_ylabel('Minimum FSTR Value', fontsize=16)

# Set the title
ax.set_title('Minimum FSTR Value', fontsize=20)

# Create custom legends
from matplotlib.patches import Patch
adhd_legend = Patch(facecolor='orange', edgecolor='black', label='ADHD')
non_adhd_legend = Patch(facecolor='blue', edgecolor='black', label='non-ADHD')

# Add legends
ax.legend(handles=[adhd_legend, non_adhd_legend], loc='upper right', fontsize=12)

fig.savefig('Min K-Ratio Value.jpg', format='jpeg', bbox_inches='tight')

# Display the plot
plt.show()



In [None]:
# Set up the plot with one subplot for the cumulative sum value

fig, ax = plt.subplots(figsize=(20,8))
  
# Define the x-axis values (participant numbers)
x = np.arange(len(Vel_K_CumK_list))

# Define the bar width
bar_width = 0.4

# Plot the Min K-Ratio Value
k_ratio_bars = ax.bar(x, Vel_K_CumK_list['Cumulative FSTR Value'], width=bar_width)

# Set the colors
for i in range(27):
    k_ratio_bars[i].set_color('orange')
for i in range(27, len(Vel_K_CumK_list)):
    k_ratio_bars[i].set_color('blue')

# Set the x-axis ticks and labels
ax.set_xticks(x)
ax.set_xticklabels([f'P{participant}' for participant in participant_numbers], fontsize=10)


# Set the y-axis label
ax.set_ylabel('Cumulative Sum FSTR Value', fontsize=16)

# Set the title
ax.set_title('Cumulative Sum FSTR Value', fontsize=20)

# Create custom legends
from matplotlib.patches import Patch
adhd_legend = Patch(facecolor='orange', edgecolor='black', label='ADHD')
non_adhd_legend = Patch(facecolor='blue', edgecolor='black', label='non-ADHD')

# Add legends
ax.legend(handles=[adhd_legend, non_adhd_legend], loc='upper right', fontsize=12)

fig.savefig('Cumulative K-Ratio Value.jpg', format='jpeg', bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
# T-test analysis

import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Split the DataFrame into two groups
first_group = Vel_K_CumK_list.iloc[:27]
second_group = Vel_K_CumK_list.iloc[27:]

# Perform the t-tests
t_stat_minFSTR, p_value_minFSTR = ttest_ind(first_group['FSTR Value'], second_group['FSTR Value'])
t_stat_cumFSTR, p_value_cumFSTR = ttest_ind(first_group['Cumulative FSTR Value'], second_group['Cumulative FSTR Value'])



print(f"Min FSTR Value: t-stat = {t_stat_minFSTR:.2f}, p-value = {p_value_minFSTR:.4f}")
print(f"Cumulative FSTR Value: t-stat = {t_stat_cumFSTR:.2f}, p-value = {p_value_cumFSTR:.4f}")


In [None]:
# calculate the saccade frequency and the fixation duration

import numpy as np
import pandas as pd
from itertools import groupby

# Initialize dictionaries to store the number of saccades, fixations, and saccade frequencies for each participant
saccade_counts = {}
fixation_counts = {}
saccade_frequencies = {}
mean_fixation_durations = {}
ratio_above_below_counts = {}

for participant in participant_numbers:
    # Check that the participant has non-empty velocity and timestamp arrays
    if not velocity_values[f'v{participant}'].any() or not t_values[f't{participant}'].any():
        continue

    # Get the optimal velocity threshold for the participant
    threshold = threshold_table.loc[threshold_table['Participant'] == f'Participant {participant}', 'Velocity Threshold'].values[0]
    # Get the participant's velocity values
    participant_velocity_values = velocity_values[f'v{participant}']
    # Get the participant's timestamps
    participant_t_values = t_values[f't{participant}']
    # Calculate the binary vector representing fixations (0) and saccades (1)
    binary_vector = (participant_velocity_values > threshold).astype(int)

    # Initialize counts and time duration for the current participant
    fixation_count = 0
    saccade_count = 0
    total_time = participant_t_values[-1] - participant_t_values[0]

    # Calculate the time differences between the timestamps
    time_diff = np.diff(participant_t_values)

    # Initialize a list to store the duration of each fixation
    fixation_duration = []

    # Count the number of saccades and fixations
    for key, group in groupby(binary_vector):
        if key == 0:
            fixation_count += 1
            # Calculate the duration of the fixation and append it to the fixation duration list
            fixation_duration.append(sum(time_diff[list(group)]))
        else:
            saccade_count += 1

    # Calculate the saccade frequency and mean fixation duration for the participant
    saccade_freq = saccade_count / total_time * 1000 # convert to per second
    mean_fixation_duration = np.mean(fixation_duration) / 1000  # convert to seconds

    # Count the number of above the threshold and below the threshold
    above_threshold_count = np.sum(binary_vector == 1)
    below_threshold_count = np.sum(binary_vector == 0)
    ratio_above_below = above_threshold_count / below_threshold_count
    
    # Add the counts, frequencies, and mean fixation duration to their respective dictionaries
    saccade_counts[f'Participant {participant}'] = saccade_count
    fixation_counts[f'Participant {participant}'] = fixation_count
    saccade_frequencies[f'Participant {participant}'] = saccade_freq
    mean_fixation_durations[f'Participant {participant}'] = mean_fixation_duration
    ratio_above_below_counts[f'Participant {participant}'] = ratio_above_below

# Create dataframes
saccade_counts_df = pd.DataFrame(saccade_counts.items(), columns=['Participant', 'Saccade Count'])
fixation_counts_df = pd.DataFrame(fixation_counts.items(), columns=['Participant', 'Fixation Count'])
saccade_frequencies_df = pd.DataFrame(saccade_frequencies.items(), columns=['Participant', 'Saccade Frequency'])
mean_fixation_durations_df = pd.DataFrame(mean_fixation_durations.items(), columns=['Participant', 'Mean Fixation Duration'])
ratio_above_below_df = pd.DataFrame(ratio_above_below_counts.items(), columns=['Participant', 'Ratio Above/Below Threshold'])

# Merge the dataframes with the threshold_table
results_table = threshold_table.merge(saccade_counts_df, on='Participant')\
.merge(fixation_counts_df, on='Participant').merge(saccade_frequencies_df, on='Participant')\
.merge(mean_fixation_durations_df, on='Participant').merge(ratio_above_below_df, on='Participant')

# Display the results table
print(results_table)

In [None]:
# Set up the plot with one subplot for the velocity threshold

fig, ax = plt.subplots(figsize=(20,8))
  
# Define the x-axis values (participant numbers)
x = np.arange(len(results_table))

# Define the bar width
bar_width = 0.4

# Plot the Min K-Ratio Value
Vel_Thres_bars = ax.bar(x, results_table['Velocity Threshold'], width=bar_width)

# Set the colors
for i in range(27):
    Vel_Thres_bars[i].set_color('orange')
for i in range(27, len(results_table)):
    Vel_Thres_bars[i].set_color('blue')

# Set the x-axis ticks and labels
ax.set_xticks(x)
ax.set_xticklabels([f'P{participant}' for participant in participant_numbers], fontsize=10)


# Set the y-axis label
ax.set_ylabel('Velocity Threshold (pixel/ms)', fontsize=16)

# Set the title
ax.set_title('Velocity Threshold', fontsize=20)

# Create custom legends
from matplotlib.patches import Patch
adhd_legend = Patch(facecolor='orange', edgecolor='black', label='ADHD')
non_adhd_legend = Patch(facecolor='blue', edgecolor='black', label='non-ADHD')

# Add legends
ax.legend(handles=[adhd_legend, non_adhd_legend], loc='upper right', fontsize=12)

fig.savefig('Velocity Threshold.jpg', format='jpeg', bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
# Set up the plot with one subplot for the saccade frequency

fig, ax = plt.subplots(figsize=(20,8))
  
# Define the x-axis values (participant numbers)
x = np.arange(len(results_table))

# Define the bar width
bar_width = 0.4

# Plot the Min K-Ratio Value
Sac_Freq_bars = ax.bar(x, results_table['Saccade Frequency'], width=bar_width)

# Set the colors
for i in range(27):
    Sac_Freq_bars[i].set_color('orange')
for i in range(27, len(results_table)):
    Sac_Freq_bars[i].set_color('blue')

# Set the x-axis ticks and labels
ax.set_xticks(x)
ax.set_xticklabels([f'P{participant}' for participant in participant_numbers], fontsize=10)


# Set the y-axis label
ax.set_ylabel('Saccade Frequency (#/sec)', fontsize=16)

# Set the title
ax.set_title('Saccade Frequency', fontsize=20)

# Create custom legends
from matplotlib.patches import Patch
adhd_legend = Patch(facecolor='orange', edgecolor='black', label='ADHD')
non_adhd_legend = Patch(facecolor='blue', edgecolor='black', label='non-ADHD')

# Add legends
ax.legend(handles=[adhd_legend, non_adhd_legend], loc='upper right', fontsize=12)

fig.savefig('Saccade Frequency.jpg', format='jpeg', bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
# Set up the plot with one subplot for the fixation duration
# Exclude the value at index 9 from 'Mean Fixation Duration'

filtered_mean_fix_dur = results_table['Mean Fixation Duration'].drop(index=9)

# Set up the plot with one subplot
fig, ax = plt.subplots(figsize=(20,8))

# Define the x-axis values (participant numbers)
x = np.arange(len(filtered_mean_fix_dur))

# Define the bar width
bar_width = 0.4

# Plot the Mean Fixation Duration Value
Fix_Dur_bars = ax.bar(x, filtered_mean_fix_dur, width=bar_width)

# Set the colors
for i in range(27):
    Fix_Dur_bars[i].set_color('orange')
for i in range(27, len(filtered_mean_fix_dur)):
    Fix_Dur_bars[i].set_color('blue')

# Set the x-axis ticks and labels
ax.set_xticks(x)
participant_labels = [f'P{participant}' for participant in participant_numbers if participant != 10]
ax.set_xticklabels(participant_labels, fontsize=10)

# Set the y-axis label
ax.set_ylabel('Mean Fixation Duration (sec)', fontsize=16)

# Set the title
ax.set_title('Mean Fixation Duration', fontsize=20)

# Create custom legends
from matplotlib.patches import Patch
adhd_legend = Patch(facecolor='orange', edgecolor='black', label='ADHD')
non_adhd_legend = Patch(facecolor='blue', edgecolor='black', label='non-ADHD')

# Add legends
ax.legend(handles=[adhd_legend, non_adhd_legend], loc='upper right', fontsize=12)

fig.savefig('Mean Fixation Duration (excluding index 9).jpg', format='jpeg', bbox_inches='tight')

# Display the plot
plt.show()


In [None]:
# T-test for the saccade frequency and fixation duration

import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Split the DataFrame into two groups
first_group = results_table.iloc[:27]
second_group = results_table.iloc[27:]

first_group_excl_9 = first_group.drop(index=9)

# Perform the t-tests
t_stat_sacfreq, p_value_sacfreq = ttest_ind(first_group['Saccade Frequency'], second_group['Saccade Frequency'])
t_stat_fixdur, p_value_fixdur = ttest_ind(first_group_excl_9['Mean Fixation Duration'], second_group['Mean Fixation Duration'])



print(f"Saccade Frequency: t-stat = {t_stat_sacfreq:.2f}, p-value = {p_value_sacfreq:.4f}")
print(f"Mean Fixation Duration: t-stat = {t_stat_fixdur:.2f}, p-value = {p_value_fixdur:.4f}")
