# Analysis of Task Performance during the NF training
 
##### In this script the Task Performance (Percentage of targets hit - percentage of distractors hit) is analysed. 

In [3]:
import pandas as pd, os, numpy as np, matplotlib.pyplot as plt
import seaborn as sns
from pylab import plot, show, savefig, xlim, figure, ylim, legend, boxplot, setp, axes
from scipy.stats import ttest_ind 
from scipy.stats import ttest_rel
from itertools import combinations
import scipy.stats as stats
import statsmodels.api as sm
import matplotlib.patches as mpatches
import sys

from pymer4.models import Lmer,Lm
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
emmeans = importr('emmeans')
import ipynb

sys.path.insert(0, '../..')
sys.path.insert(0, '..')
sys.path.append('../fMRI/ROI_analyses/')

-----

# Set main paths & subject P codes

In [None]:
# Save figures/results
save_to = './behav_results/NF/'

# Path to txt outputs
main_path = './data/'

# Get the subject names
subj_list = [f for f in sorted(os.listdir(main_path)) if f.startswith('P')]

n_subj = len(subj_list)
print(len(subj_list))
print(subj_list)



# Extract main data

In [None]:
# Get HitXO excel files (per subject)
# Excel files have been create with the script 'Performance_output_gen.m'
filenames_hits = []

for subject in subj_list:
    path = os.path.join(main_path, subject, 'V01', '5_FND_NF_TPJ_Run1', 'behav')
    filenames = [f for f in sorted(os.listdir(path))if f.endswith('xlsx') and 'HitXO' in f and f.startswith('P')]
    filenames_hits.extend(filenames)

# Get group-level HitXO file
filename_hits_all_subj = os.path.join(main_path,'group_analysis', 'behav_results', 'NF', 'HitX_HitO_all_subjects.xlsx')

# Get TimeSpentInTurbulence file (per subject)
filenames_turb = []
for subject in subj_list:
        path = os.path.join(main_path, subject,'V01','5_FND_NF_TPJ_Run1', 'behav')
        filenames = [f for f in sorted(os.listdir(path)) if f.endswith('xlsx') and 'TimeInTurb' in f and f.startswith('P')]
        filenames_turb.extend(filenames)

# Get group-level TimeInTurb file
filename_turb_all_subj = os.path.join(main_path,'group_analysis', 'behav_results', 'NF', 'TimeInTurb_all_subjects.xlsx')
    
print(filenames_hits)


# Task performance calculation

In [None]:
# Subject-leve calculation and visualistion of task performance and visua

# Initialise the main list to store data for all subjects and all runs
runs_all_subj = []
plt.style.use('default')
metrics = ['Performance']

# Initialise a list to store data for all runs of the current subject
for subject in range(n_subj):
    runs_for_subject = []
    input_path_hit = os.path.join(main_path + subj_list[subject] + '/V01' + '/5_FND_NF_TPJ_Run1' + '/behav/' + filenames_hits[subject])
    df_hitXO = pd.read_excel(input_path_hit, sheet_name=2)

    for run in range(9):
        # Extract the first three rows for the current run 
        run_data = [
            df_hitXO.iloc[:, run],  # Get the specific column for the current run in hitX

        ]
        runs_for_subject.append(run_data)

    # Append the data for all runs of the current subject to the main list 
    runs_all_subj.append([subj_list[subject], runs_for_subject, metrics])


runs_all_subj


for subj_data in runs_all_subj:
    subj_id = subj_data[0]
    runs_for_subject = subj_data[1]
    metrics = subj_data[2]

    # Initialize a dictionary to store regression values for this subject
    regression_lines[subj_id] = {}

    for idx, metric in enumerate(metrics):
        y_values = np.array([run[idx].mean() for run in runs_for_subject]).reshape(-1, 1)

        # Prepare the data for sklearn
        x_values = np.arange(1, 10).reshape(-1, 1)
        x_values = sm.add_constant(x_values)

        # Fit the linear regression model
        model = sm.OLS(y_values, x_values).fit()

        # Get regression line values
        intercept = model.params[0]
        slope = model.params[1]
        pvalue_int = model.pvalues[0]
        pvalue_slope = model.pvalues[1]
        r2 = model.rsquared
        print(subj_id, 'intercept:', intercept, 
              'slope:', slope, 
              'pvalue_intercept:', pvalue_int, 
              'pvalue_slope:', pvalue_slope, 
              'r2:', r2)

        # Calculate the regression line values for plotting
        regression_line = intercept + slope * np.arange(1, 10)

        # Store the regression line values in the dictionary
        regression_lines[subj_id][metric] = regression_line

# Create figure
for s in range(n_subj):
    fig, ax = plt.subplots(figsize=(10, 6))  # Adjust figure size as needed
    handles = []  # List to store handles for legend
    labels = ['Performance']  # Legend labels

    # Retrieve the correct subject ID for the current plot
    subj_id = runs_all_subj[s][0]

    for i in range(len(runs_all_subj[s][1])):
        pos = (i * 4)+2
        bp = plt.boxplot(runs_all_subj[s][1][i], positions=[pos],
                         widths=0.6, patch_artist=True, showmeans=True,
                         meanprops=dict(marker='o', markerfacecolor='black',
                                        markersize=4, markeredgecolor='black'))
        colors = ['darkorange']
        for patch, color in zip(bp['boxes'], colors):
            patch.set_facecolor(color)
        for patch, color in zip(bp['medians'], colors):
            patch.set(color='k', linewidth=2)
            patch.set_alpha(0.6)

        handles.extend([plt.Rectangle((0, 0), 1, 1, color=color) for color in colors])

    # Plot regression lines specific to the current subject
    for idx, (metric, color) in enumerate(zip(metrics, colors)):
        # Adjust the x-values to match the boxplot positions
        ax.plot(np.arange(0.7, 38.4, 4.3), regression_lines[subj_id][metric], 
                label=f'{metric} Trend', color=color, linestyle='--', alpha=0.7)

    # Set x-axis labels and ticks
    ax.set_xticks(range(2, 37, 4))  # Adjust ticks based on number of metrics and positions
    ax.set_xticklabels(['Run 1', 'Run 2', 'Run 3', 'Run 4', 'Run 5', 'Run 6', 'Run 7', 'Run 8', 'Run 9'], fontsize = 14)
    ax.set_ylabel('Percentage (%)', fontsize = 16)
    ax.set_title(subj_id, fontsize = 20)
    ax.set_yticks(range(0,109,10))
    ax.tick_params(axis='y', labelsize=14)
    

   # Custom legend handles
    legend_handles = [
    mpatches.Patch(color = 'darkorange', label = 'Performance'),
    plt.Line2D([0],[0], color = 'darkorange', linewidth=2, linestyle='--', label = 'Performance Trend'),
    
]
    ax.legend(handles = legend_handles, loc='upper left', bbox_to_anchor=(1, 1))

    # Uncomment the following line to save the figure
    plt.savefig(save_to + subj_id + '_Real_Performance.png', dpi=200, bbox_inches='tight')


In [None]:
# Linear mixed model + ANOVA of fitted results of performance values 
# used the function from the notebook 'ROI_Analysis_SoA_network_NF_Task.ipynb' for LMM + subseqeuent ANOVA on fitted values 

df_hitXO = pd.read_excel(input_path_hit, sheet_name=2)
df_hitXO


df_hitXO.set_index('Subject', inplace=True)
df_hitXO.index.name = 'P-Code'

df_hitXO.columns = df_hitXO.columns.str.replace('Run ', 'Run')


In [None]:
__package__ = None
from ipynb.fs.defs.ROI_Analysis_SoA_network_NF_Task import run_lmm

result_perf = run_lmm(df_hitXO, 'Performance',(30,100))
print(result_perf.summary())

In [None]:
# Get LLM_pyer4 function (for post-hoc analysis) and use on the NF training performance data
from ipynb.fs.defs.ROI_Analysis_SoA_network_NF_Task import run_lmm_pymer4

# Example DataFrame (replace with actual data)
model,marginal_estimates, post_hoc_results = run_lmm_pymer4(df_hitXO, 'hit_XO')
# Print post hoc results
post_hoc_results
