# Code to reproduce statistical tests and analyses

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import stats
import pingouin as pg
import scikit_posthocs
import scipy
import math

  **kwargs


In [2]:
set_WD = os.getcwd() # get current work directory
# path were we store data files ready for statistical analyses (done in figures_gaze_aversions.ipynb)*
files_for_stats = set_WD + r'\data\processed_data\files for statitical analyses\\'
path_for_figures = set_WD + r'\data\figures\\'

### Statistical analyses for gaze aversions

In [3]:
# open the different files containing data that we want to compare
# these dataframe contain 3 column
# sujet = code of the participant (ex : P01)
# effort OR clarte = the name of the variable with 3 levels (1, 2, or 3)
# prop_VD = the proportion of questions where gaze aversion was observed (VD stands for visual disengagement = gaze aversion)

VD_access_clarte = pd.read_csv(files_for_stats + "Access_prop_VD_clarte.csv") 
VD_access_effort = pd.read_csv(files_for_stats + "Access_prop_VD_effort .csv")
VD_visu_clarte = pd.read_csv(files_for_stats + "Elaboration_prop_VD_clarte.csv")
VD_visu_effort = pd.read_csv(files_for_stats + "Elaboration_prop_VD_effort .csv")
# correct a typo existing in column name
VD_access_effort= VD_access_effort.rename(columns={'effort ': 'effort'})
VD_visu_effort = VD_visu_effort.rename(columns={'effort ': 'effort'})

df = pd.read_csv(set_WD+ r"\data\processed_data\data_T1_access.csv")

In [4]:
liste_sujets_anon=['P01','P02','P03','P04','P05', \
     'P06','P07','P09','P10', 'P11', \
     'P12','P13','P14','P15','P16', \
     'P17','P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24', 'P25', 'P26', 'P27', \
     'P28', 'P29', 'P30', 'P31', 'P32', 'P33']

In [5]:

def run_Friedman_test(file, var, period, liste_sujets):

    
    '''
    Define a function to run Friedman test
    
    Args : 
        var : Compare the percentage of questions with gaze aversion depending on the 3 levels of either effort or vividness 
        period : (either during access or elaboration phase)
        file : name of the df containing the data of the var and period concerned
        liste_sujets : liste_sujets_anon (i.e. the list of all the anonymized code for participants)

    '''
    

    for s in liste_sujets:
        
        if list(file['sujet']).count(s) < 3:
            sub_df = file[file['sujet'] == s]
            if 1 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(1), 0] # ATTENTION forced to put 0 for NaN values
            if 2 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(2), 0]
            if 3 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(3), 0]
            
    #perform the Friedman test
    print('\033[1m' + period, var, '\033[0m' + " ")
    print(pg.friedman(data=file,dv='prop_VD',within=var,subject='sujet',method='f'))
    print(" ")

In [6]:
run_Friedman_test(VD_access_clarte, 'clarte', 'access', liste_sujets_anon)
run_Friedman_test(VD_access_effort, 'effort', 'access', liste_sujets_anon)

run_Friedman_test(VD_visu_clarte, 'clarte', 'elaboration', liste_sujets_anon)
run_Friedman_test(VD_visu_effort, 'effort', 'elaboration', liste_sujets_anon)

[1maccess clarte [0m 
          Source         W   ddof1    ddof2         F     p-unc
Friedman  clarte  0.214912  1.9375  60.0625  8.486034  0.000644
 
[1maccess effort [0m 
          Source         W   ddof1    ddof2          F         p-unc
Friedman  effort  0.457705  1.9375  60.0625  26.164431  9.425314e-09
 
[1melaboration clarte [0m 
          Source         W   ddof1    ddof2         F     p-unc
Friedman  clarte  0.158405  1.9375  60.0625  5.834827  0.005238
 
[1melaboration effort [0m 
          Source         W   ddof1    ddof2        F    p-unc
Friedman  effort  0.042763  1.9375  60.0625  1.38488  0.25807
 


In [7]:
def run_Nemenyi_posthocs(file, var, period, liste_sujets):
    '''
    Define a function to run Nemenyi posthocs tests
    Args : 
        var : Compare the percentage of questions with gaze aversion depending on the 3 levels of either effort or vividness 
        period : type of period (either during access or elaboration phase)
        file : name of the df containing the data of the var and period concerned
        liste_sujets : liste_sujets_anon (i.e. the list of all the anonymized code for participants)

   '''
    
    
    for s in liste_sujets:
        
        if list(file['sujet']).count(s) < 3:
            sub_df = file[file['sujet'] == s]
            if 1 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(1), 0] # ATTENTION forced to put 0 for NaN values
            if 2 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(2), 0]
            if 3 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(3), 0]
            
    #perform the Nemenyi posthocs test
    print('\033[1m' + period, var, '\033[0m' + " ")
    print(scikit_posthocs.posthoc_nemenyi_friedman(file,y_col='prop_VD',block_col='sujet', group_col=var, melted=True))
    print(" ")

In [8]:
run_Nemenyi_posthocs(VD_access_clarte, 'clarte', 'access', liste_sujets_anon)
run_Nemenyi_posthocs(VD_access_effort, 'effort', 'access', liste_sujets_anon)

run_Nemenyi_posthocs(VD_visu_clarte, 'clarte', 'elaboration', liste_sujets_anon)
run_Nemenyi_posthocs(VD_visu_effort, 'effort', 'elaboration', liste_sujets_anon)

[1maccess clarte [0m 
          1         2         3
1  1.000000  0.001353  0.186906
2  0.001353  1.000000  0.186906
3  0.186906  0.186906  1.000000
 
[1maccess effort [0m 
       1         2         3
1  1.000  0.001000  0.001000
2  0.001  1.000000  0.678721
3  0.001  0.678721  1.000000
 
[1melaboration clarte [0m 
          1         2         3
1  1.000000  0.023554  0.023554
2  0.023554  1.000000  0.900000
3  0.023554  0.900000  1.000000
 
[1melaboration effort [0m 
          1         2         3
1  1.000000  0.499954  0.291414
2  0.499954  1.000000  0.900000
3  0.291414  0.900000  1.000000
 


In [9]:
list_suj = []
list_eff = []
list_clarte = []
list_median_quality = []
list_mean_quality = []
list_gaze_aversion = []

for suj in liste_sujets_anon:
    df_suj = df[df['Sujet'] == suj]
    df_suj_no_av = df_suj[df_suj['VD_here'] == 0] # without aversions
    df_suj = df_suj[df_suj['VD_here'] != 0] # with aversions 
    
    
    df_effort1 = df_suj[df_suj['effort '] == 1]
    df_effort2 = df_suj[df_suj['effort '] == 2]
    df_effort3 = df_suj[df_suj['effort '] == 3]
    
    df_effort1_no_av = df_suj_no_av[df_suj_no_av['effort '] == 1]
    df_effort2_no_av = df_suj_no_av[df_suj_no_av['effort '] == 2]
    df_effort3_no_av = df_suj_no_av[df_suj_no_av['effort '] == 3]
       
    if len(df_effort1) > 0 :
        df_effort1_clarte1 = df_effort1[df_effort1['clarte'] == 1]
        df_effort1_clarte2 = df_effort1[df_effort1['clarte'] == 2]
        df_effort1_clarte3 = df_effort1[df_effort1['clarte'] == 3]
        
        df_effort1_clarte1 = df_effort1_clarte1.reset_index()
        df_effort1_clarte2 = df_effort1_clarte2.reset_index()
        df_effort1_clarte3 = df_effort1_clarte3.reset_index()
        
        
        if len(df_effort1_clarte1) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(1)
            list_median_quality.append(df_effort1_clarte1['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte1['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort1_clarte1) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort1_clarte2) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(2)
            list_median_quality.append(df_effort1_clarte2['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte2['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort1_clarte2) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort1_clarte3) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(3)
            list_median_quality.append(df_effort1_clarte3['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte3['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort1_clarte3) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
    elif len(df_effort1) == 0: 
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        
    if len(df_effort2) > 0 :
        df_effort2_clarte1 = df_effort2[df_effort2['clarte'] == 1]
        df_effort2_clarte2 = df_effort2[df_effort2['clarte'] == 2]
        df_effort2_clarte3 = df_effort2[df_effort2['clarte'] == 3]
        
        df_effort2_clarte1 = df_effort2_clarte1.reset_index()
        df_effort2_clarte2 = df_effort2_clarte2.reset_index()
        df_effort2_clarte3 = df_effort2_clarte3.reset_index()
        
        if len(df_effort2_clarte1) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(1)
            list_median_quality.append(df_effort2_clarte1['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte1['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort2_clarte1) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort2_clarte2) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(2)
            list_median_quality.append(df_effort2_clarte2['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte2['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort2_clarte2) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort2_clarte3) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(3)
            list_median_quality.append(df_effort2_clarte3['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte3['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort2_clarte3) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
    elif len(df_effort2) == 0: 
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
            
    if len(df_effort3) > 0 :
        df_effort3_clarte1 = df_effort3[df_effort3['clarte'] == 1]
        df_effort3_clarte2 = df_effort3[df_effort3['clarte'] == 2]
        df_effort3_clarte3 = df_effort3[df_effort3['clarte'] == 3]
        
        df_effort3_clarte1 = df_effort3_clarte1.reset_index()
        df_effort3_clarte2 = df_effort3_clarte2.reset_index()
        df_effort3_clarte3 = df_effort3_clarte3.reset_index()
        
        if len(df_effort3_clarte1) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(1)
            list_median_quality.append(df_effort3_clarte1['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte1['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort3_clarte1) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort3_clarte2) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(2)
            list_median_quality.append(df_effort3_clarte2['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte2['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort3_clarte2) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
        if len(df_effort3_clarte3) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(3)
            list_median_quality.append(df_effort3_clarte3['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte3['Etape_2'].mean())
            list_gaze_aversion.append(1)
        if len(df_effort3_clarte3) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(1)
    elif len(df_effort3) == 0: 
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(1)
            
    if len(df_effort1_no_av) > 0 :
        
        df_effort1_clarte1_no_av = df_effort1_no_av[df_effort1_no_av['clarte'] == 1]
        df_effort1_clarte2_no_av = df_effort1_no_av[df_effort1_no_av['clarte'] == 2]
        df_effort1_clarte3_no_av = df_effort1_no_av[df_effort1_no_av['clarte'] == 3]
        
        df_effort1_clarte1_no_av = df_effort1_clarte1_no_av.reset_index()
        df_effort1_clarte2_no_av = df_effort1_clarte2_no_av.reset_index()
        df_effort1_clarte3_no_av = df_effort1_clarte3_no_av.reset_index()
        
        if len(df_effort1_clarte1_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(1)
            list_median_quality.append(df_effort1_clarte1_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte1_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort1_clarte1_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort1_clarte2_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(2)
            list_median_quality.append(df_effort1_clarte2_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte2_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort1_clarte2_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort1_clarte3_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(3)
            list_median_quality.append(df_effort1_clarte3_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort1_clarte3_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort1_clarte3_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(1)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
    elif len(df_effort1_no_av) == 0: 
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(1)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        
    if len(df_effort2_no_av) > 0 :
        df_effort2_clarte1_no_av = df_effort2_no_av[df_effort2_no_av['clarte'] == 1]
        df_effort2_clarte2_no_av = df_effort2_no_av[df_effort2_no_av['clarte'] == 2]
        df_effort2_clarte3_no_av = df_effort2_no_av[df_effort2_no_av['clarte'] == 3]
        
        df_effort2_clarte1_no_av = df_effort2_clarte1_no_av.reset_index()
        df_effort2_clarte2_no_av = df_effort2_clarte2_no_av.reset_index()
        df_effort2_clarte3_no_av = df_effort2_clarte3_no_av.reset_index()
        
        if len(df_effort2_clarte1_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(1)
            list_median_quality.append(df_effort2_clarte1_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte1_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort2_clarte1_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort2_clarte2_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(2)
            list_median_quality.append(df_effort2_clarte2_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte2_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort2_clarte2_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort2_clarte3_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(3)
            list_median_quality.append(df_effort2_clarte3_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort2_clarte3_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort2_clarte3_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(2)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
    elif len(df_effort2_no_av) == 0: 
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(2)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
            
    if len(df_effort3_no_av) > 0 :
        df_effort3_clarte1_no_av = df_effort3_no_av[df_effort3_no_av['clarte'] == 1]
        df_effort3_clarte2_no_av = df_effort3_no_av[df_effort3_no_av['clarte'] == 2]
        df_effort3_clarte3_no_av = df_effort3_no_av[df_effort3_no_av['clarte'] == 3]
        
        df_effort3_clarte1_no_av = df_effort3_clarte1_no_av.reset_index()
        df_effort3_clarte2_no_av = df_effort3_clarte2_no_av.reset_index()
        df_effort3_clarte3_no_av = df_effort3_clarte3_no_av.reset_index()
        
        if len(df_effort3_clarte1_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(1)
            list_median_quality.append(df_effort3_clarte1_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte1_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort3_clarte1_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(1)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort3_clarte2_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(2)
            list_median_quality.append(df_effort3_clarte2_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte2_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort3_clarte2_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(2)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
        if len(df_effort3_clarte3_no_av) > 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(3)
            list_median_quality.append(df_effort3_clarte3_no_av['Etape_2'].median())
            list_mean_quality.append(df_effort3_clarte3_no_av['Etape_2'].mean())
            list_gaze_aversion.append(0)
        if len(df_effort3_clarte3_no_av) == 0:
            list_suj.append(suj)
            list_eff.append(3)
            list_clarte.append(3)
            list_median_quality.append(np.nan)
            list_mean_quality.append(np.nan)
            list_gaze_aversion.append(0)
    elif len(df_effort3_no_av) == 0: 
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(1)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(2)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)
        list_suj.append(suj)
        list_eff.append(3)
        list_clarte.append(3)
        list_median_quality.append(np.nan)
        list_mean_quality.append(np.nan)
        list_gaze_aversion.append(0)          
    
        

            
df_interaction_quality = pd.DataFrame({'Sujet':list_suj, 'aversion': list_gaze_aversion, 'effort': list_eff, 'clarte':list_clarte, \
                                      'median_quality':list_median_quality, 'mean_quality': list_mean_quality})


In [10]:
# Calculate average and std quality of memories for all trials 
list_av_quality = []
for suj in liste_sujets_anon:
    df_suj = df[df['Sujet'] == suj]
    av_suj = df_suj['Etape_2'].mean()
    list_av_quality.append(av_suj)
print("Mean : ",np.mean(list_av_quality))
print("STD : ",np.std(list_av_quality))

6.715404552742884
0.9372968608612495


In [11]:
# Difference of quality of memories depending on the presence/absence of gaze aversion
# Wilcoxon ranked sign test
mean_quality_av = []
mean_quality_no_av = []
for suj in liste_sujets_anon:
    df_suj = df_interaction_quality[df_interaction_quality['Sujet'] == suj]
    df_av = df_suj[df_suj['aversion'] ==1]
    df_no_av = df_suj[df_suj['aversion'] ==0]
    mean_quality_av.append(df_av['mean_quality'].mean())
    mean_quality_no_av.append(df_no_av['mean_quality'].mean())
scipy.stats.wilcoxon(mean_quality_av, mean_quality_no_av)
pg.wilcoxon(mean_quality_av, mean_quality_no_av)

Unnamed: 0,W-val,alternative,p-val,RBC,CLES
Wilcoxon,211.0,two-sided,0.665789,0.092473,0.523889


In [12]:
# count number and % of participants were number of details is higher when there is a gaze aversion
col_av = []
for i in range(len(mean_quality_av)):
    col_av.append(1)
for i in range(len(mean_quality_no_av)):
    col_av.append(0)

count_with_av = 0 # participants doing at least one gaze aversion
diff_qual_av = [] 
for el in range(len(mean_quality_av)):
    if math.isnan(mean_quality_av[el]) == False:
        diff_qual_av.append(mean_quality_av[el]-mean_quality_no_av[el])
        count_with_av = count_with_av + 1
print('number of participants', count_with_av)
count = 0
for i in diff_qual_av:
    if i > 0:
        count = count + 1
print(count, 'with better quality during aversion than without aversion')
print(count/count_with_av*100, '%')

number of participants 30
18 with better quality during aversion than without aversion
60.0 %


### Statistical analyses for eye vergence

In [13]:
# files for access period
vergence_file_access = pd.read_csv(set_WD+r"\data\processed_data\eye vergence\vergence_access.csv")
data_T1_access = pd.read_csv(set_WD+r"\data\processed_data\data_T1_access.csv")
# files for elaboration period
vergence_file_visu = pd.read_csv(set_WD+r"\data\processed_data\eye vergence\\vergence_visu.csv")
data_T1_visu = pd.read_csv(set_WD+r"\data\processed_data\data_T1_visu.csv")

In [14]:
conv_access_clarte = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "conv_Access_clarte.csv")
conv_access_effort = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "conv_Access_effort.csv")
conv_visu_clarte = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "conv_Visu_clarte.csv")
conv_visu_effort = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "conv_Visu_effort.csv")


div_access_clarte = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "div_Access_clarte.csv")
div_access_effort = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "div_Access_effort.csv")
div_visu_clarte = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "div_Visu_clarte.csv")
div_visu_effort = pd.read_csv(set_WD + r'\data\processed_data\eye vergence\\' + "div_Visu_effort.csv")


conv_access_effort= conv_access_effort.rename(columns={'effort ': 'effort'})
conv_visu_effort = conv_visu_effort.rename(columns={'effort ': 'effort'})
div_access_effort= div_access_effort.rename(columns={'effort ': 'effort'})
div_visu_effort = div_visu_effort.rename(columns={'effort ': 'effort'})

In [15]:
def check_values(file, var, period): # this function will fill with zéro if a participant never chose spontaneous as a level of effort for example
    
    liste_sujets=['P01','P02','P03','P04','P05', \
     'P06','P07','P09','P10', 'P11', \
     'P12','P13','P14','P15','P16', \
     'P17','P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24', 'P25', 'P26', 'P27', \
     'P28', 'P29', 'P30', 'P31', 'P32', 'P33']
    
    for s in liste_sujets:
        if list(file['sujet']).count(s) < 3:
            sub_df = file[file['sujet'] == s]
            if 1 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(1), 0] # ATTENTION forced to put 0 for NaN values
            if 2 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(2), 0]
            if 3 not in list(sub_df[var]):
                file.loc[len(file['sujet'])] = [len(file['sujet']), s, int(3), 0]

    return file

conv_access_clarte = check_values(conv_access_clarte, 'clarte', 'access')
conv_access_effort = check_values(conv_access_effort, 'effort', 'access')
conv_visu_clarte = check_values(conv_visu_clarte, 'clarte', 'elaboration')
conv_visu_effort = check_values(conv_visu_effort, 'effort', 'elaboration')

div_access_clarte = check_values(div_access_clarte, 'clarte', 'access')
div_access_effort = check_values(div_access_effort, 'effort', 'access')
div_visu_clarte = check_values(div_visu_clarte, 'clarte', 'elaboration')
div_visu_effort = check_values(div_visu_effort, 'effort', 'elaboration')

In [16]:
print('Convergence, Access, clarte \n', pg.friedman(data=conv_access_clarte,dv='prop_VD',within='clarte',subject='sujet',method='f'), '\n')
print('Convergence, Elaboration, clarte \n', pg.friedman(data=conv_visu_clarte,dv='prop_VD',within='clarte',subject='sujet',method='f'), '\n')
print('Convergence, Access, effort\n', pg.friedman(data=conv_access_effort,dv='prop_VD',within='effort',subject='sujet',method='f'), '\n')
print('Convergence, Elaboration, effort\n', pg.friedman(data=conv_visu_effort,dv='prop_VD',within='effort',subject='sujet',method='f'), '\n')

Convergence, Access, clarte 
           Source        W   ddof1    ddof2         F     p-unc
Friedman  clarte  0.23207  1.9375  60.0625  9.368245  0.000331 

Convergence, Elaboration, clarte 
           Source         W   ddof1    ddof2          F    p-unc
Friedman  clarte  0.267551  1.9375  60.0625  11.323787  0.00008 

Convergence, Access, effort
           Source         W   ddof1    ddof2         F     p-unc
Friedman  effort  0.106557  1.9375  60.0625  3.697248  0.031863 

Convergence, Elaboration, effort
           Source         W   ddof1    ddof2         F     p-unc
Friedman  effort  0.173986  1.9375  60.0625  6.529652  0.002981 



In [17]:
print('Divergence, Access, clarte \n', pg.friedman(data=div_access_clarte,dv='prop_VD',within='clarte',subject='sujet',method='f'), '\n')
print('Divergence, Elaboration, clarte \n', pg.friedman(data=div_visu_clarte,dv='prop_VD',within='clarte',subject='sujet',method='f'), '\n')
print('Divergence, Access, effort\n', pg.friedman(data=div_access_effort,dv='prop_VD',within='effort',subject='sujet',method='f'), '\n')
print('Divergence, Elaboration, effort\n', pg.friedman(data=div_visu_effort,dv='prop_VD',within='effort',subject='sujet',method='f'), '\n')

Divergence, Access, clarte 
           Source         W   ddof1    ddof2         F     p-unc
Friedman  clarte  0.117578  1.9375  60.0625  4.130589  0.021895 

Divergence, Elaboration, clarte 
           Source         W   ddof1    ddof2         F     p-unc
Friedman  clarte  0.115728  1.9375  60.0625  4.057087  0.023325 

Divergence, Access, effort
           Source         W   ddof1    ddof2         F     p-unc
Friedman  effort  0.102033  1.9375  60.0625  3.522432  0.037122 

Divergence, Elaboration, effort
           Source         W   ddof1    ddof2         F     p-unc
Friedman  effort  0.230847  1.9375  60.0625  9.304063  0.000347 



In [18]:
# Difference of quality of memories depending on the presence/absence of divergence
# Wilcoxon ranked sign test
mean_quality_div = []
mean_quality_no_div = []
for suj in liste_sujets_anon:
    df_suj = vergence_file_access[vergence_file_access['Sujet'] == suj]
    df_suj = df_suj[df_suj['VD_here'] == 0]
    df_div = df_suj[df_suj['sig_vergence'] =='divergence']
    df_no_div = df_suj[df_suj['sig_vergence'] !='divergence']
    mean_quality_div.append(df_div['Etape_2'].mean())
    mean_quality_no_div.append(df_no_div['Etape_2'].mean())

diff_qual_div = [] 
count_with_div = 0
for el in range(len(mean_quality_div)):
    if math.isnan(mean_quality_div[el]) == False:
        diff_qual_div.append(mean_quality_div[el]-mean_quality_no_div[el])
        count_with_div = count_with_div + 1
print('number of participants', count_with_div)
count = 0
for i in diff_qual_div:
    if i > 0:
        count = count + 1
print(count, 'with better quality during aversion than without divergence')
print(count/count_with_div*100, '%')
scipy.stats.wilcoxon(diff_qual_div)

number of participants 23
9 with better quality during aversion than without divergence
39.130434782608695 %


WilcoxonResult(statistic=98.0, pvalue=0.23447084426879883)

In [19]:
# Difference of quality of memories depending on the presence/absence of convergence
# Wilcoxon ranked sign test
mean_quality_conv = []
mean_quality_no_conv = []

for suj in liste_sujets_anon:
    df_suj = vergence_file_access[vergence_file_access['Sujet'] == suj]
    df_suj = df_suj[df_suj['VD_here'] == 0]
    df_conv = df_suj[df_suj['sig_vergence'] =='convergence']
    df_no_conv = df_suj[df_suj['sig_vergence'] !='convergence']
    mean_quality_conv.append(df_conv['Etape_2'].mean())
    mean_quality_no_conv.append(df_no_conv['Etape_2'].mean())

diff_qual_conv = [] 
count_with_conv = 0
for el in range(len(mean_quality_conv)):
    if math.isnan(mean_quality_conv[el]) == False:
        diff_qual_conv.append(mean_quality_conv[el]-mean_quality_no_conv[el])
        count_with_conv = count_with_conv + 1
print('number of participants', count_with_conv)
count = 0
for i in diff_qual_conv:
    if i > 0:
        count = count + 1
print(count, 'with better quality during aversion than without convergence')
print(count/count_with_conv*100, '%')

scipy.stats.wilcoxon(diff_qual_conv)

number of participants 19
8 with better quality during aversion than without convergence
42.10526315789473 %


WilcoxonResult(statistic=76.0, pvalue=0.4653167724609375)