In [7]:
import numpy as np
import pandas as pd
from statsmodels.stats.anova import AnovaRM
from scipy.stats import ttest_rel
import pingouin as pg
from constants import (ACCURACIES_PATH, ACC_UNSHIFTED_PATH)
from utils.utils_statistical_analysis import stats_preprocessing

# Importing data and formatting it
Imports all the accuracies and brings them into a table that can be used for statistical analysis. Also segments the data to analyse only certain blocks.

In [8]:
accuracies_data = pd.read_csv(ACCURACIES_PATH, header=None)
stats_data = stats_preprocessing(accuracies_data)

stats_data_pre = stats_data.loc[(stats_data['Run'] == 1) | (stats_data['Run'] == 2)]
stats_data_treatment = stats_data.loc[(stats_data['Run'] == 3) | (stats_data['Run'] == 4)]
stats_data_post = stats_data.loc[(stats_data['Run'] == 5) | (stats_data['Run'] == 6)]

#stats_data.info()

# Output
Exporting data to use in other programs

In [9]:
# np.savetxt("data/stats_data.csv", stats_data, delimiter=",")
# np.savetxt("data/stats_data_treatment.csv", stats_data_treatment, delimiter=",")
# np.savetxt("data/stats_data_pre.csv", stats_data_pre, delimiter=",")
# np.savetxt("data/stats_data_post.csv", stats_data_post, delimiter=",")

# ANOVA
calculates within-subjects ANOVA

In [10]:
model_1 = AnovaRM(data=stats_data, depvar='Accuracy', subject='Subject', within=['Run', 'Treatment']).fit()
print("\033[4m" + "Model 1" + "\033[0m")
print(model_1)
print("")

model_2 = pg.rm_anova(dv='Accuracy', within=['Run', 'Treatment'], subject='Subject', data=stats_data)
print("\033[4m" + "Model 2" + "\033[0m")
print(model_2)
print("")

[4mModel 1[0m
                   Anova
              F Value Num DF  Den DF  Pr > F
--------------------------------------------
Run            0.7133 5.0000 155.0000 0.6144
Treatment      0.0095 1.0000  31.0000 0.9232
Run:Treatment  0.7501 5.0000 155.0000 0.5872


[4mModel 2[0m
            Source        SS  ddof1  ddof2        MS         F     p-unc  \
0              Run  0.025461      5    155  0.005092  0.713259  0.614357   
1        Treatment  0.000086      1     31  0.000086  0.009457  0.923155   
2  Run * Treatment  0.037047      5    155  0.007409  0.750078  0.587216   

   p-GG-corr       ng2       eps  
0   0.599474  0.005739  0.895642  
1   0.923155  0.000020  1.000000  
2   0.560758  0.008330  0.806473  



# T-Test
Calculates a t-test comparing individual runs to each other

In [11]:
sham_4 = stats_data.query('Treatment == 1 and Run == 6')['Accuracy']
stim_4 = stats_data.query('Treatment == 2 and Run == 6')['Accuracy']
print(np.mean(sham_4))
print(np.mean(stim_4))
print("")

t_test_1 = pg.ttest(sham_4, stim_4, paired=True)
print("\033[4m" + "T-Test 1" + "\033[0m")
print(t_test_1)

t_test_2 = ttest_rel(sham_4, stim_4, alternative='less')
print("\033[4m" + "T-Test 2" + "\033[0m")
print(t_test_2)

0.6345314558629775
0.6165338850461133

[4mT-Test 1[0m
               T  dof alternative    p-val          CI95%   cohen-d   BF10  \
T-test  0.693936   31   two-sided  0.49289  [-0.03, 0.07]  0.169506  0.236   

           power  
T-test  0.153303  
[4mT-Test 2[0m
Ttest_relResult(statistic=0.6939363145494997, pvalue=0.7535550819019683)


# Performance Split
Splitting dataset into high and low performers and calculating ANOVA

In [13]:
subject_performance = stats_data.Accuracy.to_numpy()
subject_performance = np.mean(subject_performance.reshape(-1, 12), axis=1)
performance_index = np.argpartition(subject_performance, int(len(subject_performance)/2))
performance_index = performance_index + 1
performance_index = np.array_split(performance_index,2)

low_performers = stats_data[~(stats_data.Subject.isin(performance_index[1]))]
high_performers = stats_data[~(stats_data.Subject.isin(performance_index[0]))]

anova_low_performers = pg.rm_anova(dv='Accuracy', within=['Run', 'Treatment'], subject='Subject', data=low_performers)
anova_high_performers = pg.rm_anova(dv='Accuracy', within=['Run', 'Treatment'], subject='Subject', data=high_performers)

print("\033[4m" + "Low Performers" + "\033[0m")
print(np.mean(low_performers.Accuracy))
print(anova_low_performers)
print("")
print("\033[4m" + "High Performers" + "\033[0m")
print(np.mean(high_performers.Accuracy))
print(anova_high_performers)

[4mLow Performers[0m
0.5751168272397892
            Source        SS  ddof1  ddof2        MS         F     p-unc  \
0              Run  0.052037      5     75  0.010407  1.544898  0.186236   
1        Treatment  0.000356      1     15  0.000356  0.034914  0.854280   
2  Run * Treatment  0.054385      5     75  0.010877  0.935045  0.463355   

   p-GG-corr       ng2       eps  
0   0.208021  0.028702  0.704322  
1   0.854280  0.000202  1.000000  
2   0.444382  0.029958  0.732152  

[4mHigh Performers[0m
0.6768251468489238
            Source        SS  ddof1  ddof2        MS         F     p-unc  \
0              Run  0.011203      5     75  0.002241  0.298189  0.912426   
1        Treatment  0.000033      1     15  0.000033  0.003770  0.951854   
2  Run * Treatment  0.055670      5     75  0.011134  1.425775  0.224820   

   p-GG-corr       ng2       eps  
0   0.878480  0.007197  0.802277  
1   0.951854  0.000021  1.000000  
2   0.244521  0.034770  0.658665  


# Comparing day 1 to day 2
Using the unshifted data to test if there is a difference in the performance between day 1 and day 2 and to see if there is a learning effect

In [21]:
accuracies_unshifted_data = pd.read_csv(ACC_UNSHIFTED_PATH, header=None)
stats_data_unshifted = stats_preprocessing(accuracies_unshifted_data)

stats_data_day1 = stats_data.loc[(stats_data['Treatment'] == 1)]
stats_data_day2 = stats_data.loc[(stats_data['Treatment'] == 2)]

unshifted_anova = pg.rm_anova(dv='Accuracy', within=['Run', 'Treatment'], subject='Subject', data=stats_data_unshifted)
anova_day1 = pg.rm_anova(dv='Accuracy', within=['Run'], subject='Subject', data=stats_data_day1)
anova_day2 = pg.rm_anova(dv='Accuracy', within=['Run'], subject='Subject', data=stats_data_day2)

print("\033[4m" + "ANOVA comparing day 1 and 2" + "\033[0m")
print(unshifted_anova)
print("")
print("\033[4m" + "ANOVA of day 1" + "\033[0m")
print(anova_day1)
print("")
print("\033[4m" + "ANOVA of day 2" + "\033[0m")
print(anova_day2)

[4mANOVA comparing day 1 and 2[0m
            Source        SS  ddof1  ddof2        MS         F     p-unc  \
0              Run  0.025461      5    155  0.005092  0.713259  0.614357   
1        Treatment  0.000086      1     31  0.000086  0.009457  0.923155   
2  Run * Treatment  0.037047      5    155  0.007409  0.750078  0.587216   

   p-GG-corr       ng2       eps  
0   0.599474  0.005739  0.895642  
1   0.923155  0.000020  1.000000  
2   0.560758  0.008330  0.806473  

[4mANOVA of day 1[0m
  Source  ddof1  ddof2         F     p-unc       ng2       eps
0    Run      5    155  0.689446  0.632141  0.013437  0.870944

[4mANOVA of day 2[0m
  Source  ddof1  ddof2        F     p-unc       ng2       eps
0    Run      5    155  0.78205  0.564062  0.014511  0.892408
