In [1]:
import numpy as np
import pandas as pd
from statsmodels.stats.anova import AnovaRM
from scipy.stats import ttest_rel
import pingouin as pg
from constants import (ACCURACIES_PATH)

# Importing data and formatting it
Imports all the accuracies and brings them into a table that can be used for statistical analysis. Also segments the data to analyse only certain blocks.

In [9]:
accuracies_data = pd.read_csv(ACCURACIES_PATH, header=None)

accuracy = accuracies_data.to_numpy().flatten()
subject = range(1, len(accuracies_data) + 1)
subject = np.repeat(subject, 12)
treatment = [1, 2]
treatment = np.repeat(treatment, 6)
treatment = np.tile(treatment, len(accuracies_data))
run = np.arange(1, 7)
run = np.tile(run, len(accuracies_data) * 2)
stats_data = pd.DataFrame({'Subject': subject, 'Treatment': treatment, 'Run': run, 'Accuracy': accuracy},
                          columns=['Subject', 'Treatment', 'Run', 'Accuracy'])

stats_data["Subject"] = pd.Categorical(stats_data.Subject)
stats_data["Treatment"] = pd.Categorical(stats_data.Treatment)
stats_data["Run"] = pd.Categorical(stats_data.Run)

stats_data_pre = stats_data.loc[(stats_data['Run'] == 1) | (stats_data['Run'] == 2)]
stats_data_treatment = stats_data.loc[(stats_data['Run'] == 3) | (stats_data['Run'] == 4)]
stats_data_post = stats_data.loc[(stats_data['Run'] == 5) | (stats_data['Run'] == 6)]

stats_data_treatment.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 124 entries, 2 to 369
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Subject    124 non-null    category
 1   Treatment  124 non-null    category
 2   Run        124 non-null    category
 3   Accuracy   124 non-null    float64 
dtypes: category(3), float64(1)
memory usage: 3.9 KB


# ANOVA
calculates within-subjects ANOVA

In [10]:
model_1_pre = AnovaRM(data=stats_data, depvar='Accuracy', subject='Subject', within=['Run', 'Treatment'])
model_1 = model_1_pre.fit()
print("\033[4m" + "Model 1" + "\033[0m")
print(model_1)
print("")

model_2 = pg.rm_anova(dv='Accuracy', within=['Run', 'Treatment'], subject='Subject', data=stats_data_treatment)
print("\033[4m" + "Model 2" + "\033[0m")
print(model_2)
print("")

[4mModel 1[0m
                   Anova
              F Value Num DF  Den DF  Pr > F
--------------------------------------------
Run            1.4781 5.0000 150.0000 0.2002
Treatment      0.1679 1.0000  30.0000 0.6849
Run:Treatment  2.2930 5.0000 150.0000 0.0484


[4mModel 2[0m
            Source        SS  ddof1  ddof2        MS          F     p-unc  \
0              Run  0.040839      1     30  0.040839  10.394714  0.003043   
1        Treatment  0.033567      1     30  0.033567   2.979015  0.094638   
2  Run * Treatment  0.053054      1     30  0.053054   7.335583  0.011058   

   p-GG-corr       ng2  eps  
0   0.003043  0.027004  1.0  
1   0.094638  0.022303  1.0  
2   0.011058  0.034800  1.0  



# T-Test
Calculates a t-test comparing individual runs to each other

In [None]:
sham_4 = stats_data.query('Treatment == 1 and Run == 4')['Accuracy']
stim_4 = stats_data.query('Treatment == 2 and Run == 4')['Accuracy']
print(np.mean(sham_4))
print(np.mean(stim_4))
print("")

t_test_1 = ttest_rel(sham_4, stim_4, alternative='less')
print("\033[4m" + "T-Test 1" + "\033[0m")
print(t_test_1)
print("")

t_test_2 = pg.ttest(sham_4, stim_4, paired=True)
print("\033[4m" + "T-Test 2" + "\033[0m")
print(t_test_2)

# Output
Exporting data to use in other programs

In [12]:
np.savetxt("data/stats_data.csv", stats_data, delimiter=",")