# Evaluation
Here we will perform statistical tests on the results collected so far.\
The following packages should be installed:
* pandas
* numpy
* scipy
* numpy
* pingouin

In [35]:
import pandas as pd
import scipy.stats as stats
import numpy.random as rnd
import numpy as np

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 500)

Import the utility class from [experiment-evaluation](https://github.com/MarcRuble/experiment-evaluation).\
*Note: The file `evaluation.py` needs to be in the same folder as this notebook.*

In [36]:
from evaluation import DatasetEvaluation

Try out the utility functions.

In [37]:
# read data
df = pd.read_csv("original-tables/AR_Presence_Results.csv")
#df = pd.read_csv("tables/results.csv")

# create object
evl = DatasetEvaluation(df)

# add a score column
evl.add_mean(['Q1', 'Q2', 'Q3', 'Q4'], 'Score')

# check for a normal distribution
evl.check_normal_distribution('Q1')
evl.check_normal_distribution('Score', ('Condition', 'XXS'))

# check for homogene variances
evl.check_homogene_variances('Score', 'Condition')

# check for sphericity
evl.check_sphericity('Score', 'Condition', 'Participant')

# perform friedman test
evl.friedman_test('Score', 'Condition', ('Task', 1))

# perform anova test
evl.anova_test('Score', 'Condition', 'Participant', condition=('Task', 2))

# perform wilcoxon post-hoc
evl.save_order('Condition', ['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL'])
evl.wilcoxon_test('Score', 'Condition')
evl.wilcoxon_test('Score', 'Condition', condition=('Task', 1), baseline='M')

# perform paired t-test as post-hoc
evl.paired_t_test('Score', 'Condition', 'Participant')
evl.paired_t_test('Score', 'Condition', 'Participant', condition=('Task', 1), baseline='M')

### Normal Distribution ###
Q1: stat=0.93916, p=2.4613e-07
--> Non-Gaussian

### Normal Distribution ###
Score with (Condition = XXS): stat=0.96306, p=0.41098
--> Gaussian-like

### Homogeneity of Variances ###
Score: stat=0.67958, p=0.99492
--> Homogene Variances

### Sphericity ###
Score between Condition for Participant: W=0.016525, chi2=44.22, dof=20, p=0.001922
--> No sphericity given

################
### Friedman ###
################
Task = 1
Score between Condition: stat=30.296, p=3.4522e-05
--> Significant effects

#############
### ANOVA ###
#############
Task = 2


Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,n2,eps,sphericity,W-spher,p-spher
0,Condition,6,78,8.083142,8.438643e-07,9.8e-05,0.383394,0.584794,True,0.094743,0.204229



################
### Wilcoxon ###
################


Unnamed: 0,A,B,W,p,bonf,RBC,CLES
0,XXS,XS,33.5,0.00031 ***,0.007 **,-0.80912,0.31059
1,XXS,S,16.5,3e-05 ***,0.00073 ***,-0.9127,0.20344
2,XXS,M,15.5,2e-05 ***,0.00042 ***,-0.92365,0.19069
3,XXS,L,16.0,2e-05 ***,0.00044 ***,-0.92118,0.20791
4,XXS,XL,17.0,2e-05 ***,0.00049 ***,-0.91626,0.16454
5,XXS,XXL,47.0,0.00039 ***,0.008 **,-0.76847,0.23597
6,XS,S,43.5,0.007 **,0.15055,-0.65613,0.34375
7,XS,M,54.0,0.002 **,0.043 *,-0.69231,0.31696
8,XS,L,41.5,0.003 **,0.07126,-0.69928,0.35013
9,XS,XL,39.0,0.00053 ***,0.011 *,-0.77778,0.27997



################
### Wilcoxon ###
################
Task = 1


group,XXS,XS,S,L,XL,XXL
value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
p,0.00061 ***,0.09907,0.21693,0.85782,0.50159,0.32581
bonf,0.004 **,0.59442,1.0,1.0,1.0,1.0
W,3,21.5,27.5,30.5,40.5,36.0
r,-0.94286,-0.52747,0.3956,-0.07576,0.22857,-0.31429



######################
### Paired t-Tests ###
######################


Unnamed: 0,Contrast,A,B,Paired,Parametric,T,dof,Tail,p-unc,p-corr,p-adjust,BF10,cohen
0,Condition,XS,S,True,True,-2.762509,27.0,two-sided,0.01019629,0.214122,bonf,4.533,-0.552131
1,Condition,XS,XXS,True,True,4.60257,27.0,two-sided,8.861515e-05,0.001861,bonf,293.442,0.66629
2,Condition,XS,M,True,True,-3.532475,27.0,two-sided,0.001502479,0.031552,bonf,23.596,-0.682253
3,Condition,XS,XXL,True,True,-2.019343,27.0,two-sided,0.05348023,1.0,bonf,1.164,-0.393071
4,Condition,XS,L,True,True,-3.290227,27.0,two-sided,0.002788756,0.058564,bonf,13.76,-0.559464
5,Condition,XS,XL,True,True,-4.192168,27.0,two-sided,0.0002657217,0.00558,bonf,109.425,-0.804111
6,Condition,S,XXS,True,True,5.523578,27.0,two-sided,7.487704e-06,0.000157,bonf,2779.219,1.163136
7,Condition,S,M,True,True,-0.883309,27.0,two-sided,0.3848667,1.0,bonf,0.286,-0.13442
8,Condition,S,XXL,True,True,0.930434,27.0,two-sided,0.3603915,1.0,bonf,0.297,0.140012
9,Condition,S,L,True,True,-0.103244,27.0,two-sided,0.9185325,1.0,bonf,0.201,-0.014932



######################
### Paired t-Tests ###
######################
Task = 1


condition,XXS,XS,S,L,XL,XXL
value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
p,0.002 **,0.0662,0.89414,0.59316,0.74553,0.42088
bonf,0.012 *,0.39719,1.0,1.0,1.0,1.0
T,-3.87346,-2.00539,0.1357,0.54772,-0.33152,0.83118
d,-1.26258,-0.63556,0.02932,-0.08985,0.0797,-0.26464



