In [19]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import statsmodels.stats.multicomp
from itertools import combinations

datafilename = "data_processed-without-outliers.csv"
d = pd.read_csv(datafilename)
print("Loading", len(d), "lines of data (outliers removed).")

resultsfilename = "data_stats-without-outliers.csv"
st = pd.read_csv(resultsfilename)
print("Loading", len(st), "lines of stats.")

# ------------------------------------------------------
# functions to add eta squared and omega squared
# to the ANOVA summary table

def eta_squared(aov):
    aov['eta_sq'] = 'NaN'
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    return aov

def omega_squared(aov):
    mse = aov['sum_sq'][-1]/aov['df'][-1]
    aov['omega_sq'] = 'NaN'
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*mse))/(sum(aov['sum_sq'])+mse)
    return aov

Loading 14520 lines of data (outliers removed).
Loading 808 lines of stats.


In [28]:
factors = ["StudyID", "Training", "Font", "Firstfont"]
metrics = ["RT", "AUC", "AUC_word"]

def anova(factor, metrics, no):
    for f in combinations(factors, no):
        if f[0] != f[1]:
            f_ = ["C(%s)" % x for x in f]
            for metric in metrics:
                formula = metric + " ~ " + (" * ".join(f_))
                model = ols(formula, st).fit()
                aov_table = anova_lm(model, typ=2)
                eta_squared(aov_table)
                omega_squared(aov_table)
                print()
                print("# %s (metric: %s)" % (f, metric))
                print()
                display(aov_table)
                # overall model significance
                print(f"Overall model F(%d, %d) = %.3f, p = %.4f" % (model.df_model, model.df_resid, model.fvalue, model.f_pvalue))
                print()

anova(factors, metrics, 2)
print(30 * "_")
anova(factors, metrics, 3)

# ('StudyID', 'Training') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),19832.69,1.0,0.021936,0.882295,2.6e-05,-0.001171
C(Training),23752880.0,4.0,6.567933,3.3e-05,0.031498,0.02667
C(StudyID):C(Training),8845468.0,4.0,2.445869,0.045115,0.01173,0.006926
Residual,721490400.0,798.0,,,,


Overall model F(9, 798) = 4.030, p = 0.0000

# ('StudyID', 'Training') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.18262,1.0,16.082524,6.6e-05,0.019118,0.017908
C(Training),0.190207,4.0,4.187671,0.002313,0.019912,0.015139
C(StudyID):C(Training),0.118228,4.0,2.602952,0.034819,0.012377,0.007613
Residual,9.061438,798.0,,,,


Overall model F(9, 798) = 5.366, p = 0.0000

# ('StudyID', 'Training') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.048573,1.0,2.293613,0.130709,0.005709,0.003212
C(Training),0.108734,4.0,1.283586,0.27579,0.012779,0.002816
C(StudyID):C(Training),0.007414,4.0,0.087519,0.986303,0.000871,-0.009062
Residual,8.344011,394.0,,,,


Overall model F(9, 394) = 0.956, p = 0.4765

# ('StudyID', 'Font') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),195481.3,1.0,0.212846,0.644671,0.000259,-0.000957
C(Font),15326820.0,1.0,16.688305,4.8e-05,0.02032,0.019079
C(StudyID):C(Font),354995.9,1.0,0.38653,0.534305,0.000471,-0.000746
Residual,738406900.0,804.0,,,,


Overall model F(3, 804) = 5.763, p = 0.0007

# ('StudyID', 'Font') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.239968,1.0,20.60296,7e-06,0.024971,0.02373
C(Font),0.000395,1.0,0.033892,0.853983,4.1e-05,-0.00117
C(StudyID):C(Font),0.005094,1.0,0.437386,0.508577,0.00053,-0.000681
Residual,9.364384,804.0,,,,


Overall model F(3, 804) = 7.025, p = 0.0001

# ('StudyID', 'Font') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.066015,1.0,3.122063,0.078001,0.007743,0.00525
C(Font),8.7e-05,1.0,0.004115,0.948881,1e-05,-0.002464
C(StudyID):C(Font),0.002145,1.0,0.101422,0.750296,0.000252,-0.002223
Residual,8.457927,400.0,,,,


Overall model F(3, 400) = 1.076, p = 0.3591

# ('StudyID', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),131756.5,1.0,0.143041,0.705376,0.000175,-0.001045
C(Firstfont),7054584.0,1.0,7.658776,0.00578,0.009353,0.008122
C(StudyID):C(Firstfont),6460738.0,1.0,7.01407,0.008246,0.008566,0.007336
Residual,740573400.0,804.0,,,,


Overall model F(3, 804) = 4.962, p = 0.0020

# ('StudyID', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.243532,1.0,20.948756,5e-06,0.025333,0.024094
C(Firstfont),0.016694,1.0,1.436004,0.23114,0.001737,0.000527
C(StudyID):C(Firstfont),0.006589,1.0,0.566779,0.451762,0.000685,-0.000523
Residual,9.346591,804.0,,,,


Overall model F(3, 804) = 7.548, p = 0.0001

# ('StudyID', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.064985,1.0,3.083541,0.079853,0.007623,0.005138
C(Firstfont),0.004084,1.0,0.193802,0.660009,0.000479,-0.001988
C(StudyID):C(Firstfont),0.026164,1.0,1.2415,0.265851,0.003069,0.000596
Residual,8.42991,400.0,,,,


Overall model F(3, 400) = 1.523, p = 0.2081

# ('Training', 'Font') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),23928530.0,4.0,6.684818,2.7e-05,0.031723,0.026946
C(Font),15326820.0,1.0,17.127163,3.9e-05,0.02032,0.019111
C(Training):C(Font),911824.1,4.0,0.254733,0.906816,0.001209,-0.003533
Residual,714117000.0,798.0,,,,


Overall model F(9, 798) = 4.987, p = 0.0000

# ('Training', 'Font') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.247555,4.0,5.285119,0.000333,0.025761,0.020861
C(Font),0.000395,1.0,0.03371,0.854371,4.1e-05,-0.001176
C(Training):C(Font),0.01732,4.0,0.369759,0.830263,0.001802,-0.003068
Residual,9.344572,798.0,,,,


Overall model F(9, 798) = 2.517, p = 0.0076

# ('Training', 'Font') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.126176,4.0,1.493508,0.203354,0.014799,0.004878
C(Font),8.7e-05,1.0,0.00412,0.948853,1e-05,-0.002461
C(Training):C(Font),0.078368,4.0,0.927626,0.447763,0.009192,-0.000715
Residual,8.321543,394.0,,,,


Overall model F(9, 394) = 1.077, p = 0.3790

# ('Training', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),22778210.0,4.0,6.347953,5e-05,0.030245,0.02545
C(Firstfont),5967989.0,1.0,6.652764,0.010078,0.007924,0.006725
C(Training):C(Firstfont),8526485.0,4.0,2.376206,0.050566,0.011321,0.006549
Residual,715861200.0,798.0,,,,


Overall model F(9, 798) = 4.759, p = 0.0000

# ('Training', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.249878,4.0,5.531245,0.000215,0.025996,0.021271
C(Firstfont),0.015453,1.0,1.368256,0.242461,0.001608,0.000432
C(Training):C(Firstfont),0.334276,4.0,7.399456,7e-06,0.034776,0.030041
Residual,9.012558,798.0,,,,


Overall model F(9, 798) = 5.876, p = 0.0000

# ('Training', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.128196,4.0,1.54174,0.189313,0.015032,0.005269
C(Firstfont),0.007135,1.0,0.343242,0.5583,0.000837,-0.001597
C(Training):C(Firstfont),0.202577,4.0,2.436277,0.046737,0.023754,0.01397
Residual,8.190286,394.0,,,,


Overall model F(9, 394) = 1.795, p = 0.0674

# ('Font', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Font),15326820.0,1.0,16.941449,4.3e-05,0.02032,0.019097
C(Firstfont),7118309.0,1.0,7.868201,0.005153,0.009437,0.008228
C(Font):C(Firstfont),4465667.0,1.0,4.936112,0.026579,0.00592,0.004715
Residual,727373400.0,804.0,,,,


Overall model F(3, 804) = 9.915, p = 0.0000

# ('Font', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Font),0.000395,1.0,0.033133,0.85561,4.1e-05,-0.001197
C(Firstfont),0.01313,1.0,1.102033,0.294136,0.001366,0.000126
C(Font):C(Firstfont),0.017353,1.0,1.456545,0.227836,0.001806,0.000565
Residual,9.578963,804.0,,,,


Overall model F(3, 804) = 0.864, p = 0.4594

# ('Font', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Font),8.7e-05,1.0,0.004111,0.948908,1e-05,-0.002466
C(Firstfont),0.005115,1.0,0.24165,0.623287,0.0006,-0.001878
C(Font):C(Firstfont),0.054388,1.0,2.569526,0.10973,0.006379,0.003887
Residual,8.466584,400.0,,,,


Overall model F(3, 400) = 0.938, p = 0.4220

______________________________
# ('StudyID', 'Training', 'Font') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),19832.69,1.0,0.022185,0.881633,2.6e-05,-0.001158
C(Training),23752880.0,4.0,6.642679,2.9e-05,0.031491,0.026719
C(Font),15326820.0,1.0,17.145052,3.8e-05,0.02032,0.019112
C(StudyID):C(Training),8845468.0,4.0,2.473704,0.043109,0.011727,0.006978
C(StudyID):C(Font),519593.8,1.0,0.581234,0.446057,0.000689,-0.000496
C(Training):C(Font),1076422.0,4.0,0.30103,0.87732,0.001427,-0.00331
C(StudyID):C(Training):C(Font),299733.0,4.0,0.083823,0.987399,0.000397,-0.004338
Residual,704432400.0,788.0,,,,


Overall model F(19, 788) = 2.935, p = 0.0000

# ('StudyID', 'Training', 'Font') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.18262,1.0,15.94001,7.1e-05,0.019122,0.017901
C(Training),0.190207,4.0,4.150562,0.00247,0.019916,0.0151
C(Font),0.000395,1.0,0.034456,0.852789,4.1e-05,-0.001157
C(StudyID):C(Training),0.118228,4.0,2.579886,0.036184,0.012379,0.007572
C(StudyID):C(Font),0.00294,1.0,0.256618,0.612595,0.000308,-0.000891
C(Training):C(Font),0.015165,4.0,0.330923,0.857246,0.001588,-0.003207
C(StudyID):C(Training):C(Font),0.012898,4.0,0.281447,0.89006,0.001351,-0.003444
Residual,9.027886,788.0,,,,


Overall model F(19, 788) = 2.673, p = 0.0001

# ('StudyID', 'Training', 'Font') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.048573,1.0,2.301375,0.130082,0.005705,0.003218
C(Training),0.108734,4.0,1.28793,0.274142,0.012771,0.002848
C(Font),8.7e-05,1.0,0.004123,0.948836,1e-05,-0.002463
C(StudyID):C(Training),0.007414,4.0,0.087816,0.986214,0.000871,-0.009023
C(StudyID):C(Font),0.007389,1.0,0.35007,0.554422,0.000868,-0.001607
C(Training):C(Font),0.083612,4.0,0.990375,0.412617,0.009821,-9.5e-05
C(StudyID):C(Training):C(Font),0.153361,4.0,1.816529,0.12486,0.018013,0.008077
Residual,8.104806,384.0,,,,


Overall model F(19, 384) = 1.051, p = 0.4013

# ('StudyID', 'Training', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),42043.86,1.0,0.047678,0.827211,5.6e-05,-0.001111
C(Training),21328420.0,4.0,6.04659,8.5e-05,0.028248,0.023548
C(Firstfont),7818373.0,1.0,8.86601,0.002994,0.010355,0.009176
C(StudyID):C(Training),11798270.0,4.0,3.3448,0.00997,0.015626,0.010941
C(StudyID):C(Firstfont),5552841.0,1.0,6.296904,0.012294,0.007354,0.006179
C(Training):C(Firstfont),9982260.0,4.0,2.829963,0.023857,0.013221,0.008539
C(StudyID):C(Training):C(Firstfont),3645480.0,4.0,1.033491,0.38891,0.004828,0.000156
Residual,694887400.0,788.0,,,,


Overall model F(19, 788) = 3.545, p = 0.0000

# ('StudyID', 'Training', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.146149,1.0,13.377435,0.000272,0.015437,0.014266
C(Training),0.191429,4.0,4.380523,0.001651,0.020219,0.015586
C(Firstfont),0.010885,1.0,0.996363,0.318499,0.00115,-4e-06
C(StudyID):C(Training),0.063698,4.0,1.457612,0.21329,0.006728,0.00211
C(StudyID):C(Firstfont),0.008502,1.0,0.778174,0.377969,0.000898,-0.000256
C(Training):C(Firstfont),0.254707,4.0,5.828528,0.000126,0.026903,0.022262
C(StudyID):C(Training):C(Firstfont),0.183305,4.0,4.194622,0.002287,0.019361,0.014729
Residual,8.608917,788.0,,,,


Overall model F(19, 788) = 4.822, p = 0.0000

# ('StudyID', 'Training', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.041489,1.0,2.000891,0.158017,0.004862,0.002426
C(Training),0.11652,4.0,1.404854,0.231659,0.013656,0.003926
C(Firstfont),0.00734,1.0,0.353979,0.55222,0.00086,-0.001566
C(StudyID):C(Training),0.01531,4.0,0.184593,0.946359,0.001794,-0.007907
C(StudyID):C(Firstfont),0.046909,1.0,2.262306,0.133379,0.005498,0.00306
C(Training):C(Firstfont),0.214547,4.0,2.586743,0.036627,0.025144,0.015386
C(StudyID):C(Training):C(Firstfont),0.128267,4.0,1.546489,0.188038,0.015032,0.005299
Residual,7.962338,384.0,,,,


Overall model F(19, 384) = 1.431, p = 0.1082

# ('StudyID', 'Font', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),131756.5,1.0,0.146945,0.701573,0.000175,-0.001013
C(Font),15326820.0,1.0,17.093698,3.9e-05,0.020323,0.019112
C(Firstfont),7054584.0,1.0,7.86784,0.005154,0.009354,0.008156
C(StudyID):C(Font),284337.4,1.0,0.317116,0.573504,0.000377,-0.000811
C(StudyID):C(Firstfont),6460738.0,1.0,7.205535,0.007418,0.008567,0.007369
C(Font):C(Firstfont),4395009.0,1.0,4.901668,0.027112,0.005828,0.004633
C(StudyID):C(Font):C(Firstfont),3188248.0,1.0,3.555791,0.0597,0.004228,0.003035
Residual,717308300.0,800.0,,,,


Overall model F(7, 800) = 5.891, p = 0.0000

# ('StudyID', 'Font', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.243532,1.0,20.979577,5e-06,0.025334,0.024097
C(Font),0.000395,1.0,0.034006,0.85374,4.1e-05,-0.001165
C(Firstfont),0.016694,1.0,1.438117,0.230799,0.001737,0.000528
C(StudyID):C(Font),0.004555,1.0,0.392375,0.531232,0.000474,-0.000733
C(StudyID):C(Firstfont),0.006589,1.0,0.567613,0.451431,0.000685,-0.000522
C(Font):C(Firstfont),0.016814,1.0,1.448466,0.22913,0.001749,0.000541
C(StudyID):C(Font):C(Firstfont),0.03786,1.0,3.26156,0.071297,0.003939,0.002728
Residual,9.286428,800.0,,,,


Overall model F(7, 800) = 3.980, p = 0.0003

# ('StudyID', 'Font', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(StudyID),0.064985,1.0,3.105421,0.078804,0.007622,0.005155
C(Font),8.7e-05,1.0,0.004158,0.948616,1e-05,-0.002438
C(Firstfont),0.004084,1.0,0.195177,0.658883,0.000479,-0.001971
C(StudyID):C(Font),0.002837,1.0,0.13558,0.712912,0.000333,-0.002116
C(StudyID):C(Firstfont),0.026164,1.0,1.250309,0.264171,0.003069,0.000613
C(Font):C(Firstfont),0.05508,1.0,2.632116,0.105518,0.00646,0.003996
C(StudyID):C(Font):C(Firstfont),0.085788,1.0,4.099538,0.043566,0.010062,0.007589
Residual,8.28681,396.0,,,,


Overall model F(7, 396) = 1.634, p = 0.1242

# ('Training', 'Font', 'Firstfont') (metric: RT)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),22778210.0,4.0,6.464762,4e-05,0.030238,0.02553
C(Font),15326820.0,1.0,17.399821,3.4e-05,0.020346,0.019154
C(Firstfont),5967989.0,1.0,6.775181,0.009417,0.007922,0.006745
C(Training):C(Font),1086260.0,4.0,0.308295,0.872506,0.001442,-0.003232
C(Training):C(Firstfont),8526485.0,4.0,2.419931,0.047086,0.011319,0.006634
C(Font):C(Firstfont),4640104.0,1.0,5.267694,0.021987,0.00616,0.004984
C(Training):C(Font):C(Firstfont),864388.7,4.0,0.245325,0.912523,0.001147,-0.003526
Residual,694118100.0,788.0,,,,


Overall model F(19, 788) = 3.595, p = 0.0000

# ('Training', 'Font', 'Firstfont') (metric: AUC)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.249878,4.0,5.496347,0.000229,0.025992,0.021238
C(Font),0.000395,1.0,0.034732,0.852207,4.1e-05,-0.00114
C(Firstfont),0.015453,1.0,1.359623,0.243956,0.001607,0.000425
C(Training):C(Font),0.018744,4.0,0.412296,0.799861,0.00195,-0.002776
C(Training):C(Firstfont),0.334276,4.0,7.352771,8e-06,0.034771,0.030007
C(Font):C(Firstfont),0.018778,1.0,1.652171,0.199041,0.001953,0.00077
C(Training):C(Font):C(Firstfont),0.019941,4.0,0.438621,0.780749,0.002074,-0.002652
Residual,8.956125,788.0,,,,


Overall model F(19, 788) = 3.027, p = 0.0000

# ('Training', 'Font', 'Firstfont') (metric: AUC_word)



Unnamed: 0,sum_sq,df,F,PR(>F),eta_sq,omega_sq
C(Training),0.128196,4.0,1.552465,0.186372,0.015025,0.005334
C(Font),8.7e-05,1.0,0.004215,0.948267,1e-05,-0.002404
C(Firstfont),0.007135,1.0,0.345629,0.556943,0.000836,-0.001579
C(Training):C(Font),0.082216,4.0,0.995648,0.409757,0.009636,-4.2e-05
C(Training):C(Firstfont),0.202577,4.0,2.453225,0.045517,0.023743,0.014031
C(Font):C(Firstfont),0.058236,1.0,2.820962,0.093854,0.006826,0.004395
C(Training):C(Font):C(Firstfont),0.12633,4.0,1.529869,0.192745,0.014807,0.005116
Residual,7.927265,384.0,,,,


Overall model F(19, 384) = 1.527, p = 0.0729

