In [1]:
import pandas as pd
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from io import StringIO

In [2]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,Accuracy,Model,Category
0,91.647549,Standard,Type I
1,93.550854,Standard,Type I
2,69.61509,Standard,Type I
3,89.480839,Standard,Type I
4,93.601835,Standard,Type I


In [3]:
data['Combined Category'] = [f'{m}-{c}' for m, c in zip(data['Model'], data['Category'])]

In [4]:
data.head(5)

Unnamed: 0,Accuracy,Model,Category,Combined Category
0,91.647549,Standard,Type I,Standard-Type I
1,93.550854,Standard,Type I,Standard-Type I
2,69.61509,Standard,Type I,Standard-Type I
3,89.480839,Standard,Type I,Standard-Type I
4,93.601835,Standard,Type I,Standard-Type I


In [5]:
formula = 'Accuracy ~ C(Model) + C(Category) + C(Model):C(Category)'
model = ols(formula, data).fit()
aov_table = anova_lm(model, typ=2)

In [6]:
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Model),2579.683595,1.0,34.654132,1.151314e-07
C(Category),17544.760664,3.0,78.562407,1.177642e-22
C(Model):C(Category),1390.173403,3.0,6.224956,0.0008081863
Residual,5359.742296,72.0,,


In [7]:
m_comp = pairwise_tukeyhsd(endog=data['Accuracy'], groups=data['Combined Category'], alpha=0.05)
table = m_comp.summary()
testdata = StringIO(table.as_csv())
table_df = pd.read_csv(testdata)
table_df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1,group2,meandiff,p-adj,lower,upper,reject
LatentNet-Hair Density,LatentNet-Type I,25.1701,0.001,13.1252,37.215,True
LatentNet-Hair Density,LatentNet-Type II,14.7012,0.0067,2.6563,26.746,True
LatentNet-Hair Density,LatentNet-Type III,-0.0441,0.9,-12.089,12.0007,False
LatentNet-Hair Density,Standard-Hair Density,-19.0669,0.001,-31.1117,-7.022,True
LatentNet-Hair Density,Standard-Type I,26.0631,0.001,14.0183,38.108,True
LatentNet-Hair Density,Standard-Type II,6.4693,0.678,-5.5756,18.5141,False
LatentNet-Hair Density,Standard-Type III,-19.0669,0.001,-31.1117,-7.022,True
LatentNet-Type I,LatentNet-Type II,-10.4689,0.1351,-22.5138,1.5759,False
LatentNet-Type I,LatentNet-Type III,-25.2142,0.001,-37.2591,-13.1694,True


In [8]:
table_df.to_csv('HSD_TEST.csv', index=True)