In [22]:
import numpy as np

## ANOVA Usando SciPy

Referência: (https://pythonfordatascience.org/anova-python/)

In [2]:
import scipy.stats as stats

In [12]:
import pandas as pd
df = pd.read_csv('cisal.csv')

In [4]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [45]:
df.head()

Unnamed: 0,CP,Base,Cola,Substrato
0,34501,Aco Galvanizado,Killing,Taraflex
1,39522,Aco Galvanizado,Killing,Taraflex
2,33504,Aco Galvanizado,Killing,Taraflex
3,40285,Aco Galvanizado,Killing,Taraflex
4,32862,Aco Galvanizado,Killing,Taraflex


In [52]:
df['CP'] = df['CP'].apply(lambda x: x.replace(',','.'))
df['CP'] = df['CP'].convert_objects(convert_numeric=True)

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  


In [55]:
Killing = df['CP'][df['Cola'] == ' Killing ']
Artecola =  df['CP'][df['Cola'] == ' Artecola ']

In [64]:
stats.f_oneway(Killing,Artecola)

F_onewayResult(statistic=101.9677428425113, pvalue=2.7901262306825697e-17)

In [63]:
F, p = stats.f_oneway(Killing,Artecola)
print('F statistic = {:5.3f} and probability p = {:5.3f}'.format(F, p)) 

F statistic = 101.968 and probability p = 0.000


In [57]:
results = ols('CP ~ C(Cola)', data=df).fit()
results.summary()

0,1,2,3
Dep. Variable:,CP,R-squared:,0.49
Model:,OLS,Adj. R-squared:,0.48
Method:,Least Squares,F-statistic:,48.41
Date:,"Tue, 18 Jun 2019",Prob (F-statistic):,5.5800000000000005e-22
Time:,09:07:11,Log-Likelihood:,203.77
No. Observations:,155,AIC:,-399.5
Df Residuals:,151,BIC:,-387.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3097,0.015,21.039,0.000,0.281,0.339
C(Cola)[T. Artecola ],-0.0214,0.017,-1.242,0.216,-0.055,0.013
C(Cola)[T. Killing],0.0772,0.020,3.909,0.000,0.038,0.116
C(Cola)[T. Killing ],0.1216,0.017,7.071,0.000,0.088,0.156

0,1,2,3
Omnibus:,1.004,Durbin-Watson:,1.197
Prob(Omnibus):,0.605,Jarque-Bera (JB):,0.882
Skew:,0.185,Prob(JB):,0.644
Kurtosis:,2.99,Cond. No.,6.64


In [58]:
aov_table = sm.stats.anova_lm(results, typ=2)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Cola),0.62957,3.0,48.410692,5.575074000000001e-22
Residual,0.654573,151.0,,


In [59]:
def anova_table(aov):
    aov['mean_sq'] = aov[:]['sum_sq']/aov[:]['df']
    
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*aov['mean_sq'][-1]))/(sum(aov['sum_sq'])+aov['mean_sq'][-1])
    
    cols = ['sum_sq', 'df', 'mean_sq', 'F', 'PR(>F)', 'eta_sq', 'omega_sq']
    aov = aov[cols]
    return aov

anova_table(aov_table)

Unnamed: 0,sum_sq,df,mean_sq,F,PR(>F),eta_sq,omega_sq
C(Cola),0.62957,3.0,0.209857,48.410692,5.575074000000001e-22,0.490265,0.478522
Residual,0.654573,151.0,0.004335,,,,


In [60]:
#Teste Shapiro-Wilk de normalidade
stats.shapiro(results.resid)

(0.9876656532287598, 0.1882605403661728)

In [62]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison

mc = MultiComparison(df['CP'], df['Cola'])
mc_results = mc.tukeyhsd()
print(mc_results)

 Multiple Comparison of Means - Tukey HSD,FWER=0.05
  group1     group2   meandiff lower  upper  reject
---------------------------------------------------
 Artecola   Artecola  -0.0214  -0.066 0.0233 False 
 Artecola    Killing   0.0772  0.0259 0.1285  True 
 Artecola   Killing    0.1216  0.0769 0.1662  True 
 Artecola    Killing   0.0986  0.0573 0.1398  True 
 Artecola   Killing    0.1429  0.1103 0.1755  True 
  Killing   Killing    0.0443  0.0031 0.0856  True 
---------------------------------------------------
