In [1]:
import numpy as np
import pandas as pd

import pingouin

from scipy.stats import norm, poisson, invgamma, f
from scipy.stats import ttest_1samp, ttest_ind, bartlett, levene, f_oneway

Simulate 6 groups, each one woth its own ammount of observations, and following a hierarchical structure

In [2]:
J = 6

np.random.seed(111)
N = poisson.rvs(3, size=J, loc=10)

MU  = -9
TAU = 1
RHO = 3
NU  = 4

np.random.seed(111)
THETA  = norm.rvs(size=J, loc=MU, scale=TAU)
SIGMA2 = invgamma.rvs(size=J, a=NU/2, scale=NU/2*RHO**2)

SIGMA = np.sqrt(SIGMA2)

ALPHA_LEVEL = 0.05

VARIABLE = 'Y'

In [3]:
Y = norm.rvs(size=N[0], loc=THETA[0], scale=SIGMA[0])
Dat = pd.DataFrame(np.array([Y, [0]*N[0]]).T)

for j in range(1,J):
    Y = norm.rvs(size=N[j], loc=THETA[j], scale=SIGMA[j])
    DatAux = pd.DataFrame(np.array([Y, [j]*N[j]]).T)
    Dat = pd.concat([Dat, DatAux])

Dat = Dat.rename(columns={0:VARIABLE, 1:'Group'})
Dat['Group'] = Dat['Group'].astype(int)

In [4]:
DatGroup = Dat.groupby("Group").agg(["count", "mean", "var"]).reset_index()

In [5]:
DatGroup

Unnamed: 0_level_0,Group,Y,Y,Y
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,var
0,0,12,-10.148234,1.862126
1,1,12,-8.706604,14.571669
2,2,10,-8.423547,3.435911
3,3,12,-7.731961,11.539293
4,4,12,-12.217278,79.296169
5,5,14,-9.041636,3.623141


---

## One sample Student's t-test

### Comparing the mean of Group 0 against $\mu_0=-9$

In [6]:
mu0 = -9

In [7]:
pingouin.ttest(Dat[Dat["Group"]==0]["Y"], mu0)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-2.914851,11,two-sided,0.014066,"[-11.02, -9.28]",0.841445,4.492,0.756126


In [8]:
ttest_1samp(Dat[Dat["Group"]==0]["Y"], mu0)

TtestResult(statistic=-2.914850712740165, pvalue=0.014066446537359667, df=11)

The $p$-value is so low, that we can reject the hypothesis that $\theta_0=-9$

### Comparing the mean of Group 5 against $\mu_0=-9$

In [9]:
pingouin.ttest(Dat[Dat["Group"]==5]["Y"], mu0)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.081844,13,two-sided,0.936017,"[-10.14, -7.94]",0.021874,0.271,0.050662


In [10]:
ttest_1samp(Dat[Dat["Group"]==5]["Y"], mu0)

TtestResult(statistic=-0.08184406937964947, pvalue=0.9360174088882173, df=13)

The $p$-value is so high, that we cannot reject the hypothesis that $\theta_5=-9$

---

## Testing equal variance in two groups

### Comparing Group 2 and Group 5

In [11]:
FTest = (DatGroup[DatGroup["Group"]==2]["Y"]["var"]).values / (DatGroup[DatGroup["Group"]==5]["Y"]["var"]).values
n1 = DatGroup[DatGroup["Group"]==2]["Y"]["count"]
n2 = DatGroup[DatGroup["Group"]==5]["Y"]["count"]

f.sf(FTest, n1, n2)

array([0.52220779])

### Bartlett and Levene tests

Bartlett's and Levene's tests are popular to test equal variance between groups

In [12]:
bartlett(Dat[Dat["Group"]==5]["Y"], Dat[Dat["Group"]==2]["Y"])

BartlettResult(statistic=0.007122692067720255, pvalue=0.9327415533221795)

In [13]:
levene(Dat[Dat["Group"]==5]["Y"], Dat[Dat["Group"]==2]["Y"])

LeveneResult(statistic=0.1411107486371548, pvalue=0.7107769681238512)

We cannot reject the hypothesis of equal variances.

### Comparing Group 0 and Group 4

We will test the hypothesis that the variance of the groups 0 and 4 are equal

In [14]:
FTest = (DatGroup[DatGroup["Group"]==4]["Y"]["var"]).values / (DatGroup[DatGroup["Group"]==0]["Y"]["var"]).values
n1 = DatGroup[DatGroup["Group"]==4]["Y"]["count"]
n2 = DatGroup[DatGroup["Group"]==0]["Y"]["count"]

f.sf(FTest, n1, n2)

array([6.10386152e-08])

In [15]:
bartlett(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"])

BartlettResult(statistic=25.374224916868826, pvalue=4.721851987853883e-07)

In [16]:
levene(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"])

LeveneResult(statistic=6.152892806692913, pvalue=0.021251787316881685)

We reject the hypothesis of equal variances.

In conclusion, we can act as if Groups 2 and 5 have the same variance. But Groups 0 and 4 have different variances.

---

## Testing equal mean in two groups

### Comparing Group 0 and Group 5

In [17]:
pingouin.ttest(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==5]["Y"])

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-1.719903,23.344629,two-sided,0.098682,"[-2.44, 0.22]",0.659436,1.05,0.363095


In [18]:
ttest_ind(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==5]["Y"])

TtestResult(statistic=-1.6762576764168238, pvalue=0.10666807353971323, df=24.0)

We have slight evidence against the hypothesis of equal means

### Comparing Group 0 and Group 4

In [19]:
pingouin.ttest(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"])

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,0.795599,22,two-sided,0.434766,"[-3.32, 7.46]",0.324802,0.47,0.118586


In [20]:
ttest_ind(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"])

TtestResult(statistic=0.7955985692871433, pvalue=0.4347661300165786, df=22.0)

We cannot reject the hypothesis of equal means.

### Comparing Group 0 and Group 5

But, before we concluded that these groups have different variance. We can take this into account

In [21]:
pingouin.ttest(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"], correction=True)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,0.795599,11.516345,two-sided,0.442356,"[-3.62, 7.76]",0.324802,0.47,0.118586


In [22]:
ttest_ind(Dat[Dat["Group"]==0]["Y"], Dat[Dat["Group"]==4]["Y"], equal_var=False)

TtestResult(statistic=0.7955985692871433, pvalue=0.4423556292510572, df=11.51634521273693)

---

## One-way ANOVA

### By hand

In [23]:
ntot = sum(DatGroup["Y"]["count"])
nBar = ntot / J

dfB = J - 1
dfW = ntot - J

S2Bar = DatGroup["Y"]["var"] / DatGroup["Y"]["count"]
Phi2 = 1/sum(1 / S2Bar)

HatMu = Phi2 * sum(DatGroup["Y"]["mean"] / S2Bar)

In [24]:
SSB = sum(DatGroup["Y"]["count"] * (DatGroup["Y"]["mean"] - HatMu)**2)
SSW = sum((DatGroup["Y"]["count"]-1) * DatGroup["Y"]["var"])

MSB = SSB / dfB
MSW = SSW / dfW

FAnova = MSB / MSW

PValue = f.sf(FAnova, dfB, dfW)

In [25]:
AnovaTable = pd.DataFrame(
    {"df":[dfB, dfW], "SS":[SSB, SSW], "MS":[MSB, MSW], "F":[FAnova, np.nan], "PValue":[PValue, np.nan]},
    index=["Group", "Within"]
)

In [26]:
AnovaTable

Unnamed: 0,df,SS,MS,F,PValue
Group,5,152.908835,30.581767,1.604467,0.171163
Within,66,1257.98586,19.060392,,


In this case, we do not reject the hypothesis of equal mean in the groups.

The common mean can be estimated by

In [27]:
HatMu

-9.315701923151291

The intra-group variance can be estimated by

In [28]:
MSW

19.060391825730775

However, if we would reject the hypothesis of same mean for the groups, then the group of each mean would be estimated by

In [29]:
DatGroup["Y"]["mean"]

0   -10.148234
1    -8.706604
2    -8.423547
3    -7.731961
4   -12.217278
5    -9.041636
Name: mean, dtype: float64

And the inter-group variance would be estimated by

In [30]:
HatTau2 = (MSB - MSW) / nBar

We can compare these estimations with their real values

In [31]:
HatMu, MU

(-9.315701923151291, -9)

In [32]:
np.sqrt(HatTau2), TAU

(0.9798543751756985, 1)

In [33]:
DatGroup["Y"]["mean"], THETA

(0   -10.148234
 1    -8.706604
 2    -8.423547
 3    -7.731961
 4   -12.217278
 5    -9.041636
 Name: mean, dtype: float64,
 array([-10.13383833,  -8.61568081,  -7.50344622,  -9.3553823 ,
         -9.78753354,  -9.45943891]))

In [34]:
MSW, SIGMA2

(19.060391825730775,
 array([  3.15964669,   9.23957758,   6.53127852,   7.92691449,
        125.96344253,   6.51834957]))

### Using pingouin

In [35]:
pingouin.anova(dv="Y", between="Group", data=Dat, detailed=True)

Unnamed: 0,Source,SS,DF,MS,F,p-unc,np2
0,Group,152.451747,5,30.490349,1.599671,0.172473,0.108088
1,Within,1257.98586,66,19.060392,,,


### Using scipy

Testing equal mean for all the groups

In [36]:
f_oneway(
    Dat[Dat["Group"]==0]["Y"],
    Dat[Dat["Group"]==1]["Y"],
    Dat[Dat["Group"]==2]["Y"],
    Dat[Dat["Group"]==3]["Y"],
    Dat[Dat["Group"]==4]["Y"],
    Dat[Dat["Group"]==5]["Y"]
)

F_onewayResult(statistic=1.599670646695831, pvalue=0.17247260234129153)