# 分散が等質でなく、独立した２群のt検定（Welchのt検定）

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats as stats #統計に関するライブラリ

In [2]:
df = pd.read_csv("./data/03.csv")
df.head()

Unnamed: 0,A,B
0,13.8,3.3
1,10.2,2.6
2,4.6,4.0
3,10.0,4.7
4,4.2,1.9


In [3]:
df.describe()

Unnamed: 0,A,B
count,10.0,11.0
mean,9.73,3.518182
std,4.299625,1.152231
min,4.2,1.9
25%,5.6,2.75
50%,10.1,3.3
75%,13.2,4.5
max,16.1,5.3


In [9]:
df.A

0     13.8
1     10.2
2      4.6
3     10.0
4      4.2
5     16.1
6     14.4
7      4.9
8      7.7
9     11.4
10     NaN
Name: A, dtype: float64

In [4]:
A = df.A.iloc[0:10]
A

0    13.8
1    10.2
2     4.6
3    10.0
4     4.2
5    16.1
6    14.4
7     4.9
8     7.7
9    11.4
Name: A, dtype: float64

In [5]:
B = df.B
B

0     3.3
1     2.6
2     4.0
3     4.7
4     1.9
5     2.9
6     4.7
7     5.3
8     4.3
9     3.0
10    2.0
Name: B, dtype: float64

## F検定

In [6]:
def F_test(A, B):
    A_var = np.var(A, ddof=1)  # Aの不偏分散
    B_var = np.var(B, ddof=1)  # Bの不偏分散
    A_df = len(df.A) - 1  # Aの自由度
    B_df = len(df.B) - 1  # Bの自由度
    f = A_var / B_var  # F比の値
    one_sided_pval1 = stats.f.cdf(f, A_df, B_df)  # 片側検定のp値 1
    one_sided_pval2 = stats.f.sf(f, A_df, B_df)   # 片側検定のp値 2
    two_sided_pval = min(one_sided_pval1, one_sided_pval2) * 2  # 両側検定のp値

    print('F-value:       ', round(f, 5))
    print('p-value(2 sides): ', round(two_sided_pval, 5))

In [7]:
F_test(A, B)

F-value:        13.92458
p-value(2 sides):  0.00027


F検定で分散に差があることが判明したので、Welchのt検定を行う

## Welchのt検定（分散が等質でない）

In [8]:
stats.ttest_ind(A, B, equal_var=False)

Ttest_indResult(statistic=4.426442804187721, pvalue=0.0012285738375064346)

## scipy.stats.ttest_ind(a, b, axis=0, equal_var=True)
Calculates the T-test for the means of TWO INDEPENDENT samples of scores.
This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances.

Parameters:	

a, b : array_like　The arrays must have the same shape, except in the dimension corresponding to axis (the first, by default).

axis : int, optional　Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b).

equal_var : bool, optional　If True (default), perform a standard independent 2 sample test that assumes equal population variances. If False, perform Welch’s t-test, which does not assume equal population variance.
New in version 0.11.0.

Returns:

t : float or array The calculated t-statistic.

prob : float or array The two-tailed p-value.

https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.mannwhitneyu.html#scipy.stats.mannwhitneyu

返り値：

t値 ： 平均値の差の大きさの度合い。

p値（両側検定） ： 母集団の平均が同じだと仮定したときに、平均の差が、検定を行ったデータより大きくなる確率。この値でもって1%や5%の判定をする。