# 分散が等質で独立した２群のt検定（Studentのt検定）

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats as stats #統計に関するライブラリ

In [2]:
df = pd.read_csv("./data/02.csv")
df.head()

Unnamed: 0,ID_A,A,ID_B,B
0,1,80,1,75
1,2,75,2,65
2,3,80,3,80
3,4,95,4,85
4,5,90,5,75


In [3]:
df.describe()

Unnamed: 0,ID_A,A,ID_B,B
count,8.0,8.0,8.0,8.0
mean,4.5,84.375,4.5,76.25
std,2.44949,6.781013,2.44949,6.408699
min,1.0,75.0,1.0,65.0
25%,2.75,80.0,2.75,73.75
50%,4.5,82.5,4.5,77.5
75%,6.25,90.0,6.25,80.0
max,8.0,95.0,8.0,85.0


## F検定

In [4]:
def F_test(A, B):
    A_var = np.var(A, ddof=1)  # Aの不偏分散
    B_var = np.var(B, ddof=1)  # Bの不偏分散
    A_df = len(df.A) - 1  # Aの自由度
    B_df = len(df.B) - 1  # Bの自由度
    f = A_var / B_var  # F比の値
    one_sided_pval1 = stats.f.cdf(f, A_df, B_df)  # 片側検定のp値 1
    one_sided_pval2 = stats.f.sf(f, A_df, B_df)   # 片側検定のp値 2
    two_sided_pval = min(one_sided_pval1, one_sided_pval2) * 2  # 両側検定のp値

    print('F-value:       ', round(f, 5))
    print('p-value(2 sides): ', round(two_sided_pval, 5))

In [5]:
F_test(df.A, df.B)

F-value:        1.11957
p-value(2 sides):  0.88539


### F検定で分散に差がないことが判明したので、Studentのt検定を行う

## Studentのt検定（分散が等質）

In [6]:
stats.ttest_ind(df.A, df.B)

Ttest_indResult(statistic=2.463060426921489, pvalue=0.027347565851526662)

## scipy.stats.ttest_ind(a, b, axis=0, equal_var=True)
Calculates the T-test for the means of TWO INDEPENDENT samples of scores.
This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances.

Parameters:	

a, b : array_like　The arrays must have the same shape, except in the dimension corresponding to axis (the first, by default).

axis : int, optional　Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b).

equal_var : bool, optional　If True (default), perform a standard independent 2 sample test that assumes equal population variances [R263]. If False, perform Welch’s t-test, which does not assume equal population variance [R264].
New in version 0.11.0.

Returns:

t : float or array The calculated t-statistic.

prob : float or array The two-tailed p-value.

https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.ttest_ind.html#scipy.stats.ttest_ind

返り値：

t値 ： 平均値の差の大きさの度合い。

p値（両側検定） ： 母集団の平均が同じだと仮定したときに、平均の差が、検定を行ったデータより大きくなる確率。この値でもって1%や5%の判定をする。