# Chapter 11 統計的仮説検定

In [1]:
import numpy as np
import pandas as pd
from scipy import stats
%precision 3
np.random.seed(1111)

In [2]:
df = pd.read_csv('../data/ch11_potato.csv')

sample = np.array(df['重さ'])
sample

array([122.02, 131.73, 130.6 , 131.82, 132.05, 126.12, 124.43, 132.89,
       122.79, 129.95, 126.14, 134.45, 127.64, 125.68])

In [3]:
s_mean = np.mean(sample)
s_mean

128.4507142857143

## 11.1 |　統計的仮説検定とは

### 11.1.1　統計的仮説検定の基本

In [4]:
rv = stats.norm(130, np.sqrt(9/14))
rv.isf(0.95)

128.68118313069039

In [5]:
z = (s_mean -130)/np.sqrt(9/14)
z

-1.932298779026813

In [6]:
rv = stats.norm()
rv.isf(0.95)

-1.6448536269514722

In [8]:
# p 値
rv.cdf(z)

0.026661319523126635

### 11.1.2　片側検定と両側検定

In [9]:
z = (s_mean - 130)/np.sqrt(9/14)
z

-1.932298779026813

In [10]:
rv = stats.norm()
rv.interval(0.95)

(-1.959963984540054, 1.959963984540054)

片側検定の方が両側検定より帰無仮説を棄却しやすい

In [11]:
rv.cdf(z) * 2

0.05332263904625327

### 11.1.3　仮説検定における 2つの過誤

__第一種の過誤__  帰無仮説が正しいときに, 帰無仮説を棄却してしまう過誤  
__第二種の過誤__  対立仮説が正しいときに, 帰無仮説を採択してしまう過誤

In [12]:
rv = stats.norm(130, 3)

In [13]:
c = stats.norm().isf(0.95)
n_sample = 10000
cnt = 0

for _ in range(n_sample):
    sample_ = np.round(rv.rvs(14), 2)
    s_mean_ = np.mean(sample_)
    z = (s_mean_ - 130)/np.sqrt(9/14)
    
    if z < c:
        cnt += 1
        
cnt/n_sample

0.053

In [14]:
rv = stats.norm(128, 3)

In [15]:
c = stats.norm().isf(0.95)
n_sample = 10000
cnt = 0

for _ in range(n_sample):
    sample_ = np.round(rv.rvs(14), 2)
    s_mean_ = np.mean(sample_)
    z = (s_mean_ - 130)/np.sqrt(9/14)
    
    if z >= c:
        cnt += 1
        
cnt/n_sample

0.197