In [1]:
import numpy as np
import pandas as pd
import scipy.stats as sps
import matplotlib.pyplot as plt

%matplotlib inline

### Критерий Бартлетта

$X_{ij} \sim \mathcal{N}(\mu_j, \sigma_j^2),\ \ i=1,...,n_j,\ \ j=1,...,k$

$\mathsf{H}_0\colon \sigma_1 = ... = \sigma_k$

<a href="https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.stats.bartlett.html#scipy.stats.bartlett">`bartlett`</a>`(sample1, sample2, ...): statistic, pvalue`


### $F$-критерий однофакторного дисперсионного анализа

$X_{ij} \sim \mathcal{N}(\mu_j, \sigma^2),\ \ i=1,...,n_j,\ \ j=1,...,k$

$\mathsf{H}_0\colon \mu_1 = ... = \mu_k$

<a href="https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.stats.f_oneway.html">`f_oneway`</a>`(sample1, sample2, ...): statistic, pvalue`

In [14]:
samples = []
for i in range(5):
    samples.append(sps.norm.rvs(size=20+i))
sps.bartlett(*samples), sps.f_oneway(*samples)

(BartlettResult(statistic=5.302409501752963, pvalue=0.2576514519129599),
 F_onewayResult(statistic=0.7483479585522701, pvalue=0.5612738682824921))

In [15]:
samples = []
for i in range(5):
    samples.append(sps.norm(loc=i).rvs(size=20+i))
sps.bartlett(*samples), sps.f_oneway(*samples)

(BartlettResult(statistic=9.146888050337433, pvalue=0.057531028801199424),
 F_onewayResult(statistic=46.38914385075102, pvalue=2.1428650506386594e-22))

In [16]:
samples = []
for i in range(5):
    samples.append(sps.norm(scale=1+i/2).rvs(size=20+i))
sps.bartlett(*samples), sps.f_oneway(*samples)

(BartlettResult(statistic=35.02702491180489, pvalue=4.586347994808979e-07),
 F_onewayResult(statistic=1.3661820497330388, pvalue=0.2507041179938111))

In [19]:
samples = []
for i in range(5):
    samples.append(sps.norm(loc=i, scale=1+i/2).rvs(size=20+i))
sps.bartlett(*samples), sps.f_oneway(*samples)

(BartlettResult(statistic=19.614971279326745, pvalue=0.0005948230266618849),
 F_onewayResult(statistic=14.064849135285003, pvalue=3.186325091862291e-09))

### Критерий Краскела-Уоллиса

$X_{ij},\ \ i=1,...,n_j,\ \ j=1,...,k$ --- однофакторная модель, случай независмых выборок

$\mathsf{H}_0\colon \mu_1 = ... = \mu_k$

<a href="https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.stats.kruskal.html#scipy.stats.kruskal">`kruskal`</a>`(sample1, sample2, ...): statistic, pvalue`

In [20]:
samples = []
for i in range(5):
    samples.append(sps.norm.rvs(size=20+i))
sps.kruskal(*samples)

KruskalResult(statistic=3.9994042474280036, pvalue=0.4060864820570774)

In [21]:
samples = []
for i in range(5):
    samples.append(sps.norm(loc=i).rvs(size=20+i))
sps.kruskal(*samples)

KruskalResult(statistic=76.38012704917844, pvalue=1.0172837687548495e-15)

In [22]:
samples = []
for i in range(5):
    samples.append(sps.norm(scale=i+1).rvs(size=20+i))
sps.kruskal(*samples)

KruskalResult(statistic=0.854885258956358, pvalue=0.9309421349904955)

In [23]:
samples = []
for i in range(5):
    samples.append(sps.expon.rvs(size=20+i))
sps.kruskal(*samples)

KruskalResult(statistic=0.9416866792360565, pvalue=0.918505484762733)

In [24]:
samples = []
for i in range(5):
    samples.append(sps.expon(scale=i+1).rvs(size=20+i))
sps.kruskal(*samples)

KruskalResult(statistic=19.558231702777107, pvalue=0.0006103333303977086)

### Критерий Фридмана

$X_{ij},\ \ i=1,...,n,\ \ j=1,...,k$ --- однофакторная модель, случай связных выборок

$\mathsf{H}_0\colon \beta_1 = ... = \beta_k$

<a href="https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.stats.friedmanchisquare.html#scipy.stats.friedmanchisquare">`friedmanchisquare`</a>`(sample1, sample2, ...): statistic, pvalue`

Все выборки одинакового размера, количество выборок не менее 3.

In [34]:
sample_size = 30
factor_size = 5
alpha = np.linspace(0, 10, sample_size)[:, np.newaxis]
beta = np.zeros(factor_size)[np.newaxis, :]

samples = sps.norm(loc=1+alpha+beta).rvs()
print(samples.shape)
sps.friedmanchisquare(*samples.T)

(30, 5)


FriedmanchisquareResult(statistic=3.8666666666666742, pvalue=0.42435118480771783)

In [35]:
beta = np.arange(factor_size)[np.newaxis, :]

samples = sps.norm(loc=1+alpha+beta).rvs()
print(samples.shape)
sps.friedmanchisquare(*samples.T)

(30, 5)


FriedmanchisquareResult(statistic=82.42666666666673, pvalue=5.3299900664472323e-17)

In [37]:
beta = np.arange(factor_size)[np.newaxis, :]

samples = sps.norm(loc=1+alpha+beta, scale=alpha).rvs()
print(samples.shape)
sps.friedmanchisquare(*samples.T)

(30, 5)


FriedmanchisquareResult(statistic=10.186666666666724, pvalue=0.03739799984860937)

---------

Прикладная статистика и анализ данных, 2019

Никита Волков

https://mipt-stats.gitlab.io/