In [1]:
from itertools import product
from math import factorial
from typing import Generator

import numpy as np
from scipy.stats import chi2, chi2_contingency, fisher_exact, hypergeom

In [2]:
# table = np.array([
#     [3, 4],
#     [3, 11],
# ])

table = np.array([
    [3, 6],
    [3, 6],
])

## Критерий $\chi^2$

In [3]:
def calculate_chi2_statistics(table: np.ndarray) -> float:
    statistics = 0
    for i, j in product(range(table.shape[0]), range(table.shape[1])):
        statistics += table[i][j] ** 2 / (table[i].sum() * table[:, j].sum())
    statistics -= 1
    return table.sum() * statistics

In [4]:
chi2_statistics = calculate_chi2_statistics(table)
print(f'chi2: {chi2_statistics}')

chi2: 0.0


In [5]:
chi2_p_value = chi2.sf(chi2_statistics, 1)
print(f'p-value: {chi2_p_value}')

p-value: 1.0


In [6]:
true_chi2_statistics, true_chi2_p_value, _, _ = chi2_contingency(table, correction=False)
print(f'chi2 (scipy): {true_chi2_statistics}')
print(f'p-value (scipy): {true_chi2_p_value}')

chi2 (scipy): 0.0
p-value (scipy): 1.0


In [7]:
print(f'chi2 error: {abs(true_chi2_statistics - chi2_statistics)}')
print(f'p-value error: {abs(true_chi2_p_value - chi2_p_value)}')

chi2 error: 0.0
p-value error: 0.0


## Точный тест Фишера

In [8]:
# table: 2x2
def _calculate_table_probability(table: np.ndarray) -> float:
    table_sum = table.sum()

    table = table.copy()
    axis_sums = np.array([table.sum(axis=0), table.sum(axis=1)])

    for i, j in product(range(table.shape[0]), range(table.shape[1])):
        table[i][j] = factorial(table[i][j])
        axis_sums[i][j] = factorial(axis_sums[i][j])

    return axis_sums.prod() / (table.prod() * factorial(table_sum))


# parent_table: 2x2
def _generate_table(parent_table: np.ndarray) -> Generator[np.ndarray, None, None]:
    axis_sums = np.array([parent_table.sum(axis=0), parent_table.sum(axis=1)])

    for x in range(
            max(0, axis_sums[0][0] - axis_sums[1][1]),
            min(parent_table[0][1], parent_table[1][0]) + parent_table[0][0] + 1
    ):
        table = np.zeros(parent_table.shape)
        table[0][0] = x
        table[0][1] = axis_sums[1][0] - x
        table[1][0] = axis_sums[0][0] - x
        table[1][1] = axis_sums[1][1] - axis_sums[0][0] + x
        yield table


# table: 2x2
def calculate_fisher_test_p_value(table: np.ndarray) -> float:
    general_chi2 = calculate_chi2_statistics(table)

    print(f'chi2(table): {general_chi2}\n')

    propabilities = []
    for possible_table in _generate_table(table):
        print(f'Current table:\n{possible_table}')

        chi2 = calculate_chi2_statistics(possible_table)
        true_propability = hypergeom.pmf(
            possible_table[0][0],
            possible_table.sum(),
            possible_table[0][0] + possible_table[0][1],
            possible_table[0][0] + possible_table[1][0]
        )
        propability = _calculate_table_probability(possible_table)

        print(f'chi2 = {chi2}, P = {true_propability}\n')

        if chi2 >= general_chi2:
            propabilities.append(propability)

    return sum(propabilities)

In [9]:
fisher_p_value = calculate_fisher_test_p_value(table)
print(f'p-value: {fisher_p_value}')

chi2(table): 0.0

Current table:
[[0. 9.]
 [6. 3.]]
chi2 = 8.999999999999996, P = 0.004524886877828052

Current table:
[[1. 8.]
 [5. 4.]]
chi2 = 3.999999999999998, P = 0.061085972850678835

Current table:
[[2. 7.]
 [4. 5.]]
chi2 = 1.0000000000000004, P = 0.24434389140271537

Current table:
[[3. 6.]
 [3. 6.]]
chi2 = 0.0, P = 0.38009049773755743

Current table:
[[4. 5.]
 [2. 7.]]
chi2 = 1.0000000000000004, P = 0.24434389140271537

Current table:
[[5. 4.]
 [1. 8.]]
chi2 = 4.000000000000002, P = 0.061085972850678835

Current table:
[[6. 3.]
 [0. 9.]]
chi2 = 9.0, P = 0.00452488687782806

p-value: 1.0


In [10]:
_, true_fisher_p_value = fisher_exact(table)
print(f'p-value (scipy): {true_fisher_p_value}')

p-value (scipy): 1.0


In [11]:
print(f'p-value error: {abs(true_fisher_p_value - fisher_p_value)}')

p-value error: 0.0
