In [176]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import sqrt
from collections import Counter
from scipy.stats import chi2

In [177]:
sizes = [5, 10, 100, 200, 400, 600, 800, 1000]

## Генерация выборок выбранных случайных величин

In [178]:
def P(x):
  t = 1/13
  return -t**x/(np.log(1-t) * x)

In [179]:
def log_dist_model(u):
  m0 = 1
  s = 0
  while s <= u:
    s += P(m0)
    if s > u:
      return m0
    m0 += 1

In [180]:
df_log = dict() # словарь, где ключ - объем выборки, значение - 5 выборок с логарифмическим распределением
for n in sizes:
  np.random.seed(99)
  df_uni = np.random.uniform(size=(5, n))# 5 выборок размера n
  df_log[n] = [[log_dist_model(u) for u in df_uni[i]] for i in range(5)]

In [181]:
def emp(df: list, x_lst: list): # Э.ф.р.
  return [np.sum(np.array(df) <= x) / len(df) for x in x_lst]

# ДЗ 4

## Проверка гипотезы о виде распределения

### Критерий согласия Колмогорова

In [182]:
def tdf(x_lst):
  res = []
  for x in x_lst:
    if x < 1:
      res.append(0)
    else:
      s = 0
      for k in range(1, int(x) + 1):
        s += 1 / (k * (13**k))
      s *= 1/np.log(13/12)
      res.append(s)
  return res

In [183]:
df_uni = dict() # словарь, где ключ - объем выборки, значение - 5 выборок с равномерным распределением
for n in sizes:
  np.random.seed(42)
  df_uni[n] = np.random.uniform(size=(5, n))# 5 выборок размера n

In [184]:
df_new = dict()
eps = 0.00001
for n in sizes:
    df_new[n] = [np.array(tdf(np.array(df_log[n][i])-eps)) +
              df_uni[n][i] * (np.array(tdf(np.array(df_log[n][i]))) - np.array(tdf(np.array(df_log[n][i])-eps)))
              for i in range(5)]

In [185]:
coefs = [(0.01, 1.62762), (0.05, 1.3581), (0.1, 1.22385)]
for n in sizes:
    for i in range(5):
        res = []
        for alpha, lamb in coefs:
            var_series = np.array(sorted(df_new[n][i]))
            Dn_plus = np.max(np.abs(np.arange(1, n + 1) / n - var_series))
            Dn_minus = np.max(np.abs(var_series - np.arange(n) / n))
            Dn = max(Dn_plus, Dn_minus)

            if n <= 20:
                stat = (6 * n * Dn + 1) / (6 * sqrt(n))
            else:
                stat = sqrt(n) * Dn

            if stat >= lamb:
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${n}$ & ${i}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

$5$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $4$ & $\checkmark$ & $\checkmark$ & $\times$ \\ \hline
$200$ & $0$ & 

### Критерий согласия Колмогорова для сложной гипотезы

1. Находим $\hat{\theta}_{м.м.п.}$

In [186]:
def tdf(x_lst, t):
  res = []
  for x in x_lst:
    if x < 1:
      res.append(0)
    else:
      s = 0
      for k in range(1, int(x) + 1):
        s += t**k / k
      s *= -1/np.log(1-t)
      res.append(s)
  return res

In [187]:
from math import log
import scipy.optimize

def mle(sample_av):
    def func(x):
        y = x / (log(1-x) * (1-x)) + sample_av
        return y

    y = scipy.optimize.fsolve(func, 0.0769231)
    return y[0]

In [188]:
mle_estims = {n:[] for n in sizes}
for n in sizes:
  for i in range(5):
    if n <= 20:
        sample_av = np.array(df_log[n][i]).mean()
    else:
        sample_av = np.array(df_log[n][i][n // 2 :]).mean()
    mle_estims[n].append(mle(sample_av))

2. Строим $U_i$

In [189]:
df_new = dict()
eps = 0.00001
for n in sizes:
    df_new[n] = [np.array(tdf(np.array(df_log[n][i])-eps, mle_estims[n][i])) +
              df_uni[n][i] * (np.array(tdf(np.array(df_log[n][i]), mle_estims[n][i])) - np.array(tdf(np.array(df_log[n][i])-eps, mle_estims[n][i])))
              for i in range(5)]

3. Применяем критерий

In [190]:
half_df = {n // 2 : [] for n in sizes[2:]}
for n in sizes[2:]:
    for i in range(5):
        half_df[n // 2].append(df_new[n][i][: n // 2])

half_df[5] = df_new[5]
half_df[10] = df_new[10]

In [191]:
coefs = [(0.01, 1.62762), (0.05, 1.3581), (0.1, 1.22385)]
for j in range(len(sizes)):
    n = sorted(half_df.keys())[j]
    for i in range(5):
        res = []
        for alpha, lamb in coefs:
            var_series = np.array(sorted(half_df[n][i]))
            Dn_plus = np.max(np.abs(np.arange(1, n + 1) / n - var_series))
            Dn_minus = np.max(np.abs(var_series - np.arange(n) / n))
            Dn = max(Dn_plus, Dn_minus)

            if n <= 20:
                stat = (6 * n * Dn + 1) / (6 * sqrt(n))
            else:
                stat = sqrt(n) * Dn

            if stat >= lamb:
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${sizes[j]}$ & ${i}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

$5$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$200$ & $0

### Критерий согласия хи-квадрат

In [192]:
N = 2
probs = np.array([P(1), 1 - P(1)])
probs

array([0.96102542, 0.03897458])

In [193]:
chi_sq = chi2(df=N-1)
alphas = [0.01, 0.05, 0.1]

for alpha in alphas:
    print(np.round(chi_sq.ppf(q=1 - alpha), 3))

for n in sizes:
    for i in range(5):
        res = []
        freqs = dict(Counter(df_log[n][i]))
        freq_vect = np.array([freqs[1], n - freqs[1]])
        stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n

        for alpha in alphas:
            if stat_pirs > chi_sq.ppf(q=1 - alpha):
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${n}$ & ${i}$ & ${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

6.635
3.841
2.706
$5$ & $0$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $3.461$ & $\checkmark$ & $\checkmark$ & $\times$ \\ \hline
$5$ & $2$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $3.461$ & $\checkmark$ & $\checkmark$ & $\times$ \\ \hline
$5$ & $4$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $0.994$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $0.994$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $0.406$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $0.406$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $0.994$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $0.003$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $0.215$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $1.18$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$

In [194]:
N = 3
probs = np.array([P(1), P(2), 1 - P(1) - P(2)])
probs

array([0.96102542, 0.03696252, 0.00201206])

In [195]:
chi_sq = chi2(df=N-1)
alphas = [0.01, 0.05, 0.1]

for alpha in alphas:
    print(np.round(chi_sq.ppf(q=1 - alpha), 3))

for n in sizes:
    for i in range(5):
        res = []
        freqs = dict(Counter(df_log[n][i]))
        freq_vect = np.array([freqs.get(1, 0), freqs.get(2, 0), n - freqs.get(1, 0) - freqs.get(2, 0)])
        stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n

        for alpha in alphas:
            if stat_pirs > chi_sq.ppf(q=1 - alpha):
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${n}$ & ${i}$ & ${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

9.21
5.991
4.605
$5$ & $0$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $3.741$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $3.741$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $0.203$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $1.134$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $1.134$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $0.406$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $0.406$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $1.134$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $0.226$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $0.341$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $3.677$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hli

### Критерий согласия хи-квадрат для сложной гипотезы

In [196]:
def P(x, t):
  return -t**x/(np.log(1-t) * x)

In [197]:
N = 3
r = 1

In [198]:
chi_sq = chi2(df=N-1-r)
alphas = [0.01, 0.05, 0.1]

for alpha in alphas:
    print(np.round(chi_sq.ppf(q=1 - alpha), 3))

for n in sizes:
    for i in range(5):
        res = []

        t = mle(np.array(df_log[n][i]).mean())
        probs = np.array([P(1, t), P(2, t), 1 - P(1, t) - P(2, t)])

        freqs = dict(Counter(df_log[n][i]))
        freq_vect = np.array([freqs.get(1, 0), freqs.get(2, 0), n - freqs.get(1, 0) - freqs.get(2, 0)])
        stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n

        for alpha in alphas:
            if stat_pirs > chi_sq.ppf(q=1 - alpha):
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${n}$ & ${i}$ & ${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

6.635
3.841
2.706
$5$ & $0$ & $0.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $0.393$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $0.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $0.393$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $0.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $0.166$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $0.166$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $0.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $0.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $0.166$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $0.234$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $0.129$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $0.519$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ 

## Задание для данных, описываемых распределением

In [199]:
data = pd.read_csv("games.csv")
data.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,11.99,0.0,True


In [200]:
df = np.round(np.array(data.price_final))
df = df[df != 0]
n = len(df)

Находим $\hat{\theta}_{м.м.п.}$

In [201]:
from math import log
import scipy.optimize

def func(x):
    y = x / (log(1-x) * (1-x)) + df[n // 2:].mean()
    return y

mle_estim = scipy.optimize.fsolve(func, 0.96)[0]

In [202]:
n, mle_estim

(40933, 0.9709765130005783)

In [203]:
def tdf(x_lst, t):
  res = []
  for x in x_lst:
    if x < 1:
      res.append(0)
    else:
      s = 0
      for k in range(1, int(x) + 1):
        s += t**k / k
      s *= -1/np.log(1-t)
      res.append(s)
  return res

### Критерий согласия Колмогорова для сложной гипотезы

Строим $U_i$

In [204]:
np.random.seed(42)
df_uni = np.random.uniform(size=(1, n))
df_new = np.array(tdf(df - eps, mle_estim)) + df_uni * (np.array(tdf(df, mle_estim)) - np.array(tdf(df - eps, mle_estim)))

Применяем критерий

In [205]:
half_df = df_new[:n // 2]

In [206]:
res = []
for alpha, lamb in coefs:
    var_series = np.array(sorted(half_df))
    Dn_plus = np.max(np.abs(np.arange(1, n + 1) / n - var_series))
    Dn_minus = np.max(np.abs(var_series - np.arange(n) / n))
    Dn = max(Dn_plus, Dn_minus)

    stat = sqrt(n) * Dn

    if stat >= lamb:
        res.append('$\\times$')
    else:
        res.append('$\\checkmark$')
print(f'{res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

$\times$ & $\times$ & $\times$ \\ \hline


### Критерий согласия хи-квадрат для сложной гипотезы

Воспользуемся эвристической формулой Старджесса для определения "оптимального" числа интервалов:

In [207]:
N = int(3.3 * np.log10(n) + 1)
r = 1
N

16

Выберем интервалы так, чтобы число наблюдений, попавших в интервалы, было не слишком малым и сравнимым.

In [208]:
n / N

2558.3125

In [209]:
freqs = dict(Counter(sorted(df)))
freqs

{1.0: 3977,
 2.0: 3387,
 3.0: 3043,
 4.0: 2125,
 5.0: 4845,
 6.0: 1464,
 7.0: 1781,
 8.0: 1403,
 9.0: 685,
 10.0: 5258,
 11.0: 291,
 12.0: 774,
 13.0: 795,
 14.0: 240,
 15.0: 2866,
 16.0: 266,
 17.0: 217,
 18.0: 337,
 19.0: 147,
 20.0: 3214,
 21.0: 16,
 22.0: 40,
 23.0: 16,
 24.0: 23,
 25.0: 1009,
 27.0: 16,
 28.0: 7,
 29.0: 5,
 30.0: 1070,
 31.0: 4,
 32.0: 3,
 33.0: 5,
 34.0: 5,
 35.0: 211,
 36.0: 5,
 37.0: 3,
 38.0: 1,
 39.0: 2,
 40.0: 573,
 42.0: 1,
 45.0: 72,
 48.0: 1,
 49.0: 5,
 50.0: 228,
 52.0: 1,
 54.0: 1,
 55.0: 13,
 58.0: 1,
 59.0: 11,
 60.0: 268,
 64.0: 1,
 65.0: 7,
 70.0: 44,
 72.0: 1,
 75.0: 5,
 80.0: 26,
 90.0: 7,
 93.0: 1,
 95.0: 5,
 100.0: 77,
 110.0: 2,
 115.0: 1,
 120.0: 1,
 125.0: 1,
 129.0: 1,
 130.0: 1,
 134.0: 1,
 149.0: 1,
 150.0: 3,
 200.0: 13,
 270.0: 1,
 300.0: 2}

$ɛ_1 = \{1\}$ \\
$ɛ_2 = \{2\}$ \\
$ɛ_3 = \{3\}$ \\
$ɛ_4 = \{4\}$ \\
$ɛ_5 = \{5\}$ \\
$ɛ_6 = \{6, 7\}$ \\
$ɛ_7 = \{8, 9\}$ \\
$ɛ_8 = \{10\}$ \\
$ɛ_9 = \{11, 12, 13, 14\}$ \\
$ɛ_{10} = \{15\}$ \\
$ɛ_{11} = \{16, 17, 18, 19\}$ \\
$ɛ_{12} = \{20\}$ \\
$ɛ_{13} = \{21, 22, ..., 29\}$ \\
$ɛ_{14} = \{30, 31, ..., 39\}$ \\
$ɛ_{15} = \{40, 41, ..., 59\}$ \\
$ɛ_{16} = \{60, 62, ...\}$

In [216]:
from math import log
import scipy.optimize

def func(x):
    y = x / (log(1-x) * (1-x)) + df.mean()
    return y

t = scipy.optimize.fsolve(func, 0.97)[0]

In [217]:
probs = np.array([P(1, t), P(2, t), P(3, t), P(4, t),  P(5, t),
         P(6, t) + P(7, t),
         P(8, t) + P(9, t),
         P(10, t),
         P(11, t) + P(12, t) + P(13, t) + P(14, t),
         P(15, t),
         P(16, t) + P(17, t) + P(18, t) + P(19, t),
         P(20, t),
         sum(P(i, t) for i in range(21, 30)),
         sum(P(i, t) for i in range(30, 40)),
         sum(P(i, t) for i in range(40, 60)),
         1 - tdf([60], t)[0]
         ])

In [218]:
freqs = dict(Counter(df))
freq_vect = np.array([freqs.get(1, 0), freqs.get(2, 0), freqs.get(3, 0), freqs.get(4, 0), freqs.get(5, 0),
             freqs.get(6, 0) + freqs.get(7, 0),
             freqs.get(8, 0) + freqs.get(9, 0),
             freqs.get(10, 0),
             freqs.get(11, 0) + freqs.get(12, 0) + freqs.get(13, 0) + freqs.get(14, 0),
             freqs.get(15, 0),
             freqs.get(16, 0) + freqs.get(17, 0) + freqs.get(18, 0) + freqs.get(19, 0),
             freqs.get(20, 0),
             sum(freqs.get(i, 0) for i in range(21, 30)),
             sum(freqs.get(i, 0) for i in range(30, 40)),
             sum(freqs.get(i, 0) for i in range(40, 60)),
             np.sum(df >= 60)
             ])

In [213]:
chi_sq = chi2(df=N-1-r)
alphas = [0.01, 0.05, 0.1]

for alpha in alphas:
    print(np.round(chi_sq.ppf(q=1 - alpha), 3))

res = []
stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n
for alpha in alphas:
    if stat_pirs > chi_sq.ppf(q=1 - alpha):
        res.append('$\\times$')
    else:
        res.append('$\\checkmark$')
print(f'${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

29.141
23.685
21.064
$68917.796$ & $\times$ & $\times$ & $\times$ \\ \hline


In [214]:
N = 3
r = 1

In [215]:
chi_sq = chi2(df=N-1-r)
alphas = [0.01, 0.05, 0.1]

for alpha in alphas:
    print(np.round(chi_sq.ppf(q=1 - alpha), 3))

res = []
t = mle_estim
probs = np.array([P(1, t), P(2, t), 1 - P(1, t) - P(2, t)])
freqs = dict(Counter(df))
freq_vect = np.array([freqs.get(1, 0), freqs.get(2, 0), n - freqs.get(1, 0) - freqs.get(2, 0)])
stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n
for alpha in alphas:
    if stat_pirs > chi_sq.ppf(q=1 - alpha):
        res.append('$\\times$')
    else:
        res.append('$\\checkmark$')
print(f'${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

6.635
3.841
2.706
$7968.279$ & $\times$ & $\times$ & $\times$ \\ \hline
