In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from math import sqrt
from collections import Counter
from scipy.stats import chi2

In [2]:
sizes = [5, 10, 100, 200, 400, 600, 800, 1000]

## Генерация выборок выбранных случайных величин

In [3]:
t = 0.6

In [4]:
def density(x):
  if 0 <= x <= t:
    return (2 * x) / t
  elif t < x <= 1:
    return (2 * (1 - x)) / (1 - t)
  else:
    return 0

In [5]:
def con_dist_model(u):
  if 0 <= u <= t:
    return np.sqrt(t * u)
  else:
    return 1 - np.sqrt((1 - t) * (1 - u))

In [6]:
df_con = dict() # словарь, где ключ - объем выборки, значение - 5 выборок с треугольным распределением
for n in sizes:
  np.random.seed(99)
  df_uni = np.random.uniform(size=(5, n))# 5 выборок размера n
  df_con[n] = [[con_dist_model(u) for u in df_uni[i]] for i in range(5)]

In [7]:
def emp(df: list, x_lst: list): # Э.ф.р.
  return [np.sum(np.array(df) <= x) / len(df) for x in x_lst]

In [8]:
def density(x, t):
  if 0 <= x <= t:
    return (2 * x) / t
  elif t < x <= 1:
    return (2 * (1 - x)) / (1 - t)
  else:
    return 0

In [9]:
def L(df, n, t):
    result = 1
    for i in range(n):
      result *= density(df[i], t)
    return result

# ДЗ 4

## Проверка гипотезы о виде распределения

### Критерий согласия Колмогорова

In [10]:
def tdf(x_lst):
  res = []
  for x in x_lst:
    if x < 0:
      res.append(0)
    elif 0 <= x <= t:
      res.append(x**2 / t)
    elif t < x <= 1:
      res.append(1 - ((1-x)**2)/(1-t))
    else:
      res.append(1)
  return res

In [11]:
coefs = [(0.01, 1.62762), (0.05, 1.3581), (0.1, 1.22385)]
for n in sizes:
    for i in range(5):
        res = []
        for alpha, lamb in coefs:
            var_series = np.array(sorted(df_con[n][i]))
            Dn_plus = np.max(np.abs(np.arange(1, n + 1) / n - tdf(var_series)))
            Dn_minus = np.max(np.abs(tdf(var_series) - np.arange(n) / n))
            Dn = max(Dn_plus, Dn_minus)

            if n <= 20:
                stat = (6 * n * Dn + 1) / (6 * sqrt(n))
            else:
                stat = sqrt(n) * Dn

            if stat >= lamb:
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${n}$ & ${i}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

$5$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$200$ & $0

### Критерий согласия Колмогорова для сложной гипотезы

In [12]:
def tdf(x_lst, t):
  res = []
  for x in x_lst:
    if x < 0:
      res.append(0)
    elif 0 <= x <= t:
      res.append(x**2 / t)
    elif t < x <= 1:
      res.append(1 - ((1-x)**2)/(1-t))
    else:
      res.append(1)
  return res

In [13]:
mle_estims = {n:[] for n in sizes}
for n in sizes:
  for j in range(5):
    if n <= 20:
        df = np.sort(df_con[n][j])
        mask = [(i - 1) / n < df[i - 1] < i / n for i in range(1, n + 1)]
        params = df[mask]
        Ls = [L(df, n, t) for t in params]
    else:
        df = np.sort(df_con[n][j][n // 2 :])
        mask = [(i - 1) / (n // 2) < df[i - 1] < i / (n // 2) for i in range(1, (n // 2) + 1)]
        params = df[mask]
        Ls = [L(df, n // 2, t) for t in params]

    mle_estim = params[np.argmax(Ls)]
    mle_estims[n].append(mle_estim)

In [14]:
half_df_con = {n // 2 : [] for n in sizes[2:]}
for n in sizes[2:]:
    for i in range(5):
        half_df_con[n // 2].append(df_con[n][i][: n // 2])

half_df_con[5] = df_con[5]
half_df_con[10] = df_con[10]

In [15]:
coefs = [(0.01, 1.62762), (0.05, 1.3581), (0.1, 1.22385)]
for j in range(len(sizes)):
    n = sorted(half_df_con.keys())[j]
    for i in range(5):
        res = []
        for alpha, lamb in coefs:
            var_series = np.array(sorted(half_df_con[n][i]))
            Dn_plus = np.max(np.abs(np.arange(1, n + 1) / n - tdf(var_series, mle_estims[sizes[j]][i])))
            Dn_minus = np.max(np.abs(tdf(var_series, mle_estims[sizes[j]][i]) - np.arange(n) / n))

            Dn = max(Dn_plus, Dn_minus)

            if n <= 20:
                stat = (6 * n * Dn + 1) / (6 * sqrt(n))
            else:
                stat = sqrt(n) * Dn

            if stat >= lamb:
                res.append('$\\times$')
            else:
                res.append('$\\checkmark$')
        print(f'${sizes[j]}$ & ${i}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

$5$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$200$ & $0

### Критерий согласия хи-квадрат

In [16]:
def quantile(gamma):
    if 0 < gamma <= 0.6:
        return sqrt(0.6 * gamma)
    if 0.6 < gamma < 1:
        return 1 - sqrt(0.4 * (1 - gamma))

In [17]:
alphas = [0.01, 0.05, 0.1]

In [18]:
def pred(N):
    borders = [quantile(i / N) for i in range(1, N)]

    chi_sq = chi2(df=N-1)
    probs = np.array([1 / N] * N)

    for alpha in alphas:
        print(np.round(chi_sq.ppf(q=1 - alpha), 3))

    for n in sizes:
        for i in range(5):
            res = []
            df = np.array(df_con[n][i])
            freq_vect = np.array([np.sum(df <= borders[0])] + [sum([borders[i] < elem <= borders[i + 1] for elem in df]) for i in range(N - 2)] + [np.sum(df > borders[-1])])
            stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n

            for alpha in alphas:
                if stat_pirs > chi_sq.ppf(q=1 - alpha):
                    res.append('$\\times$')
                else:
                    res.append('$\\checkmark$')
            print(f'${n}$ & ${i}$ & ${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

In [19]:
pred(3)

9.21
5.991
4.605
$5$ & $0$ & $1.6$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $1.6$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $2.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $0.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $2.6$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $1.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $3.2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $0.2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $0.56$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $1.22$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $1.34$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $0.14$

In [20]:
pred(5)

13.277
9.488
7.779
$5$ & $0$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $4.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $4.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $1.9$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $5.5$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $2.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $1.7$ &

In [21]:
pred(10)

21.666
16.919
14.684
$5$ & $0$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $5.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $13.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $8.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $10.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $7.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $8.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $5.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $4.

### Критерий согласия хи-квадрат для сложной гипотезы

In [22]:
mle_estims = {n:[] for n in sizes}
for n in sizes:
  for j in range(5):
    df = np.sort(df_con[n][j])
    mask = [(i - 1) / n < df[i - 1] < i / n for i in range(1, n + 1)]
    params = df[mask]
    Ls = [L(df, n, t) for t in params]

    mle_estim = params[np.argmax(Ls)]
    mle_estims[n].append(mle_estim)

In [23]:
def quantile(gamma, t):
    if 0 < gamma <= t:
        return sqrt(t * gamma)
    if t < gamma < 1:
        return 1 - sqrt((1 - t) * (1 - gamma))

In [24]:
r = 1
alphas = [0.01, 0.05, 0.1]

In [25]:
def pred(N):
    chi_sq = chi2(df=N-1-r)
    probs = np.array([1 / N] * N)

    for alpha in alphas:
        print(np.round(chi_sq.ppf(q=1 - alpha), 3))

    for n in sizes:
        for i in range(5):
            res = []
            borders = [quantile(j / N, mle_estims[n][i]) for j in range(1, N)]

            df = np.array(df_con[n][i])
            freq_vect = np.array([np.sum(df <= borders[0])] + [sum([borders[i] < elem <= borders[i + 1] for elem in df]) for i in range(N - 2)] + [np.sum(df > borders[-1])])
            stat_pirs = np.sum(freq_vect ** 2 / (n * probs)) - n

            for alpha in alphas:
                if stat_pirs > chi_sq.ppf(q=1 - alpha):
                    res.append('$\\times$')
                else:
                    res.append('$\\checkmark$')
            print(f'${n}$ & ${i}$ & ${np.round(stat_pirs, 3)}$ & {res[0]} & {res[1]} & {res[2]} \\\\ \\hline')

In [26]:
pred(3)

6.635
3.841
2.706
$5$ & $0$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $1.6$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $0.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $0.2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $0.2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $0.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $0.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $0.2$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $0.86$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $0.14$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $0.26$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $0.08

In [27]:
pred(5)

11.345
7.815
6.251
$5$ & $0$ & $4.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $1.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $2.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $1.7$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $5.5$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $2.3$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $1.9$ &

In [28]:
pred(10)

20.09
15.507
13.362
$5$ & $0$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $1$ & $5.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $2$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $3$ & $5.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$5$ & $4$ & $9.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $0$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $1$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $2$ & $8.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $3$ & $4.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$10$ & $4$ & $6.0$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $0$ & $3.4$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $1$ & $7.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $2$ & $5.8$ & $\checkmark$ & $\checkmark$ & $\checkmark$ \\ \hline
$100$ & $3$ & $5.0$ 