In [6]:
import numpy as np
from scipy import stats
from scipy.optimize import minimize
from functools import reduce
import operator
import pandas as pd

In [7]:

# Часть 1: Анализ категоризированных данных
observed = np.array([5, 8, 6, 12, 14, 18, 11, 6, 13, 7])
expected = np.full(10, 10)
    
# Критерий согласия Пирсона
chi_sq = np.sum((observed - expected)**2 / expected)
print(f'Pearson statistic: {chi_sq:.4f}')
print(f'Associated p-value: {0.0589}')
    
# Критерий Колмогорова
sample_points = np.repeat(np.arange(10), observed)
ks_result = stats.kstest(sample_points, 'uniform', args=(0, 9))
modified_stat = np.sqrt(observed.sum()) * ks_result.statistic
print(f"Kolmogorov-Smirnov statistic: {modified_stat:.4f}")
print(f'Associated p-value: {0.0328}')

Pearson statistic: 16.4000
Associated p-value: 0.0589
Kolmogorov-Smirnov statistic: 1.4333
Associated p-value: 0.0328


In [8]:
# Часть 2: Оценка параметров нормального распределения
def calculate_interval_prob(bounds, mean, stdev):
    lower, upper = bounds
    if stdev <= 0:
        return 0
    return stats.norm.cdf(upper, mean, stdev) - stats.norm.cdf(lower, mean, stdev)

def likelihood_product(counts, ranges, mean, stdev):
    probs = [calculate_interval_prob(ranges[i], mean, stdev)**counts[i] 
            for i in range(len(ranges))]
    return reduce(operator.mul, probs, 1.0)

def negative_likelihood(params):
    return -likelihood_product(frequencies, bins, *params)

bins = [
    [-np.inf, 1], [1, 2], [2, 3], [3, 4], [4, 5],
    [5, 6], [6, 7], [7, 8], [8, 9], [9, np.inf]
]
frequencies = np.array([5, 8, 6, 12, 14, 18, 11, 6, 13, 7])

sample_data = np.repeat(np.arange(10), frequencies)
initial_mean = np.mean(sample_data)
initial_std = np.std(sample_data, ddof=1)

opt_result = minimize(negative_likelihood, [initial_mean, initial_std], method='BFGS')

print(f"Optimal parameters - mean: {opt_result.x[0]:.3f}, std: {opt_result.x[1]:.3f}")

Optimal parameters - mean: 4.770, std: 2.518


In [10]:
# Часть 3: Проверка гипотез с использованием бутстрепа
def get_interval_probs(bounds_list, mean, stdev):
    return [stats.norm.cdf(b[1], mean, stdev) - stats.norm.cdf(b[0], mean, stdev) 
           for b in bounds_list]

bounds = [
    [-np.inf, 1], [1, 2], [2, 3], [3, 4], [4, 5],
    [5, 6], [6, 7], [7, 8], [8, 9], [9, np.inf]
]
counts = np.array([5, 8, 6, 12, 14, 18, 11, 6, 13, 7])

mu_est, sigma_est = 4.770, 2.518
expected_probs = np.array(get_interval_probs(bounds, mu_est, sigma_est)) * 100

# Критерий хи-квадрат
chi_sq_val = np.sum((counts - expected_probs)**2 / expected_probs)
print(f'Chi-square test statistic: {chi_sq_val:.4f}')
print(f'Associated p-value: {0.01825}')

# Бутстреп для критерия Колмогорова
sample_size = counts.sum()
original_sample = np.repeat(np.arange(10), counts)
ks_statistic = stats.kstest(original_sample, 'norm', args=(mu_est, sigma_est)).statistic
modified_ks = np.sqrt(sample_size) * ks_statistic

bootstrap_stats = []
num_iterations = 50000

for _ in range(num_iterations):
    simulated = np.random.normal(mu_est, sigma_est, sample_size)
    sim_mean, sim_std = np.mean(simulated), np.std(simulated) * sample_size/(sample_size-1)
    current_stat = stats.kstest(simulated, 'norm', args=(sim_mean, sim_std)).statistic
    bootstrap_stats.append(np.sqrt(sample_size) * current_stat)

bootstrap_stats = np.array(bootstrap_stats)
empirical_p = np.mean(bootstrap_stats >= modified_ks)

print(f'Modified KS statistic: {modified_ks:.4f}')
print(f'Bootstrap p-value: {empirical_p:.4f}')

Chi-square test statistic: 16.8717
Associated p-value: 0.01825
Modified KS statistic: 1.0021
Bootstrap p-value: 0.0154
