In [200]:
import matplotlib.pyplot as plt
import numpy as np
import random
import math
from scipy import stats

In [201]:
from lib.moments import init_moment_k, init_moment_k_interval,\
                        cent_moment_k, cent_moment_k_interval
from lib.sample_creation import get_sample

# **ЛР 3.2**

In [202]:
ages = []
with open('moscow_2021.txt', 'r') as f:
    for l in f.readlines():
        ages.append(int(l.strip()))

In [203]:
def corr_variance2(nums: list) -> float:
    return (len(nums)/(len(nums)-1))*cent_moment_k(nums, 2)

### _Функции для проверки гипотезы равенства дисперсий выборок_

In [204]:
def get_F_observed(corr_variance2_1: float, corr_variance2_2: float) -> float:
    lesser = min(corr_variance2_1, corr_variance2_2)
    bigger = max(corr_variance2_1, corr_variance2_2)
    return bigger/lesser


def lesser_bigger_variance2(sample1: list, sample2: list) -> tuple:
    '''df_lesser - degree of freedom of the sample with lesser corr_variance2

    df_bigger - degree of freedom of the sample with bigger corr_variance2
    
    Returns tuple(corr_variance2_lesser, df_lesser, corr_variance2_bigger, df_bigger)'''
    
    corr_variance2_1 = corr_variance2(sample1)
    corr_variance2_2 = corr_variance2(sample2)

    if corr_variance2_1>corr_variance2_2:
        variance2_bigger = corr_variance2_1
        df_bigger = len(sample1)-1

        variance2_lesser = corr_variance2_2
        df_lesser = len(sample2)-1
    else:
        variance2_bigger = corr_variance2_2
        df_bigger = len(sample2)-1

        variance2_lesser = corr_variance2_1
        df_lesser = len(sample1)-1

    return variance2_lesser, df_lesser, variance2_bigger, df_bigger

### _Функции для проверки гипотезы о равенстве генеральной дисперсии заданному значению_

In [205]:
def get_disper_chi2(nums: list, dis_val: float) -> float:
    sample_dispersion = cent_moment_k(nums, 2)
    return (len(nums)-1)*sample_dispersion/dis_val

### _Функции для вывода_

In [206]:
def hypoth_right_print(funct: str, funct_observed: float, funct_critical: float) -> None:
    print(f'{funct}_observed = {funct_observed:.3f}')
    print(f'{funct}_critical = {funct_critical:.3f}')

    hypoth_accept = funct_observed<funct_critical
    cmpr_res = '<' if hypoth_accept else '>='
    hypoth_status = "accepted" if hypoth_accept else "rejected"
    print(f'{funct}_observed {cmpr_res} {funct}_critical')
    print(f'Hypothesis {hypoth_status}')


def hypoth_left_print(funct: str, funct_observed: float, funct_critical: float) -> None:
    print(f'{funct}_observed = {funct_observed:.3f}')
    print(f'{funct}_critical = {funct_critical:.3f}')

    hypoth_accept = funct_observed>funct_critical
    cmpr_res = '>' if hypoth_accept else '<='
    hypoth_status = "accepted" if hypoth_accept else "rejected"
    print(f'{funct}_observed {cmpr_res} {funct}_critical')
    print(f'Hypothesis {hypoth_status}')


def hypoth_both_print(funct: str, funct_observed: float, funct_critical1: float, funct_critical2: float) -> None:
    print(f'{funct}_observed = {funct_observed:.3f}')
    print(f'{funct}_critical1 = {funct_critical1:.3f}')
    print(f'{funct}_critical2 = {funct_critical2:.3f}')

    hypoth_accept = funct_critical1<funct_observed<funct_critical2
    cmpr_res = 'in' if hypoth_accept else 'out of'
    hypoth_status = "accepted" if hypoth_accept else "rejected"
    print(f'Observed value {funct_observed:.3f} is {cmpr_res} ({funct_critical1:.3f}; {funct_critical2:.3f})')
    print(f'Hypothesis {hypoth_status}')

## **Ход работы**

### ___Задание 1___

$\alpha = 0.05$

$H_0 : D_1 = D_2$

a) $H_1 : D_1 > D_2$

b) $H_1 : D_1 \neq D_2$

In [207]:
ages_sample_size1 = ages_sample_size2 = 62
alpha1 = 0.05
func_used12 = 'F'

ages_sample1 = get_sample(ages, ages_sample_size1)
ages_sample2 = get_sample(ages, ages_sample_size2)


less_corr_var2, less_df, big_corr_var2, big_df = lesser_bigger_variance2(ages_sample1, ages_sample2)
ages_samples12_F_obs = get_F_observed(less_corr_var2, big_corr_var2)

# for case *a*
print('================')
print('Hypothesis for case a):')

ages_samples_case_a_F_crit = stats.f.ppf(1-alpha1, big_df, less_df)

hypoth_right_print(func_used12, ages_samples12_F_obs, ages_samples_case_a_F_crit)

print('================')

# for case *b*
print('================')
print('Hypothesis for case b):')

ages_samples_case_b_F_crit1 = stats.f.ppf(alpha1/2, big_df, less_df)
ages_samples_case_b_F_crit2 = stats.f.ppf(1-alpha1/2, big_df, less_df)

hypoth_both_print(func_used12, ages_samples12_F_obs, ages_samples_case_b_F_crit1, ages_samples_case_b_F_crit2)

print('================')


Hypothesis for case a):
F_observed = 1.080
F_critical = 1.529
F_observed < F_critical
Hypothesis accepted
Hypothesis for case b):
F_observed = 1.080
F_critical1 = 0.603
F_critical2 = 1.660
Observed value 1.080 is in (0.603; 1.660)
Hypothesis accepted


### ***Задание 2***

$\alpha = 0.05$

$ H_0 : D(x) = \sigma^2 $

a) $ H_1 : D(x) > \sigma^2 $

b) $ H_1 : D(x) \neq \sigma^2 $

c) $ H_1 : D(x) < \sigma^2 $

In [208]:
ages_dispersion = cent_moment_k(ages, 2)
ages_sample_size3 = 62
alpha2 = 0.05
func_used3 = 'chi2'

ages_sample3 = get_sample(ages, ages_sample_size3)

ages_sample3_chi2_obs = get_disper_chi2(ages_sample3, ages_dispersion)

print('================')
print('Hypothesis for case a):')
ages_sample3_chi2_crit_a = stats.chi2.ppf(1-alpha2, ages_sample_size3-1)

hypoth_right_print(func_used3, ages_sample3_chi2_obs, ages_sample3_chi2_crit_a)

print('================')


print('================')
print('Hypothesis for case b):')
ages_sample3_chi2_crit_b1 = stats.chi2.ppf(alpha2/2, ages_sample_size3-1)
ages_sample3_chi2_crit_b2 = stats.chi2.ppf(1-alpha2/2, ages_sample_size3-1)

hypoth_both_print(func_used3, ages_sample3_chi2_obs, ages_sample3_chi2_crit_b1, ages_sample3_chi2_crit_b2)

print('================')


print('================')
print('Hypothesis for case c):')
ages_sample3_chi2_crit_c = stats.chi2.ppf(alpha2, ages_sample_size3-1)

hypoth_left_print(func_used3, ages_sample3_chi2_obs, ages_sample3_chi2_crit_c)

print('================')

Hypothesis for case a):
chi2_observed = 54.671
chi2_critical = 80.232
chi2_observed < chi2_critical
Hypothesis accepted
Hypothesis for case b):
chi2_observed = 54.671
chi2_critical1 = 41.303
chi2_critical2 = 84.476
Observed value 54.671 is in (41.303; 84.476)
Hypothesis accepted
Hypothesis for case c):
chi2_observed = 54.671
chi2_critical = 44.038
chi2_observed > chi2_critical
Hypothesis accepted
