# TASK 1

Даны две независимые выборки $X, Y$ из нормальных распределений $\mathcal{N} (\mu_1, \sigma_1^2), \mathcal{N} (\mu_2, \sigma_2^2)$ объема $n, m$ соответственно.

$\tau = \sigma_1^2 / \sigma_2^2$

известны $\mu_1 = 0, \mu_2 = 0$, 
для эксперимента: $\sigma_1^2 = 2, \sigma_2^2 = 1$

функция: $\frac{m \sum_{i=1}^{n} (X_i - \mu_1)^2}{n \sum_{i=1}^{m} (Y_i - \mu_2)^2}$

уровень надежности $1 - \alpha = 0.95$

In [7]:
import numpy as np
import pandas as pd
from scipy.stats import f

mu1 = 0
mu2 = 0

sigma1 = np.sqrt(2)
sigma2 = np.sqrt(1)

count = 1000

def experiment(n, m, count):
    dataX = np.random.normal(loc=mu1, scale=sigma1, size=(count, n))
    dataY = np.random.normal(loc=mu2, scale=sigma2, size=(count, m))

    varX = (m * np.sum((dataX - mu1)**2, axis=1)) / n
    varY = (n * np.sum((dataY - mu2)**2, axis=1)) / m

    df = pd.DataFrame()
    df['lower_bound'] = (varX / varY) * f.ppf(q=0.025, dfn=n-1, dfd=n-1)
    df['upper_bound'] = (varX / varY) * f.ppf(q=0.975, dfn=n-1, dfd=n-1)
    df['covering'] = ((df['lower_bound'] <= 2) & (df['upper_bound'] >= 2)).astype(bool)

    return df

In [14]:
ex_25 = experiment(25, 25, count)
covering = ex_25['covering'].sum()
print(f"the confidence interval covers the real value of the parameter: {covering / count}")

the confidence interval covers the real value of the parameter: 0.967


In [9]:
ex_25

Unnamed: 0,lower_bound,upper_bound,covering
0,0.934872,4.814234,True
1,0.940668,4.844082,True
2,0.412004,2.121663,True
3,1.428792,7.357738,True
4,0.767567,3.952678,True
...,...,...,...
995,1.564636,8.057278,True
996,1.508155,7.766423,True
997,0.766782,3.948637,True
998,0.506060,2.606016,True


In [11]:
ex_10000 = experiment(10000, 10000, count)
covering = ex_10000['covering'].sum()
print(f"the confidence interval covers the real value of the parameter: {covering / count}")

the confidence interval covers the real value of the parameter: 0.937


In [12]:
ex_10000

Unnamed: 0,lower_bound,upper_bound,covering
0,1.949432,2.108433,True
1,1.885003,2.038749,True
2,1.986851,2.148903,True
3,1.873004,2.025771,True
4,1.876942,2.030031,True
...,...,...,...
995,1.922505,2.079310,True
996,1.904372,2.059698,True
997,1.895583,2.050192,True
998,1.875160,2.028103,True


In [16]:
(ex_25['upper_bound'] - ex_25['lower_bound']).mean()

3.930803107798436

In [17]:
(ex_10000['upper_bound'] - ex_10000['lower_bound']).mean()

0.1567396997180478

# TASK 2

$$
 Geom(p) ; p ; p = 0.7 \\
  P(X = x) = (1 - p)^{x - 1} \cdot p \\
  f_p = \prod P_i = \prod (1 - p)^{\sum(x_i - 1)} \cdot p^{n} \\
 L(p) = \sum ((x_i - 1)ln(1 - p) + ln(p)) \\
 L'(p) = \dfrac{\sum(1-x_i)}{1-p} + \dfrac{n}{p} \\
L  \rightarrow max \  \text{in} \  p = \dfrac{n}{\sum x_i} = \dfrac{1}{\overline{x}} \\
 \overline{p} = \dfrac{1}{\overline{x}} \\
  P\left(\tau \in \left[\overline{p} -q_{1-\frac{\alpha}{2}} \sigma, \overline{p} +q_{1-\frac{\alpha}{2}} \sigma\right] \right)
$$
Доверетильный интервал найден.\
Эксперимент:

По заданию:

In [107]:
import numpy as np
import pandas as pd
from scipy.stats import rv_continuous, geom

p = 0.7
𝜎_squared_1 = 2# из первой таски
𝜎_squared_2 = 1  # также



𝛼 = 1 - 0.95

quantile = geom.ppf(1 - 𝛼 / 2, p = p)

def experiment(n, n_experiments=1000):
    # data = np.random.normal(p, size = (n_experiments, n))
    data = geom.rvs(p, size = (n_experiments, n))
    data_mean = data.mean(axis = 1)
    𝜎 = np.sqrt(𝜎_squared_1 / n  + σ_squared_2 /n)
    
    df = pd.DataFrame()
    df['left'] = data_mean - quantile * 𝜎
    df['right'] = data_mean + quantile * 𝜎
    df['in'] = ((df['left'] <= p) & (df['right'] >= p)).astype(bool)
    return df
         

При большом размере выборки среднее смещается от параметра и он не попадает в доверительный интервал.

In [108]:
experiment_25 = experiment(25)
print(len(experiment_25[experiment_25['in']] == True))
experiment_25

1000


Unnamed: 0,left,right,in
0,-0.065641,2.705641,True
1,-0.265641,2.505641,True
2,0.014359,2.785641,True
3,0.254359,3.025641,True
4,0.014359,2.785641,True
...,...,...,...
995,0.054359,2.825641,True
996,0.054359,2.825641,True
997,0.054359,2.825641,True
998,0.014359,2.785641,True


In [109]:
experiment_10000 = experiment(10000)
print(len(experiment_10000[experiment_10000['in']] == True))
experiment_10000

0


Unnamed: 0,left,right,in
0,1.346318,1.484882,False
1,1.351518,1.490082,False
2,1.360718,1.499282,False
3,1.361418,1.499982,False
4,1.365718,1.504282,False
...,...,...,...
995,1.346118,1.484682,False
996,1.366818,1.505382,False
997,1.349918,1.488482,False
998,1.350818,1.489382,False
