# TASK 1

Даны две независимые выборки $X, Y$ из нормальных распределений $\mathcal{N} (\mu_1, \sigma_1^2), \mathcal{N} (\mu_2, \sigma_2^2)$ объема $n, m$ соответственно.

$\tau = \sigma_1^2 / \sigma_2^2$

известны $\mu_1 = 0, \mu_2 = 0$, 
для эксперимента: $\sigma_1^2 = 2, \sigma_2^2 = 1$

функция: $\frac{m \sum_{i=1}^{n} (X_i - \mu_1)^2}{n \sum_{i=1}^{m} (Y_i - \mu_2)^2}$

уровень надежности $1 - \alpha = 0.95$

In [7]:
import numpy as np
import pandas as pd
from scipy.stats import f

mu1 = 0
mu2 = 0

sigma1 = np.sqrt(2)
sigma2 = np.sqrt(1)

count = 1000

def experiment(n, m, count):
    dataX = np.random.normal(loc=mu1, scale=sigma1, size=(count, n))
    dataY = np.random.normal(loc=mu2, scale=sigma2, size=(count, m))

    varX = (m * np.sum((dataX - mu1)**2, axis=1)) / n
    varY = (n * np.sum((dataY - mu2)**2, axis=1)) / m

    df = pd.DataFrame()
    df['lower_bound'] = (varX / varY) * f.ppf(q=0.025, dfn=n-1, dfd=n-1)
    df['upper_bound'] = (varX / varY) * f.ppf(q=0.975, dfn=n-1, dfd=n-1)
    df['covering'] = ((df['lower_bound'] <= 2) & (df['upper_bound'] >= 2)).astype(bool)

    return df

In [14]:
ex_25 = experiment(25, 25, count)
covering = ex_25['covering'].sum()
print(f"the confidence interval covers the real value of the parameter: {covering / count}")

the confidence interval covers the real value of the parameter: 0.967


In [9]:
ex_25

Unnamed: 0,lower_bound,upper_bound,covering
0,0.934872,4.814234,True
1,0.940668,4.844082,True
2,0.412004,2.121663,True
3,1.428792,7.357738,True
4,0.767567,3.952678,True
...,...,...,...
995,1.564636,8.057278,True
996,1.508155,7.766423,True
997,0.766782,3.948637,True
998,0.506060,2.606016,True


In [11]:
ex_10000 = experiment(10000, 10000, count)
covering = ex_10000['covering'].sum()
print(f"the confidence interval covers the real value of the parameter: {covering / count}")

the confidence interval covers the real value of the parameter: 0.937


In [12]:
ex_10000

Unnamed: 0,lower_bound,upper_bound,covering
0,1.949432,2.108433,True
1,1.885003,2.038749,True
2,1.986851,2.148903,True
3,1.873004,2.025771,True
4,1.876942,2.030031,True
...,...,...,...
995,1.922505,2.079310,True
996,1.904372,2.059698,True
997,1.895583,2.050192,True
998,1.875160,2.028103,True


In [16]:
(ex_25['upper_bound'] - ex_25['lower_bound']).mean()

3.930803107798436

In [17]:
(ex_10000['upper_bound'] - ex_10000['lower_bound']).mean()

0.1567396997180478

# TASK 2

$$
 Geom(p) ; p ; p = 0.7 \\
  P(x = X) = (1 - p)^{x - 1} \cdot p \\
  \text{Функция плотности вероятности} \\
  f_p = \prod P_i = \prod (1 - p)^{\sum(x_i - 1)} \cdot p^{n} \\
 L(p) = \sum ((x_i - 1)ln(1 - p) + ln(p)) \\
 L'(p) = \dfrac{\sum(1-x_i)}{1-p} + \dfrac{n}{p} \\
L  \rightarrow max \  \text{in} \  p = \dfrac{n}{\sum x_i} = \dfrac{1}{\overline{x}} \\
 \overline{p} = \dfrac{1}{\overline{x}} \\
  P\left(\tau \in \left[\overline{p} -q_{1-\frac{\alpha}{2}} \sigma, \overline{p} +q_{1-\frac{\alpha}{2}} \sigma\right] \right)
$$
Доверетильный интервал найден.\
Эксперимент:

По заданию:

In [278]:
import numpy as np
import pandas as pd
from scipy.stats import rv_continuous, geom, norm

p = 0.7

alpha = 1 - 0.95

#критерий(критическое значение)
#распредеоение вообще не симметричное поэтому на 2 не делим
q = norm.ppf(1 - alpha)

def experiment(n, n_experiments=1000):

    data = geom.rvs(p = p, size = (n_experiments, n))
    data_mean = data.mean(axis  = 1)

    mle = 1 / data_mean

    se = np.sqrt(mle * (1 - mle) / n)

    df = pd.DataFrame()
    df['left'] = mle - q * se
    df['right'] = mle + q * se
    df['in'] = ((df['left'] <= p) & (df['right'] >= p)).astype(bool)
    return df
         

При большом размере выборки среднее смещается от параметра и он не попадает в доверительный интервал.

In [279]:
experiment_25 = experiment(25)
print(len(experiment_25[experiment_25['in']] == True))
experiment_25

913


Unnamed: 0,left,right,in
0,0.676482,0.936421,True
1,0.616595,0.898556,True
2,0.645254,0.917246,True
3,0.565672,0.862900,True
4,0.565672,0.862900,True
...,...,...,...
995,0.590160,0.880428,True
996,0.542907,0.845982,True
997,0.616595,0.898556,True
998,0.419104,0.743687,True


In [280]:
experiment_10000 = experiment(10000)
print(len(experiment_10000[experiment_10000['in']] == True))
experiment_10000

956


Unnamed: 0,left,right,in
0,0.691758,0.706843,True
1,0.692596,0.707670,True
2,0.690529,0.705632,True
3,0.692992,0.708059,True
4,0.693783,0.708840,True
...,...,...,...
995,0.689206,0.704328,True
996,0.695273,0.710308,True
997,0.689255,0.704376,True
998,0.692843,0.707913,True
