In [1]:
import numpy
from scipy.integrate import quad
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF

In [2]:
def integrand(x, cdf, pdf, ecdf):
    return (
        (cdf(x) - ecdf(x)) ** 2 /
        ((1 - cdf(x)) * cdf(x)) * pdf(x)
    )

# Надо немного пошаманить с пределами интегрирования, чтобы не выскакивали Nan'ы
def Omega2(sample, scipy_distribution):
    ecdf = ECDF(sample)
    return quad(integrand, -37, 8, args=(scipy_distribution.cdf, scipy_distribution.pdf, ecdf))[0]

In [3]:
alphas = {0.01, 0.05, 0.1}
ns = {25, 100, 500, 2000}
bootstrap_size = 100

In [4]:
quantilies = {}
for n in ns:
    samples = [stats.norm.rvs(size=n) for i in range(bootstrap_size)]
    values = numpy.array([n * Omega2(sample, stats.norm) for sample in samples])
    for alpha in alphas:
        quantilies[(n, alpha)] = numpy.percentile(values, 100 * (1 - alpha))

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.


In [8]:
quantilies

{(25, 0.01): 2.6427273541901486,
 (25, 0.05): 2.0457642046807138,
 (25, 0.1): 1.5127067448622049,
 (100, 0.01): 4.1716083654555947,
 (100, 0.05): 2.442005852057664,
 (100, 0.1): 2.1056332644331968,
 (500, 0.01): 3.2721420612855869,
 (500, 0.05): 1.8098952497219156,
 (500, 0.1): 1.5258246626840002,
 (2000, 0.01): 3.7885549483564649,
 (2000, 0.05): 3.1635547961551453,
 (2000, 0.1): 2.400199912616571}

In [5]:
powers = {}
ks_powers = {}
student_bootstraps = {n: [stats.t.rvs(df=10, size=n) for i in range(bootstrap_size)] for n in ns}

for n, alpha in quantilies:
    satisfing_counter = 0
    ks_satisfing_counter = 0
    for sample in student_bootstraps[n]:
        satisfing_counter += 1 if n * Omega2(sample, stats.norm) > quantilies[(n, alpha)] else 0
        ks_satisfing_counter += (
            1 if (stats.kstest(sample, 'norm').statistic > stats.ksone.ppf(1 - alpha, n)) else 0
        )
    powers[(n, alpha)] = satisfing_counter / bootstrap_size
    ks_powers[(n, alpha)] = ks_satisfing_counter / bootstrap_size

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.


In [6]:
powers

{(25, 0.01): 0.05,
 (25, 0.05): 0.16,
 (25, 0.1): 0.25,
 (100, 0.01): 0.01,
 (100, 0.05): 0.11,
 (100, 0.1): 0.14,
 (500, 0.01): 0.12,
 (500, 0.05): 0.45,
 (500, 0.1): 0.56,
 (2000, 0.01): 0.62,
 (2000, 0.05): 0.74,
 (2000, 0.1): 0.93}

In [7]:
ks_powers

{(25, 0.01): 0.02,
 (25, 0.05): 0.08,
 (25, 0.1): 0.22,
 (100, 0.01): 0.02,
 (100, 0.05): 0.09,
 (100, 0.1): 0.22,
 (500, 0.01): 0.03,
 (500, 0.05): 0.13,
 (500, 0.1): 0.22,
 (2000, 0.01): 0.09,
 (2000, 0.05): 0.41,
 (2000, 0.1): 0.58}

Критерий Андерсона вышел мощнее, как и обещали на лекции. Квантили посчитаны.