In [13]:
import numpy
from scipy.integrate import quad
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF

ImportError: No module named 'statsmodels'

In [2]:
def empirical_cdf(x, sample):
    return numpy.sum([1 for value in sample if value <= x]) / len(sample)

def integrand(x, sample, scipy_distribution):
    return (
        (scipy_distribution.cdf(x) - empirical_cdf(x, sample)) ** 2 /
        ((1 - scipy_distribution.cdf(x)) * scipy_distribution.cdf(x)) * scipy_distribution.pdf(x)
        if -37 <= x <= 8 else 0
    )

def Omega2(sample, scipy_distribution):
    return quad(integrand, -numpy.inf, numpy.inf, args=(sample, scipy_distribution))[0]

In [3]:
alphas = {0.01, 0.05, 0.1}
ns = {25, 100, 500, 2000}
bootstrap_size = 100

In [4]:
quantilies = {}
for n in ns:
    samples = [stats.norm.rvs(size=n) for i in range(bootstrap_size)]
    for alpha in alphas:
        values = [n * Omega2(sample, stats.norm) for sample in samples]
        values.sort()
        quantilies[(n, alpha)] = values[int((1 - alpha) * len(values))]

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.


In [5]:
powers = {}
for n, alpha in quantilies:
    student_samples = [stats.t.rvs(df=10, size=n) for i in range(bootstrap_size)]
    satisfing_counter = 0
    for sample in student_samples:
        satisfing_counter += 1 if n * Omega2(sample, stats.norm) > quantilies[(n, alpha)] else 0
    powers[(n, alpha)] = satisfing_counter / bootstrap_size

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.


In [7]:
powers

{(25, 0.01): 0.02,
 (25, 0.05): 0.06,
 (25, 0.1): 0.21,
 (100, 0.01): 0.0,
 (100, 0.05): 0.11,
 (100, 0.1): 0.15,
 (500, 0.01): 0.04,
 (500, 0.05): 0.19,
 (500, 0.1): 0.41,
 (2000, 0.01): 0.37,
 (2000, 0.05): 0.73,
 (2000, 0.1): 0.95}