In [1]:
import numpy as np
import pandas as pd
import scipy.stats as sps
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.sandbox.stats.multicomp import multipletests
from tqdm import tqdm_notebook

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from scipy.linalg import eigvals

%matplotlib inline

In [2]:
alpha = 0.05

max_sample_size = 100
sample_count = 10000

sample = sps.cauchy().rvs((sample_count, max_sample_size))

is_reject_kstest = np.zeros((max_sample_size + 1, sample_count))
is_reject_jarque_bera = np.zeros((max_sample_size + 1, sample_count))
is_reject_shapiro_wilk = np.zeros((max_sample_size + 1, sample_count))


for i in tqdm_notebook(range(5, max_sample_size+1)):
    for j in range(sample_count):
        is_reject_kstest[i, j] = (sps.kstest(sample[j, :i+1], cdf=sps.norm().cdf)[1] < alpha)
        is_reject_jarque_bera[i, j] = (sps.jarque_bera(sample[j, :i+1])[1] < alpha)
        is_reject_shapiro_wilk[i, j] = (sps.shapiro(sample[j, :i+1])[1] < alpha)

        
kstest_check = is_reject_kstest[5:].mean(axis=1)
jarque_bera_check = is_reject_jarque_bera[5:].mean(axis=1)
shapiro_wilk_check = is_reject_shapiro_wilk[5:].mean(axis=1)

HBox(children=(IntProgress(value=0, max=96), HTML(value='')))




In [4]:
np.savetxt('kstest_power.txt', kstest_check, fmt='%f')
np.savetxt('jarque_bera_power.txt', jarque_bera_check, fmt='%f')
np.savetxt('shapiro_power.txt', shapiro_wilk_check, fmt='%f')