In [1]:
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import pickle
from pathlib import Path
import pandas as pd
import seaborn as sns

%matplotlib inline

In [2]:
from tqdm import tqdm
from collections import defaultdict as ddict
from pprint import pprint

In [3]:
def sortdir(f):
    num = f.name.split("_")[-2].split("p")[-1]
    num = int(num)
    return  num

def sortf(f):
    num = f.name.split(".")[0]
    num = int(num)
    return num

In [4]:
def load_pkl(f):
    with open(f, "rb") as fi:
        data = pickle.load(fi)
    return data

## To obtaining simple format file

In [5]:
def parse_sresult(datum):
    res = {
        "JEF"  : datum["jef"]["theta"],
        "FULL": datum["full"]["theta"],
        "UIPD" : datum["UIPD"]["thetah"],
        "UIPJS": datum["UIPJS"]["thetah"],
        "NPP"  : datum["NPP"]["thetah"],
        "LCP"  : datum["LCP"]["thetah"]
    }
    return res

def save_data(cdir):
    files = list(cdir.glob("*.pkl"))
    files = sorted(files, key=sortf, reverse=False)
    retres = {
        "JEF":[],
        "FULL":[],
        "LCP":[],
        "NPP":[],
        "UIPJS":[],
        "UIPD":[]
    }
    for fil in tqdm(files):
        datum = load_pkl(fil)
        simdatum = parse_sresult(datum)
        retres["JEF"].append(simdatum["JEF"])
        retres["FULL"].append(simdatum["FULL"])
        retres["UIPD"].append(simdatum["UIPD"])
        retres["UIPJS"].append(simdatum["UIPJS"])
        retres["NPP"].append(simdatum["NPP"])
        retres["LCP"].append(simdatum["LCP"])
    return retres

In [6]:
#n = 120
#root = Path(f"./")
#dirs = root.glob(f"MCMCNorm_p*_n{n}")
#dirs = list(dirs)
#
## test theta = theta0
#idxs = [0, 0.1, 0.2, 0.3, 0.4]
#theta0 = 0.0
#
## sort the files
#powers = []
#dirs = sorted(dirs, key=sortdir, reverse=False)
#cdir = dirs[idxs.index(theta0)]
#files = list(cdir.glob("*.pkl"))
#files = sorted(files, key=sortf, reverse=False)

In [67]:
#for cdir in dirs:
#    print(f"Processing {cdir.name}")
#    saved_data = save_data(cdir)
#    with open(f"./SimMCMCNorm_p{sortdir(cdir)}_n{n}.pkl", "wb") as savef:
#        pickle.dump(saved_data, savef)
#    

  0%|          | 0/1000 [00:00<?, ?it/s]

Processing MCMCNorm_p00_n120


100%|██████████| 1000/1000 [24:43<00:00,  1.60it/s] 
  0%|          | 0/1000 [00:00<?, ?it/s]

Processing MCMCNorm_p010_n120


100%|██████████| 1000/1000 [25:48<00:00,  1.06s/it] 
  0%|          | 0/1000 [00:00<?, ?it/s]

Processing MCMCNorm_p020_n120


100%|██████████| 1000/1000 [24:48<00:00,  1.72it/s] 
  0%|          | 0/1000 [00:00<?, ?it/s]

Processing MCMCNorm_p030_n120


100%|██████████| 1000/1000 [24:48<00:00,  1.16s/it] 
  0%|          | 0/1000 [00:00<?, ?it/s]

Processing MCMCNorm_p040_n120


100%|██████████| 1000/1000 [25:28<00:00,  1.34it/s] 


In [7]:
def is_true(theta0, bs):
    low, up = bs
    return (theta0 > low) and (theta0 < up)


def rejrate(theta0, data, theta):
    reslist = [is_true(theta0, bs=[np.quantile(dat, q=theta), np.quantile(dat, q=1-theta)]) for dat in data] 
    return 1 - np.mean(reslist)


def getRatio(theta0, data):
    p1 = np.mean(data<=theta0)
    p2 = np.mean(data>theta0)
    res = np.min([p1, p2])
    return res



def getQuantile(theta0, data, alp=0.05):
    res = [getRatio(theta0, data=dat) for dat in data]
    return np.quantile(res, q=alp)

# n = 80

In [33]:
n = 80
root = Path(f"./")
files = root.glob(f"SimMCMCNorm_p*_n{n}*")
files = list(files)

# test theta = theta0
idxs = [0, 0.1, 0.2, 0.3, 0.4]
theta0 = 0

# sort the files
files = sorted(files, key=sortdir, reverse=False)
# H0 file
h0f = files[idxs.index(theta0)]

### Get the CV

In [34]:
datah0 = load_pkl(h0f)
CVqs = ddict(float)
Alphas = ddict(float)
Alphas95 = ddict(float)
Powers = ddict(list)
Powers95 = ddict(list)

In [35]:
alps = {
    "JEF": 0.05 - 1e-3,
    "FULL": 0.05,
    "LCP": 0.05 - 1e-3,
    "UIPJS": 0.05 - 1e-3,
    "UIPD": 0.05 - 5e-4,
    "NPP": 0.05 - 5e-4,
}
for key, dat in datah0.items():
    CVqs[key] = getQuantile(theta0, data=dat, alp=alps[key])
    Alphas[key] = rejrate(theta0, dat, CVqs[key])
    Alphas95[key] = rejrate(theta0, dat, 0.025)
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")
print("Sizes95")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

Sizes
JEF       : 0.050
FULL      : 1.000
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050
Sizes95
JEF       : 0.046
FULL      : 1.000
LCP       : 0.119
NPP       : 0.178
UIPJS     : 0.138
UIPD      : 0.121


In [36]:
cvs = getQuantile(theta0, data=datah0["UIPD"], alp=0.05-5e-4)
rejrate(theta0, datah0["UIPD"], cvs)

0.050000000000000044

## Get the powers

In [37]:
for f in files:
    theta = sortdir(f)/100
    if theta != theta0:
        Powers["theta"].append(theta)
        Powers95["theta"].append(theta)
        datah1 = load_pkl(f)
        print("=="*30)
        for key, dat in datah1.items():
            print(f"Current is method {key} in file {f.name}!")
            Powers[key].append(rejrate(theta0, dat, CVqs[key]))
            Powers95[key].append(rejrate(theta0, dat, 0.025))

Current is method JEF in file SimMCMCNorm_p10_n80!
Current is method FULL in file SimMCMCNorm_p10_n80!
Current is method LCP in file SimMCMCNorm_p10_n80!
Current is method NPP in file SimMCMCNorm_p10_n80!
Current is method UIPJS in file SimMCMCNorm_p10_n80!
Current is method UIPD in file SimMCMCNorm_p10_n80!
Current is method JEF in file SimMCMCNorm_p20_n80!
Current is method FULL in file SimMCMCNorm_p20_n80!
Current is method LCP in file SimMCMCNorm_p20_n80!
Current is method NPP in file SimMCMCNorm_p20_n80!
Current is method UIPJS in file SimMCMCNorm_p20_n80!
Current is method UIPD in file SimMCMCNorm_p20_n80!
Current is method JEF in file SimMCMCNorm_p30_n80!
Current is method FULL in file SimMCMCNorm_p30_n80!
Current is method LCP in file SimMCMCNorm_p30_n80!
Current is method NPP in file SimMCMCNorm_p30_n80!
Current is method UIPJS in file SimMCMCNorm_p30_n80!
Current is method UIPD in file SimMCMCNorm_p30_n80!
Current is method JEF in file SimMCMCNorm_p40_n80!
Current is method F

In [38]:
Powers = pd.DataFrame(Powers)
print(Powers)
print(f"Powers")
print(Powers.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.156   1.0  0.225  0.262  0.245  0.240
1    0.2  0.452   1.0  0.538  0.572  0.559  0.549
2    0.3  0.791   1.0  0.856  0.886  0.878  0.871
3    0.4  0.935   1.0  0.956  0.966  0.962  0.964
Powers
JEF      0.58350
FULL     1.00000
LCP      0.64375
NPP      0.67150
UIPJS    0.66100
UIPD     0.65600
dtype: float64
Sizes
JEF       : 0.050
FULL      : 1.000
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [39]:
Powers95 = pd.DataFrame(Powers95)
print(Powers95)
print(f"Powers95")
print(Powers95.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.148   1.0  0.365  0.489  0.426  0.400
1    0.2  0.441   1.0  0.672  0.786  0.720  0.693
2    0.3  0.774   1.0  0.926  0.956  0.942  0.935
3    0.4  0.931   1.0  0.978  0.992  0.986  0.982
Powers95
JEF      0.57350
FULL     1.00000
LCP      0.73525
NPP      0.80575
UIPJS    0.76850
UIPD     0.75250
dtype: float64
Sizes
JEF       : 0.046
FULL      : 1.000
LCP       : 0.119
NPP       : 0.178
UIPJS     : 0.138
UIPD      : 0.121


# n=40 

In [40]:
n = 40
root = Path(f"./")
files = root.glob(f"SimMCMCNorm_p*_n{n}*")
files = list(files)

# test theta = theta0
idxs = [0, 0.1, 0.2, 0.3, 0.4]
theta0 = 0

# sort the files
files = sorted(files, key=sortdir, reverse=False)
# H0 file
h0f = files[idxs.index(theta0)]

datah0 = load_pkl(h0f)
CVqs = ddict(float)
Alphas = ddict(float)
Alphas95 = ddict(float)

alps = {
    "JEF": 0.05 - 5e-4,
    "FULL": 0.05,
    "LCP": 0.05 - 1e-3,
    "UIPJS": 0.05 - 1e-3,
    "UIPD": 0.05 - 5e-4,
    "NPP": 0.05 - 3.1e-3,
}
for key, dat in datah0.items():
    CVqs[key] = getQuantile(theta0, data=dat, alp=alps[key])
    Alphas[key] = rejrate(theta0, dat, CVqs[key])
    Alphas95[key] = rejrate(theta0, dat, 0.025)
    
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

JEF       : 0.050
FULL      : 1.000
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [41]:
key = "NPP"
cvs = getQuantile(theta0, data=datah0[key], alp=0.05-3.1e-3)
rejrate(theta0, datah0[key], cvs)

0.050000000000000044

In [42]:
Powers = ddict(list)
Powers95 = ddict(list)
for f in files:
    theta = sortdir(f)/100
    if theta != theta0:
        print("=="*30)
        Powers["theta"].append(theta)
        Powers95["theta"].append(theta)
        datah1 = load_pkl(f)
        for key, dat in datah1.items():
            print(f"Current is method {key} in file {f.name}!")
            Powers[key].append(rejrate(theta0, dat, CVqs[key]))
            Powers95[key].append(rejrate(theta0, dat, 0.025))

Current is method JEF in file SimMCMCNorm_p10_n40!
Current is method FULL in file SimMCMCNorm_p10_n40!
Current is method LCP in file SimMCMCNorm_p10_n40!
Current is method NPP in file SimMCMCNorm_p10_n40!
Current is method UIPJS in file SimMCMCNorm_p10_n40!
Current is method UIPD in file SimMCMCNorm_p10_n40!
Current is method JEF in file SimMCMCNorm_p20_n40!
Current is method FULL in file SimMCMCNorm_p20_n40!
Current is method LCP in file SimMCMCNorm_p20_n40!
Current is method NPP in file SimMCMCNorm_p20_n40!
Current is method UIPJS in file SimMCMCNorm_p20_n40!
Current is method UIPD in file SimMCMCNorm_p20_n40!
Current is method JEF in file SimMCMCNorm_p30_n40!
Current is method FULL in file SimMCMCNorm_p30_n40!
Current is method LCP in file SimMCMCNorm_p30_n40!
Current is method NPP in file SimMCMCNorm_p30_n40!
Current is method UIPJS in file SimMCMCNorm_p30_n40!
Current is method UIPD in file SimMCMCNorm_p30_n40!
Current is method JEF in file SimMCMCNorm_p40_n40!
Current is method F

In [43]:
Powers = pd.DataFrame(Powers)
print(Powers)
print(f"Powers")
print(Powers.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.087   1.0  0.131  0.126  0.129  0.131
1    0.2  0.255   1.0  0.334  0.330  0.333  0.332
2    0.3  0.467   1.0  0.568  0.561  0.537  0.561
3    0.4  0.684   1.0  0.761  0.768  0.755  0.768
Powers
JEF      0.37325
FULL     1.00000
LCP      0.44850
NPP      0.44625
UIPJS    0.43850
UIPD     0.44800
dtype: float64
Sizes
JEF       : 0.050
FULL      : 1.000
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [44]:
Powers95 = pd.DataFrame(Powers95)
print(Powers95)
print(f"Powers95")
print(Powers95.drop(columns=["theta"]).mean(axis=0))
print("Sizes95")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.087   1.0  0.341  0.522  0.420  0.393
1    0.2  0.256   1.0  0.601  0.763  0.678  0.653
2    0.3  0.469   1.0  0.811  0.926  0.857  0.845
3    0.4  0.688   1.0  0.926  0.981  0.951  0.944
Powers95
JEF      0.37500
FULL     1.00000
LCP      0.66975
NPP      0.79800
UIPJS    0.72650
UIPD     0.70875
dtype: float64
Sizes95
JEF       : 0.051
FULL      : 1.000
LCP       : 0.180
NPP       : 0.329
UIPJS     : 0.248
UIPD      : 0.211


# n=120

In [49]:
n = 120
root = Path(f"./")
files = root.glob(f"SimMCMCNorm_p*_n{n}*")
files = list(files)

# test theta = theta0
idxs = [0, 0.1, 0.2, 0.3, 0.4]
theta0 = 0

# sort the files
files = sorted(files, key=sortdir, reverse=False)
# H0 file
h0f = files[idxs.index(theta0)]

datah0 = load_pkl(h0f)
CVqs = ddict(float)
Alphas = ddict(float)
Alphas95 = ddict(float)

alps = {
    "JEF": 0.05 - 5e-4,
    "FULL": 0.05,
    "LCP": 0.05 - 1e-3,
    "UIPJS": 0.05 - 5e-4,
    "UIPD": 0.05 - 5e-4,
    "NPP": 0.05 - 1e-3,
}
for key, dat in datah0.items():
    CVqs[key] = getQuantile(theta0, data=dat, alp=alps[key])
    Alphas[key] = rejrate(theta0, dat, CVqs[key])
    Alphas95[key] = rejrate(theta0, dat, 0.025)
    
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

JEF       : 0.050
FULL      : 0.999
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [50]:
key = "UIPJS"
cvs = getQuantile(theta0, data=datah0[key], alp=0.05-5e-4)
rejrate(theta0, datah0[key], cvs)

0.050000000000000044

In [51]:
Powers = ddict(list)
Powers95 = ddict(list)
for f in files:
    theta = sortdir(f)/100
    if theta != theta0:
        print("=="*30)
        Powers["theta"].append(theta)
        Powers95["theta"].append(theta)
        datah1 = load_pkl(f)
        for key, dat in datah1.items():
            print(f"Current is method {key} in file {f.name}!")
            Powers[key].append(rejrate(theta0, dat, CVqs[key]))
            Powers95[key].append(rejrate(theta0, dat, 0.025))

Current is method JEF in file SimMCMCNorm_p10_n120.pkl!
Current is method FULL in file SimMCMCNorm_p10_n120.pkl!
Current is method LCP in file SimMCMCNorm_p10_n120.pkl!
Current is method NPP in file SimMCMCNorm_p10_n120.pkl!
Current is method UIPJS in file SimMCMCNorm_p10_n120.pkl!
Current is method UIPD in file SimMCMCNorm_p10_n120.pkl!
Current is method JEF in file SimMCMCNorm_p20_n120.pkl!
Current is method FULL in file SimMCMCNorm_p20_n120.pkl!
Current is method LCP in file SimMCMCNorm_p20_n120.pkl!
Current is method NPP in file SimMCMCNorm_p20_n120.pkl!
Current is method UIPJS in file SimMCMCNorm_p20_n120.pkl!
Current is method UIPD in file SimMCMCNorm_p20_n120.pkl!
Current is method JEF in file SimMCMCNorm_p30_n120.pkl!
Current is method FULL in file SimMCMCNorm_p30_n120.pkl!
Current is method LCP in file SimMCMCNorm_p30_n120.pkl!
Current is method NPP in file SimMCMCNorm_p30_n120.pkl!
Current is method UIPJS in file SimMCMCNorm_p30_n120.pkl!
Current is method UIPD in file SimMCM

In [52]:
Powers = pd.DataFrame(Powers)
print(Powers)
print(f"Powers")
print(Powers.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.204   1.0  0.266  0.286  0.298  0.292
1    0.2  0.548   1.0  0.628  0.664  0.670  0.667
2    0.3  0.905   1.0  0.934  0.940  0.949  0.947
3    0.4  0.987   1.0  0.993  0.993  0.993  0.994
Powers
JEF      0.66100
FULL     1.00000
LCP      0.70525
NPP      0.72075
UIPJS    0.72750
UIPD     0.72500
dtype: float64
Sizes
JEF       : 0.050
FULL      : 0.999
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [53]:
Powers95 = pd.DataFrame(Powers95)
print(Powers95)
print(f"Powers95")
print(Powers95.drop(columns=["theta"]).mean(axis=0))
print("Sizes95")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.216   1.0  0.396  0.490  0.427  0.411
1    0.2  0.575   1.0  0.766  0.849  0.792  0.778
2    0.3  0.914   1.0  0.968  0.986  0.975  0.971
3    0.4  0.988   1.0  0.999  1.000  0.999  0.998
Powers95
JEF      0.67325
FULL     1.00000
LCP      0.78225
NPP      0.83125
UIPJS    0.79825
UIPD     0.78950
dtype: float64
Sizes95
JEF       : 0.058
FULL      : 1.000
LCP       : 0.104
NPP       : 0.130
UIPJS     : 0.114
UIPD      : 0.108
