In [1]:
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import pickle
from pathlib import Path
import pandas as pd
import seaborn as sns
from easydict import EasyDict as edict

%matplotlib inline



In [2]:
from tqdm import tqdm_notebook as tqdm
from collections import defaultdict as ddict
from pprint import pprint

In [3]:
def sortdir(f):
    num = f.name.split("_")[-2].split("p")[-1]
    num = int(num)
    return  num

def sortf(f):
    num = f.name.split(".")[0]
    num = int(num)
    return num

In [4]:
def load_pkl(f):
    with open(f, "rb") as fi:
        data = pickle.load(fi)
    return data

## To obtaining simple format file

In [5]:
def parse_sresult(datum):
    res = {
        "JEF"  : datum["jef"]["theta"],
        "FULL": datum["full"]["theta"],
        "UIPD" : datum["UIPD"]["thetah"],
        "UIPJS": datum["UIPJS"]["thetah"],
        "NPP"  : datum["NPP"]["thetah"],
        "LCP"  : datum["LCP"]["thetah"],
        "rMAP" : datum["rMAP"]["thetaP"]
    }
    return res

def save_data(cdir):
    files = list(cdir.glob("*.pkl"))
    files = sorted(files, key=sortf, reverse=False)
    retres = {
        "JEF":[],
        "FULL":[],
        "LCP":[],
        "NPP":[],
        "UIPJS":[],
        "UIPD":[],
        "rMAP":[]
    }
    for fil in tqdm(files):
        datum = load_pkl(fil)
        simdatum = parse_sresult(datum)
        retres["JEF"].append(simdatum["JEF"])
        retres["FULL"].append(simdatum["FULL"])
        retres["UIPD"].append(simdatum["UIPD"])
        retres["UIPJS"].append(simdatum["UIPJS"])
        retres["NPP"].append(simdatum["NPP"])
        retres["LCP"].append(simdatum["LCP"])
        retres["rMAP"].append(simdatum["rMAP"])
    return retres

### Obain the folders

In [6]:
n = 120

root = Path(f"./results/")
dirs = root.glob(f"BiostatR1*_n{n}")
dirs = list(dirs)

### Get the current data

In [15]:
theta0 = 0.5

idxs = np.array([sortdir(d) for d in dirs])/100
idxs = list(np.sort(idxs))

dirs = sorted(dirs, key=sortdir, reverse=False)
cdir = dirs[idxs.index(theta0)]

simData = root/f"./SimBiostatR1_p{sortdir(cdir)}_n{n}.pkl"

### Obtain the cleaned data

In [16]:
if simData.exists():
    cleanData = load_pkl(simData)
else:
    cleanData = save_data(cdir)
    with open(simData, "wb") as f:
        pickle.dump(cleanData, f)

In [17]:
cleanData = edict(cleanData)

In [18]:
def is_true(theta0, bs):
    low, up = bs
    return (theta0 > low) and (theta0 < up)


def rejrate(theta0, data, theta):
    reslist = [is_true(theta0, bs=[np.quantile(dat, q=theta), np.quantile(dat, q=1-theta)]) for dat in data] 
    return 1 - np.mean(reslist)


def getRatio(theta0, data):
    p1 = np.mean(data<=theta0)
    p2 = np.mean(data>theta0)
    res = np.min([p1, p2])
    return res



def getQuantile(theta0, data, alp=0.05):
    res = [getRatio(theta0, data=dat) for dat in data]
    return np.quantile(res, q=alp)

### Get the CV

In [19]:
CVqs = ddict(float)
Alphas = ddict(float)
Alphas95 = ddict(float)
Powers = ddict(list)
Powers95 = ddict(list)

In [20]:
alps = {
    "JEF": 0.05 - 1e-3,
    "FULL": 0.05,
    "LCP": 0.05 - 1e-3,
    "UIPJS": 0.05 - 1e-3,
    "UIPD": 0.05 - 5e-4,
    "NPP": 0.05 - 5e-4,
    "rMAP": 0.05
}
for key, dat in cleanData.items():
    CVqs[key] = getQuantile(theta0, data=dat, alp=alps[key])
    Alphas[key] = rejrate(theta0, dat, CVqs[key])
    Alphas95[key] = rejrate(theta0, dat, 0.025)
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")
print("Sizes95")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

Sizes
JEF       : 0.049
FULL      : 0.054
LCP       : 0.049
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050
rMAP      : 0.051
Sizes95
JEF       : 0.045
FULL      : 0.346
LCP       : 0.049
NPP       : 0.074
UIPJS     : 0.071
UIPD      : 0.034
rMAP      : 0.042


In [14]:
theta = 0
for key, dat in cleanData.items():
    Powers[key] = rejrate(theta, dat, CVqs[key])
    Powers95[key] = rejrate(theta, dat, 0.025)
print("Powers")
for key, v in Powers.items():
    print(f"{key:<10}: {v:.3f}")
print("Powers")
for key, v in Powers95.items():
    print(f"{key:<10}: {v:.3f}")

Powers
JEF       : 0.632
FULL      : 1.000
LCP       : 0.598
NPP       : 0.692
UIPJS     : 0.673
UIPD      : 0.712
rMAP      : 0.702
Powers
JEF       : 0.614
FULL      : 1.000
LCP       : 0.779
NPP       : 0.868
UIPJS     : 0.818
UIPD      : 0.833
rMAP      : 0.703


In [36]:
cvs = getQuantile(theta0, data=datah0["UIPD"], alp=0.05-5e-4)
rejrate(theta0, datah0["UIPD"], cvs)

0.050000000000000044

## Get the powers

In [37]:
for f in files:
    theta = sortdir(f)/100
    if theta != theta0:
        Powers["theta"].append(theta)
        Powers95["theta"].append(theta)
        datah1 = load_pkl(f)
        print("=="*30)
        for key, dat in datah1.items():
            print(f"Current is method {key} in file {f.name}!")
            Powers[key].append(rejrate(theta0, dat, CVqs[key]))
            Powers95[key].append(rejrate(theta0, dat, 0.025))

Current is method JEF in file SimMCMCNorm_p10_n80!
Current is method FULL in file SimMCMCNorm_p10_n80!
Current is method LCP in file SimMCMCNorm_p10_n80!
Current is method NPP in file SimMCMCNorm_p10_n80!
Current is method UIPJS in file SimMCMCNorm_p10_n80!
Current is method UIPD in file SimMCMCNorm_p10_n80!
Current is method JEF in file SimMCMCNorm_p20_n80!
Current is method FULL in file SimMCMCNorm_p20_n80!
Current is method LCP in file SimMCMCNorm_p20_n80!
Current is method NPP in file SimMCMCNorm_p20_n80!
Current is method UIPJS in file SimMCMCNorm_p20_n80!
Current is method UIPD in file SimMCMCNorm_p20_n80!
Current is method JEF in file SimMCMCNorm_p30_n80!
Current is method FULL in file SimMCMCNorm_p30_n80!
Current is method LCP in file SimMCMCNorm_p30_n80!
Current is method NPP in file SimMCMCNorm_p30_n80!
Current is method UIPJS in file SimMCMCNorm_p30_n80!
Current is method UIPD in file SimMCMCNorm_p30_n80!
Current is method JEF in file SimMCMCNorm_p40_n80!
Current is method F

In [38]:
Powers = pd.DataFrame(Powers)
print(Powers)
print(f"Powers")
print(Powers.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.156   1.0  0.225  0.262  0.245  0.240
1    0.2  0.452   1.0  0.538  0.572  0.559  0.549
2    0.3  0.791   1.0  0.856  0.886  0.878  0.871
3    0.4  0.935   1.0  0.956  0.966  0.962  0.964
Powers
JEF      0.58350
FULL     1.00000
LCP      0.64375
NPP      0.67150
UIPJS    0.66100
UIPD     0.65600
dtype: float64
Sizes
JEF       : 0.050
FULL      : 1.000
LCP       : 0.050
NPP       : 0.050
UIPJS     : 0.050
UIPD      : 0.050


In [39]:
Powers95 = pd.DataFrame(Powers95)
print(Powers95)
print(f"Powers95")
print(Powers95.drop(columns=["theta"]).mean(axis=0))
print("Sizes")
for key, v in Alphas95.items():
    print(f"{key:<10}: {v:.3f}")

   theta    JEF  FULL    LCP    NPP  UIPJS   UIPD
0    0.1  0.148   1.0  0.365  0.489  0.426  0.400
1    0.2  0.441   1.0  0.672  0.786  0.720  0.693
2    0.3  0.774   1.0  0.926  0.956  0.942  0.935
3    0.4  0.931   1.0  0.978  0.992  0.986  0.982
Powers95
JEF      0.57350
FULL     1.00000
LCP      0.73525
NPP      0.80575
UIPJS    0.76850
UIPD     0.75250
dtype: float64
Sizes
JEF       : 0.046
FULL      : 1.000
LCP       : 0.119
NPP       : 0.178
UIPJS     : 0.138
UIPD      : 0.121
