In [1]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

import matplotlib.pyplot as plt
import numpy as np
import scipy
import json
import pandas as pd
import seaborn as sns
import os
import admix
import scipy.stats
from scipy.interpolate import CubicSpline
from scipy import stats
import admix_genet_cor
import glob
import pickle
from tqdm import tqdm
from admix_genet_cor import hdi

In [2]:
rho_list = np.linspace(0, 1, 21)
xs = np.linspace(0, 1, 1001)

pheno_prefix_list = [f.split("/")[-1] for f in glob.glob("out/gcta-estimate/*")]
grm_prefix_list = ["hm3.mafukb.005", "imputed.mafukb.005"]

In [3]:
dict_loglik = dict()
dict_estimate = dict()
dict_stderr = dict()
dict_pval = dict()
for pheno_prefix in pheno_prefix_list:
    for grm_prefix in grm_prefix_list:

        estimate = []
        stderr = []
        pval = []
        meta_ll = 0
        for sim_i in range(100):
            f_dir = os.path.join(f"out/gcta-estimate/{pheno_prefix}", grm_prefix)

            loglik_list = [
                (
                    rho,
                    admix.tools.gcta.read_reml(
                        os.path.join(f_dir, f"sim_{sim_i}.rho{int(rho * 100)}")
                    )["loglik"],
                )
                for rho in rho_list
                if os.path.exists(
                    os.path.join(f_dir, f"sim_{sim_i}.rho{int(rho * 100)}.hsq")
                )
            ]
            cs = CubicSpline([l[0] for l in loglik_list], [l[1] for l in loglik_list])
            ll = cs(xs)

            estimate.append(ll.argmax() / 1000)
            # calculate 1 std error using normal distribution
            ci = admix_genet_cor.hdi(xs, ll, ci=0.6827)
            stderr.append((ci[1] - ci[0]) / 2)
            # p-value using likelihood-ratio test
            pval.append(stats.chi2.sf((ll.max() - ll[-1]) * 2, df=1))
            meta_ll += ll

        meta_ci = hdi(xs, meta_ll)
        print(
            f"{pheno_prefix}, {grm_prefix}, meta-ci: ({meta_ci[0]:.2g}, {meta_ci[1]:.2g}), "
            f"mean(se): {np.mean(stderr):.2g}, std(se): {np.std(stderr):.2g}, std(estimate): {np.std(estimate):.2g}",
            f"P[reject] = {np.mean(np.array(pval) < 0.05)}",
        )

        dict_loglik[(pheno_prefix, grm_prefix)] = meta_ll
        dict_estimate[(pheno_prefix, grm_prefix)] = estimate
        dict_stderr[(pheno_prefix, grm_prefix)] = stderr
        dict_pval[(pheno_prefix, grm_prefix)] = pval

hsq-0.1-pcausal-0.001-cor-0.9-hermodel-mafukb, hm3.mafukb.005, meta-ci: (0.88, 0.92), mean(se): 0.12, std(se): 0.072, std(estimate): 0.12 P[reject] = 0.36
hsq-0.1-pcausal-0.001-cor-0.9-hermodel-mafukb, imputed.mafukb.005, meta-ci: (0.89, 0.92), mean(se): 0.11, std(se): 0.061, std(estimate): 0.11 P[reject] = 0.31
hsq-0.1-pcausal-0.001-cor-0.95-hermodel-mafukb, hm3.mafukb.005, meta-ci: (0.92, 0.95), mean(se): 0.11, std(se): 0.071, std(estimate): 0.11 P[reject] = 0.15
hsq-0.1-pcausal-0.001-cor-0.95-hermodel-mafukb, imputed.mafukb.005, meta-ci: (0.92, 0.95), mean(se): 0.11, std(se): 0.061, std(estimate): 0.099 P[reject] = 0.13
hsq-0.1-pcausal-0.001-cor-1.0-hermodel-mafukb, hm3.mafukb.005, meta-ci: (0.98, 1), mean(se): 0.085, std(se): 0.06, std(estimate): 0.051 P[reject] = 0.01
hsq-0.1-pcausal-0.001-cor-1.0-hermodel-mafukb, imputed.mafukb.005, meta-ci: (0.99, 1), mean(se): 0.073, std(se): 0.051, std(estimate): 0.058 P[reject] = 0.01
hsq-0.25-pcausal-0.001-cor-0.9-hermodel-mafukb, hm3.mafukb

In [5]:
dict_res = {
    "loglik": dict_loglik,
    "estimate": dict_estimate,
    "stderr": dict_stderr,
    "pval": dict_pval,
}
with open(f"cache/dict_res.pkl", "wb") as f:
    pickle.dump(dict_res, f)