# Compare the estimate including different number of PCs

In [1]:
%load_ext lab_black

import pandas as pd
import numpy as np
import glob
import os
import admix
from scipy.interpolate import CubicSpline
from tqdm import tqdm
import admix_genet_cor

In [2]:
trait_list = [
    f.split("/")[-1].split(".")[0]
    for f in glob.glob(os.path.join("out/pheno", "*.tsv"))
]

In [3]:
grm_prefix_list = [
    "hm3.mafukb.005",
    "hm3.mafukb.05",
    "hm3.gcta.005",
    "hm3.gcta.05",
    "imputed.mafukb.005",
    "imputed.mafukb.05",
    "imputed.gcta.005",
    "imputed.gcta.05",
]

In [4]:
dict_loglik = {trait: dict() for trait in trait_list}
rho_list = np.linspace(0, 1, 21)
xs = np.linspace(0, 1, 1001)

for grm_prefix in grm_prefix_list:
    for trait in tqdm(trait_list):
        est_dir = f"out/gcta-estimate/{trait}-{grm_prefix}"

        loglik = {
            rho: admix.tools.gcta.read_reml(
                os.path.join(est_dir, f"rho{int(rho * 100)}")
            )["loglik"]
            for rho in rho_list
            if os.path.exists(os.path.join(est_dir, f"rho{int(rho * 100)}.hsq"))
        }
        cs = CubicSpline([r for r in loglik], [loglik[r] for r in loglik])
        ll = cs(xs)
        dict_loglik[trait][grm_prefix] = ll

100%|██████████| 72/72 [00:09<00:00,  7.81it/s]
100%|██████████| 72/72 [00:09<00:00,  7.80it/s]
100%|██████████| 72/72 [00:09<00:00,  7.72it/s]
100%|██████████| 72/72 [00:09<00:00,  7.82it/s]
100%|██████████| 72/72 [00:09<00:00,  7.67it/s]
100%|██████████| 72/72 [00:09<00:00,  7.68it/s]
100%|██████████| 72/72 [00:09<00:00,  7.87it/s]
100%|██████████| 72/72 [00:09<00:00,  7.76it/s]


In [5]:
trait_list = list(set([t.split("-")[0] for t in trait_list]))

In [6]:
for covar in ["page1pc", "page10pc", "sample10pc"]:
    meta_ll = 0
    for trait in trait_list:
        meta_ll += dict_loglik[trait + "-" + covar]["imputed.mafukb.005"]
    interval = admix_genet_cor.hdi(xs, meta_ll)
    interval = [np.round(i, 3) for i in interval]

    print(covar, meta_ll.argmax() / 1000, interval)

page1pc 0.928 [0.883, 0.964]
page10pc 0.922 [0.876, 0.96]
sample10pc 0.911 [0.864, 0.951]


In [7]:
for grm_prefix in grm_prefix_list:
    meta_ll = 0
    for trait in trait_list:
        meta_ll += dict_loglik[trait + "-" + "sample10pc"][grm_prefix]
    interval = admix_genet_cor.hdi(xs, meta_ll)
    interval = [np.round(i, 3) for i in interval]
    print(grm_prefix, meta_ll.argmax() / 1000, interval)

hm3.mafukb.005 0.944 [0.909, 0.973]
hm3.mafukb.05 0.92 [0.881, 0.952]
hm3.gcta.005 0.952 [0.921, 0.978]
hm3.gcta.05 0.923 [0.887, 0.954]
imputed.mafukb.005 0.911 [0.864, 0.951]
imputed.mafukb.05 0.863 [0.809, 0.909]
imputed.gcta.005 0.915 [0.87, 0.951]
imputed.gcta.05 0.865 [0.813, 0.909]
