# Compare the estimate including different number of PCs

In [1]:
%load_ext lab_black

import pandas as pd
import numpy as np
import glob
import os
import admix
from scipy.interpolate import CubicSpline
from tqdm import tqdm
import admix_genet_cor

In [2]:
trait_list = [
    f.split("/")[-1].split(".")[0]
    for f in glob.glob(os.path.join("out/pheno", "*.tsv"))
]

In [3]:
dict_loglik = {trait: dict() for trait in trait_list}
rho_list = np.linspace(0, 1, 21)
xs = np.linspace(0, 1, 1001)

for grm_prefix in ["imputed.mafukb.005"]:
    for trait in tqdm(trait_list):
        est_dir = f"out/gcta-estimate/{trait}-{grm_prefix}"
        try:
            loglik_list = [
                admix.tools.gcta.read_reml(
                    os.path.join(est_dir, f"rho{int(rho * 100)}")
                )["loglik"]
                for rho in rho_list
            ]
            cs = CubicSpline(rho_list, loglik_list)
            ll = cs(xs)
            dict_loglik[trait][grm_prefix] = ll
        except ValueError as err:
            print(trait, err)

100%|██████████| 72/72 [00:08<00:00,  8.59it/s]


In [4]:
trait_list = [t.split("-")[0] for t in trait_list]

In [5]:
for covar in ["page1pc", "page10pc", "sample10pc"]:
    meta_ll = 0
    for trait in trait_list:
        meta_ll += dict_loglik[trait + "-" + covar]["imputed.mafukb.005"]
    print(covar, meta_ll.argmax() / 1000, admix_genet_cor.hdi(xs, meta_ll))

page1pc 0.944 (0.924, 0.963)
page10pc 0.938 (0.918, 0.9580000000000001)
sample10pc 0.929 (0.907, 0.9490000000000001)
