In [1]:
%load_ext autoreload
%load_ext lab_black
%autoreload 2

import admix
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from os.path import join
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr
import seaborn as sns
import admix_prs
import statsmodels.api as sm

In [2]:
prefix = "hsq-0.25-pcausal-0.01"
ldpred2_dir = f"out/ldpred2/{prefix}"
PLINK_DIR = "/u/project/pasaniuc/pasaniucdata/admixture/projects/admix-prs-uncertainty/data/PLINK/"
sim_i = 0

In [3]:
# read PRS
df_prs_eur_test = pd.read_csv(
    f"out/ldpred2/{prefix}/sim_{sim_i}.prs.eur_test.tsv.gz", sep="\t", index_col=0
)
df_prs_admix = pd.read_csv(
    f"out/ldpred2/{prefix}/sim_{sim_i}.prs.admix.tsv.gz", sep="\t", index_col=0
)
df_prs = pd.concat([df_prs_eur_test, df_prs_admix])

# read genetic value and phenotype
df_pheno_g = (
    pd.read_csv(join(f"out/pheno/{prefix}/sim.pheno_g.tsv"), sep="\t", index_col=0)[
        ["GROUP", f"SIM_{sim_i}"]
    ]
    .rename(columns={f"SIM_{sim_i}": "GV"})
    .reindex(df_prs.index)
)
df_pheno = (
    pd.read_csv(join(f"out/pheno/{prefix}/sim.pheno.tsv"), sep="\t", index_col=0)[
        [f"SIM_{sim_i}"]
    ]
    .rename(columns={f"SIM_{sim_i}": "PHENO"})
    .reindex(df_prs.index)
)

In [4]:
df_prs = pd.concat([df_pheno, df_pheno_g, df_prs], axis=1)

In [5]:
# read local ancestry
df_lanc = pd.read_csv("out/admix_lanc.tsv", sep="\t", index_col=0)
df_lanc["lanc"] = 1.0 - df_lanc["lanc"]
df_lanc["lanc_q"] = pd.qcut(df_lanc.lanc, q=5).cat.codes + 1

df_prs.loc[df_lanc.index, "GROUP"] = "admix_" + df_lanc["lanc_q"].astype(str)

In [6]:
from scipy.stats import pearsonr

In [11]:
for group, df_group in df_prs.groupby(["GROUP"]):
    raw_r2 = pearsonr(df_group["PHENO"], df_group["MEAN"])[0] ** 2
    hsq = pearsonr(df_group["PHENO"], df_group["GV"])[0] ** 2
    print(f"group={group}, raw_r2={raw_r2}, hsq={hsq}")

group=admix_1, raw_r2=0.1350995441894608, hsq=0.256808440687892
group=admix_2, raw_r2=0.11907791773787098, hsq=0.23889731704008066
group=admix_3, raw_r2=0.15770210113121041, hsq=0.2840469951111308
group=admix_4, raw_r2=0.1403370182279685, hsq=0.3338751583351793
group=admix_5, raw_r2=0.1439545775073527, hsq=0.28428715357960244
group=eur_test, raw_r2=0.14816597924201233, hsq=0.24721611206679614
