In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import random
from scipy import stats
import admix_prs

In [3]:
# Constats
trait = "bmi"
PHENO_PATH = "/u/project/pasaniuc/kangchen/tmp/prs-1219/REAL-PHENO/all-pheno.csv"
PRS_PATH = f"/u/project/pasaniuc/kangchen/tmp/prs-1219/REAL-PRS/{trait}.tsv.gz"

In [4]:
df_pheno = pd.read_csv(PHENO_PATH, index_col=0)
df_pheno.index = df_pheno.index.astype(str)
df_prs = pd.read_csv(PRS_PATH, sep="\t")
df_prs["indiv"] = df_prs["indiv"].apply(lambda x: x.split("_")[0])
df_prs = df_prs.set_index("indiv")

In [5]:
df_info = pd.merge(df_pheno, df_prs, left_index=True, right_index=True)
df_info = df_info.dropna(subset=[trait, "MEAN"])

In [6]:
df_info

Unnamed: 0,SEX,AGE,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,...,QUANTILE_49,QUANTILE_54,QUANTILE_60,QUANTILE_65,QUANTILE_70,QUANTILE_75,QUANTILE_80,QUANTILE_85,QUANTILE_90,QUANTILE_95
1000011,0,43,-12.16240,3.43848,-1.904560,4.855460,1.263630,-0.402053,-1.003270,-4.033580,...,-0.29645,-0.24080,-0.18389,-0.10870,-0.04607,0.01820,0.10862,0.18722,0.29898,0.45728
1000026,0,57,-9.94105,3.05703,-0.466343,-2.081500,-7.125890,-1.990820,0.906184,-1.355140,...,0.42664,0.47398,0.52046,0.56938,0.64363,0.69082,0.75451,0.84861,0.96790,1.15174
1000032,1,64,-14.61760,3.54024,-3.521440,6.759100,7.999590,-2.824470,1.418720,-1.603100,...,0.40705,0.46578,0.53774,0.59148,0.62859,0.68255,0.79094,0.89281,0.99831,1.24254
1000044,0,47,6.68544,-2.91200,13.416200,-45.251300,10.064700,-6.174340,-16.192900,-9.641410,...,0.26890,0.35473,0.42062,0.46792,0.53955,0.61611,0.69813,0.78304,0.90968,1.08229
1000058,1,53,-9.97432,2.29831,-1.265110,3.144700,0.931964,1.203180,-2.403660,0.584553,...,0.82149,0.90630,0.96425,1.03650,1.12457,1.17608,1.27409,1.38587,1.51844,1.66077
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6026111,1,63,-12.75140,4.24704,-4.756570,-0.347158,-4.576120,-0.720132,1.471440,-1.652470,...,0.67473,0.73342,0.80127,0.84302,0.92509,0.99117,1.07196,1.18819,1.30617,1.44662
6026126,1,49,-11.22710,4.09981,2.824300,-3.649090,-7.239380,0.755878,2.896370,-3.185470,...,0.22997,0.29419,0.35068,0.40701,0.47952,0.55950,0.64691,0.71615,0.84496,1.04781
6026132,1,59,-14.37550,3.38334,-2.116290,2.924080,4.800640,1.401280,2.143520,0.705102,...,-0.13488,-0.07699,0.00791,0.05630,0.12151,0.19279,0.26741,0.33964,0.43707,0.66948
6026144,1,41,-13.48330,3.44693,-0.522458,5.800640,17.272800,-1.180140,2.639370,-0.129023,...,0.85630,0.93013,0.98368,1.03108,1.14801,1.20380,1.27008,1.36612,1.50234,1.61011


# Calculate R2 stratified by age and sex

In [7]:
res1 = admix_prs.stratify_calculate_r2(
    df_info, x_col="bmi", y_col="MEAN", grp_col="SEX"
)

df_info["AGE_Q"] = admix_prs.make_levels(
    df_info, stratify_col="AGE", n_level=5
)

res2 = admix_prs.stratify_calculate_r2(
    df_info, x_col="bmi", y_col="MEAN", grp_col="AGE_Q"
)

res3 = admix_prs.stratify_calculate_r2(
    df_info, x_col="bmi", y_col="MEAN"
)


In [8]:
res1

Unnamed: 0,SEX,R2,R2_std
0,0,0.241364,0.001553
1,1,0.222308,0.001519


In [9]:
res2

Unnamed: 0,AGE_Q,R2,R2_std
0,"(36.999, 48.0]",0.234654,0.002908
1,"(48.0, 55.0]",0.239133,0.002351
2,"(55.0, 60.0]",0.235538,0.001535
3,"(60.0, 64.0]",0.23107,0.001704
4,"(64.0, 73.0]",0.220749,0.002348


In [10]:
res3

Unnamed: 0,R2,R2_std
0,0.231193,0.000636


# Evaluate calibration

In [11]:
res1 = admix_prs.eval_calibration(
    df_info,
    x_col="bmi",
    lower_col="QUANTILE_10",
    upper_col="QUANTILE_90",
    grp_col="SEX",
)
res2 = admix_prs.eval_calibration(
    df_info,
    x_col="bmi",
    lower_col="QUANTILE_10",
    upper_col="QUANTILE_90",
    grp_col="AGE_Q",
)

In [12]:
res1

Unnamed: 0,SEX,Coverage
0,0,0.0
1,1,0.0


In [13]:
res2

Unnamed: 0,AGE_Q,Coverage
0,"(36.999, 48.0]",0.0
1,"(48.0, 55.0]",0.0
2,"(55.0, 60.0]",0.0
3,"(60.0, 64.0]",0.0
4,"(64.0, 73.0]",0.0
