In [56]:
%load_ext lab_black

import admix
import numpy as np
import pandas as pd
import admix_prs
import os
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [57]:
DATA_DIR = "./out"

In [9]:
df_covar = pd.read_csv(os.path.join(DATA_DIR, "covar.tsv"), sep='\t', index_col=0)

In [114]:
trait = "insomnia"
covar_cols = ["AGE", "SEX", "DEPRIVATION_INDEX"] + [f"PC{i}" for i in range(1, 17)]

In [115]:
df_trait = pd.read_csv(
    os.path.join(DATA_DIR, f"pred/{trait}.tsv"), sep="\t", index_col=0
)
df_trait = pd.merge(df_trait, df_covar, left_index=True, right_index=True)
df_trait = df_trait[df_trait.group == "United Kingdom"]
df_trait = df_trait.dropna()
df_trait["PHENO_RESID"] = (
    sm.OLS(
        df_trait["PHENO"].values, sm.add_constant(df_trait[covar_cols]), missing="drop"
    )
    .fit()
    .resid
)

In [116]:
print(
    f"{trait}: R2={pearsonr(df_trait['PRS_MEAN'], df_trait['PHENO_RESID'])[0] ** 2:.2g}"
)

insomnia: R2=0.02


In [117]:
test_cols = ["AGE", "SEX", "DEPRIVATION_INDEX", "PC1", "PC2"]

In [118]:
for col in test_cols:
    het_stats, het_model = admix_prs.test_het_breuschpagan(
        df=df_trait,
        y_col="PHENO_RESID",
        pred_col=f"PRS_MEAN",
        test_col=col,
    )
    print(col, het_stats["p-value"])

AGE 0.16052081674497903
SEX 1.8559681715283013e-16
DEPRIVATION_INDEX 0.40153800285660013
PC1 0.2784140540968132
PC2 0.18797248681978554
