# Marginal association testing

In [None]:
import matplotlib.pyplot as plt
import admix
import numpy as np
from admix.plot import compare_pval

np.random.seed(1)

In [None]:
# simulate a data set of admixed individuals (with no LD)
dset = admix.simulate.admix_geno_simple(
    n_indiv=5_000, n_snp=500, n_anc=3, mosaic_size=50, anc_props=[0.5, 0.4, 0.1]
)
# simulate binary phenotypes
sim = admix.simulate.binary_pheno(dset, n_causal=10, hsq=0.1, cor=1.0)

In [None]:
# pick one simulation
sim_i = 1

dict_pval = dict()
dict_slow_pval = dict()
for method in ["TRACTOR", "ATT", "ASE"]:
    dict_pval[method] = admix.assoc.marginal(
        dset=dset,
        pheno=sim["pheno"][:, sim_i],
        family="logistic",
        method=method,
    ).P
    dict_slow_pval[method] = admix.assoc.marginal(
        dset=dset,
        pheno=sim["pheno"][:, sim_i],
        family="logistic",
        method=method,
        fast=False,
    ).P

In [None]:
fig, axes = plt.subplots(figsize=(4, 2), dpi=150, ncols=2)
compare_pval(
    dict_pval["ATT"], dict_pval["TRACTOR"], xlabel="ATT", ylabel="Tractor", ax=axes[0]
)
compare_pval(dict_pval["ATT"], dict_pval["ASE"], xlabel="ATT", ylabel="ASE", ax=axes[1])
fig.tight_layout()

## Consistency between the slow and fast implementation

In [None]:
fig, axes = plt.subplots(figsize=(6, 2), ncols=3, dpi=150)
for i, method in enumerate(["TRACTOR", "ATT", "ASE"]):
    compare_pval(
        dict_pval[method],
        dict_slow_pval[method],
        xlabel=method,
        ylabel=method + " (slow)",
        ax=axes[i],
    )
plt.tight_layout()

## HET test

HET tests the null hypothesis for coefficients for a set of variables are identical. In the case of three-way admixed populations. It tests the null of 
$H_0: \beta_{s,1} = \beta_{s,2} = \beta_{s,3}$ for each SNP $s$.

Since we simulate phenotype with no heterogeneity by ancestry, we obtain a null distribution of p-value.

In [None]:
df_het = admix.assoc.marginal(
    dset=dset,
    pheno=sim["pheno"][:, 0],
    family="logistic",
    method="HET",
)

In [None]:
fig, ax = plt.subplots(figsize=(2, 2), dpi=150)
admix.plot.qq(df_het.P.values, bootstrap_ci=True, ax=ax)
plt.show()