In [None]:
import pandas as pd
import numpy as np
import pathlib as pl

from tqdm.notebook import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from statannotations.Annotator import Annotator

from scipy.stats import mannwhitneyu, fisher_exact, pearsonr, kruskal

In [None]:
import sys
sys.path.append("../../FinalCode/")
import download.download as dwnl
import utils.plotting as plting
import adVMP.adVMP_discovery as discov
import adVMP.global_vs_local as glob

In [None]:
# For figures
colors = sns.color_palette("muted")
fig_dir = pl.Path("/add/path/here/")

In [None]:
base_dir = pl.Path("/add/path/here/")
base_dir4 = pl.Path("/add/path/here/")

data_dir = pl.Path("/add/path/here/")

bad_probes = pd.read_csv(data_dir / "auxiliary" / "sketchy_probe_list_epic.csv",index_col=0).values.ravel()
sample_origin_path = pl.Path(data_dir / "clinical" / "sample_origin_wbatch.csv")

clinical_path = pl.Path(data_dir / "clinical" / "cleaned_clinical_reduced_diet.csv")
target_path = pl.Path(data_dir / "clinical" / "targets.csv")

In [None]:
EPIC2_b, EPIC2_clin, EPIC2_samples, EPIC2_phenotypes, EPIC3_b, EPIC3_clin, EPIC3_samples, EPIC3_phenotypes = dwnl.download_EPIC(sample_origin_path=sample_origin_path, 
                     base_dir=base_dir, clinical_path=clinical_path, target_path=target_path,
                  bad_probes=bad_probes, EPIC4=False) 

In [None]:
EPIC4_b, EPIC4_clin, EPIC4_samples, EPIC4_phenotypes = dwnl.download_EPIC(sample_origin_path=sample_origin_path, 
                     base_dir=base_dir4, clinical_path=clinical_path, target_path=target_path, 
                  bad_probes=bad_probes, EPIC4=True) 

In [None]:
background_cpgs = EPIC4_b.columns.intersection(EPIC2_b.columns).to_numpy()
pd.Series(background_cpgs).to_csv(data_dir / "variable_probes" / "full_background_probes.csv")

In [None]:
union_cpgs = pd.read_csv(data_dir / "adVMP" / "union_cpgs.csv",index_col=0).values.ravel()

# Get hit fraction

In [None]:
heatmap_df1, hit_fraction1 = discov.get_heatmap_df(selcpgs=union_cpgs, EPIC_m=EPIC2_b, phenotypes=EPIC2_phenotypes, bal=True)

heatmap_df2, hit_fraction2 = discov.get_heatmap_df(selcpgs=union_cpgs, EPIC_m=EPIC3_b, phenotypes=EPIC3_phenotypes, bal=True)

heatmap_df3, hit_fraction3 = discov.get_heatmap_df(selcpgs=union_cpgs, EPIC_m=EPIC4_b, phenotypes=EPIC4_phenotypes, bal=True)

In [None]:
binary1 = (heatmap_df1.iloc[:,:-6].abs()>4).astype(int)
binary2 = (heatmap_df2.iloc[:,:-6].abs()>4).astype(int)
binary3 = (heatmap_df3.iloc[:,:-6].abs()>4).astype(int)

# Visualize probe performance

In [None]:
epic_manifest = pd.read_csv(data_dir / "illumina_manifests" / "GPL21145_MethylationEPIC_15073387_v-1-0.csv.gz",skiprows=7,index_col=0)

In [None]:
roadmap_annotations = pd.read_csv(data_dir / "NIH_Epigenomics_Roadmap" / "EPIC_to_state_mapping.csv",index_col=0)

In [None]:
epic_manifest = pd.concat([epic_manifest,roadmap_annotations["State"]],axis=1)

In [None]:
red_manifest = epic_manifest.loc[union_cpgs]

In [None]:
assoc_genes = red_manifest["UCSC_RefGene_Name"].dropna()
unique_advmp_genes = []
for genes in assoc_genes.str.split(";"):
    for g in genes:
        unique_advmp_genes.append(g)
unique_advmp_genes = np.unique(unique_advmp_genes)

In [None]:
len(unique_advmp_genes)

In [None]:
is_global = {}
for gene in tqdm(unique_advmp_genes):
    is_global[gene] = glob.check_global_dysregulation_pergene(epic_manifest=epic_manifest, 
                                                         gene=gene, 
                            EPIC2_b=EPIC2_b, 
                            EPIC3_b=EPIC3_b, 
                            EPIC4_b=EPIC4_b, 
                            EPIC2_phenotypes=EPIC2_phenotypes, 
                            EPIC3_phenotypes=EPIC3_phenotypes, 
                            EPIC4_phenotypes=EPIC4_phenotypes,
                            binary1=binary1, 
                            binary2=binary2, 
                            binary3=binary3, 
                            union_cpgs=union_cpgs)

In [None]:
global_df = pd.DataFrame(is_global, index=["Global"]).T
global_df["Local"] = ~global_df["Global"]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(2,4))
sns.barplot(data=global_df,ax=ax,palette="pastel")
plting.transform_plot_ax(ax, legend_title="")
fig.savefig(fig_dir / "global_vs_local_dysregulation.svg", bbox_inches="tight")

In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(
    epic_manifest=epic_manifest, union_cpgs=union_cpgs,
    gene="IQGAP2",
    binary1=binary1,
    binary2=binary2,
    binary3=binary3,
    EPIC2_b=EPIC2_b,
    EPIC3_b=EPIC3_b,
    EPIC4_b=EPIC4_b,
    promoter_only=True,
)
ax1.figure.savefig(
    fig_dir / "global_local_dys" / "IQGAP2_promoter_SWEPIC1.svg", bbox_inches="tight"
)
ax2.figure.savefig(
    fig_dir / "global_local_dys" / "IQGAP2_promoter_SWEPIC2.svg", bbox_inches="tight"
)
ax3.figure.savefig(
    fig_dir / "global_local_dys" / "IQGAP2_promoter_SWEPIC3.svg", bbox_inches="tight"
)


In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(epic_manifest=epic_manifest,
                                           union_cpgs=union_cpgs,
                          gene='IQGAP2', 
                          binary1=binary1, 
                          binary2=binary2, 
                          binary3=binary3, 
                          EPIC2_b=EPIC2_b, 
                          EPIC3_b=EPIC3_b, 
                          EPIC4_b=EPIC4_b, promoter_only=False,)
ax1.figure.savefig(fig_dir / "global_local_dys" / "IQGAP2_SWEPIC1.svg", 
                   bbox_inches="tight")
ax2.figure.savefig(fig_dir / "global_local_dys" / "IQGAP2_SWEPIC2.svg", 
                   bbox_inches="tight")
ax3.figure.savefig(fig_dir / "global_local_dys" / "IQGAP2_SWEPIC3.svg", 
                   bbox_inches="tight")

In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(epic_manifest=epic_manifest, 
                                           union_cpgs=union_cpgs,
                          gene='HLA-F', 
                          binary1=binary1, 
                          binary2=binary2, 
                          binary3=binary3, 
                          EPIC2_b=EPIC2_b, 
                          EPIC3_b=EPIC3_b, 
                          EPIC4_b=EPIC4_b, promoter_only=True,)
ax1.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_promoter_SWEPIC1.svg", 
                   bbox_inches="tight")
ax2.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_promoter_SWEPIC2.svg", 
                   bbox_inches="tight")
ax3.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_promoter_SWEPIC3.svg", 
                   bbox_inches="tight")

In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(epic_manifest=epic_manifest, 
                                           union_cpgs=union_cpgs,
                          gene='HLA-F', 
                          binary1=binary1, 
                          binary2=binary2, 
                          binary3=binary3, 
                          EPIC2_b=EPIC2_b, 
                          EPIC3_b=EPIC3_b, 
                          EPIC4_b=EPIC4_b, promoter_only=False,)
ax1.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_SWEPIC1.svg", 
                   bbox_inches="tight")
ax2.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_SWEPIC2.svg", 
                   bbox_inches="tight")
ax3.figure.savefig(fig_dir / "global_local_dys" / "HLA-F_SWEPIC3.svg", 
                   bbox_inches="tight")

In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(epic_manifest=epic_manifest, 
                                           union_cpgs=union_cpgs,
                          gene='SPIRE1', 
                          binary1=binary1, 
                          binary2=binary2, 
                          binary3=binary3, 
                          EPIC2_b=EPIC2_b, 
                          EPIC3_b=EPIC3_b, 
                          EPIC4_b=EPIC4_b, promoter_only=True,)
ax1.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_promoter_SWEPIC1.svg", 
                   bbox_inches="tight")
ax2.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_promoter_SWEPIC2.svg", 
                   bbox_inches="tight")
ax3.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_promoter_SWEPIC3.svg", 
                   bbox_inches="tight")

In [None]:
ax1, ax2, ax3 = glob.get_full_cg_info_gene(epic_manifest=epic_manifest, 
                                           union_cpgs=union_cpgs,
                          gene='SPIRE1', 
                          binary1=binary1, 
                          binary2=binary2, 
                          binary3=binary3, 
                          EPIC2_b=EPIC2_b, 
                          EPIC3_b=EPIC3_b, 
                          EPIC4_b=EPIC4_b, promoter_only=False,)
ax1.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_SWEPIC1.svg", 
                   bbox_inches="tight")
ax2.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_SWEPIC2.svg", 
                   bbox_inches="tight")
ax3.figure.savefig(fig_dir / "global_local_dys" / "SPIRE1_SWEPIC3.svg", 
                   bbox_inches="tight")