In [1]:
# Notebook: Notebook to annotate CpGs
# Description:  ...

In [2]:
%load_ext lab_black

In [3]:
import pandas as pd

In [4]:
def structurize(set_: set) -> pd.DataFrame:
    """
    Convert set to DataFrame.
    """
    return pd.DataFrame(list(set_), columns=["CpG"]).set_index("CpG")


def extract(df, threshold=0.1, alpha=0.05):
    """Function to extract CpGs from report"""
    df = df[(df["Delta mean"].abs() > threshold) & (df["q-value"] <= alpha)]
    return set(df.index)

In [5]:
# Load EPIC

In [6]:
epic = pd.read_csv(
    "../data/Additional/EPIC/MethylationEPIC_v-1-0_B4.csv",
    index_col=0,
    low_memory=False,
)
epic = epic[["UCSC_RefGene_Name", "UCSC_RefGene_Group", "Relation_to_UCSC_CpG_Island"]]

In [7]:
epic

Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group,Relation_to_UCSC_CpG_Island
IlmnID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cg07881041,PTPRS;PTPRS;PTPRS;PTPRS,Body;Body;Body;Body,N_Shore
cg18478105,YTHDF1,TSS200,Island
cg23229610,,,N_Shelf
cg03513874,,,S_Shelf
cg09835024,EIF2S3,TSS1500,Island
...,...,...,...
71678368,,,
72748406,,,
73635489,,,
73784382,,,


In [3]:
# DMPs in ALL covid groups vs HB

In [9]:
report = pd.read_csv("../Files/DMPsInAllCoVGroup", index_col=0).set_index("CpG")
report = pd.concat((report, epic.loc[report.index]), axis=1)
report.to_csv("../Annots/DMPsInAllStudiesAnnots.csv")
report

Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group,Relation_to_UCSC_CpG_Island
CpG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cg20232119,HSPA1L,Body,S_Shelf
cg10827434,RPF1,1stExon,Island
cg14094333,KLF3;KLF3;FLJ13197,1stExon;5'UTR;Body,Island
cg13407664,HTN1,TSS1500,
cg08609238,,,Island
...,...,...,...
cg11962649,HSPA5,Body,N_Shore
cg08323960,,,
cg27323430,FLJ44606;FLJ44606,Body;Body,
cg00574307,DCT;DCT,Body;Body,


In [10]:
# DMPs non-Covid vs HB

In [11]:
report = structurize(
    extract(pd.read_csv("output/USANoNCoV_vs_HB_ALLProbes.csv", index_col=0))
)
report = pd.concat((report, epic.loc[report.index]), axis=1)
report.to_csv("../Annots/DMPsNonCovVsHBAnnots.csv")
report

Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group,Relation_to_UCSC_CpG_Island
CpG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cg22726533,YAE1D1;YAE1D1,Body;Body,S_Shelf
cg10211530,TNRC18,Body,N_Shore
cg20232119,HSPA1L,Body,S_Shelf
cg05521474,,,
cg08609238,,,Island
...,...,...,...
cg04783624,RANBP9,Body,
cg10685380,IL12B,Body,
cg25940202,,,
cg08022717,CYP27C1,Body,


In [47]:
# DMPs non-Cov vs CoV [USA]

In [12]:
report = structurize(
    extract(pd.read_csv("output/USANoNCoV_vs_CoV_ALLProbes.csv", index_col=0))
)
report = pd.concat((report, epic.loc[report.index]), axis=1)
report.to_csv("../Annots/DMPsNonCovVsCoVAnnots.csv")
report

Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group,Relation_to_UCSC_CpG_Island
CpG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cg14541119,LOC100507477;LOC100507477,Body;Body,
cg06712932,PLXNC1;PLXNC1,Body;Body,
cg01176329,GABBR1;GABBR1;GABBR1,Body;Body;Body,N_Shelf
cg24103563,TRIM34;TRIM34;TRIM6-TRIM34,TSS1500;5'UTR;Body,
cg20426042,PGCP,5'UTR,
...,...,...,...
cg16168081,,,N_Shore
cg03037271,,,
cg18557047,TET2;TET2;TET2-AS1,5'UTR;5'UTR;Body,
cg17114584,IRF7;IRF7;IRF7,Body;Body;Body,N_Shore
