In [1]:
# Notebook: COVID-19 specific genes methylation
# Description: Notebook contains analysis of the genes methylation chagnes specific for covid-19

In [5]:
%load_ext lab_black

In [6]:
import pandas as pd
from src.utils import EPIC

In [8]:
# Load EPIC manifest

In [7]:
epic = pd.read_csv(
    "../data/Additional/EPIC/MethylationEPIC_v-1-0_B4.csv",
    index_col=0,
    low_memory=False,
)

In [3]:
# Differential methylated promotors of genes -> study all covid groups vs healthy controls

In [9]:
dmg_in_cov = pd.read_csv("../Files/DMGenesInAllCoVGroupsCorrected.csv", index_col=0)
dmg_in_cov

Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group
IlmnID,Unnamed: 1_level_1,Unnamed: 2_level_1
cg21465162,UBQLN1,TSS200
cg24315703,LIMK2,TSS200
cg25060243,UGT1A4,TSS1500
cg21581312,LOC723972,TSS200
cg08585897,TERF2IP,TSS1500
cg12981595,KRTAP4-8,TSS200
cg19556901,SNORD115-1,TSS1500
cg22930808,DTX3L,TSS1500
cg13407664,HTN1,TSS1500
cg18642567,RPGRIP1,TSS1500


In [17]:
# Differential methylated promotors of genes -> study non-covid-19 vs covid-19

In [11]:
# Load report
cov_vs_non_cov = pd.read_csv("output/USANoNCoV_vs_CoV_TSS.csv", index_col=0)

# Extract DMPs
cov_vs_non_cov = cov_vs_non_cov[
    (cov_vs_non_cov["Delta mean"].abs() > 0.1) & (cov_vs_non_cov["q-value"] <= 0.05)
].index
print("DMPs [TSS200 | TSS1500]: ", len(cov_vs_non_cov))

# Annotate CpGs to genes
cov_vs_non_cov = epic.loc[cov_vs_non_cov, ["UCSC_RefGene_Name", "UCSC_RefGene_Group"]]
display(cov_vs_non_cov)

# Export
cov_vs_non_cov.to_csv("../Files/DMGenesCoVvsNoNCoV.csv")

DMPs [TSS200 | TSS1500]:  26


Unnamed: 0_level_0,UCSC_RefGene_Name,UCSC_RefGene_Group
CpG,Unnamed: 1_level_1,Unnamed: 2_level_1
cg15528722,TTC33,TSS1500
cg24678928,DDX60,TSS1500
cg03607951,IFI44L,TSS1500
cg07815522,PARP9;PARP9;PARP9;PARP9;PARP9;PARP9;DTX3L,5'UTR;5'UTR;5'UTR;5'UTR;5'UTR;5'UTR;TSS1500
cg24103563,TRIM34;TRIM34;TRIM6-TRIM34,TSS1500;5'UTR;Body
cg13304609,IFI44L,TSS1500
cg15452182,LOC101928650;GORAB;GORAB;GORAB,TSS1500;Body;Body;Body
cg17515347,AIM2,TSS1500
cg07863524,OR3A4,TSS200
cg01036958,LOC101928650;GORAB;GORAB;GORAB,TSS1500;Body;Body;Body


In [12]:
from venn import venn

In [13]:
# Reload DM genes specific for Covid-19 patients and non-covid-19 patients

In [15]:
cov_non_cov = (
    pd.read_csv("../Files/DMGenesCoVvsNoNCoVCorrected.csv", index_col=0)
    .UCSC_RefGene_Name.str.split(";")
    .explode()
)  # DM genes between covid and non-covid samples

cov_non_hb = (
    pd.read_csv("../Files/DMGenesInAllCoVGroupsCorrected.csv", index_col=0)
    .UCSC_RefGene_Name.str.split(";")
    .explode()
)  # DM genes between covid and healthy controls

In [12]:
# Intersection

In [17]:
common = pd.DataFrame(
    set.intersection(set(cov_non_cov), set(cov_non_hb)), columns=["Gene"]
)
common

Unnamed: 0,Gene
0,AIM2
1,DTX3L
