In [5]:
# Notebook: Chromosome localisation of DMPs.
# Description:  Analysis of chromosomal localisation of DMPS

In [6]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [7]:
import pandas as pd
import scipy.stats as sts

from IPython.display import display

In [8]:
def extract(df: pd.DataFrame) -> pd.DataFrame:
    return df[(df["Delta mean"].abs() > 0.1) & (df["q-value"] <= 0.05)]

In [9]:
# Load EPIC mnifest

In [10]:
epic = pd.read_csv(
    "../data/Additional/EPIC/MethylationEPIC_v-1-0_B4.csv",
    index_col=0,
    low_memory=False,
)["CHR"]

In [11]:
# Load reports

In [14]:
reports = [
    "output/SpainCoV_vs_HB_TSS.csv",
    "output/PLCoV_vs_HB_TSS.csv",
    "output/USACoV_vs_HB_TSS.csv",
]
names = ["Spain", "PL", "USA"]

In [15]:
# Perform analysis for each COVID-19 kohorts

In [17]:
for name, report in zip(names, reports):

    # Load report
    df = pd.read_csv(report, index_col=0)
    df = pd.concat((df, epic.loc[df.index]), axis=1)

    # Frequency in BG
    tss_chr_freq = df["CHR"].value_counts(normalize=True).to_frame() * 100
    extracted = extract(df)["CHR"]

    # Concate expected and observed frequency of probes
    df = pd.concat(
        (tss_chr_freq, extracted.value_counts(normalize=True).to_frame() * 100), axis=1
    )

    # Sort by chromosome
    df.index = df.index.astype(int)
    df = df.sort_index()

    df.columns = ["Expected", "Observed"]
    df = df.dropna()

    # Calculate pvalue
    _, pval = sts.chisquare(f_obs=df.Observed, f_exp=df.Expected)

    print(name, extracted.shape[0])
    display(df)
    print("Pvalue: ", pval)

Spain 286


Unnamed: 0,Expected,Observed
1,10.120705,13.636364
2,6.658229,5.244755
3,6.147007,4.895105
4,3.955793,3.496503
5,4.957861,5.594406
6,5.662952,3.146853
7,4.544809,4.895105
8,3.608187,4.195804
9,3.066095,4.895105
10,3.924305,5.594406


Pvalue:  0.9461863618190455
PL 1400


Unnamed: 0,Expected,Observed
1,10.102771,10.214286
2,6.657511,7.285714
3,6.14571,5.928571
4,3.950017,3.5
5,4.926663,4.928571
6,5.875724,5.357143
7,4.688958,6.5
8,3.701747,3.214286
9,3.096626,4.5
10,4.055077,5.142857


Pvalue:  0.9999995332515582
USA 23


Unnamed: 0,Expected,Observed
1,10.100052,8.695652
2,6.659993,13.043478
3,6.137778,17.391304
4,3.961103,8.695652
7,4.687637,4.347826
9,3.088209,4.347826
12,5.582193,4.347826
14,3.578224,4.347826
15,3.378003,8.695652
16,4.32759,4.347826


Pvalue:  1.1467322023235173e-06
