# Enrichment analysis in search of peripheral metabollic subsystems

In [2]:
import pandas as pd
import numpy as np

In [None]:
# Read datafile
data = pd.read_csv('periphericalRxns_iJN1480.tsv', sep = '\t', encoding = 'latin1')
data.head()

Unnamed: 0,Subsystem,Peripherical reactions,Not peripherical reactions,Total
0,Fatty Acids Biosynthesis,8,67,75
1,Alternate Carbon and Nitrogen Source,61,45,106
2,Lysine Metabolism,0,34,34
3,Cofactor and Prosthetic Group Biosynthesis,27,163,190
4,Fatty Acids Metabolism,132,233,365


In [4]:
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import multipletests

enrich_analysis_results = []

subsystem_list = data["Subsystem"]

b = data.loc[data["Subsystem"] == 'Total', "Peripherical reactions"].iloc[0]
d = data.loc[data["Subsystem"] == 'Total', "Not peripherical reactions"].iloc[0]

for subsystem in subsystem_list:
    a = data.loc[data["Subsystem"] == subsystem, "Peripherical reactions"].iloc[0]
    c = data.loc[data["Subsystem"] == subsystem, "Not peripherical reactions"].iloc[0]

    cont_table = np.array([[a, b],[c,d]])  

    oddsratio, pvalue = fisher_exact(cont_table, alternative = "greater")

    enrich_analysis_results.append({
            "subsystem": subsystem,
            "peripherical": a,
            "no peripherical": c,
            "oddsratio": oddsratio,
            "pvalue": pvalue
        })

In [5]:
# Benjaminiâ€“Hochberg (FDR) correction
enrich_df = pd.DataFrame(enrich_analysis_results)
enrich_df["p_adj"] = multipletests(
    enrich_df["pvalue"],
    method = "fdr_bh"
)[1]

enrich_df

Unnamed: 0,subsystem,peripherical,no peripherical,oddsratio,pvalue,p_adj
0,Fatty Acids Biosynthesis,8,67,0.322919,0.99982,1.0
1,Alternate Carbon and Nitrogen Source,61,45,3.666025,1.710703e-10,5.388329e-09
2,Lysine Metabolism,0,34,0.0,1.0,1.0
3,Cofactor and Prosthetic Group Biosynthesis,27,163,0.447975,0.9999816,1.0
4,Fatty Acids Metabolism,132,233,1.532132,0.0003470336,0.002568049
5,Glycolysis/Gluconeogenesis,0,29,0.0,1.0,1.0
6,Aromatic Compounds Degradation,52,35,4.018032,4.892157e-10,6.03366e-09
7,"Valine, Leucine and Isoleucine Metabolism",1,32,0.084514,0.9999659,1.0
8,Urea Cycleamino Group Metabolism,0,7,0.0,1.0,1.0
9,Alternate Carbon Source,55,38,3.914327,2.91261e-10,5.388329e-09


In [6]:
# Print only significant values
significant = enrich_df[enrich_df["p_adj"] <= 0.05]
significant

Unnamed: 0,subsystem,peripherical,no peripherical,oddsratio,pvalue,p_adj
1,Alternate Carbon and Nitrogen Source,61,45,3.666025,1.710703e-10,5.388329e-09
4,Fatty Acids Metabolism,132,233,1.532132,0.0003470336,0.002568049
6,Aromatic Compounds Degradation,52,35,4.018032,4.892157e-10,6.03366e-09
9,Alternate Carbon Source,55,38,3.914327,2.91261e-10,5.388329e-09
32,Xenobiotic Tolerance,10,1,27.044444,1.852413e-05,0.0001713482
33,Nitrogen Metabolism,4,0,inf,0.005396643,0.0332793


In [7]:
print(significant["subsystem"].tolist())

['Alternate Carbon and Nitrogen Source', 'Fatty Acids Metabolism', 'Aromatic Compounds Degradation', 'Alternate Carbon Source', 'Xenobiotic Tolerance', 'Nitrogen Metabolism']


These subsystems are associated with secondary metabolism