# Assess the enrichment of metabolic subsystems

Produces an XLSX file with enriched reactions per subsystem for all models. Does not compute anything but only processes the CSV files produced by `reactions_enrichment.ipynb`.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
import os 

## Set folders

In [2]:
p_vals_folder = "results_enrichment_reactions"

In [3]:
df = pd.read_csv("model\\Human-GEM_subsystems.txt", sep=";")
df = df.rename(columns={'rxn': 'reaction'})
df = df.set_index('reaction')

In [None]:
df.subsystem.unique()

## Read and parse

In [16]:
file_names = [file_name for file_name in os.listdir(p_vals_folder) if file_name.endswith('csv')]
for file in file_names:
    experiment = os.path.splitext(file)[0]
    
    MEM = experiment.split("_")[0]
    dataset = experiment.split("_")[1]
    
    df_tmp = pd.read_csv(p_vals_folder+"\\"+file, sep=",")
    df_tmp = df_tmp[['reaction','enrichment']]
    df_tmp = df_tmp.set_index('reaction')
    df_tmp.columns = [f'{MEM}_{dataset}']
    df[f'{MEM}_{dataset}'] = np.nan
    df.update(df_tmp)

df = df.fillna(0)

In [17]:
df.head()

Unnamed: 0_level_0,subsystem,GIMME_293T,GIMME_A549,GIMME_Calu-3,GIMME_HBE,GIMME_Lung,iMAT_293T,iMAT_A549,iMAT_Calu-3,iMAT_HBE,...,INIT_293T,INIT_A549,INIT_Calu-3,INIT_HBE,INIT_Lung,tINIT_293T,tINIT_A549,tINIT_Calu-3,tINIT_HBE,tINIT_Lung
reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HMR_3905,Glycolysis / Gluconeogenesis,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,...,0.0,0.0,1.0,-1.0,0.0,1.0,1.0,0.0,1.0,-1.0
HMR_3907,Glycolysis / Gluconeogenesis,1.0,-1.0,1.0,0.0,-1.0,1.0,-1.0,1.0,0.0,...,0.0,1.0,1.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0
HMR_4097,Glycolysis / Gluconeogenesis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,-1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
HMR_4099,Glycolysis / Gluconeogenesis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HMR_4108,Glycolysis / Gluconeogenesis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0


## Write to Excel - subsystem = sheet

In [18]:
writer = pd.ExcelWriter('results_enrichment\\reactions_subsystems.xlsx')

subsystems = df.subsystem.unique()
for subsystem in subsystems:
    # take only the current subsystem
    df_sub = df[df['subsystem'] == subsystem]
    # sheet name must be shorter than 31 characters and must not include "/"
    sheet_name = subsystem.replace("/","")[:31] 
    # remove subsystem info - already in the sheet name
    df_sub = df_sub[df_sub.columns[1:]]
    # write to a sheet
    df_sub.to_excel(writer, sheet_name=sheet_name, index=True)

writer.save()
    