## Unification of the iCHO2291 datasets

A dataset created from the iCHO2291 xml file and the excell file provided as Supplementary data have been unified  in order to have information from both dataset (i.e. forward and reverse kcat from the excel file and GPR reaction formula from the excell)

In [1]:
import pandas as pd
import cobra
from cobra.io import read_sbml_model
from tqdm.notebook import tqdm



In [4]:
# Load iCHO2291 excell file provided as supplementary data
df1 = pd.read_excel('../../Data/Supplementary Data.xlsx', 'Data S2')
df1

Unnamed: 0,Rxn,GPR,Proteins,EC Number,Mol wt,kcat_forward,kcat_backward,Subsystem (iCHO2291),Subsystem (iCHO1766)
0,10FTHF5GLUtl,,,,,,,Transport,"TRANSPORT, LYSOSOMAL"
1,10FTHF5GLUtm,,,,,,,Transport,"TRANSPORT, MITOCHONDRIAL"
2,10FTHF6GLUtl,,,,,,,Transport,"TRANSPORT, LYSOSOMAL"
3,10FTHF6GLUtm,,,,,,,Transport,"TRANSPORT, MITOCHONDRIAL"
4,10FTHF7GLUtl,,,,,,,Transport,"TRANSPORT, LYSOSOMAL"
...,...,...,...,...,...,...,...,...,...
6231,RTOTALFATPc,,,,,,,Exchange/demand/sink reaction,R GROUP SYNTHESIS
6232,RTOTALt,,,,,,,Transport,"TRANSPORT, EXTRACELLULAR"
6233,Rtotaltl,,,,,,,Transport,"TRANSPORT, LYSOSOMAL"
6234,Rtotaltp,,,,,,,Transport,"TRANSPORT, PEROXISOMAL"


In [7]:
# Read the iCHO2291 model from https://www.ebi.ac.uk/biomodels/ using the cobrapy library
model = read_sbml_model('../Data/models/iCHO2291.xml')

# Create dataframe from the model with the attributes that we are interested in
attributes = []
for reaction in tqdm(model.reactions):
    attributes.append([reaction.id, reaction.name, reaction.reaction, reaction.gpr, 
                       reaction.subsystem, reaction.lower_bound, reaction.upper_bound])

df2 = pd.DataFrame(data=attributes, columns=['Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Lower bound', 'Upper bound'])
df2

OSError: The file with '../Data/models/iCHO2291.xml' does not exist, or is not an SBML string. Provide the path to an existing SBML file or a valid SBML string representation:


### Both dfs has the same amount of reactions but differen info in the columns

In [4]:
# Unify number of columns
df1.rename(columns = {'Rxn':'Reaction', 'Subsystem (iCHO2291)':'Subsystem'}, inplace = True)
df1 = df1.reindex(columns = df1.columns.tolist() + ['Reaction Name','Reaction Formula','Lower bound','Upper bound'])
df1.drop(columns=['Subsystem (iCHO1766)'])
df2 = df2.reindex(columns = df2.columns.tolist() + ['Proteins','EC Number','Mol wt','kcat_forward','kcat_backward'])


# Unify order of columns
df1 = df1[['Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Lower bound', 'Upper bound', 'Proteins','EC Number','Mol wt','kcat_forward','kcat_backward']]
df2 = df2[['Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Lower bound', 'Upper bound', 'Proteins','EC Number','Mol wt','kcat_forward','kcat_backward']]

In [5]:
# Merge both datasets 
iCHO2291 = pd.concat([df2, df1])
iCHO2291 = iCHO2291.reset_index(drop = True)
iCHO2291 = iCHO2291.groupby('Reaction').first()
iCHO2291.rename(columns = {'GPR':'GPR_yeo'}, inplace = True)

iCHO2291.to_excel('../Data/iCHO2291_final.xlsx')

In [6]:
iCHO2291

Unnamed: 0_level_0,Reaction Name,Reaction Formula,GPR_yeo,Subsystem,Lower bound,Upper bound,Proteins,EC Number,Mol wt,kcat_forward,kcat_backward
Reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,Transport,0.0,1000.0,,,,,
10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] --> 10fthf5glu[c],,Transport,0.0,1000.0,,,,,
10FTHF6GLUtl,"6-glutamyl-10FTHF transport, lysosomal",10fthf6glu[c] --> 10fthf6glu[l],,Transport,0.0,1000.0,,,,,
10FTHF6GLUtm,"6-glutamyl-10FTHF transport, mitochondrial",10fthf6glu[m] --> 10fthf6glu[c],,Transport,0.0,1000.0,,,,,
10FTHF7GLUtl,"7-glutamyl-10FTHF transport, lysosomal",10fthf7glu[c] --> 10fthf7glu[l],,Transport,0.0,1000.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
igg_hc,igg_hc,24.0 ala_L[c] + 8.0 arg_L[c] + 19.0 asn_L[c] +...,,Miscellaneous,0.0,1000.0,,,,,
igg_lc,igg_lc,15.0 ala_L[c] + 6.0 arg_L[c] + 7.0 asn_L[c] + ...,,Miscellaneous,0.0,1000.0,,,,,
peplys_synthesis,peplys synthesis,lys_L[n] --> peplys[n],,Miscellaneous,0.0,1000.0,,,,,
q10h2tc,transport of ubiquinol into cytosol,q10h2[m] <=> q10h2[c],,Transport,-1000.0,1000.0,,,,,
