In [1]:
import pandas as pd
import cobra
import numpy as np

from cobra.io import read_sbml_model
from tqdm.notebook import tqdm



In [2]:
# Read the iCHO2241 model from Cleo's lab using the cobrapy library
iCHO2441_model = read_sbml_model('../Data/Reconciliation/iCHO2441.xml')

# Create dataframe from the model with the attributes that we are interested in
attributes = []
for reaction in tqdm(iCHO2441_model.reactions):
    attributes.append([reaction.id, reaction.name, reaction.reaction, reaction.gpr, 
                       reaction.subsystem, reaction.lower_bound, reaction.upper_bound])

iCHO2441_df = pd.DataFrame(data=attributes, columns=['Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Lower bound', 'Upper bound'])

# Addition of tag columns for organizational purposes
iCHO2441_df.insert(loc=0, column='yeo', value=np.nan)
iCHO2441_df.insert(loc=1, column='cleo', value='X')
iCHO2441_df

  0%|          | 0/6337 [00:00<?, ?it/s]

Unnamed: 0,yeo,cleo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Lower bound,Upper bound
0,,X,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,,0.0,1000.0
1,,X,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] --> 10fthf5glu[c],,,0.0,1000.0
2,,X,10FTHF6GLUtl,"6-glutamyl-10FTHF transport, lysosomal",10fthf6glu[c] --> 10fthf6glu[l],,,0.0,1000.0
3,,X,10FTHF6GLUtm,"6-glutamyl-10FTHF transport, mitochondrial",10fthf6glu[m] --> 10fthf6glu[c],,,0.0,1000.0
4,,X,10FTHF7GLUtl,"7-glutamyl-10FTHF transport, lysosomal",10fthf7glu[c] --> 10fthf7glu[l],,,0.0,1000.0
...,...,...,...,...,...,...,...,...,...
6332,,X,SPC_degradation,SPC_degradation,SPC[r] -->,100762175 and 100755739 and 100753146 and 1007...,,0.0,1000.0
6333,,X,co_TRANSLOC_7,co_TRANSLOC_7,Translocase[r] --> DNAJC1[r] + SEC61C[r] + SEC...,100767375 and 100768703 and 100772250 and 1007...,,0.0,1000.0
6334,,X,TRAP_degradation,TRAP_degradation,TRAP[r] -->,100767661 and 100758563 and 100759016 and 1007...,,0.0,1000.0
6335,,X,OST_complex,OST_complex,DAD1[r] + DDOST[r] + RPN1[r] + RPN2[r] + STT3A...,100755259 and 100762811 and 103158732 and 1007...,,0.0,1000.0


In [3]:
# Read the iCHO2291 model from https://www.ebi.ac.uk/biomodels/ using the cobrapy library
iCHO2291_model = read_sbml_model('../Data/Reconciliation/iCHO2291.xml')

# Create dataframe from the model with the attributes that we are interested in
attributes = []
for reaction in tqdm(iCHO2291_model.reactions):
    attributes.append([reaction.id, reaction.name, reaction.reaction, reaction.gpr, 
                       reaction.subsystem, reaction.lower_bound, reaction.upper_bound])

iCHO2291_df = pd.DataFrame(data=attributes, columns=['Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Lower bound', 'Upper bound'])

# Addition of tag columns for organizational purposes
iCHO2291_df.insert(loc=0, column='yeo', value='X')
iCHO2291_df.insert(loc=1, column='cleo', value=np.nan)
iCHO2291_df

  0%|          | 0/6236 [00:00<?, ?it/s]

Unnamed: 0,yeo,cleo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Lower bound,Upper bound
0,X,,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,Transport,0.0,1000.0
1,X,,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] --> 10fthf5glu[c],,Transport,0.0,1000.0
2,X,,10FTHF6GLUtl,"6-glutamyl-10FTHF transport, lysosomal",10fthf6glu[c] --> 10fthf6glu[l],,Transport,0.0,1000.0
3,X,,10FTHF6GLUtm,"6-glutamyl-10FTHF transport, mitochondrial",10fthf6glu[m] --> 10fthf6glu[c],,Transport,0.0,1000.0
4,X,,10FTHF7GLUtl,"7-glutamyl-10FTHF transport, lysosomal",10fthf7glu[c] --> 10fthf7glu[l],,Transport,0.0,1000.0
...,...,...,...,...,...,...,...,...,...
6231,X,,RTOTALFATPc,uptake of Rtotal by enterocytes,Rtotal[e] + atp[c] + coa[c] --> Rtotalcoa[c] +...,,Exchange/demand/sink reaction,0.0,1000.0
6232,X,,RTOTALt,RTOTAL transport,Rtotal[e] <=> Rtotal[c],,Transport,-1000.0,1000.0
6233,X,,Rtotaltl,fatty acid intracellular transport,Rtotal[c] <=> Rtotal[l],,Transport,-1000.0,1000.0
6234,X,,Rtotaltp,fatty acid intracellular transport,Rtotal[c] <=> Rtotal[x],,Transport,-1000.0,1000.0


In [4]:
all_dfs = pd.concat([iCHO2441_df, iCHO2291_df])
all_dfs = all_dfs.reset_index(drop = True)
all_dfs

Unnamed: 0,yeo,cleo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Lower bound,Upper bound
0,,X,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,,0.0,1000.0
1,,X,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] --> 10fthf5glu[c],,,0.0,1000.0
2,,X,10FTHF6GLUtl,"6-glutamyl-10FTHF transport, lysosomal",10fthf6glu[c] --> 10fthf6glu[l],,,0.0,1000.0
3,,X,10FTHF6GLUtm,"6-glutamyl-10FTHF transport, mitochondrial",10fthf6glu[m] --> 10fthf6glu[c],,,0.0,1000.0
4,,X,10FTHF7GLUtl,"7-glutamyl-10FTHF transport, lysosomal",10fthf7glu[c] --> 10fthf7glu[l],,,0.0,1000.0
...,...,...,...,...,...,...,...,...,...
12568,X,,RTOTALFATPc,uptake of Rtotal by enterocytes,Rtotal[e] + atp[c] + coa[c] --> Rtotalcoa[c] +...,,Exchange/demand/sink reaction,0.0,1000.0
12569,X,,RTOTALt,RTOTAL transport,Rtotal[e] <=> Rtotal[c],,Transport,-1000.0,1000.0
12570,X,,Rtotaltl,fatty acid intracellular transport,Rtotal[c] <=> Rtotal[l],,Transport,-1000.0,1000.0
12571,X,,Rtotaltp,fatty acid intracellular transport,Rtotal[c] <=> Rtotal[x],,Transport,-1000.0,1000.0


In [5]:
def group_and_compare(df, group_column):
    
    """
    Groups a Pandas DataFrame by one column and evaluates if the grouped rows are identical or not.
    
    Args:
        df: Pandas DataFrame to group by
        group_column: Name of the column to group by
        
    Returns:
        Pandas DataFrame containing the grouping column and a boolean indicating if the rows are identical
    """
    df = df.set_index(group_column)
    grouped = df.groupby(group_column)
    group_keys = list(grouped.groups.keys())
    
    is_identical = []
    for key in group_keys:
        group = grouped.get_group(key)
        if group.astype(str).duplicated().size == 2:
            is_identical.append(group.astype(str).duplicated()[1])
        elif group.astype(str).duplicated().size == 1:
            is_identical.append('only in one model')

    result = pd.DataFrame({
        group_column: group_keys,
        'is_identical': is_identical
    })
    return result

In [6]:
df_new = group_and_compare(all_dfs.drop(['yeo','cleo','Subsystem'], axis = 1), 'Reaction')

In [12]:
not_equal = []
equal = []
only_in_one = []

for i,row in df_new.iterrows():
    if row['is_identical'] == False:
        not_equal.append(row['Reaction'])
    elif row['is_identical'] == True:
        equal.append(row['Reaction'])
    elif row['is_identical'] == 'only in one model':
        only_in_one.append(row['Reaction'])
        

In [16]:
len(only_in_one)

101

In [17]:
len(equal)

6236