In [11]:
import cobra
import pandas as pd

In [12]:
model_DP = cobra.io.read_sbml_model("models/consistent_DP_SNM.xml")
model_SA = cobra.io.read_sbml_model("models/consistent_iYS854_SNM.xml")
print("Gorwth: ", model_DP.slim_optimize())
print("Gorwth: ", model_SA.slim_optimize())

Gorwth:  0.28236539253224924
Gorwth:  2.558694612613397


## 1) Common and uncommon metabolits

In [13]:
met_DP_ids = set(map(lambda x:x.id, model_DP.metabolites))
met_SA_ids = set(map(lambda x:x.id, model_SA.metabolites))
met_ids = set(met_DP_ids)
met_ids = met_ids.union(set(met_SA_ids))
common_met_ids = met_DP_ids.intersection(met_SA_ids)
non_common_met_ids = met_ids.difference(common_met_ids)
print("The number of common metabolites is: ", len(common_met_ids))
print("The number of non common metabolites is: ", len(non_common_met_ids))
print("The total numebr of unique metabolites is: ", len(met_ids))

The number of common metabolites is:  377
The number of non common metabolites is:  970
The total numebr of unique metabolites is:  1347


In [14]:
common_metabolits = [met for met in model_DP.metabolites if met.id in met_SA_ids]
# Check for correctness
assert len(common_metabolits) == len(common_met_ids)
# Write csv
df_dict = {"ID":[], "NAME":[], "FORMULA":[], "COMPARTMENT":[]}
for met in common_metabolits:
    df_dict["ID"].append(met.id)
    df_dict["NAME"].append(met.name)
    df_dict["FORMULA"].append(met.formula)
    df_dict["COMPARTMENT"].append(met.compartment)
df_common_met = pd.DataFrame(df_dict)
df_common_met.to_csv("common_metabolistes.csv", sep=";")
df_common_met.head()

Unnamed: 0,ID,NAME,FORMULA,COMPARTMENT
0,10fthf_c,10-Formyltetrahydrofolate,C20H21N7O7,c
1,adp_c,ADP,C10H12N5O10P2,c
2,atp_c,ATP,C10H12N5O13P3,c
3,glu__L_c,L-Glutamate,C5H8NO4,c
4,pi_c,Phosphate,HO4P,c


In [15]:
uncommon_metabolites = []
origin = []
for id in non_common_met_ids:
    try:
        met = model_DP.metabolites.get_by_id(id)
        origin.append("DP")
    except:
        met = model_SA.metabolites.get_by_id(id)
        origin.append("SA")
    uncommon_metabolites.append(met)
# Check
assert len(uncommon_metabolites) == len(non_common_met_ids)

# Write csv
df_dict = {"ID":[], "NAME":[], "FORMULA":[], "COMPARTMENT":[], "MODEL":[]}
for met, org in zip(uncommon_metabolites, origin):
    df_dict["ID"].append(met.id)
    df_dict["NAME"].append(met.name)
    df_dict["FORMULA"].append(met.formula)
    df_dict["COMPARTMENT"].append(met.compartment)
    df_dict["MODEL"].append(org)
df_common_met = pd.DataFrame(df_dict)
df_common_met.to_csv("uncommon_metabolistes.csv", sep=";")
df_common_met.head()

Unnamed: 0,ID,NAME,FORMULA,COMPARTMENT,MODEL
0,cdpdtdecg_c,"CDP-1,2-ditetradecanoylglycerol",C40H71N3O15P2,c,DP
1,7me3oxoacp_c,7-methyl-3-oxo-octanoyl-ACP,C20H35N2O9PRS,c,SA
2,ptdoh_MRSA_c,Phosphatidic acid,C53H102N2O16PR0S0,c,SA
3,8mtn2eACP_c,8-methyl-trans-non-2-enoyl-ACP,C21H37N2O8PRS,c,SA
4,5fthf_c,5-Formyltetrahydrofolate,C20H21N7O7,c,DP


## 2) Common and uncommon reactions

In [16]:
rec_DP_ids = set(map(lambda x:x.id, model_DP.reactions))
rec_SA_ids = set(map(lambda x:x.id, model_SA.reactions))
rec_ids = set(rec_DP_ids)
rec_ids = rec_ids.union(set(rec_SA_ids))
common_rec_ids = rec_DP_ids.intersection(rec_SA_ids)
non_common_rec_ids = rec_ids.difference(common_rec_ids)
print("The number of common reactions is: ", len(common_rec_ids))
print("The number of non common reactions is: ", len(non_common_rec_ids))
print("The total numebr of unique reactions is: ", len(rec_ids))

The number of common reactions is:  273
The number of non common reactions is:  1535
The total numebr of unique reactions is:  1808


In [17]:
common_reactions = [rec for rec in model_DP.reactions if rec.id in rec_SA_ids]
# Check for correctness
assert len(common_reactions) == len(common_rec_ids)
# Write csv
df_dict = {"ID":[], "NAME":[], "REACTION":[], "LOWER_BOUND":[], "UPPER_BOUND":[]}
for rec in common_reactions:
    df_dict["ID"].append(rec.id)
    df_dict["NAME"].append(rec.name)
    df_dict["REACTION"].append(rec.reaction)
    df_dict["LOWER_BOUND"].append(rec.lower_bound)
    df_dict["UPPER_BOUND"].append(rec.upper_bound)

def highlight_col(x):
    #copy df to new - original data are not changed
    df = x.copy()
    #mark exchange reactions by yellow color
    mask = x["ID"].str.contains("EX_")
    df.loc[mask, :] = 'background-color: yellow'
    df.loc[~mask,:] = 'background-color: white'
    return df   

writer = pd.ExcelWriter('common_reactions.xlsx')
df_common_rec = pd.DataFrame(df_dict)
df_common_rec.style.apply(highlight_col, axis=None).to_excel(writer)
writer.save()
writer.close()

In [18]:
uncommon_reactions = []
origin = []
for id in non_common_rec_ids:
    try:
        rec = model_DP.reactions.get_by_id(id)
        origin.append("DP")
    except:
        rec = model_SA.reactions.get_by_id(id)
        origin.append("SA")
    uncommon_reactions.append(rec)
# Check
assert len(uncommon_reactions) == len(non_common_rec_ids)

# Write csv
df_dict = {"ID":[], "NAME":[], "REACTION":[], "LOWER_BOUND":[], "UPPER_BOUND":[]}
for rec in uncommon_reactions:
    df_dict["ID"].append(rec.id)
    df_dict["NAME"].append(rec.name)
    df_dict["REACTION"].append(rec.reaction)
    df_dict["LOWER_BOUND"].append(rec.lower_bound)
    df_dict["UPPER_BOUND"].append(rec.upper_bound)

writer = pd.ExcelWriter('uncommon_reactions.xlsx')
df_uncommon_rec = pd.DataFrame(df_dict)
df_uncommon_rec.style.apply(highlight_col, axis=None).to_excel(writer)
writer.save()
writer.close()

## 3) Jaccard index

We compute the jaccard index for both species as following:
$$ J(A,B) = \frac{|A \cap B|}{|A \cup B|}$$
Were A,B are the set of metabolites/reactions in the model. Thus the Jaccard index can in this case also be written as:
$$ J(M_1,M_2) = \frac{\text{Common metabolites/reactions in } M_1,M_2}{\text{All metabolites/reactions in both models } M_1,M_2}$$
These number we compute before thus we can easily compute it as:

In [19]:
jaccard_index_met = len(common_met_ids)/len(met_ids)
jaccard_index_met

0.2798812175204157

In [20]:
jaccard_index_rec = len(common_rec_ids)/len(rec_ids)
jaccard_index_rec

0.15099557522123894