# Reactions mapper reduction

In [1]:
import pandas as pd
import re

In [2]:
df_modelseed_reactions = pd.read_csv("./ModelSEED_ids/Unique_ModelSEED_Reaction_Aliases.txt", sep="\t")
df_metanetx_reactions = pd.read_csv("./MetaNetX_ids/reac_xref.tsv", sep="\t", skiprows=351)

In [3]:
df_modelseed_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
0,rxn00001,R_R00004_c,AlgaGEM
1,rxn00001,INORGPYROPHOSPHAT-RXN,AraCyc
2,rxn00001,R_R00004_c,AraGEM
3,rxn00001,IPP1,BiGG
4,rxn00001,PPA,BiGG
...,...,...,...
267225,rxn48573,MNXR109727,metanetx.reaction
267226,rxn48573,24421,rhea
267227,rxn48573,24423,rhea
267228,rxn48574,R03874,KEGG


In [4]:
df_metanetx_reactions = df_metanetx_reactions.drop(["description"], axis=1)
df_metanetx_reactions = df_metanetx_reactions[df_metanetx_reactions.ID != "EMPTY"]
df_metanetx_reactions

Unnamed: 0,#source,ID
104,MNXR01,MNXR01
105,mnx:MNXR01,MNXR01
106,MNXR02,MNXR02
107,bigg.reaction:EX_h_e,MNXR02
108,bigg.reaction:R_EX_h_e,MNXR02
...,...,...
384797,biggR:R_GALNACT3g,MNXR99998
384798,bigg.reaction:GALNACT4g,MNXR99999
384799,bigg.reaction:R_GALNACT4g,MNXR99999
384800,biggR:GALNACT4g,MNXR99999


In [5]:
bigg_modelseed_reactions = df_modelseed_reactions[df_modelseed_reactions['Source'].str.count('BiGG')>0]
bigg_metanetx_reactions = df_metanetx_reactions[df_metanetx_reactions['#source'].str.count('bigg\.reaction\:.*')>0]

In [6]:
bigg_modelseed_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
3,rxn00001,IPP1,BiGG
4,rxn00001,PPA,BiGG
5,rxn00001,PPA_1,BiGG
6,rxn00001,PPAm,BiGG
59,rxn00002,ALPHNH,BiGG
...,...,...,...
250059,rxn40273,AMALT2,BiGG
250060,rxn40273,AMALT3,BiGG
250061,rxn40273,MLTG1,BiGG
250062,rxn40273,MPL,BiGG


In [7]:
bigg_metanetx_reactions

Unnamed: 0,#source,ID
107,bigg.reaction:EX_h_e,MNXR02
108,bigg.reaction:R_EX_h_e,MNXR02
113,bigg.reaction:HMR_1095,MNXR03
114,bigg.reaction:Ht,MNXR03
115,bigg.reaction:Htcx,MNXR03
...,...,...
384791,bigg.reaction:R_GALNACT2g,MNXR99997
384794,bigg.reaction:GALNACT3g,MNXR99998
384795,bigg.reaction:R_GALNACT3g,MNXR99998
384798,bigg.reaction:GALNACT4g,MNXR99999


In [8]:
kegg_modelseed_reactions = df_modelseed_reactions[df_modelseed_reactions['Source'].str.count('KEGG')>0]
kegg_metanetx_reactions = df_metanetx_reactions[df_metanetx_reactions['#source'].str.count('kegg\.reaction\:.*')>0]

In [9]:
kegg_modelseed_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
15,rxn00001,R00004,KEGG
64,rxn00002,R00005,KEGG
78,rxn00003,R00006,KEGG
94,rxn00004,R00008,KEGG
109,rxn00006,R00009,KEGG
...,...,...,...
267184,rxn48569,R02161,KEGG
267191,rxn48570,R02163,KEGG
267201,rxn48571,R02164,KEGG
267217,rxn48573,R05635,KEGG


In [10]:
kegg_metanetx_reactions

Unnamed: 0,#source,ID
316,kegg.reaction:R00253,MNXR100024
378,kegg.reaction:R00256,MNXR100030
540,kegg.reaction:R01333,MNXR100060
598,kegg.reaction:R03824,MNXR100063
644,kegg.reaction:R01698,MNXR100066
...,...,...
384276,kegg.reaction:R00846,MNXR99890
384298,kegg.reaction:R00841,MNXR99894
384334,kegg.reaction:R03313,MNXR99896
384384,kegg.reaction:R03550,MNXR99920


In [11]:
metacyc_modelseed_reactions = df_modelseed_reactions[df_modelseed_reactions['Source'].str.count('MetaCyc')>0]
metacyc_metanetx_reactions = df_metanetx_reactions[df_metanetx_reactions['#source'].str.count('metacyc\.reaction\:.*')>0]

In [12]:
metacyc_modelseed_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
18,rxn00001,INORGPYROPHOSPHAT-RXN,MetaCyc
65,rxn00002,ALLOPHANATE-HYDROLASE-RXN,MetaCyc
112,rxn00006,CATAL-RXN,MetaCyc
113,rxn00006,RXN-12121,MetaCyc
181,rxn00008,MANGANESE-PEROXIDASE-RXN,MetaCyc
...,...,...,...
267173,rxn48566,RXN-17840,MetaCyc
267175,rxn48567,RXN-15970,MetaCyc
267177,rxn48568,RXN-8351,MetaCyc
267219,rxn48573,3.5.1.88-RXN,MetaCyc


In [13]:
metacyc_metanetx_reactions

Unnamed: 0,#source,ID
173,metacyc.reaction:RXN-13519,MNXR03
174,metacyc.reaction:TRANS-RXN-218,MNXR03
175,metacyc.reaction:TRANS-RXN-395,MNXR03
318,metacyc.reaction:GLUTAMINESYN-RXN,MNXR100024
380,metacyc.reaction:GLUTAMIN-RXN,MNXR100030
...,...,...
384278,metacyc.reaction:GLYCEROL-3-PHOSPHATE-OXIDASE-RXN,MNXR99890
384300,metacyc.reaction:RXN-14965,MNXR99894
384336,metacyc.reaction:GLUTSEMIALDEHYDROG-RXN,MNXR99896
384532,metacyc.reaction:GALACTONDEHYDRAT-RXN,MNXR99957


In [14]:
modelseedIDs = df_modelseed_reactions['ModelSEED ID'].unique()

modelseed_modelseed_reactions = pd.DataFrame(columns=["ModelSEED ID", "External ID", "Source"])

for ID in modelseedIDs:

    new_row = {"ModelSEED ID": ID,
               "External ID": ID,
               "Source": "ModelSEED"}
    
    modelseed_modelseed_reactions = modelseed_modelseed_reactions.append(new_row, ignore_index=True)

modelseed_modelseed_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
0,rxn00001,rxn00001,ModelSEED
1,rxn00002,rxn00002,ModelSEED
2,rxn00003,rxn00003,ModelSEED
3,rxn00004,rxn00004,ModelSEED
4,rxn00006,rxn00006,ModelSEED
...,...,...,...
43546,rxn48571,rxn48571,ModelSEED
43547,rxn48572,rxn48572,ModelSEED
43548,rxn48573,rxn48573,ModelSEED
43549,rxn48574,rxn48574,ModelSEED


In [15]:
modelseed_metanetx_reactions = df_metanetx_reactions[df_metanetx_reactions['#source'].str.count('seed\.reaction\:.*')>0]
modelseed_metanetx_reactions

Unnamed: 0,#source,ID
190,seed.reaction:rxn08730,MNXR03
191,seed.reaction:rxn11009,MNXR03
192,seed.reaction:rxn13646,MNXR03
193,seed.reaction:rxn22797,MNXR03
194,seed.reaction:rxn26298,MNXR03
...,...,...
384518,seed.reaction:rxn38742,MNXR99953
384544,seed.reaction:rxn02173,MNXR99957
384560,seed.reaction:rxn08576,MNXR99958
384570,seed.reaction:rxn08577,MNXR99959


In [16]:
metanetx_reactionsIDs = modelseed_metanetx_reactions['ID'].unique()

metanetx_metanetx_reactions = pd.DataFrame(columns=["#source", "ID"])

for ID in metanetx_reactionsIDs:

    new_row = {"#source": "metanetx.reaction:"+ID,
               "ID": ID }
    
    metanetx_metanetx_reactions = metanetx_metanetx_reactions.append(new_row, ignore_index=True)

metanetx_metanetx_reactions

Unnamed: 0,#source,ID
0,metanetx.reaction:MNXR03,MNXR03
1,metanetx.reaction:MNXR100018,MNXR100018
2,metanetx.reaction:MNXR100019,MNXR100019
3,metanetx.reaction:MNXR100024,MNXR100024
4,metanetx.reaction:MNXR100030,MNXR100030
...,...,...
34942,metanetx.reaction:MNXR99953,MNXR99953
34943,metanetx.reaction:MNXR99957,MNXR99957
34944,metanetx.reaction:MNXR99958,MNXR99958
34945,metanetx.reaction:MNXR99959,MNXR99959


In [17]:
modelseed_all_reactions = bigg_modelseed_reactions.append(kegg_modelseed_reactions).append(metacyc_modelseed_reactions).append(modelseed_modelseed_reactions)
metanetx_all_reactions = bigg_metanetx_reactions.append(kegg_metanetx_reactions).append(metacyc_metanetx_reactions).append(modelseed_metanetx_reactions).append(metanetx_metanetx_reactions)

In [18]:
modelseed_all_reactions

Unnamed: 0,ModelSEED ID,External ID,Source
3,rxn00001,IPP1,BiGG
4,rxn00001,PPA,BiGG
5,rxn00001,PPA_1,BiGG
6,rxn00001,PPAm,BiGG
59,rxn00002,ALPHNH,BiGG
...,...,...,...
43546,rxn48571,rxn48571,ModelSEED
43547,rxn48572,rxn48572,ModelSEED
43548,rxn48573,rxn48573,ModelSEED
43549,rxn48574,rxn48574,ModelSEED


In [19]:
metanetx_all_reactions

Unnamed: 0,#source,ID
107,bigg.reaction:EX_h_e,MNXR02
108,bigg.reaction:R_EX_h_e,MNXR02
113,bigg.reaction:HMR_1095,MNXR03
114,bigg.reaction:Ht,MNXR03
115,bigg.reaction:Htcx,MNXR03
...,...,...
34942,metanetx.reaction:MNXR99953,MNXR99953
34943,metanetx.reaction:MNXR99957,MNXR99957
34944,metanetx.reaction:MNXR99958,MNXR99958
34945,metanetx.reaction:MNXR99959,MNXR99959


In [20]:
modelseed_final_reactions_df = pd.DataFrame(columns=["Internal ID", "External ID", "Source"])

for i, row in modelseed_all_reactions.iterrows():
    internal_id = row["ModelSEED ID"]
    external_id = row["External ID"]
    source = row["Source"]

    new_row = {"Internal ID": internal_id,
               "External ID": external_id,
               "Source": source}
    
    modelseed_final_reactions_df = modelseed_final_reactions_df.append(new_row, ignore_index=True)

modelseed_final_reactions_df

Unnamed: 0,Internal ID,External ID,Source
0,rxn00001,IPP1,BiGG
1,rxn00001,PPA,BiGG
2,rxn00001,PPA_1,BiGG
3,rxn00001,PPAm,BiGG
4,rxn00002,ALPHNH,BiGG
...,...,...,...
104191,rxn48571,rxn48571,ModelSEED
104192,rxn48572,rxn48572,ModelSEED
104193,rxn48573,rxn48573,ModelSEED
104194,rxn48574,rxn48574,ModelSEED


In [21]:
metanetx_final_reactions_df = pd.DataFrame(columns=["Internal ID", "External ID", "Source"])

for i, row in metanetx_all_reactions.iterrows():
    
    identifier = row["#source"]
    source = ""
    external_id = ""
    
    if re.match("kegg\.reaction:.*", identifier):
        external_id = identifier.replace("kegg.reaction:", "")
        source = "KEGG"

    elif re.match("bigg\.reaction:.*", identifier):
        external_id = identifier.replace("bigg.reaction:", "")
        source = "BiGG"
        
    elif re.match("metacyc\.reaction:.*", identifier):
        external_id = identifier.replace("metacyc.reaction:", "")
        source = "MetaCyc"
        
    elif re.match("seed\.reaction:.*", identifier):
        external_id = identifier.replace("seed.reaction:", "")
        source = "ModelSEED"

    elif re.match("metanetx\.reaction:.*", identifier):
        external_id = identifier.replace("metanetx.reaction:", "")
        source = "MetaNetX"
        
    internal_id = row["ID"]

    new_row = {"Internal ID": internal_id,
               "External ID": external_id,
               "Source": source}
    
    metanetx_final_reactions_df = metanetx_final_reactions_df.append(new_row, ignore_index=True)

metanetx_final_reactions_df

Unnamed: 0,Internal ID,External ID,Source
0,MNXR02,EX_h_e,BiGG
1,MNXR02,R_EX_h_e,BiGG
2,MNXR03,HMR_1095,BiGG
3,MNXR03,Ht,BiGG
4,MNXR03,Htcx,BiGG
...,...,...,...
164983,MNXR99953,MNXR99953,MetaNetX
164984,MNXR99957,MNXR99957,MetaNetX
164985,MNXR99958,MNXR99958,MetaNetX
164986,MNXR99959,MNXR99959,MetaNetX


In [22]:
modelseed_final_reactions_df.to_csv("./ModelSEED-reactions.csv",index=False)
metanetx_final_reactions_df.to_csv("./MetaNetX-reactions.csv",index=False)

# Metabolites mapper reduction

In [24]:
df_modelseed_compounds = pd.read_csv("./ModelSEED_ids/Unique_ModelSEED_Compound_Aliases.txt", sep="\t")
df_metanetx_compounds = pd.read_csv("./MetaNetX_ids/chem_xref.tsv", sep="\t", skiprows=351)

In [25]:
df_modelseed_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
0,cpd00001,S_H2O_c,AlgaGEM
1,cpd00001,S_H2O_ext_b,AlgaGEM
2,cpd00001,S_H2O_m,AlgaGEM
3,cpd00001,S_H2O_p,AlgaGEM
4,cpd00001,S_H2O_x,AlgaGEM
...,...,...,...
137360,cpd37302,CPD-9739,BrachyCyc
137361,cpd37302,CPD-9739,MaizeCyc
137362,cpd37302,CPD-9739,MetaCyc
137363,cpd37302,CPD-9739,PlantCyc


In [26]:
df_metanetx_compounds = df_metanetx_compounds.drop(["description"], axis=1)
df_metanetx_compounds = df_metanetx_compounds[df_metanetx_compounds.ID != "EMPTY"]
df_metanetx_compounds

Unnamed: 0,#source,ID
0,BIOMASS,BIOMASS
1,mnx:BIOMASS,BIOMASS
2,seed.compound:cpd11416,BIOMASS
3,seedM:M_cpd11416,BIOMASS
4,seedM:cpd11416,BIOMASS
...,...,...
2514075,sabiork.compound:40,WATER
2514076,sabiorkM:40,WATER
2514077,seed.compound:cpd00001,WATER
2514078,seedM:M_cpd00001,WATER


In [27]:
bigg_modelseed_compounds = df_modelseed_compounds[df_modelseed_compounds['Source'].str.count('BiGG')>0]
bigg_metanetx_compounds = df_metanetx_compounds[df_metanetx_compounds['#source'].str.count('bigg\.metabolite\:.*')>0]

In [28]:
bigg_modelseed_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
13,cpd00001,h2o,BiGG
14,cpd00001,oh1,BiGG
15,cpd00001,h2o,BiGG1
16,cpd00001,oh1,BiGG1
119,cpd00002,atp,BiGG
...,...,...,...
126560,cpd30761,octdp_5,BiGG
126561,cpd30761,octdp,BiGG1
126562,cpd30761,octdp_5,BiGG1
137354,cpd37301,hemeC,BiGG


In [29]:
bigg_metanetx_compounds

Unnamed: 0,#source,ID
10,bigg.metabolite:oh1,MNXM02
41,bigg.metabolite:h,MNXM1
116,bigg.metabolite:nadh,MNXM10
149,bigg.metabolite:grdp,MNXM100
12226,bigg.metabolite:mercplaccys,MNXM10053
...,...,...
2486800,bigg.metabolite:34hpl,MNXM988
2490935,bigg.metabolite:3htmelys,MNXM990
2495513,bigg.metabolite:4mptnl,MNXM992
2498062,bigg.metabolite:35cdamp,MNXM9931


In [30]:
kegg_modelseed_compounds = df_modelseed_compounds[df_modelseed_compounds['Source'].str.count('KEGG')>0]
kegg_metanetx_compounds = df_metanetx_compounds[df_metanetx_compounds['#source'].str.count('kegg\.compound\:.*')>0]

In [31]:
kegg_modelseed_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
45,cpd00001,C00001,KEGG
46,cpd00001,C01328,KEGG
139,cpd00002,C00002,KEGG
223,cpd00003,C00003,KEGG
306,cpd00004,C00004,KEGG
...,...,...,...
137293,cpd37284,C11480,KEGG
137302,cpd37287,C19928,KEGG
137333,cpd37292,C03303,KEGG
137337,cpd37293,C20718,KEGG


In [32]:
kegg_metanetx_compounds

Unnamed: 0,#source,ID
21,kegg.compound:C01328,MNXM02
53,kegg.compound:C00080,MNXM1
128,kegg.compound:C00004,MNXM10
166,kegg.compound:C00341,MNXM100
1548,kegg.compound:C16496,MNXM10006
...,...,...
2510753,kegg.compound:C06057,MNXM9986
2510998,kegg.compound:C05145,MNXM9987
2511478,kegg.compound:C16720,MNXM9989
2511706,kegg.compound:C16266,MNXM9990


In [33]:
metacyc_modelseed_compounds = df_modelseed_compounds[df_modelseed_compounds['Source'].str.count('MetaCyc')>0]
metacyc_metanetx_compounds = df_metanetx_compounds[df_metanetx_compounds['#source'].str.count('metacyc\.compound\:.*')>0]

In [34]:
metacyc_modelseed_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
55,cpd00001,OH,MetaCyc
56,cpd00001,OXONIUM,MetaCyc
57,cpd00001,WATER,MetaCyc
145,cpd00002,ATP,MetaCyc
229,cpd00003,NAD,MetaCyc
...,...,...,...
137341,cpd37294,CPD-12469,MetaCyc
137343,cpd37298,CPD-175,MetaCyc
137348,cpd37299,CPD1G-1344,MetaCyc
137357,cpd37301,HEME_C,MetaCyc


In [35]:
metacyc_metanetx_compounds

Unnamed: 0,#source,ID
24,metacyc.compound:OH,MNXM02
33,metacyc.compound:OXONIUM,MNXM03
56,metacyc.compound:PROTON,MNXM1
131,metacyc.compound:NADH,MNXM10
171,metacyc.compound:GERANYL-PP,MNXM100
...,...,...
2512636,metacyc.compound:CPD-11220,MNXM9994
2512863,metacyc.compound:CPD-10654,MNXM9995
2513765,metacyc.compound:CPD-9861,MNXM9999
2513816,metacyc.compound:CPD-17804,MNXM99992


In [36]:
modelseedIDs = df_modelseed_compounds['ModelSEED ID'].unique()

modelseed_modelseed_compounds = pd.DataFrame(columns=["ModelSEED ID", "External ID", "Source"])

for ID in modelseedIDs:

    new_row = {"ModelSEED ID": ID,
               "External ID": ID,
               "Source": "ModelSEED"}
    
    modelseed_modelseed_compounds = modelseed_modelseed_compounds.append(new_row, ignore_index=True)

modelseed_modelseed_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
0,cpd00001,cpd00001,ModelSEED
1,cpd00002,cpd00002,ModelSEED
2,cpd00003,cpd00003,ModelSEED
3,cpd00004,cpd00004,ModelSEED
4,cpd00005,cpd00005,ModelSEED
...,...,...,...
33900,cpd37298,cpd37298,ModelSEED
33901,cpd37299,cpd37299,ModelSEED
33902,cpd37300,cpd37300,ModelSEED
33903,cpd37301,cpd37301,ModelSEED


In [37]:
modelseed_metanetx_compounds = df_metanetx_compounds[df_metanetx_compounds['#source'].str.count('seed\.compound\:.*')>0]
modelseed_metanetx_compounds

Unnamed: 0,#source,ID
2,seed.compound:cpd11416,BIOMASS
27,seed.compound:cpd15275,MNXM02
111,seed.compound:cpd00067,MNXM1
143,seed.compound:cpd00004,MNXM10
179,seed.compound:cpd00283,MNXM100
...,...,...
2512865,seed.compound:cpd22761,MNXM9995
2513290,seed.compound:cpd28907,MNXM99969
2513767,seed.compound:cpd25878,MNXM9999
2513818,seed.compound:cpd33563,MNXM99992


In [38]:
metanetx_compoundsIDs = modelseed_metanetx_compounds['ID'].unique()

metanetx_metanetx_compounds = pd.DataFrame(columns=["#source", "ID"])

for ID in metanetx_compoundsIDs:

    new_row = {"#source": "metanetx.compound:"+ID,
               "ID": ID }
    
    metanetx_metanetx_compounds = metanetx_metanetx_compounds.append(new_row, ignore_index=True)

metanetx_metanetx_compounds

Unnamed: 0,#source,ID
0,metanetx.compound:BIOMASS,BIOMASS
1,metanetx.compound:MNXM02,MNXM02
2,metanetx.compound:MNXM1,MNXM1
3,metanetx.compound:MNXM10,MNXM10
4,metanetx.compound:MNXM100,MNXM100
...,...,...
33072,metanetx.compound:MNXM9995,MNXM9995
33073,metanetx.compound:MNXM99969,MNXM99969
33074,metanetx.compound:MNXM9999,MNXM9999
33075,metanetx.compound:MNXM99992,MNXM99992


In [39]:
modelseed_all_compounds = bigg_modelseed_compounds.append(kegg_modelseed_compounds).append(metacyc_modelseed_compounds).append(modelseed_modelseed_compounds)
metanetx_all_compounds = bigg_metanetx_compounds.append(kegg_metanetx_compounds).append(metacyc_metanetx_compounds).append(modelseed_metanetx_compounds).append(metanetx_metanetx_compounds)

In [54]:
modelseed_all_compounds

Unnamed: 0,ModelSEED ID,External ID,Source
13,cpd00001,h2o,BiGG
14,cpd00001,oh1,BiGG
15,cpd00001,h2o,BiGG1
16,cpd00001,oh1,BiGG1
119,cpd00002,atp,BiGG
...,...,...,...
33900,cpd37298,cpd37298,ModelSEED
33901,cpd37299,cpd37299,ModelSEED
33902,cpd37300,cpd37300,ModelSEED
33903,cpd37301,cpd37301,ModelSEED


In [55]:
metanetx_all_compounds

Unnamed: 0,#source,ID
10,bigg.metabolite:oh1,MNXM02
41,bigg.metabolite:h,MNXM1
116,bigg.metabolite:nadh,MNXM10
149,bigg.metabolite:grdp,MNXM100
12226,bigg.metabolite:mercplaccys,MNXM10053
...,...,...
2512865,seed.compound:cpd22761,MNXM9995
2513290,seed.compound:cpd28907,MNXM99969
2513767,seed.compound:cpd25878,MNXM9999
2513818,seed.compound:cpd33563,MNXM99992


In [56]:
modelseed_final_compounds_df = pd.DataFrame(columns=["Internal ID", "External ID", "Source"])

for i, row in modelseed_all_compounds.iterrows():
    internal_id = row["ModelSEED ID"]
    external_id = row["External ID"]
    source = row["Source"]

    new_row = {"Internal ID": internal_id,
               "External ID": external_id,
               "Source": source}
    
    modelseed_final_compounds_df = modelseed_final_compounds_df.append(new_row, ignore_index=True)

modelseed_final_compounds_df

Unnamed: 0,Internal ID,External ID,Source
0,cpd00001,h2o,BiGG
1,cpd00001,oh1,BiGG
2,cpd00001,h2o,BiGG1
3,cpd00001,oh1,BiGG1
4,cpd00002,atp,BiGG
...,...,...,...
77391,cpd37298,cpd37298,ModelSEED
77392,cpd37299,cpd37299,ModelSEED
77393,cpd37300,cpd37300,ModelSEED
77394,cpd37301,cpd37301,ModelSEED


In [57]:
metanetx_final_compounds_df = pd.DataFrame(columns=["Internal ID", "External ID", "Source"])

for i, row in metanetx_all_compounds.iterrows():
    
    identifier = row["#source"]
    source = ""
    external_id = ""
    
    if re.match("kegg\.compound:.*", identifier):
        external_id = identifier.replace("kegg.compound:", "")
        source = "KEGG"

    elif re.match("bigg\.metabolite:.*", identifier):
        external_id = identifier.replace("bigg.metabolite:", "")
        source = "BiGG"
        
    elif re.match("metacyc\.compound:.*", identifier):
        external_id = identifier.replace("metacyc.compound:", "")
        source = "MetaCyc"
        
    elif re.match("seed\.compound:.*", identifier):
        external_id = identifier.replace("seed.compound:", "")
        source = "ModelSEED"
    
    elif re.match("metanetx\.compound:.*", identifier):
        external_id = identifier.replace("metanetx.compound:", "")
        source = "MetaNetX"

    internal_id = row["ID"]

    new_row = {"Internal ID": internal_id,
               "External ID": external_id,
               "Source": source}
    
    metanetx_final_compounds_df = metanetx_final_compounds_df.append(new_row, ignore_index=True)

metanetx_final_compounds_df

Unnamed: 0,Internal ID,External ID,Source
0,MNXM02,oh1,BiGG
1,MNXM1,h,BiGG
2,MNXM10,nadh,BiGG
3,MNXM100,grdp,BiGG
4,MNXM10053,mercplaccys,BiGG
...,...,...,...
86050,MNXM9995,cpd22761,ModelSEED
86051,MNXM99969,cpd28907,ModelSEED
86052,MNXM9999,cpd25878,ModelSEED
86053,MNXM99992,cpd33563,ModelSEED


In [59]:
modelseed_final_compounds_df.to_csv("./ModelSEED-compounds.csv",index=False)
metanetx_final_compounds_df.to_csv("./MetaNetX-compounds.csv",index=False)