# Reconstructing GEMs from 


Pseudo-nitzschia: TARA_ARC_108_MAG_00212 (Proteins: TARA_ARC_MAG_00212) or TARA_ARC_108_MAG_00230 (Proteins: TARA_ARC_MAG_00230)

Alteromonas
TARA_ARC_108_MAG_00080
TARA_ARC_108_MAG_00179

Marinobacter
TARA_ARC_108_MAG_00174

Polaribacter
TARA_ARC_108_MAG_00201


## CarveME docs: https://carveme.readthedocs.io/en/latest/advanced.html

In [None]:
from pathlib import Path


genomes_dir = Path("genomes/pseudo-nitzschia")

In [None]:
from Bio import SeqIO
import re


def prepend_locus_tag(record):
    pattern = r'\[locus_tag=([^]]*)\]'
    match = re.search(pattern, record)
    if match:
        locus_tag = match.group(1)
        new_record = locus_tag + " " + record
        return new_record
    else:
        return record

fasta = SeqIO.parse("dokdonia_med134.faa", "fasta")
records = []
for record in fasta:
    record_name = prepend_locus_tag(record.description)
    record_str = f"> {record_name}\n{record.seq}\n"
    records.append(record_str)

with open("dokdonia_med134_locus_tags.faa", "w") as f:
    f.writelines(records)

## Homology-based search using Diamond-Blastx

In [2]:
%%bash


# Alteromonas 1
carve \
    --solver gurobi \
    -o gems/MAG_00080_alteromonas_bacteria_M9_marine.xml \
    -u bacteria \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00080.genepred.fasta

# Alteromonas 2
carve \
    --solver gurobi \
    -o gems/MAG_00179_alteromonas_bacteria_M9_marine.xml \
    -u bacteria \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00179.genepred.fasta

# Marinobacter
carve \
    --solver gurobi \
    -o gems/MAG_00174_marinobacter_bacteria_M9_marine.xml \
    -u bacteria \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00174.genepred.fasta

# Polaribacter
carve \
    --solver gurobi \
    -o gems/MAG_00201_polaribacter_bacteria_M9_marine.xml \
    -u bacteria \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00201.genepred.fasta

# Sulfitobacter
carve \
    --solver gurobi \
    -o gems/MAG_00083_sulfitobacter_bacteria_M9_marine.xml \
    -u bacteria \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00083.genepred.fasta

## REQUIREMENTS

1. pip gurobipy
2. conda install -c bioconda diamond
3. pip memote
3.1. pip install 'importlib_resources <1.0'
4. pip carveme
5. conda nextflow
6. conda install -c conda-forge openjdk


## CarveMe:

Comment line 14 and import set_default_solver of: /home/robaina/miniconda3/envs/nal/lib/python3.11/site-packages/carveme/__init__.py

In [20]:
import cobra


unineg = cobra.io.read_sbml_model("carveme_universes/universal_gramnegative.xml")

exchanges = unineg.exchanges
for rxn in exchanges:
    rxn.lower_bound = -1000

unineg.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
3mb_e,EX_3mb_e,999.9,0,0.00%
3mop_e,EX_3mop_e,223.1,0,0.00%
4mop_e,EX_4mop_e,257.7,0,0.00%
5mta_e,EX_5mta_e,116.3,0,0.00%
5mthf_e,EX_5mthf_e,0.3827,0,0.00%
LalaDgluMdap_e,EX_LalaDgluMdap_e,15.9,0,0.00%
Rtotal2_e,EX_Rtotal2_e,1000.0,0,0.00%
Rtotal3_e,EX_Rtotal3_e,73.01,0,0.00%
ahcys_e,EX_ahcys_e,0.1276,0,0.00%
akg_e,EX_akg_e,112.5,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
5mdr1p_c,DM_5mdr1p_c,-116.3,0,0.00%
cysi__L_c,DM_Lcystin_c,-473.8,0,0.00%
ac_c,DM_ac_c,-22.26,0,0.00%
co_c,DM_co_c,-1000.0,0,0.00%
lac__D_c,DM_lac__D_c,-1000.0,0,0.00%
ncam_c,DM_ncam_c,-839.3,0,0.00%
psd5p_c,DM_psd5p_c,-1000.0,0,0.00%
thmpp_c,DM_thmpp_c,-1000.0,0,0.00%
2mba_e,EX_2mba_e,-1000.0,0,0.00%
2mpa_e,EX_2mpa_e,-1000.0,0,0.00%


In [21]:
cobra.io.write_sbml_model(unineg, "carveme_universes/universal_gramnegative.xml")

In [33]:
%%bash 
# Sulfitobacter
carve --universe-file carveme_universes/universal_gramnegative.xml \
    --solver gurobi \
    -o gems/MAG_00083_sulfitobacter_bacteria_M9_marine.xml \
    --init M9[marine] \
    --gapfill M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    --fbc2 \
    genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00083.genepred.fasta

Set parameter Username
Academic license - for non-commercial use only - expires 2023-11-05
model: 
R_DM_4crsol_c: M_4crsol_c -->  [0.0, 1000]
R_BIOMASS_Ec_iJO1366_core_53p95M: 0.000223 M_10fthf_c + 2.6e-05 M_2fe2s_c + 0.000223 M_2ohph_c + 0.00026 M_4fe4s_c + 0.513689 M_ala__L_c + 0.000223 M_amet_c + 0.295792 M_arg__L_c + 0.241055 M_asn__L_c + 0.241055 M_asp__L_c + 54.124831 M_atp_c + 0.000122 M_bmocogdp_c + 2e-06 M_btn_c + 0.005205 M_ca2_c + 0.005205 M_cl_c + 0.000576 M_coa_c + 2.5e-05 M_cobalt2_c + 0.133508 M_ctp_c + 0.000709 M_cu2_c + 0.09158 M_cys__L_c + 0.026166 M_datp_c + 0.027017 M_dctp_c + 0.027017 M_dgtp_c + 0.026166 M_dttp_c + 0.000223 M_fad_c + 0.006715 M_fe2_c + 0.007808 M_fe3_c + 0.26316 M_gln__L_c + 0.26316 M_glu__L_c + 0.612638 M_gly_c + 0.215096 M_gtp_c + 48.601527 M_h2o_c + 0.094738 M_his__L_c + 0.290529 M_ile__L_c + 0.195193 M_k_c + 0.019456 M_kdo2lipid4_e + 0.450531 M_leu__L_c + 0.343161 M_lys__L_c + 0.153686 M_met__L_c + 0.008675 M_mg2_c + 0.000223 M_mlthf_c + 0.0006

## Extending CarveME's capabilities to curate universal models

CarveME provides functions to curate universal models. However, some of these are too restrictive, e.g., by removing all reactions in compartments different from c, p and e. I could add my functions to transfer these reactions to c and continue using CarveME's curation pipeline.

See: https://github.com/cdanielmachado/carveme/tree/master/carveme/universe.

In [1]:
import cobra 


model = cobra.io.read_sbml_model("/home/robaina/Documents/NewAtlantis/phycosphere/gems/MAG_00083_sulfitobacter_bacteria_M9_marine.xml")

Set parameter Username
Academic license - for non-commercial use only - expires 2023-11-05


In [2]:
model

0,1
Name,MAG_00083_sulfitobacter_bacteria_M9_marine
Memory address,7f478dc62190
Number of metabolites,2131
Number of reactions,3216
Number of genes,845
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytoplasm, extracellular, periplasm"


In [3]:
model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
ca2_e,EX_ca2_e,0.1598,0,0.00%
cl_e,EX_cl_e,0.1598,0,0.00%
cobalt2_e,EX_cobalt2_e,0.003071,0,0.00%
cu2_e,EX_cu2_e,0.02177,0,0.00%
fe3_e,EX_fe3_e,0.4596,0,0.00%
glc__D_e,EX_glc__D_e,10.0,0,0.00%
h2o_e,EX_h2o_e,10.0,0,0.00%
k_e,EX_k_e,5.994,0,0.00%
mg2_e,EX_mg2_e,0.2664,0,0.00%
mn2_e,EX_mn2_e,0.02122,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
LPS56_VL_p,DM_LPS56_VL_p,-0.09354,0,0.00%
lps_c,DM_lps_c,-203.1,0,0.00%
pnto__R_c,DM_pnto__R_c,-19.92,0,0.00%
h2s_e,EX_h2s_e,-8.719,0,0.00%
h_e,EX_h_e,-1000.0,0,0.00%
hxa_e,EX_hxa_e,-81.46,0,0.00%
iamoh_e,EX_iamoh_e,-24.76,0,0.00%
ind3eth_e,EX_ind3eth_e,-6.546,0,0.00%
tchola_c,SK_tchola_c,-19.92,0,0.00%


In [8]:
cobalamin_rxns = [rxn for rxn in model.reactions if (("cobalamin" in rxn.name.lower()))]

for rxn in cobalamin_rxns:
    print(rxn.reaction, rxn.name)

adocbl_p + atp_c + h2o_c --> adocbl_c + adp_c + h_c + pi_c Adenosylcobalamin transport via ABC system (periplasm)
adocbl_e -->  Adenosylcobalamin exchange
atp_c + cbl2_e + h2o_c --> adp_c + cbl2_c + h_c + pi_c Cobalamin uptake in via ABC transport


## What about in Pseudo-nitzschia?

I added a cobalamin interconversion reaction in the environment (fake but needed)

In [23]:
model2 = cobra.io.read_sbml_model("gems/MAG_00212_pseudonitzschia_photoeuk.xml")
model2

0,1
Name,TARA_ARC_108_MAG_00212_cds
Memory address,7f477dc98d10
Number of metabolites,2229
Number of reactions,4765
Number of genes,1957
Number of groups,359
Objective expression,1.0*bof_c - 1.0*bof_c_reverse_660d5
Compartments,"c, u, e, h"


In [25]:
cobalamin_rxns = [rxn for rxn in model2.reactions if "cobalamin" in rxn.name.lower()]

for rxn in cobalamin_rxns:
    print(rxn.reaction, rxn.name)

5-METHYL-THF-GLU-N_c + cbl1_c --> C06453_c + THF-GLU-N_c cobalamin-dependent methionine synthase
cbl1_e <--  Cobalamin exchange
cbl1_e --> cbl1_c Cobalamin transport
cbl1_e <=> adocbl_e Cobalamin interconversion


In [26]:
[(rxn.reaction, rxn.name) for rxn in model2.reactions if "cbl1_c" in [met.id for met in rxn.metabolites]]

[('C06453_c + hcys__L_c --> cbl1_c + 4.0 h_c + met__L_c',
  'methionine synthase'),
 ('5-METHYL-THF-GLU-N_c + cbl1_c --> C06453_c + THF-GLU-N_c',
  'cobalamin-dependent methionine synthase'),
 ('cbl1_e --> cbl1_c', 'Cobalamin transport')]

In [34]:
# methonine synthesis
[(rxn.reaction, rxn.name) for rxn in model2.reactions if "met__L_c" in [met.id for met in rxn.metabolites]]

[('air_c + amet_c --> 4ampm_c + co_c + dad_5_c + for_c + 2.0 h_c + met__L_c',
  '4-amino-2-methyl-5-phosphomethylpyrimidine synthetase'),
 ('amet_c + cys__L_c + dtbt_c --> ala__L_c + btn_c + dad_5_c + h_c + met__L_c',
  'Biotin synthase'),
 ('2kmb_c + glu__L_c <=> akg_c + met__L_c',
  '2-keto-4-methylthiobutyrate transamination'),
 ('glyb_c + hcys__L_c --> dmgly_c + met__L_c',
  'Betaine-homocysteine S-methyltransferase'),
 ('2.0 amet_c + cpppg3_c --> 2.0 co2_c + 2.0 dad_5_c + 2.0 met__L_c + pppg9_c',
  'CPPPGO2'),
 ('hcys__L_c + mhpglu_c <=> hpglu_c + met__L_c',
  '5-methyltetrahydropteroyltriglutamate-homocysteine S-methyltransferase'),
 ('C06453_c + hcys__L_c --> cbl1_c + 4.0 h_c + met__L_c',
  'methionine synthase'),
 ('h2o_c + met__L_c --> 2obut_c + ch4s_c + nh4_c', 'METGL'),
 ('6-Dimethylallyladenosine37-tRNAs_c + Donor-H2_c + Sulfurated-Sulfur-Acceptors_c + 2.0 amet_c --> Acceptor_c + CPD-11592_c + Unsulfurated-Sulfur-Acceptors_c + ahcys_c + dad_5_c + h_c + met__L_c',
  'isopent

Cobalamin-dependent methionine synthase is split into two reactions in this model.

In [32]:
model2.reactions.get_by_id("EX_cbl1_e").lower_bound = -1000

In [None]:
model2.reactions

In [33]:
model2.summary()

  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")
  warn(f"{count} is not an integer (in formula {self.formula})")


Metabolite,Reaction,Flux,C-Number,C-Flux
4hpro_LT_e,EX_4hpro_LT_e,0.001,5,0.00%
CPD-15530_e,EX_CPD-15530_e,1.884,6,0.30%
CPD-17273_e,EX_CPD-17273_e,1.12,37,1.11%
acon_C_e,EX_acon_C_e,23.56,6,3.80%
adn_e,EX_adn_e,14.4,10,3.87%
adpglc_e,EX_adpglc_e,29.63,16,12.74%
akg_e,EX_akg_e,209.6,5,28.15%
amet_e,EX_amet_e,10.85,15,4.37%
asn__L_e,EX_asn__L_e,10.32,4,1.11%
asp__L_e,EX_asp__L_e,10.32,4,1.11%

Metabolite,Reaction,Flux,C-Number,C-Flux
CPD-254_c,DM_CPD-254_c,-0.1116,5.0,0.01%
biomass_c,DM_biomass_phaeo_c,-33.33,45.013529,40.30%
2pg_e,EX45__PG_e,-10.0,3.0,0.81%
13dpg_e,EX_13dpg_e,-10.0,3.0,0.81%
1odecg3p_e,EX_1odecg3p_e,-1.12,21.0,0.63%
26dap_LL_e,EX_26dap_LL_e,-10.0,7.0,1.88%
26dap__M_e,EX_26dap__M_e,-5.391,7.0,1.01%
3pg_e,EX_3pg_e,-10.0,3.0,0.81%
4pasp_e,EX_4pasp_e,-10.0,4.0,1.07%
CPD-15016_e,EX_CPD-15016_e,-10.0,5.0,1.34%


## Cobalamin metabolism

Here is a paper measuring pseudo and cobalamin variants in the ocean: https://www.pnas.org/doi/10.1073/pnas.1608462114

Alright, so cobalamin synthesis is not included in the model even though the genes are present in the genome. 

Possible causes:

1) Cobalamin synthesis not present in the BIGG database? It is in the universal model, so I don't think this is the case.

2) Cobalamin synthesis peptides in the BIGG database are not sufficiently representative of marine microbes, Diamond does not return matches (why not using HMMs here btw).

After checking CarveME source code: carveme relies on a protein database it generates from BIGG reactions (present in the BIGG models databae). There is a single sequences per protein which corresponds to the model species from which the reaction was retrieved. 

Possible solutions:

1) Using eggnog annotations instead of Diamond (retrieved way smaller models for some reason: check this)

2) Adding cobalamin synthesis pathways if  biosynthetic genes are identified in the genome: either with eggnog or by other means, such as HMMs (pynteny)

3) Add more representative cobalamin synthesis sequences to the BIGG database (not very efficient)

## Adenosylcobalamin synthase

<reaction metaid="R_ADOCBLS" sboTerm="SBO:0000176" id="R_ADOCBLS" name="Adenosylcobalamin 5&apos;-phosphate synthase" reversible="true" fast="false" fbc:lowerFluxBound="cobra_default_lb" fbc:upperFluxBound="cobra_default_ub">

Annotations:
    <rdf:li rdf:resource="http://identifiers.org/ec-code/2.7.8.26"/>
    <rdf:li rdf:resource="http://identifiers.org/biocyc/META:COBALAMINSYN-RXN"/>
    <rdf:li rdf:resource="http://identifiers.org/metanetx.reaction/MNXR95469"/>
    <rdf:li rdf:resource="http://identifiers.org/kegg.reaction/R05223"/>
    <rdf:li rdf:resource="http://identifiers.org/seed.reaction/rxn03538"/>

Ok, so, CarveME assigns reaction ```R_ADOCBLS2_2``` to _Sulfitobacter_. This reaction is represented in a single model in the BIGG database, and lacks annotations. This is suspicious. The model is iCN900 (_Clostridioides difficile 630_). The reaction:

5prdmbz_c + agdpcbi_c → adocbl_c + gmp_c + h_c

The protein sequence:

```MKRFILILQFLTRIPIKLNVGFDDEFYKSIVYFPLVGFVIGILSYLIGWISMLLFEPFIASIIITLAGVLITGGLHIDGLGDTFDAIYSYRDKEKMLEIMKDSRLGTNSLLAIMFVLLLKVGFVYDIISNNSLWVIIFMPMIARLGVMLLTYKTVTPREKGMGNLFIGKLTTSMLITAIIYTLLIVALITKFIFLLPNIVLIKVLGSIIVVFVFIILFKKHIYKKIDGVTGDILGCGIELSELVYLIYIYLLIFMFF```


Why does CarveME assign this weird reaction to Sulfitobacter instead of the regular one: R_ADOCBLS?

Eggnog-mapper annotes gene: TARA_ARC_108_MAG_00083_000000000084_2 as involved in the synthesis of adocbl in Sulfitobacter:

```
Joins adenosylcobinamide-GDP and alpha-ribazole to generate adenosylcobalamin (Ado-cobalamin). Also synthesizes adenosylcobalamin 5'-phosphate from adenosylcobinamide-GDP and alpha-ribazole 5'-phosphate	cobS	-	2.7.8.26	ko:K02233	ko00860,ko01100,map00860,map01100	M00122	R05223,R11174	RC00002,RC00078	ko00000,ko00001,ko00002,ko01000	-	-	-	CobS
```

## Cobalamin metabolism in Pseudo-nitzschia

The generated model does not contain metabolite ```adocbl_c``` (Adenosylcobalamin), only ```cbls1_c``` (cobalamin) which imports from the environment (```cbls1_e```) and ```C06453_c``` (methylcobalamin) which produces from ```cbls1_c```


Thus, to link both species, we need to convert adocbls to cbls.

IDEAS:

1) Add convenient spontaneous reactions to convert cobalamin to adenosylcobalamin and vice versa.

2) Assume cbl1_c really is adocbl_c since adocbl_c is the biologically active form, then interconvert them.


___ALSO___:

The current model doesn't care if cbl1 is not present. It has plenty other reactions producing met__L_c. My guess is that upper bounds are not set correctly, because the major flux producing met__L_c should be via the cobalamin-dependent methionine synthase and not these alternative reactions.

## Adding cobalamin interconversion "fake" reactions to the environmental compartment

Since adocbl, cbl1 and methylcobalamin (C06453) can be considered biological equivalents, and the literature is not very clear on which equivalents cells are taking (they measure the pool), e.g.: https://www.pnas.org/doi/10.1073/pnas.1608462114.


These reactions would be:

cbl1_e <-> adocbl_e
cbl1_e <-> C06453_e
adocbl_e <-> C06453_e

## Consequencies of cobalamin scarcity in the metabolism of diatoms

https://www.sciencedirect.com/science/article/abs/pii/S1434461018301184?via%3Dihub

## Using Nextflow to run a workflow

In [3]:
%%bash

nextflow run pipeline.nf \
    --genomes_dir /home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/ \
    --media_file /home/robaina/Documents/NewAtlantis/phycosphere/marine_media/SnakeCarveMe-master/media_db.tsv \
    --outdir /home/robaina/Documents/NewAtlantis/phycosphere/nextflow_output

N E X T F L O W  ~  version 23.04.1
Launching `pipeline.nf` [nauseous_mayer] DSL2 - revision: 4aa53eb58c
[-        ] process > carveme   -
[-        ] process > runMemote -
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00139.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00080.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00179.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00117.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00201.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00083.genepred.fasta
/home/robaina/Documents/NewAtlantis/phycosphere/genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00174.genepred.fasta

executor >  local (7)
[4a/f0d180] process > carve

## Homology-based search using Diamond-Blastx: Eukarya

Halted the program after ca. 10 hours of running.

In [6]:
# %%bash

# carve \
#     --solver gurobi \
#     -o gems/MAG_00212_bigg.xml \
#     --universe-file carveme_universes/BIGG_universal_model/universal_model_cobrapy.xml \
#     genomes/pseudo-nitzschia/TARA_ARC_108_MAG_00212.fa

#     # --init Difco[Ala] \
#     # --gapfill Difco[Ala] \
#     # --mediadb marine_broth_ala_min.tsv \
#     # --hard /home/robaina/Documents/SymbNET/dokdonia_med134_forced_reactions.tsv \



Failed to run diamond.


# Checking PhotoEukStein models out

In [80]:
import cobra

model = cobra.io.read_sbml_model("gems/photoeukstein/TARA_ARC_108_MAG_00212.cds.xml")
model

0,1
Name,TARA_ARC_108_MAG_00212_cds
Memory address,7efb90c07cd0
Number of metabolites,2228
Number of reactions,4764
Number of genes,1957
Number of groups,359
Objective expression,1.0*bof_c - 1.0*bof_c_reverse_660d5
Compartments,"c, u, e, h"


In [81]:
[(rxn.id, rxn.name) for rxn in model.reactions if 'methionine synthase' in rxn.name.lower()]

[('HOMOCYSMETB12-RXN', 'methionine synthase'),
 ('RXN-21539', 'cobalamin-dependent methionine synthase'),
 ('O-ACETYLHOMOSERINE-THIOL-LYASE-RXN',
  'methionine synthase (B12-dependent) (EC 2.1.1.13)')]

In [29]:
model.reactions.get_by_id("HOMOCYSMETB12-RXN")

0,1
Reaction identifier,HOMOCYSMETB12-RXN
Name,methionine synthase
Memory address,0x7efb8ede5c10
Stoichiometry,C06453_c + hcys__L_c --> cbl1_c + 4.0 h_c + met__L_c  Methylcobalamin + L-Homocysteine --> cbl1 + 4.0 H+ + L-Methionine
GPR,TARA_ARC_108_MAG_00212_000000004337_12_1
Lower bound,0.0
Upper bound,1000.0


In [31]:
model.reactions.get_by_id("O-ACETYLHOMOSERINE-THIOL-LYASE-RXN")

0,1
Reaction identifier,O-ACETYLHOMOSERINE-THIOL-LYASE-RXN
Name,methionine synthase (B12-dependent) (EC 2.1.1.13)
Memory address,0x7efb8df8b010
Stoichiometry,achms_c + ch4s_c --> ac_c + h_c + met__L_c  O-Acetyl-L-homoserine + ch4s --> Acetate + H+ + L-Methionine
GPR,TARA_ARC_108_MAG_00212_000000004337_12_1
Lower bound,0.0
Upper bound,1000.0


Doesn't make any sense, why is the above reaction named B12-dependent met synthase, it doesn't use B12 in any of its forms. However, the associated gene is the same as the one assigned to the methionine synthase (B12-dependent)

## Check effect of knocking out met synthase

In [37]:
model.reactions.get_by_id("O-ACETYLHOMOSERINE-THIOL-LYASE-RXN")

0,1
Reaction identifier,O-ACETYLHOMOSERINE-THIOL-LYASE-RXN
Name,methionine synthase (B12-dependent) (EC 2.1.1.13)
Memory address,0x7efb8df8b010
Stoichiometry,achms_c + ch4s_c --> ac_c + h_c + met__L_c  O-Acetyl-L-homoserine + ch4s --> Acetate + H+ + L-Methionine
GPR,TARA_ARC_108_MAG_00212_000000004337_12_1
Lower bound,0.0
Upper bound,1000.0


In [82]:
model.reactions.get_by_id("HOMOCYSMETB12-RXN").upper_bound = 0
model.reactions.get_by_id("RXN-21539").upper_bound = 0
model.reactions.get_by_id("O-ACETYLHOMOSERINE-THIOL-LYASE-RXN").upper_bound = 0
model.slim_optimize()

33.33

In [44]:
model.reactions.get_by_id("biomass_pro_c").metabolites

{<Metabolite alatrna_c at 0x7efb913e8850>: -1.03497874254677,
 <Metabolite argtrna_c at 0x7efb90881810>: -0.4355910533849,
 <Metabolite asntrna_c at 0x7efb901c5150>: -0.3890920084299,
 <Metabolite asptrna_c at 0x7efb913eb450>: -0.38919200637604,
 <Metabolite atp_c at 0x7efb912ce450>: -9.40150690222409,
 <Metabolite cystrna_c at 0x7efb90881890>: -0.037999219533118,
 <Metabolite glntrna_c at 0x7efb90548750>: -0.452090714497964,
 <Metabolite glutrna_c at 0x7efb9146bd50>: -0.452190712444104,
 <Metabolite glytrna_c at 0x7efb90880d50>: -1.03167881032415,
 <Metabolite gtp_c at 0x7efb9149a190>: -18.802913806502,
 <Metabolite h2o_c at 0x7efb912f81d0>: -28.2044207087261,
 <Metabolite histrna_c at 0x7efb90881150>: -0.120797518936859,
 <Metabolite iletrna_c at 0x7efb90880190>: -0.446990819244835,
 <Metabolite leutrna_c at 0x7efb9146af50>: -0.702385573685844,
 <Metabolite lystrna_c at 0x7efb901c4d90>: -0.448490788436932,
 <Metabolite mettrna_c at 0x7efb90883e50>: -0.149896921263537,
 <Metabolite ph

In [45]:
model.metabolites.get_by_id("mettrna_c")

0,1
Metabolite identifier,mettrna_c
Name,L-Methionyl-tRNA
Memory address,0x7efb90883e50
Formula,C5H11NOS
Compartment,c
In 2 reaction(s),"METTL, biomass_pro_c"


__NOTE__: methionine can be synthesized from a bunch of reactions besides B12-dependent methionine synthase, why is this one so important? flux capacity?? If so, not reflected in the model.

In [47]:
[(rxn.name, rxn.id) for rxn in model.reactions if "met__L_c" in [met.id for met in rxn.products]]

[('4-amino-2-methyl-5-phosphomethylpyrimidine synthetase', 'AMPMS_h'),
 ('Biotin synthase', 'BTS_m'),
 ('2-keto-4-methylthiobutyrate transamination', 'UNK3'),
 ('Betaine-homocysteine S-methyltransferase', 'BHMT'),
 ('CPPPGO2', 'CPPPGO2'),
 ('5-methyltetrahydropteroyltriglutamate-homocysteine S-methyltransferase',
  'HOMOCYSMET-RXN'),
 ('methionine synthase', 'HOMOCYSMETB12-RXN'),
 ('isopentenyl-adenosine tRNA methylthiolase', 'RXN0-5063'),
 ('METt2r', 'METt2r'),
 ('ORNITHINE--OXO-ACID-AMINOTRANSFERASE-RXN-L-ORNITHINE/CPD-479//MET/L-GLUTAMATE_GAMMA-SEMIALDEHYDE.56.',
  'ORNITHINE--OXO-ACID-AMINOTRANSFERASE-RXN-L-ORNITHINE/CPD-479//MET/L-GLUTAMATE_GAMMA-SEMIALDEHYDE.56.'),
 ('TRANS-RXN18ZY-5-MET/NA+//MET/NA+.17.',
  'TRANS-RXN18ZY-5-MET/NA+//MET/NA+.17.'),
 ('methionine synthase (B12-dependent) (EC 2.1.1.13)',
  'O-ACETYLHOMOSERINE-THIOL-LYASE-RXN'),
 ('5_methyltetrahydrofolate_homocysteine methyltransferase', 'METSm')]

No effect at all!!

# Run memote on reconstructed GEMs

In [35]:
# %%bash 

# memote report snapshot \
#     --filename "gems/memote/TARA_ARC_108_MAG_00212.html" \
#     TARA_ARC_108_MAG_00212.xml


# memote report snapshot --filename "phycosphere/gems/memote/TARA_ARC_108_MAG_00212.html" phycosphere/TARA_ARC_108_MAG_00212.xml

# Running SMETANA on reconstructed GEMs

In [1]:
%%bash

smetana \
    gems/*.xml \
    -m M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    -o pseudo_nitzschia_interactions \
    --solver gurobi \
    --exclude compounds/inorganic.txt \
    --molweight --detailed

Set parameter Username
Academic license - for non-commercial use only - expires 2023-11-05


  warn(f"Atomic weight not listed for elements: {missing}")


Intertingly, SMETANA predicts only interactions from Pseudo-nitzschia to the bacterial species. Perhaps because cofactor and vitamin metabolism not represented in the GEMs? Check medium contains B12 precursors, and other cofactors since this would impact predictions.

Weird, even removing molibdate I still get no interactions regarding B12. Does this mean that all 4 species biosynthetize B12? Or perhaps Pseudo-nitzschia doesn't need it?

In [2]:
import pandas as pd


df = pd.read_csv("pseudo_nitzschia_interactions_detailed.tsv", sep="\t")
df

Unnamed: 0,community,medium,receiver,donor,compound,scs,mus,mps,smetana
0,all,M9[marine],MAG_00080_alteromonas_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_ac_e,1.0,0.1,1,0.1
1,all,M9[marine],MAG_00080_alteromonas_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_ala__D_e,1.0,0.04,1,0.04
2,all,M9[marine],MAG_00080_alteromonas_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_ala__L_e,1.0,0.04,1,0.04
3,all,M9[marine],MAG_00080_alteromonas_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_gly_e,1.0,0.18,1,0.18
4,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_ala__D_e,1.0,0.18,1,0.18
5,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_ala__L_e,1.0,0.01,1,0.01
6,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_cys__L_e,1.0,0.21,1,0.21
7,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_gln__L_e,1.0,0.01,1,0.01
8,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_gly_e,1.0,0.01,1,0.01
9,all,M9[marine],MAG_00083_sulfitobacter_bacteria_M9_marine,MAG_00212_pseudonitzschia_photoeuk,M_no3_e,1.0,0.01,1,0.01


## Check vitamin B12 usage and biosynthetic capabilities

*__methionine synthase enzyme__: METH

*__cobalamin-independent methionine synthase__: METE

Pseudo-nitzschia lacks a cobalamin-independent methionine synthase, thus requires cobalamin to survive: https://www.sciencedirect.com/science/article/abs/pii/S1434461016300682. However, its reconstructed GEM (TARA_ARC_MAG_00212) does not contain any cobalamin biosynthesis or uptake reactions.

From ref above: "The biosynthesis of vitamin B12 by marine prokaryotes affects oceanic B12 distributions, as these organisms are the sole source of the vitamin to eukaryotic algae, including diatoms (Croft et al. 2005)"

" Therefore, an important determinant of vitamin B12 distribution is the abundance and composition of marine prokaryotic communities. In addition, diatoms without METE may compete with other B12-auxotrophic algae and bacteria when vitamin concentrations are low (Bertrand et al. 2015)."

" Recent phylogenetic analysis of METE sequences among diverse algal groups indicates that multiple independent gene losses are likely the mechanism behind widespread but randomly distributed B12 auxotrophy (Helliwell et al. 2011)."

"While 62% of Southern Ocean diatoms possessed METE, the gene was identified in only 11% of non-Southern Ocean diatoms. Fisher’s Exact Test was used to compare the distribution of gene presence or absence in diatoms isolated from the Southern Ocean versus those isolated outside of it"

---

Pseudo-nitzschia requires bacterial partners to produce Domoic Acid (DA) in large quantities: https://link.springer.com/article/10.1007/s12562-009-0081-5.

From above reference:

"Recently, Bates et al. [16] reported that several morphotypes of bacteria are attached to the frustule of P. multiseries. They speculate a possible association of these epiphytic bacteria with the enhancement of domoic acid production in the P. multiseries cells, or even autonomous production of domoic acid. Our findings that direct contact with bacteria is necessary for P. multiseries to enhance domoic acid production supports their speculation."

---

From https://aslopubs.onlinelibrary.wiley.com/doi/pdf/10.1002/lno.10552:

"This work serves as compelling support for iron-induced
vitamin B12 limitation in the Northeast Pacific Ocean. Several studies have demonstrated that low concentrations of
vitamins can control phytoplankton biomass, with communities stimulated following vitamin enrichment experiments
(Sanudo-Wilhelmy et al. 2006; Gobler et al. 2007; Bertrand ~
et al. 2012; Koch et al, 2012; Bertrand et al. 2015). In such
experiments, addition of B1 and B12 resulted in community
composition shifts (Gobler et al. 2007, Koch et al. 2012)."


Could we either add iron or a diatom containing METE to limite HABs produced by Pseudo-nitzschia?

---
Pseudo-nitzschia requires cobalamin, and some Pn species also require thiamin and biotin (check model): https://www.pnas.org/doi/full/10.1073/pnas.1009566107


#### NOTES:

Pseudo-nitzchia MAG_00212 lacks cobalamin import from the environment.

In [22]:
import cobra


model = cobra.io.read_sbml_model("gems/photoeukstein/TARA_ARC_108_MAG_00212.cds.xml")
methylcobalamin = model.metabolites.get_by_id("C06453_c")
methylcobalamin

0,1
Metabolite identifier,C06453_c
Name,Methylcobalamin
Memory address,0x7efb916740d0
Formula,C63CoH91N13O14P
Compartment,c
In 2 reaction(s),"RXN-21539, HOMOCYSMETB12-RXN"


In [26]:
cobalamin = model.metabolites.get_by_id("cbl1_c")
cobalamin

0,1
Metabolite identifier,cbl1_c
Name,cbl1
Memory address,0x7fec485379a0
Formula,C62CoH88N13O14P
Compartment,c
In 2 reaction(s),"RXN-21539, HOMOCYSMETB12-RXN"


In [9]:
model.reactions.get_by_id("RXN-21539")

0,1
Reaction identifier,RXN-21539
Name,cobalamin-dependent methionine synthase
Memory address,0x7fec6e6d24f0
Stoichiometry,5-METHYL-THF-GLU-N_c + cbl1_c --> C06453_c + THF-GLU-N_c  a 5-methyltetrahydrofolate + cbl1 --> Methylcobalamin + a tetrahydrofolate
GPR,TARA_ARC_108_MAG_00212_000000004337_12_1
Lower bound,0.0
Upper bound,1000.0


In [10]:
model.reactions.get_by_id("HOMOCYSMETB12-RXN")

0,1
Reaction identifier,HOMOCYSMETB12-RXN
Name,methionine synthase
Memory address,0x7fec6e6d2df0
Stoichiometry,C06453_c + hcys__L_c --> cbl1_c + 4.0 h_c + met__L_c  Methylcobalamin + L-Homocysteine --> cbl1 + 4.0 H+ + L-Methionine
GPR,TARA_ARC_108_MAG_00212_000000004337_12_1
Lower bound,0.0
Upper bound,1000.0


In [17]:
import cobra


models = {}

models["ps0012"] = cobra.io.read_sbml_model("gems/MAG_00212_pseudonitzschia_photoeuk.xml")
models["altero00080"] = cobra.io.read_sbml_model("gems/MAG_00080_alteromonas_bacteria_M9_marine.xml")
models["altero00179"] = cobra.io.read_sbml_model("gems/MAG_00179_alteromonas_bacteria_M9_marine.xml")
models["marino00174"] = cobra.io.read_sbml_model("gems/MAG_00174_marinobacter_bacteria_M9_marine.xml")
models["polarib00201"] = cobra.io.read_sbml_model("gems/MAG_00201_polaribacter_bacteria_M9_marine.xml")
models["sulfito00083"] = cobra.io.read_sbml_model("gems/MAG_00083_sulfitobacter_bacteria_M9_marine.xml")

## Add cobalamin exchange

In [18]:
from cobra import Reaction, Metabolite, Gene

# Define a new exchange reaction
reaction = Reaction('EX_cbl1_e')
reaction.name = 'Cobalamin exchange'
reaction.subsystem = ''


# Define the stoichiometry of the reaction
metabolite1 = Metabolite('cbl1_e', name='Cobalamin', compartment='e')

reaction.add_metabolites({
    metabolite1: -1.0,  # reactant
})

# Add the gene-reaction rule
reaction.gene_reaction_rule = 'spontaneous'

reaction.lower_bound = -1000.0
reaction.upper_bound = 0.0

# Add the reaction to the model
models["ps0012"].add_reactions([reaction])

# ***********************************************************************
# Define transport to cytoplasm
reaction = Reaction('R_cbl1_transport')
reaction.name = 'Cobalamin transport'
reaction.subsystem = ''

# Define the stoichiometry of the reaction
metabolite1 = Metabolite('cbl1_e', name='Cobalamin', compartment='e')
metabolite2 = Metabolite('cbl1_c', name='Cobalamin', compartment='c')

reaction.add_metabolites({
    metabolite1: -1.0,  # reactant
    metabolite2: 1.0,  # product
})

# Add the gene-reaction rule
reaction.gene_reaction_rule = 'spontaneous'

reaction.lower_bound = 0.0
reaction.upper_bound = 1000.0

# Add the reaction to the model
models["ps0012"].add_reactions([reaction])

# **********************************************************************
# Define cobalamin interconversion reaction
reaction = Reaction('R_cbl_interconversion_e')
reaction.name = 'Cobalamin interconversion'
reaction.subsystem = ''


# Define the stoichiometry of the reaction
metabolite1 = Metabolite('cbl1_e', name='Cobalamin', compartment='e')
metabolite2 = Metabolite('adocbl_e', name='Adenosylcobalamin', compartment='e')

reaction.add_metabolites({
    metabolite1: -1.0,  # reactant
    metabolite2: 1.0,  # product
})

# Add the gene-reaction rule
reaction.gene_reaction_rule = 'spontaneous'

reaction.lower_bound = -1000.0
reaction.upper_bound = 1000.0

# Add the reaction to the model
models["ps0012"].add_reactions([reaction])

Ignoring reaction 'EX_cbl1_e' since it already exists.
Ignoring reaction 'R_cbl1_transport' since it already exists.


In [21]:
models["ps0012"].metabolites.get_by_id("cbl1_e")

0,1
Metabolite identifier,cbl1_e
Name,Cobalamin
Memory address,0x7f47834c8cd0
Formula,
Compartment,e
In 3 reaction(s),"EX_cbl1_e, R_cbl1_transport, R_cbl_interconversion_e"


In [22]:
cobra.io.write_sbml_model(models["ps0012"], "gems/MAG_00212_pseudonitzschia_photoeuk.xml")

Ok. It looks like methionine synthase is duplicated in this GEM (by photoeukstein). They share the same gene (TARA_ARC_108_MAG_00212_000000004337_12_1). Or, rather is t he same biochemical reaction divided in two processes, with cobalamin handling the methyl group transfer.

## What about cobalamin exchange in the bacterial GEMs?


Sulfitobacter pseudonitzschiae isolated from Pseudo-nitzschia and producing B12! https://www.microbiologyresearch.org/content/journal/ijsem/10.1099/ijs.0.064972-0


"About half of the marine phytoplankton species are B12 auxotrophs and rely on prototrophic prokaryotes to obtain this essential vitamin [1, 53]. Several co-culture experiments have confirmed that individual marine bacterial isolates, mainly Alphaproteobacteria, enable phytoplankton species to overcome their auxotrophy by providing the essential cofactor [13,14,15,16, 27, 28]."

"The fact that some bacteria do not voluntarily share B12 with ambient microorganisms, significantly increases the importance of processes, such as sloppy feeding by zooplankton or virus infections [44, 49,50,51], for the release of vitamins in the marine and likely also other ecosystems."

"Most B12-provider strains were isolated from or discovered in association with eukaryotic microorganisms, whereas most B12-retainer strains were isolated as free-living in the ocean"  e.g. Sulfitobacter pseudonitzschiae

https://www.nature.com/articles/s41396-023-01391-3

In [3]:
for model_id, model in models.items():
    print(model_id, [rxn.id for rxn in model.reactions if "methionine synthase" in rxn.name])

ps0012 ['HOMOCYSMETB12-RXN', 'RXN-21539', 'O-ACETYLHOMOSERINE-THIOL-LYASE-RXN']
altero00080 []
altero00179 []
marino00174 []
polarib00201 []


# Cobalamin metabolism absent from CarveME universal database

Sulfitobacter synthesizes cobalamin from precursors, but this is not represented in the universal database. We would need to add these reactions manually to t he universal database. Also consider using gapseq instead of CarveME.

In [4]:
for model_id, model in models.items():
    print(model_id, [met.id for met in model.metabolites if "cbl" in met.name])

ps0012 ['cbl1_c']
altero00080 []
altero00179 []
marino00174 []
polarib00201 []


# Trying gapseq to reconstruct models

gapseq for bacterial genomes, only

## How can one actually reconstruct a community GEM?

That is, Smetana is doing it under the hood, but how to obtain the xml file of this community model?

In [6]:
%%bash

merge_community \
    --init M9[marine] \
    --mediadb marine_media/SnakeCarveMe-master/media_db.tsv \
    --fbc2 \
    --output community_gem.xml \
    gems/*.xml

  warn(f'Exchange reaction not in model: {r_id}')


In [1]:
import cobra


com_gem = cobra.io.read_sbml_model('community_gem.xml')
com_gem

Set parameter Username
Academic license - for non-commercial use only - expires 2023-11-05


0,1
Name,community_gem
Memory address,7fe4c2ba1190
Number of metabolites,8460
Number of reactions,14159
Number of genes,5642
Number of groups,358
Objective expression,1.0*community_growth - 1.0*community_growth_reverse_7473b
Compartments,"extracellular environment, cytosol, periplasm, cytosol, periplasm, cytosol, periplasm, cytosol, periplasm, cytosol, periplasm, c_TARA_ARC_108_MAG_00212_cds, u_TARA_ARC_108_MAG_00212_cds, h_TARA_ARC_108_MAG_00212_cds"


# Building a new universal model for prokaryotes

Two models: gram-negative and gram-positive. Gram-negative contains: extracellular, periplasm and cytoplasm. Gram-positive only extracellular and cytoplasm.

1) Gran-negative, take all reactions that are not in those compartments and move them to cytoplasm. Remove shuttle/transport reactions between compartments that are not in gram neg

2) Remove duplicated reactions/metabolites in the model

In [15]:
import cobra


unineg = cobra.io.read_sbml_model('/home/robaina/Documents/NewAtlantis/phycosphere/carveme_universes/BIGG_universal_model/universal_model_cobrapy.xml')
unineg

0,1
Name,bigg_universal
Memory address,7f93c8b33090
Number of metabolites,15638
Number of reactions,28301
Number of genes,0
Number of groups,0
Objective expression,1.0*BIOMASS_reaction - 1.0*BIOMASS_reaction_reverse_5a818
Compartments,"cytoplasm, extracellular, periplasm, mitochondrion, peroxisome, unknown, nucleus, vacuole, golgi, thylakoid, lysosome, chloroplast, eyespot, flagellum, mitochondrial intermembrane space, unknown, unknown, unknown, unknown, mitochondrial membrane, cell wall, unknown"


## Remove unwanted shuttle reactions

In [18]:
from cobra import Model


def remove_shuttle_reactions(model: Model, allowed_compartments: set = {"c", "e", "p"}) -> Model:
    """
    Remove shuttle reactions between unwanted compartments.

    Args:
        model (Model): _description_
        allowed_compartments (set, optional): _description_. Defaults to {"c", "e", "p"}.

    Returns:
        Model: _description_
    """
    shuttle_rxns_in_unwanted_compartments = [
        rxn for rxn in unineg.reactions
        if (
            (len(rxn.compartments) > 1) and 
            (not rxn.compartments.issubset(allowed_compartments))
        )
        ]
    model.remove_reactions(shuttle_rxns_in_unwanted_compartments, remove_orphans=True)
    return model

In [19]:
gramneg_compartments = {"e", "c", "p"}
unineg = remove_shuttle_reactions(unineg, allowed_compartments=gramneg_compartments)

In [20]:
# just checking
[
    rxn for rxn in unineg.reactions
    if (
        (len(rxn.compartments) > 1) and 
        (not rxn.compartments.issubset(gramneg_compartments))
    )
    ]

[]

## Move reactions to cytoplasm

In [21]:
from cobra import Reaction, Model


def move_reactions_to_cytoplasm(model: Model, allowed_compartments: set = {"c", "e", "p"}) -> Model:
    """
    Update the metabolites of a reaction to include a new set of metabolites
    Args:
        reaction (Reaction): _description_
        allowed_compartments (set, optional): _description_. Defaults to {"c", "e", "p"}.

    Returns:
        Reaction: _description_
    """
    reactions_to_add = []
    reactions_to_remove = []
    for reaction in model.reactions:
        if not reaction.compartments.issubset(allowed_compartments):

            new_metabolites = {}
            for metabolite, stoich in reaction.metabolites.items():
                new_met_id = metabolite.id[:-1] + 'c'
                new_metabolite = model.metabolites.get_by_id(new_met_id) if new_met_id in model.metabolites else metabolite.copy()
                new_metabolite.compartment = 'c'
                new_metabolite.id = new_met_id
                if new_met_id not in model.metabolites:
                    model.add_metabolites([new_metabolite])
                    model.remove_metabolites([metabolite])
                new_metabolites[new_metabolite] = stoich

            new_reaction = Reaction(
                id=reaction.id,
                name=reaction.name,
                lower_bound=reaction.lower_bound,
                upper_bound=reaction.upper_bound,
                subsystem=reaction.subsystem,
                )
            new_reaction.gene_reaction_rule = reaction.gene_reaction_rule
            new_reaction.add_metabolites(new_metabolites)
            reactions_to_add.append(new_reaction)
            reactions_to_remove.append(reaction)

    model.remove_reactions(reactions_to_remove, remove_orphans=True)
    model.add_reactions(reactions_to_add)
    return model

In [22]:
unineg = move_reactions_to_cytoplasm(unineg)

In [23]:
unineg

0,1
Name,bigg_universal
Memory address,7f93c8b33090
Number of metabolites,11970
Number of reactions,25787
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,"cytoplasm, extracellular, periplasm"


In [24]:
cobra.io.write_sbml_model(unineg, "carveme_universes/universal_gramnegative.xml")

In [25]:
# Test if any reaction remains in illegal comparments
[rxn for rxn in unineg.reactions if not rxn.compartments.issubset(gramneg_compartments)]

[]

## Add biomass reaction to universal gram negative model

Take reaction from universal CarveME gram negative model

In [4]:
import cobra 


carveme_gramneg = cobra.io.read_sbml_model("carveme_universes/universe_gramneg.xml")
unineg = cobra.io.read_sbml_model("carveme_universes/universal_gramnegative.xml")

In [16]:
growth = [rxn for rxn in carveme_gramneg.reactions if "biomass" in rxn.name.lower()][0]
unineg.add_reactions([growth])
unineg.objective = "Growth"
unineg

0,1
Name,bigg_universal
Memory address,7f493fb31d50
Number of metabolites,11970
Number of reactions,25788
Number of genes,0
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytoplasm, extracellular, periplasm"


In [32]:
unineg.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


In [33]:
cobra.io.write_sbml_model(unineg, "carveme_universes/universal_gramnegative.xml")

# Memote report

In [1]:
import json


# Read memote report
with open('nextflow_output/memote_reports/TARA_ARC_108_MAG_00080.genepred.html', 'r') as f:
    memote_report = json.load(f)

n = 0
tests = memote_report["tests"]
for test_id, test_data in tests.items():
    if test_data["result"] == "failed":
        print(test_id)
        n += 1
print()
print(f"{n}/{len(tests)} ({100 * (n/len(tests))}%) tests failed")

test_blocked_reactions
test_exchange_specific_sbo_presence
test_find_deadends
test_find_duplicate_metabolites_in_compartments
test_find_duplicate_reactions
test_find_medium_metabolites
test_find_metabolites_not_consumed_with_open_bounds
test_find_metabolites_not_produced_with_open_bounds
test_find_reactions_unbounded_flux_default_condition
test_find_reactions_with_identical_genes
test_find_reactions_with_partially_identical_annotations
test_find_stoichiometrically_balanced_cycles
test_find_unique_metabolites
test_gene_protein_reaction_rule_presence
test_gene_sbo_presence
test_gene_specific_sbo_presence
test_metabolic_reaction_specific_sbo_presence
test_metabolite_sbo_presence
test_metabolite_specific_sbo_presence
test_reaction_charge_balance
test_reaction_mass_balance
test_reaction_sbo_presence
test_sink_specific_sbo_presence
test_transport_reaction_gpr_presence
test_transport_reaction_specific_sbo_presence

25/77 (32.467532467532465%) tests failed


In [11]:
tests["test_find_duplicate_reactions"]

{'data': [['APPAT', 'PTPATi'],
  ['BUPN', 'UPPN'],
  ['CYSS', 'CYSS_2'],
  ['DRBK', 'DRBK_1'],
  ['GLYK', 'GLYK1'],
  ['HMGL', 'HMGL_2'],
  ['IDPh_1', 'PPA'],
  ['MCCC', 'MCTC_1'],
  ['METOX1s', 'METSR_S2'],
  ['NADS1', 'NADS1_1'],
  ['NTP1', 'ATPM'],
  ['OCBT', 'OCBT_1'],
  ['PFK_3', 'PFK_4'],
  ['PPRGL', 'PRAGSr'],
  ['RBK', 'RBK2']],
 'duration': 0.10119694799868739,
 'format_type': 'percent',
 'message': None,
 'metric': 0.013736263736263736,
 'result': 'failed',
 'summary': "Identify reactions in a pairwise manner that use the same set\nof metabolites including potentially duplicate metabolites. Moreover, it\nwill take a reaction's directionality and compartment into account.\n\nThe main reason for having this test is to help cleaning up merged models\nor models from automated reconstruction pipelines as these are prone to\nhaving identical reactions with identifiers from different namespaces.\n\nImplementation:\n\nCompare reactions in a pairwise manner.\nFor each reaction, the me

In [4]:
duprxns = tests["test_find_duplicate_reactions"]["data"]
duprxns

[['APPAT', 'PTPATi'],
 ['BUPN', 'UPPN'],
 ['CYSS', 'CYSS_2'],
 ['DRBK', 'DRBK_1'],
 ['GLYK', 'GLYK1'],
 ['HMGL', 'HMGL_2'],
 ['IDPh_1', 'PPA'],
 ['MCCC', 'MCTC_1'],
 ['METOX1s', 'METSR_S2'],
 ['NADS1', 'NADS1_1'],
 ['NTP1', 'ATPM'],
 ['OCBT', 'OCBT_1'],
 ['PFK_3', 'PFK_4'],
 ['PPRGL', 'PRAGSr'],
 ['RBK', 'RBK2']]

In [3]:
import cobra


model = cobra.io.read_sbml_model("nextflow_output/gems/TARA_ARC_108_MAG_00080.genepred.xml")

Set parameter Username
Academic license - for non-commercial use only - expires 2023-11-05


In [5]:
i= 0
model.reactions.get_by_id(duprxns[i][0])

0,1
Reaction identifier,APPAT
Name,ATP:pantetheine-4'-phosphate adenylyltransferase
Memory address,0x7fcc1d1d3f50
Stoichiometry,atp_c + h_c + pan4p_c --> dpcoa_c + ppi_c  ATP C10H12N5O13P3 + H+ + Pantetheine 4'-phosphate --> Dephospho-CoA + Diphosphate
GPR,TARA_ARC_108_MAG_00080_000000000340_3
Lower bound,0.0
Upper bound,1000.0


In [6]:
model.reactions.get_by_id(duprxns[i][1])

0,1
Reaction identifier,PTPATi
Name,Pantetheine-phosphate adenylyltransferase
Memory address,0x7fcc1c4e9790
Stoichiometry,atp_c + h_c + pan4p_c --> dpcoa_c + ppi_c  ATP C10H12N5O13P3 + H+ + Pantetheine 4'-phosphate --> Dephospho-CoA + Diphosphate
GPR,TARA_ARC_108_MAG_00080_000000000340_3
Lower bound,0.0
Upper bound,1000.0


## Testing cobalamin auxotrophy in E.coli

Looking at the model, it seems like adocbl is synthesized from cobamide (cbi), which is imported from the environment (EX_cbi_e).

__NOTES__:

0) E. coli harbors a cobalamin-independent met synthase
1) Doesn't care about blocking cbi import, in fact lower bound was set to 0 by default
2) Cannot grow without cobalt2 import (EX_cobalt2_e). But this is because cobalt2_c is part of the biomass reaction...
3) Doesn't care about cobalamin either (EX_cbl1_e)

In [48]:
import cobra


model = cobra.io.read_sbml_model("iML1515.xml")

In [21]:
model.reactions.get_by_id('EX_cbi_e').lower_bound = 0
model.reactions.get_by_id('EX_cobalt2_e').lower_bound = -1000
model.reactions.get_by_id('ADOCBLS').upper_bound = 1000
model.reactions.get_by_id('CBLAT').bounds = (-1000,1000)
model.reactions.get_by_id('EX_cbl1_e').bounds = (0,1000)
model.slim_optimize()

0.8769972144269638

## Knocking out met synthase here

It does have an effect in iML1515.

The only met__L_c producing reactions that compromise growth are:
METS, BTS5 and TYRL.

In [49]:
[(rxn.name, rxn.id) for rxn in model.reactions if "methionine synthase" in rxn.name.lower()]

[('Methionine synthase', 'METS'),
 ('Carboxy-S-adenosyl-L-methionine synthase', 'CXSAMS')]

In [79]:
model.reactions.get_by_id("METS")

0,1
Reaction identifier,METS
Name,Methionine synthase
Memory address,0x7efb90eadb90
Stoichiometry,"5mthf_c + hcys__L_c --> h_c + met__L_c + thf_c  5-Methyltetrahydrofolate + L-Homocysteine --> H+ + L-Methionine + 5,6,7,8-Tetrahydrofolate"
GPR,b4019 or b3829
Lower bound,0.0
Upper bound,1000


In [78]:
model.reactions.get_by_id('TYRL').upper_bound = 1000
model.slim_optimize()

0.8769972144269715

In [57]:
# Reactions producing met__L
[(rxn.name, rxn.id) for rxn in model.reactions if "met__L_c" in [met.id for met in rxn.products]]

[('Homocysteine S-methyltransferase', 'HCYSMT'),
 ('Methionine synthase', 'METS'),
 ('L-methionine-S-oxide reductase', 'METSOXR1'),
 ('L-methionine-R-sulfoxide reductase', 'METSOXR2'),
 ('L-methionine transport via ABC system (periplasm)', 'METabcpp'),
 ('Homocysteine Methyltransferase', 'HCYSMT2'),
 ('Oxygen Independent coproporphyrinogen-III oxidase', 'CPPPGO2'),
 ('Biotin synthase', 'BTS5'),
 ('Lipoate synthase', 'LIPOS'),
 ('Tyrosine lyase', 'TYRL')]