Merge pull request #100 from SysBioChalmers/devel

GECKO 2.0.0
SysBioChalmers · May 18, 2020 · 4c576c2 · 4c576c2
2 parents 5c397aa + 003150f
commit 4c576c2
Show file tree

Hide file tree

Showing 48 changed files with 151,303 additions and 729 deletions.
diff --git a/.gitignore b/.gitignore
@@ -41,9 +41,13 @@ Thumbs.db
 *.asv
 *.m~
 *.mex*
-*.mlappinstall 
+*.mlappinstall
 *.mltbx
-helpsearch*/ 
+helpsearch*/
+
+# Python stuff #
+################
+.idea/
 
 # documentation builds #
 ########################
@@ -53,4 +57,4 @@ docs/_*
 #########
 geckopy/geckopy/data_files/*.xml
 *~
-geckomat/*.mat
+geckomat/*.mat
diff --git a/Databases/abs_proteomics.txt b/Databases/abs_proteomics.txt
diff --git a/Databases/fermentationData.txt b/Databases/fermentationData.txt
@@ -0,0 +1,2 @@
+Condition	Ptot	D	Glucose [mmol/gDw h]	CO2 production [mmol/gDwh]	Oxygen uptake [mmol/gDwh]	Pyruvate	succinate	Glycerol	Acetate	Ethanol
+Std	0.4228	0.1	1.1	2.7	2.5	8.06E-05	1.00E-05	0.001893379	0.089483471	1.00E-05
diff --git a/README.rst b/README.rst
@@ -11,7 +11,7 @@ The **GECKO** toolbox is a Matlab/Python package for enhancing a **G**\ enome-sc
 - ``geckomat``: Matlab+Python scripts to fetch online data and build/simulate enzyme-constrained models.
 - ``geckopy``: a Python package which can be used with `cobrapy <https://opencobra.github.io/cobrapy/>`_ to obtain a ecYeastGEM model object, optionally adjusted for provided proteomics data.
 
-Last update: 2019-05-03
+Last update: 2020-05-18
 
 This repository is administered by Benjamin J. Sanchez (`@BenjaSanchez <https://github.com/benjasanchez>`_), Division of Systems and Synthetic Biology, Department of Biology and Biological Engineering, Chalmers University of Technology.
 
@@ -33,7 +33,7 @@ Required software - Python module
 Required software - Matlab module
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- `MATLAB <http://www.mathworks.com/>`_ (7.5 or higher) + Optimization Toolbox.
+- `MATLAB <http://www.mathworks.com/>`_ 9.1 (R2016b) or higher + Optimization Toolbox.
 - The `COBRA toolbox for MATLAB <https://github.com/opencobra/cobratoolbox>`_.
 - The `RAVEN toolbox for MATLAB <https://github.com/SysBioChalmers/RAVEN>`_.
 - The `libSBML MATLAB API <https://sourceforge.net/projects/sbml/files/libsbml/MATLAB%20Interface>`_ (version 5.17.0 is recommended).
@@ -44,26 +44,25 @@ Usage
 - **For creating an enzyme constrained model:**
 
   - Update the following data files in ``/databases`` with your organism infomation:
-  
-    - ``databases/prot_abundance.txt``: Protein abundance Data from Pax-DB. If data is not available for your organism, then a relative proteomics dataset (in molar fractions) can be used instead. The required format is a tab-separated file, named as ``databases/relative_proteomics.txt`` , with a single header line and 2 columns; the first with gene IDs and the second with the relative abundances for each protein. 
+
+    - ``databases/prot_abundance.txt``: Protein abundance Data from Pax-DB. If data is not available for your organism, then a relative proteomics dataset (in molar fractions) can be used instead. The required format is a tab-separated file, named as ``databases/relative_proteomics.txt`` , with a single header line and 2 columns; the first with gene IDs and the second with the relative abundances for each protein.
     - ``databases/uniprot.tab``: Gene-proteins data from uniprot.
     - ``databases/chemostatData.tsv``: Chemostat data for estimating GAM (optional, called by ``fitGAM.m``).
     - ``databases/manual_data.txt``: Kcat data from eventual manual curations (optional, called by ``manualModifications.m``).
-	
+
   - Adapt the following functions in ``/geckomat`` to your organism:
-  
-    - ``geckomat/enhanceGEM.m``
+
+    - ``geckomat/getModelParameters.m``
     - ``geckomat/change_model/manualModifications.m``
     - ``geckomat/limit_proteins/sumProtein.m``
     - ``geckomat/limit_proteins/scaleBioMass.m``
     - ``geckomat/kcat_sensitivity_analysis/changeMedia_batch.m`` (optional)
     - ``geckomat/change_model/removeIncorrectPathways.m`` (optional, called by ``manualModifications.m``)
     - ``geckomat/limit_proteins/sumBioMass.m`` (optional, called by ``sumProtein.m`` & ``scaleBiomass.m``)
-    - ``geckomat/limit_proteins/fitGAM.m`` (optional, called by ``scaleBiomass.m``)
-
+
   - Run ``geckomat/get_enzyme_data/updateDatabases.m`` to update ``ProtDatabase.mat``.
   - Run ``geckomat/enhanceGEM.m`` with your metabolic model as input.
-  
+
 - **For performing simulations with an enzyme-constrained model:** Enzyme-constrained models can be used as any other metabolic model, with toolboxes such as COBRA or RAVEN. For more information on rxn/met naming convention, see the supporting information of `Sanchez et al. (2017) <https://dx.doi.org/10.15252/msb.20167411>`_
 
 geckopy: Integrating proteomic data to ecYeastGEM

diff --git a/docs/conf.py b/docs/conf.py
@@ -54,9 +54,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u'1.3.5'
+version = u'2.0.0'
 # The full version, including alpha/beta/rc tags.
-release = u'1.3.5'
+release = u'2.0.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -163,6 +163,3 @@
      author, 'GECKO', 'One line description of project.',
      'Miscellaneous'),
 ]
-
-
-
diff --git a/geckomat/brenda_parser/retrieveBRENDA.py b/geckomat/brenda_parser/retrieveBRENDA.py
@@ -115,7 +115,7 @@ def extract_field(field,last):
 import string
 import hashlib
 from SOAPpy import SOAPProxy ## for usage without WSDL file
-endpointURL = 'http://www.brenda-enzymes.org/soap/brenda_server.php'
+endpointURL = 'https://www.brenda-enzymes.org/soap/brenda_server.php'
 client      = SOAPProxy(endpointURL)
 password    = hashlib.sha256(password).hexdigest()
 credentials = email + ',' + password

diff --git a/geckomat/change_model/addEnzymesToRxn.m b/geckomat/change_model/addEnzymesToRxn.m
@@ -15,7 +15,7 @@
 % model             Modified GEM structure (1x1 struct)
 % 
 % Cheng Zhang & Ivan Domenzain. Last edited: 2018-09-07
-% Eduard Kerkhoven & Benjamin Sanchez. Last edited: 2018-11-05
+% Eduard Kerkhoven & Benjamin Sanchez. Last edited: 2019-09-29
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 function model = addEnzymesToRxn(model,kvalues,rxn,newMets,newRxnName,protGenes)
@@ -24,20 +24,21 @@
     protGenes = '';
 end
 
+%Get compartment name for new enzymes:
+cd ..
+parameters = getModelParameters;
+cd change_model
+
 %Define all necessary parts for new (or changed) rxn:
 rxnIndex = strcmp(model.rxns,rxn); 
 metS     = model.mets(model.S(:,rxnIndex) < 0)';
 metP     = model.mets(model.S(:,rxnIndex) > 0)';
 coeffsS  = model.S(model.S(:,rxnIndex)<0,rxnIndex)';
 coeffsP  = model.S(model.S(:,rxnIndex)>0,rxnIndex)';
 
-%Find default compartment:
-cytIndex = strcmpi(model.compNames,'cytoplasm');
-if sum(cytIndex) == 1
-    comp = model.comps{cytIndex};	%For simplification all proteins are in cytosol
-else
-    comp = model.comps{1};
-end
+%Find compartment id:
+compIndex = strcmpi(model.compNames,parameters.enzyme_comp);
+comp      = model.comps{compIndex};
 
 %Include enzyme in reaction:
 rxnToAdd.mets         = [metS,newMets,metP];

diff --git a/geckomat/enhanceGEM.m b/geckomat/enhanceGEM.m
@@ -1,9 +1,29 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% [ecModel,ecModel_batch] = enhanceGEM(model,toolbox,name,version)
-%
-% Benjamin J. Sanchez & Ivan Domenzain. Last edited: 2018-10-25
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 function [ecModel,ecModel_batch] = enhanceGEM(model,toolbox,name,version)
+% enhanceGEM
+%
+%   Main function for running the GECKO pipeline. It returns an ecModel and
+%   its constrained version with an upper limit in the total protein pool
+%   usage pseudoreaction (ecModel_batch) with calibrated Kcat parameters that
+%   allow it to grow at a specified experimental growth rate.
+%
+%   model       a GEM MATLAB structure compatible with the COBRA or RAVEN
+%               toolbox.
+%   toolbox     string with the name of the prefered toolbox for model SBML
+%               export (COBRA or RAVEN)
+%   name        Desired name for the ecModel (opt, default '')
+%   version     version of the original GEM (opt, default '')
+%
+%
+%   ecModel        an ecModel MATLAB structure suitable for incorporation of   
+%                  proteomics data as individual enzyme usage constraints.
+%   ecModel_batch  an ecModel MATLAB structure with a global constraint on 
+%                  the total protein pool usage pseudoreaction,
+%                  proportional to the measured total protein content (Ptot)
+%
+%   Usage: [ecModel,ecModel_batch] = enhanceGEM(model,toolbox,name,version)
+%
+%   Ivan Domenzain. Last edited: 2019-07-13
+%
 
 if nargin < 3
     name    = '';
@@ -12,37 +32,31 @@
     version = '';
 end
 
-%Provide your organism scientific name
-org_name = 'saccharomyces cerevisiae';
-
 %Convert model to RAVEN for easier visualization later on:
 format short e
 if isfield(model,'rules')
     initCobraToolbox
     model = ravenCobraWrapper(model);
 end
-
+%Get model-specific parameters
+parameters = getModelParameters;
 %Remove blocked rxns + correct model.rev:
 cd change_model
 [model,name,version] = preprocessModel(model,name,version);
 
 %Retrieve kcats & MWs for each rxn in model:
 cd ../get_enzyme_data
 model_data = getEnzymeCodes(model);
-kcats      = matchKcats(model_data,org_name);
+kcats      = matchKcats(model_data,parameters.org_name);
 
 %Integrate enzymes in the model:
 cd ../change_model
 ecModel                 = readKcatData(model_data,kcats);
 [ecModel,modifications] = manualModifications(ecModel);
 
 %Constrain model to batch conditions:
-sigma    = 0.5;      %Optimized for glucose
-Ptot     = 0.5;      %Assumed constant
-gR_exp   = 0.41;     %[g/gDw h] Max batch gRate on minimal glucose media
-c_source = 'D-glucose exchange (reversible)'; %Rxn name for the glucose uptake reaction
 cd ../limit_proteins
-[ecModel_batch,OptSigma] = getConstrainedModel(ecModel,c_source,sigma,Ptot,gR_exp,modifications,name);
+[ecModel_batch,OptSigma] = getConstrainedModel(ecModel,modifications,name);
 disp(['Sigma factor (fitted for growth on glucose): ' num2str(OptSigma)])
 
 %Save output models:
@@ -51,6 +65,4 @@
 ecModel_batch = saveECmodel(ecModel_batch,toolbox,[name '_batch'],version);
 cd ../geckomat
 
-end
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+end
diff --git a/geckomat/getModelParameters.m b/geckomat/getModelParameters.m
@@ -0,0 +1,68 @@
+function parameters = getModelParameters
+% getModelParameters
+%
+%   Set model and organism specific parameters that are used by the
+%   ecModel generation pipeline.
+%
+%   Ivan Domenzain. Last edited: 2020-01-20
+%
+
+%Average enzyme saturation factor
+parameters.sigma = 0.5;
+
+%Total protein content in the cell [g protein/gDw]
+parameters.Ptot = 0.5;      %Assumed constant
+
+%Minimum growth rate the model should grow at [1/h]
+parameters.gR_exp = 0.41;     %[g/gDw h] 
+
+%Provide your organism scientific name
+parameters.org_name = 'saccharomyces cerevisiae';
+
+%Provide your organism KEGG ID
+parameters.keggID = 'sce';
+
+%The name of the exchange reaction that supplies the model with carbon (rxnNames)
+parameters.c_source = 'D-glucose exchange (reversible)'; 
+
+%Rxn Id for biomass pseudoreaction
+parameters.bioRxn = 'r_4041';
+
+%Rxn Id for non-growth associated maitenance pseudoreaction
+parameters.NGAM = 'r_4046';
+
+%Compartment name in which the added enzymes should be located
+parameters.enzyme_comp = 'cytoplasm';
+
+%Rxn names for the most common experimentally measured "exchange" fluxes
+%For glucose and o2 uptakes add the substring: " (reversible)" at the end
+%of the corresponding rxn name. This is due to the irreversible model
+%nature of ecModels. NOTE: This parameter is only used by fitGAM.m, so if
+%you do not use said function you don not need to define it.
+parameters.exch_names{1} = 'growth';
+parameters.exch_names{2} = 'D-glucose exchange (reversible)';
+parameters.exch_names{3} = 'oxygen exchange (reversible)';
+parameters.exch_names{4} = 'carbon dioxide exchange';
+
+%Biomass components pseudoreactions (proteins, carbs and lipids lumped
+%pools). NOTE: This parameter is only used by scaleBioMass.m, so if you do
+%not use said function you don not need to define it. (optional)
+parameters.bio_comp{1} = 'protein';
+parameters.bio_comp{2} = 'carbohydrate';
+parameters.bio_comp{3} = 'lipid backbone';
+parameters.bio_comp{4} = 'lipid chain';
+
+%Polymerization costs from Forster et al 2003 - table S8. NOTE: This
+%parameter is only used by scaleBioMass.m, so if you do not use said
+%function you don not need to define it. (optional)
+parameters.pol_cost(1) = 37.7; %Ptot 
+parameters.pol_cost(2) = 12.8; %Ctot
+parameters.pol_cost(3) = 26.0; %RNA 
+parameters.pol_cost(4) = 26.0; %DNA
+
+%Rxn IDs for reactions in the oxidative phosphorylation pathway (optional)
+parameters.oxPhos{1} = 'r_1021';
+parameters.oxPhos{2} = 'r_0439';
+parameters.oxPhos{3} = 'r_0438';
+parameters.oxPhos{4} = 'r_0226';
+end
diff --git a/geckomat/get_enzyme_data/updateDatabases.m b/geckomat/get_enzyme_data/updateDatabases.m
@@ -1,23 +1,25 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% [swissprot,kegg] = updateDatabases
-% Updates all databases for protein matching (KEGG and Swiss-Prot).
-%
-% keggID    three- or four-letter species abbrevation from KEGG, see
-%           https://www.genome.jp/kegg/catalog/org_list.html
+function [swissprot,kegg] = updateDatabases
+% updateDatabases
+%   Updates all databases for protein matching (KEGG and Swiss-Prot).
 %
-% Note: Before using this script, one should manually download from 
-%       http://www.uniprot.org/uniprot a tab delimited file for the
-%       desired organism with the following format:
-%       Entry - Protein names - Gene names - EC number - Sequence
-%       OBS: filter with the Swiss-Prot option
+%    Note: Before using this script, one should manually download from 
+%          http://www.uniprot.org/uniprot a tab delimited file for the
+%          desired organism with the following format:
+%          Entry - Protein names - Gene names - EC number - Sequence
+%          OBS: filter with the Swiss-Prot option
 % 
-% Benjamín Sánchez & Cheng Zhang. Last edited: 2017-10-24
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-function [swissprot,kegg] = updateDatabases(keggID)
+%   Usage: [swissprot,kegg] = updateDatabases
+%
+% Benjamin Sanchez, Cheng Zhang, Ivan Domenzain. Last edited: 2019-07-12
+%
 
-if nargin<1 || ~regexp(keggID,'[a-z]{3,4}')
-    error('Please specify the KEGG organism ID')
+current = pwd;
+cd ..
+parameters = getModelParameters;
+keggID     = parameters.keggID;
+cd (current)
+if ~regexp(keggID,'[a-z]{3,4}')
+    error('Please specify the KEGG organism ID in the script getModelParameters.m')
 end
 
 %Build Swissprot table: