Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: correct update prot pool #276

Merged
merged 12 commits into from
Mar 20, 2023
2 changes: 1 addition & 1 deletion protocol.m
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@
ecModel = setParam(ecModel,'lb',params.bioRxn,0.99*abs(sol.f));
ecModel = setParam(ecModel,'obj','prot_pool_exchange',-1);
sol = solveLP(ecModel)
disp(['Minimum protein pool usage: ' num2str(abs(sol.f)) ' ug/gDCW'])
disp(['Minimum protein pool usage: ' num2str(abs(sol.f)) ' mg/gDCW'])

% STEP 23 Compare fluxes from ecModel and starting model
% Constrain with the same conditions to model and ecModel. We now fix the
Expand Down
2 changes: 1 addition & 1 deletion src/geckomat/limit_proteins/fillProtConcs.m
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
function model = fillProtConcs(model, protData)
% fillProtConcs
% Uses the protein concentrations from protData to fill model.ec.concs.
% Protein levels should be provided in ug/gDCW. If no data is provided
% Protein levels should be provided in mg/gDCW. If no data is provided
% a particular protein, its level is NaN. Existing entries in
% model.ec.concs are overwritten.
%
Expand Down
79 changes: 65 additions & 14 deletions src/geckomat/limit_proteins/updateProtPool.m
Original file line number Diff line number Diff line change
@@ -1,45 +1,96 @@
function [model, newPtot] = updateProtPool(model, Ptot, modelAdapter)
function [model, PdiffEnz, f] = updateProtPool(model, measuredProt, Ptot, modelAdapter)
% updateProtPool
% Updates the protein pool to compensate for measured proteomics data (in
% model.ec.concs).
%
% Input:
% model an ecModel in GECKO 3 format (with ecModel.ec structure)
% Ptot total protein content, overwrites modelAdapter value
% modelAdapter a loaded model adapter (Optional,
%
% measuredProt average sum of total measured protein in proteomics.tsv
% in mg/gDW, reported as protData.measuredProt by
% loadProtData
% Ptot total protein content, overwrites modelAdapter value.
% If specified, condition-specific fluxData.Ptot from
% loadFluxData can be used.
% modelAdapter a loaded model adapter (Optional, will otherwise use the
% default model adapter).
% Output:
% model an ecModel where model.ec.concs is populated with
% protein concentrations
% newPtot new Ptot
% PdiffEnz non-measured enzyme content, in mg/gDCW
% f f-factor as determined from proteomics data
% Usage:
% [model, newPtot] = updateProtPool(model, Ptot, modelAdapter)
% [model, newPtot] = updateProtPool(model, measuredProt, Ptot, modelAdapter)

if nargin < 3 || isempty(modelAdapter)
if nargin < 4 || isempty(modelAdapter)
modelAdapter = ModelAdapterManager.getDefaultAdapter();
if isempty(modelAdapter)
error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
end
end
params = modelAdapter.params;

if nargin < 2 || isempty(Ptot)
if nargin < 3 || isempty(Ptot)
Ptot = params.Ptot;
disp(['Total protein content used: ' num2str(Ptot) ' [g protein/gDw]'])
end

if nargin < 2 || isempty(measuredProt)
% assumes not measured protein
measuredProt = Ptot * params.f * 1000;
disp(['Measured protein assumed: ' num2str(measuredProt/1000) ' [g protein/gDw]'])
end

% Ptot the "real" total protein content in the cell
% Pmeas the measured protein content (sum of proteomics data). Is lower
% than Ptot, as not all protein are measured in a proteomics
% experiment. Equals protData.measuredProt from loadProtData.
% f the ratio enzyme/protein: how many of proteins are enzymes
% m the ratio measured/total protein: how much is measured
% PtotEnz the "real" total enzyme content in the cell. Equals f * Ptot
% PmeasEnz the measured enzyme content. Equals f * Pmeas, also equals
% sum(ecModel.ec.concs)
% PdiffEnz the non-measured enzyme content. Equals PtotEnz - PmeasEnz.
%
% Without proteomics, protein pool is constraint by PtotEnz (= f * Ptot).
% With proteomics, protein pool should be constraint by PdiffEnz.
% [And the above two constraints are both multiplied by sigma-factor, which
% is unrelated to the adjustments made here]

% Convert ptot to mg/gDW
Ptot = Ptot * 1000;

originalLB = model.lb(strcmp(model.rxns,'prot_pool_exchange'));
% Calculate the protein in the model
PmeasEnz = sum(model.ec.concs,'omitnan');
PtotEnz = Ptot * 1000 * params.f;

if PmeasEnz == 0
error('The model have not been constrained with proteomics')
end

% New f factor
f = PmeasEnz / measuredProt;
% Total enzyme when extrapolating from this dataset
PtotEnz = Ptot * f;

% Draw the protein in model from the measured proteins
PdiffEnz = PtotEnz - PmeasEnz;

if PdiffEnz > 0
Pdiff = (PdiffEnz / params.f)/1000; % Convert back to g protein/gDCW
model = setProtPoolSize(model, Pdiff, params.f, params.sigma, modelAdapter);
Pdiff = PdiffEnz / 1000; % Convert back to g protein/gDCW
model = setProtPoolSize(model, Pdiff, f, params.sigma, modelAdapter);
sol = solveLP(model);
if isempty(sol.x)
error(['Changing protein pool to ' num2str(Pdiff*params.f, params.sigma) ' resuls in a non-functional model'])
% Try relaxing sigma-factor
model = setProtPoolSize(model, Pdiff, f, 1, modelAdapter);
if isempty(sol.x)
error(['Changing protein pool to ' num2str(Pdiff * f * params.sigma) ' results in a non-functional model'])
else
fprintf(['Relaxing of sigma-factor was required to yield a functional model.\n' ...
'The sigma-factor is now set to 1, run ''sigmaFitter'' using the PdiffEnz\n' ...
'and f outputs from updateProtPool to reduce the sigma-factor.'])
end
end
else
error('The total measured protein mass exceeds the total protein content.')
end
newPtot = PdiffEnz / 1000 / params.f;
PdiffEnz = PdiffEnz / 1000;
end
21 changes: 14 additions & 7 deletions src/geckomat/utilities/loadProtData.m
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
function protData = loadProtData(replPerCond, protDataFile, filterData, modelAdapter, minVal, maxRSD, maxMissing, cutLowest, addStdevs)
% loadProtData
% Function that loads absolute proteomics data and returns mean values
% across replicates for each condition in the data file. By default it
% also filters the data by various criteria (see input parameters).
% Function that loads absolute proteomics data (in mg/gDCW) and returns
% mean values across replicates for each condition in the data file. By
% default it also filters the data by various criteria, to remove
% uncertain data (see input parameters).
%
% Input:
% replPerCond vector with number of replicates for each condition in
% the dataset. Example: [3, 2] if first conditions has
% triplicates and second condition has duplicates.
% protDataFile path to file with proteomics data. (Optional, default
% reads data/proteomics.tsv as specified in modelAdapter)
% protDataFile path to file with proteomics data, where protein levels
% are in mg/gDCW (Optional, default reads
% data/proteomics.tsv as specified in modelAdapter)
% Alternatively, protDataFile can be a protData structure
% that was previously made by loadProtdata.
% filterData logical whether abundances should be filtered. If
Expand Down Expand Up @@ -42,6 +44,9 @@
% abundances matrix of proteomics data, where each
% column contains mean abundances per
% condition
% measuredProt array with measured proteins, where each
% column contains mean of unfiltered total
% abundances sum per condition in mg/gDW.
%
% Usage:
% protData = loadProtData(replPerCond, protDataFile, filterData, modelAdapter, minVal, maxRSD, maxMissing, cutLowest, addStdevs)
Expand Down Expand Up @@ -99,12 +104,12 @@
abundances(remData,:) = [];
m = size(abundances,1);
filtAbund = nan(m,numel(replPerCond));
measuredProt = zeros(1,numel(replPerCond));

if filterData
m = size(abundances,1);
filtAbund = nan(m,numel(replPerCond));
for i=1:numel(replPerCond)
condAbund = abundances(:,1:replPerCond(i));
measuredProt(i) = mean(sum(condAbund,1));
if i<numel(replPerCond)
abundances = abundances(:,replPerCond(i)+1:end);
end
Expand Down Expand Up @@ -137,6 +142,7 @@
else
for i=1:numel(replPerCond)
condAbund = abundances(:,1:replPerCond(i));
measuredProt(i) = mean(sum(condAbund,1));
if i<numel(replPerCond)
abundances = abundances(:,replPerCond(i)+1:end);
end
Expand All @@ -146,5 +152,6 @@
notAllNan = logical(sum(~isnan(filtAbund),2));
protData.abundances = filtAbund(notAllNan,:);
protData.uniprotIDs = uniprotIDs(notAllNan);
protData.measuredProt = measuredProt;
end