Skip to content

Commit

Permalink
Merge pull request #61 from ICB-DCM/feature_problem_from_dfs
Browse files Browse the repository at this point in the history
Allow creating Problem from pre-existing data frames* Allow creating Problem from pre-existing data frames

* Flake...

* Model name and sbml_file should not be mandatory

* Refactor Problem. Remove file names, pickle DataFrames and Model directly. See discussion #61

* Fix whitespace-checking issues, extend check (Closes #59)

* Make petablint more error-tolerant and prettify

* Speedup handle_missing_overrides for larger problems (229s->64s)

* Speedup handle_missing_overrides; match names a bit looser

* Doc. Fail.

* Use ctor in Problem.from_files
  • Loading branch information
dweindl committed Feb 18, 2019
2 parents 0f93d63 + 86fe7aa commit 04f7fa4
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 200 deletions.
86 changes: 51 additions & 35 deletions bin/petablint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,28 @@

import argparse
import petab
import os
import sys
import logging
from colorama import (init as init_colorama, Fore)

logger = logging.getLogger(__name__)


class LintFormatter(logging.Formatter):
"""Custom log formatter"""
formats = {
logging.DEBUG: Fore.CYAN + '%(message)s',
logging.INFO: Fore.GREEN + '%(message)s',
logging.WARN: Fore.YELLOW + '%(message)s',
logging.ERROR: Fore.RED + '%(message)s',
}

def format(self, record):
format_orig = self._style._fmt
self._style._fmt = LintFormatter.formats.get(record.levelno, self._fmt)
result = logging.Formatter.format(self, record)
self._style._fmt = format_orig
return result


def parse_cli_args():
Expand Down Expand Up @@ -62,43 +83,38 @@ def parse_cli_args():

def main():
args = parse_cli_args()
init_colorama(autoreset=True)

ch = logging.StreamHandler()
if args.verbose:
print('Checking...')
print('\tSBML model:', args.sbml_file_name)
print('\tCondition table:', args.condition_file_name)
print('\tMeasurement table:', args.measurement_file_name)
print('\tParameter table:', args.parameter_file_name)

default_files = [os.path.isfile(args.sbml_file_name),
os.path.isfile(args.condition_file_name),
os.path.isfile(args.measurement_file_name),
os.path.isfile(args.parameter_file_name)]

not_found_files = [num for num, file_exists in enumerate(
default_files) if not file_exists]

tmp = list(vars(args).values())
tmp = tmp[1:-1] # remove first index (True)

if len(not_found_files) == 0:
problem = petab.Problem(args.sbml_file_name,
args.condition_file_name,
args.measurement_file_name,
args.parameter_file_name)

petab.lint.lint_problem(problem)

elif len(not_found_files) == len(default_files):
print("All files missing")

ch.setLevel(logging.DEBUG)
else:
if args.verbose:
print('Missing files found:')
for element in not_found_files:
print('* ', tmp[element])

# TO DO: continue petab running
ch.setLevel(logging.WARN)
ch.setFormatter(LintFormatter())
logging.basicConfig(level=logging.DEBUG, handlers=[ch])

logger.debug('Looking for...')
if args.sbml_file_name:
logger.debug('\tSBML model: ' + args.sbml_file_name)
if args.condition_file_name:
logger.debug('\tCondition table: ' + args.condition_file_name)
if args.measurement_file_name:
logger.debug('\tMeasurement table: ' + args.measurement_file_name)
if args.parameter_file_name:
logger.debug('\tParameter table: ' + args.parameter_file_name)

try:
problem = petab.Problem.from_files(
sbml_file=args.sbml_file_name,
condition_file=args.condition_file_name,
measurement_file=args.measurement_file_name,
parameter_file=args.parameter_file_name)
except FileNotFoundError as e:
logger.error(e)
sys.exit(1)

ret = petab.lint.lint_problem(problem)
sys.exit(ret)


if __name__ == '__main__':
Expand Down
201 changes: 123 additions & 78 deletions petab/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,109 +18,133 @@
class Problem:
"""
PEtab parameter estimation problem as defined by
- sbml model
- SBML model
- condition table
- measurement table
- parameter table [optional]
Attributes:
sbml_file: PEtab SBML model
condition_file: PEtab condition table
measurement_file: PEtab measurement table
parameter_file: PEtab parameter table
model_name: name of the model
condition_df: @type pandas.DataFrame
measurement_df: @type pandas.DataFrame
parameter_df: @type pandas.DataFrame
sbml_reader: @type libsbml.SBMLReader
Stored to keep object alive.
sbml_document: @type libsbml.Document
Stored to keep object alive.
sbml_model: @type libsbml.Model
"""

def __init__(self,
sbml_file,
condition_file,
measurement_file,
parameter_file=None,
model_name=None):
sbml_model: libsbml.Model = None,
sbml_reader: libsbml.SBMLReader = None,
sbml_document: libsbml.SBMLDocument = None,
condition_df: pd.DataFrame = None,
measurement_df: pd.DataFrame = None,
parameter_df: pd.DataFrame = None):

if model_name is None:
model_name = os.path.splitext(os.path.split(sbml_file)[-1])[0]
self.model_name = model_name

self.measurement_file = measurement_file
self.condition_file = condition_file
self.parameter_file = parameter_file
self.sbml_file = sbml_file
self.condition_df = condition_df
self.measurement_df = measurement_df
self.parameter_df = parameter_df

self.condition_df = None
self.measurement_df = None
self.parameter_df = None
self._load_dfs()

self.sbml_reader = None
self.sbml_document = None
self.sbml_model = None
self._load_sbml()
self.sbml_reader = sbml_reader
self.sbml_document = sbml_document
self.sbml_model = sbml_model

def __getstate__(self):
state = self.__dict__.copy()
# libsbml stuff cannot be serialized
# dfs can be recreated
for key in ['sbml_reader', 'sbml_document', 'sbml_model',
'condition_df', 'measurement_df', 'parameter_df']:

# libsbml stuff cannot be serialized directly
if self.sbml_model:
sbml_document = self.sbml_model.getSBMLDocument()
sbml_writer = libsbml.SBMLWriter()
state['sbml_string'] = sbml_writer.writeSBMLToString(sbml_document)

exclude = ['sbml_reader', 'sbml_document', 'sbml_model']
for key in exclude:
state.pop(key)

return state

def __setstate__(self, state):
# load SBML model from pickled string
sbml_string = state.pop('sbml_string', None)
if sbml_string:
self.sbml_reader = libsbml.SBMLReader()
self.sbml_document = \
self.sbml_reader.readSBMLFromString(sbml_string)
self.sbml_model = self.sbml_document.getModel()

self.__dict__.update(state)

# load sbml from file name
self._load_sbml()
@staticmethod
def from_files(sbml_file: str = None,
condition_file: str = None,
measurement_file: str = None,
parameter_file: str = None):
"""
Factory method to load model and tables from files.
Arguments:
sbml_file: PEtab SBML model
condition_file: PEtab condition table
measurement_file: PEtab measurement table
parameter_file: PEtab parameter table
"""

# reload dfs
self._load_dfs()
sbml_model = sbml_document = sbml_reader = None
condition_df = measurement_df = parameter_df = None

if condition_file:
condition_df = get_condition_df(condition_file)
if measurement_file:
measurement_df = get_measurement_df(measurement_file)
if parameter_file:
parameter_df = get_parameter_df(parameter_file)
if sbml_file:
sbml_reader = libsbml.SBMLReader()
sbml_document = sbml_reader.readSBML(sbml_file)
sbml_model = sbml_document.getModel()

problem = Problem(condition_df=condition_df,
measurement_df=measurement_df,
parameter_df=parameter_df,
sbml_model=sbml_model,
sbml_document=sbml_document,
sbml_reader=sbml_reader)

return problem

@staticmethod
def from_folder(folder):
def from_folder(folder: str, model_name: str = None):
"""
Factory method to use the standard folder structure
and file names.
and file names, i.e.
${model_name}/
+-- experimentalCondition_${model_name}.tsv
+-- measurementData_${model_name}.tsv
+-- model_${model_name}.xml
+-- parameters_${model_name}.tsv
Arguments:
folder:
Path to the directory in which the files are located.
model_name:
If specified, overrides the model component in the file names.
Defaults to the last component of `folder`.
"""

folder = os.path.abspath(folder)
model_name = os.path.split(folder)[-1]
if model_name is None:
model_name = os.path.split(folder)[-1]

return Problem(
return Problem.from_files(
condition_file=get_default_condition_file_name(model_name, folder),
measurement_file=get_default_measurement_file_name(
model_name, folder),
measurement_file=get_default_measurement_file_name(model_name,
folder),
parameter_file=get_default_parameter_file_name(model_name, folder),
sbml_file=get_default_sbml_file_name(model_name, folder),
model_name=model_name
)

def _load_dfs(self):
"""
Load condition, measurement, and parameter dataframes.
"""
self.condition_df = get_condition_df(self.condition_file)
self.measurement_df = get_measurement_df(self.measurement_file)
if self.parameter_file:
self.parameter_df = get_parameter_df(self.parameter_file)
else:
self.parameter_df = None

def _load_sbml(self):
"""
Load SBML model.
"""
# sbml_reader and sbml_document must be kept alive.
# Otherwise operations on sbml_model will segfault
self.sbml_reader = libsbml.SBMLReader()
self.sbml_document = self.sbml_reader.readSBML(self.sbml_file)
self.sbml_model = self.sbml_document.getModel()

def get_constant_parameters(self):
"""
Provide list of IDs of parameters which are fixed (i.e. not subject
Expand Down Expand Up @@ -240,7 +264,7 @@ def get_condition_df(condition_file_name):
"""

condition_df = pd.read_csv(condition_file_name, sep='\t')
lint.assert_no_trailing_whitespace(
lint.assert_no_leading_trailing_whitespace(
condition_df.columns.values, "condition")

try:
Expand All @@ -258,7 +282,7 @@ def get_parameter_df(parameter_file_name):
"""

parameter_df = pd.read_csv(parameter_file_name, sep='\t')
lint.assert_no_trailing_whitespace(
lint.assert_no_leading_trailing_whitespace(
parameter_df.columns.values, "parameter")

try:
Expand All @@ -276,7 +300,7 @@ def get_measurement_df(measurement_file_name):
"""

measurement_df = pd.read_csv(measurement_file_name, sep='\t')
lint.assert_no_trailing_whitespace(
lint.assert_no_leading_trailing_whitespace(
measurement_df.columns.values, "measurement")

return measurement_df
Expand Down Expand Up @@ -496,18 +520,23 @@ def handle_missing_overrides(mapping_par_opt_to_par_sim, observable_ids):
"""
Find all observable parameters and noise parameters that were not mapped,
and set their mapping to np.nan.
Assumes that parameters matching "(noise|observable)Parameter[0-9]+_" were
all supposed to be overwritten.
"""
_missed_vals = []
for observable_id in observable_ids:
rex = re.compile("(noise|observable)Parameter[0-9]+_" + observable_id)
for i_condition, mapping_for_condition in \
enumerate(mapping_par_opt_to_par_sim):
for i_val, val in enumerate(mapping_for_condition):
if isinstance(val, numbers.Number):
continue
if rex.match(val):
mapping_for_condition[i_val] = np.nan
_missed_vals.append((i_condition, i_val, val))
rex = re.compile("^(noise|observable)Parameter[0-9]+_")
for i_condition, mapping_for_condition in \
enumerate(mapping_par_opt_to_par_sim):
for i_val, val in enumerate(mapping_for_condition):
try:
matches = rex.match(val)
except TypeError:
continue

if matches:
mapping_for_condition[i_val] = np.nan
_missed_vals.append((i_condition, i_val, val))

if len(_missed_vals):
logger.warning(f"Could not map the following overrides "
Expand Down Expand Up @@ -689,7 +718,7 @@ def create_condition_df(parameter_ids, condition_ids=None):
return df


def create_measurement_df():
def create_measurement_df() -> pd.DataFrame:
"""Create empty measurement dataframe"""

df = pd.DataFrame(data={
Expand Down Expand Up @@ -803,3 +832,19 @@ def get_observable_id(parameter_id):
return parameter_id[len('sigma_'):]

raise ValueError('Cannot extract observable id from: ' + parameter_id)


def measurements_have_replicates(measurement_df: pd.DataFrame):
"""Tests whether the measurements come with replicates
Arguments:
measurement_df: Measurement table
Returns:
True if there are replicates, False otherwise
"""
return np.any(measurement_df.groupby(
get_notnull_columns(
measurement_df,
['observableId', 'simulationConditionId',
'preequilibrationConditionId', 'time'])).size().values - 1)
Loading

0 comments on commit 04f7fa4

Please sign in to comment.