Skip to content

Commit

Permalink
remove 'dataframe_file' command-line argument, and combine run result…
Browse files Browse the repository at this point in the history
…s in a HDF5 file automatically; also, remove unneeded MultialgorithmCheckpoint.convert_checkpoints()
  • Loading branch information
artgoldberg committed May 22, 2018
1 parent 3ae0990 commit 5112bbe
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 89 deletions.
23 changes: 3 additions & 20 deletions tests/multialgorithm/test_multialgorithm_main.py
Expand Up @@ -15,6 +15,7 @@
from argparse import Namespace
from capturer import CaptureOutput
from copy import copy
import warnings

from wc_lang.core import SpeciesType
from wc_sim import __main__
Expand All @@ -39,7 +40,6 @@ def setUp(self):
end_time=100,
checkpoint_period=3,
checkpoints_dir=self.checkpoints_dir,
dataframe_file=os.path.join(self.checkpoints_dir, 'dataframe_file.h5'),
fba_time_step=5
)

Expand All @@ -52,27 +52,19 @@ def test_process_and_validate_args1(self):
SimController.process_and_validate_args(self.args)
for arg in ['model_file', 'end_time', 'checkpoint_period', 'fba_time_step']:
self.assertEqual(getattr(original_args, arg), self.args.__dict__[arg])
for arg in ['checkpoints_dir', 'dataframe_file']:
self.assertTrue(self.args.__dict__[arg].startswith(getattr(original_args, arg)))

self.args.dataframe_file = os.path.join(self.checkpoints_dir, 'dataframe_file_no_suffix')
original_args = copy(self.args)
SimController.process_and_validate_args(self.args)
self.assertEqual(self.args.dataframe_file, original_args.dataframe_file + '.h5')
self.assertTrue(self.args.checkpoints_dir.startswith(original_args.checkpoints_dir))

def test_process_and_validate_args2(self):
# test files specified relative to home directory
relative_tmp_dir = os.path.join('~/tmp/', os.path.basename(self.user_tmp_dir))
self.args.checkpoints_dir=relative_tmp_dir
self.args.dataframe_file=os.path.join(relative_tmp_dir, 'dataframe_file.h5')
SimController.process_and_validate_args(self.args)
for arg in ['checkpoints_dir', 'dataframe_file']:
for arg in ['checkpoints_dir']:
self.assertIn(getattr(self.args, arg).replace('~', ''), self.args.__dict__[arg])

def test_process_and_validate_args3(self):
# test no files
self.args.checkpoints_dir=None
self.args.dataframe_file=None
SimController.process_and_validate_args(self.args)
for arg, value in self.args.__dict__.items():
self.assertEqual(getattr(self.args, arg), value)
Expand All @@ -91,13 +83,6 @@ def test_process_and_validate_args4(self):
with self.assertRaises(ValueError):
SimController.process_and_validate_args(bad_args)

def test_process_and_validate_args5(self):
# test dataframe_file requires checkpoints_dir
self.args.checkpoints_dir=None
with self.assertRaisesRegexp(ValueError,
'dataframe_file cannot be specified unless checkpoints_dir is provided'):
SimController.process_and_validate_args(self.args)

# @unittest.skip("Fails when simulation writes to stdout, as when debugging")
def test_app_run(self):
argv = [
Expand All @@ -106,7 +91,6 @@ def test_app_run(self):
'10',
'--checkpoint-period', '3',
'--checkpoints-dir', self.checkpoints_dir,
'--dataframe-file', os.path.join(self.checkpoints_dir, 'dataframe_file.h5'),
'--fba-time-step', '5',
]
with __main__.App(argv=argv) as app:
Expand All @@ -126,7 +110,6 @@ def run_simulate(self, args):

def test_simulate_wo_output_files(self):
self.args.checkpoints_dir = None
self.args.dataframe_file = None
num_events, results_dir = self.run_simulate(self.args)
self.assertTrue(0 < num_events)
self.assertEqual(results_dir, None)
Expand Down
8 changes: 0 additions & 8 deletions tests/multialgorithm/test_multialgorithm_simulation.py
Expand Up @@ -218,14 +218,6 @@ def perform_ssa_test_run(self, model_type, run_time, initial_specie_copy_numbers
# check the checkpoint times
self.assertEqual(MultialgorithmCheckpoint.list_checkpoints(self.checkpoint_dir), self.checkpoint_times(run_time))

# check the dataframe representation
pred_species_pops = MultialgorithmCheckpoint.convert_checkpoints(self.checkpoint_dir)
self.assertEqual(type(pred_species_pops), pandas.DataFrame)
self.assertEqual(list(pred_species_pops.index), self.checkpoint_times(run_time))
for species_id, population in initial_specie_copy_numbers.items():
self.assertEqual(pred_species_pops.loc[0.0, species_id], population)
pred_species_pops.plot()

def test_run_ssa_suite(self):
specie = 'spec_type_0[c]'
self.perform_ssa_test_run('1 species, 1 reaction',
Expand Down
14 changes: 10 additions & 4 deletions tests/multialgorithm/test_run_results.py
Expand Up @@ -10,9 +10,11 @@
import unittest
import shutil
import tempfile
from argparse import Namespace
import warnings

from wc_sim.multialgorithm.multialgorithm_errors import MultialgorithmError
from wc_sim.multialgorithm.__main__ import SimController
from wc_sim.multialgorithm.run_results import RunResults


Expand All @@ -23,14 +25,19 @@ def setUp(self):
self.checkpoints_dir = tempfile.mkdtemp()
self.checkpoints_copy = os.path.join(self.checkpoints_dir, 'checkpoints_copy')
shutil.copytree(self.CHECKPOINTS_DIR, self.checkpoints_copy)
self.metadata = {'test': 3}
self.args = Namespace(
model_file='filename',
end_time=10,
checkpoint_period=3,
checkpoints_dir=self.checkpoints_dir,
fba_time_step=5
)
self.metadata = SimController.create_metadata(self.args)

def tearDown(self):
shutil.rmtree(self.checkpoints_dir)

def test_run_results(self):
# ignore 'PerformanceWarning' warnings
warnings.simplefilter("ignore")

run_results_1 = RunResults(self.checkpoints_copy, self.metadata)
for component in RunResults.COMPONENTS:
Expand All @@ -42,7 +49,6 @@ def test_run_results(self):
self.assertTrue(run_results_1.get(component).equals(run_results_2.get(component)))

def test_run_results_errors(self):
warnings.simplefilter("ignore")

with self.assertRaises(MultialgorithmError):
RunResults(self.checkpoints_copy)
Expand Down
36 changes: 13 additions & 23 deletions wc_sim/multialgorithm/__main__.py
Expand Up @@ -25,6 +25,7 @@
from wc_lang.io import Reader
from wc_lang.prepare import PrepareModel, CheckModel
from wc_sim.multialgorithm.multialgorithm_checkpointing import MultialgorithmCheckpoint
from wc_sim.multialgorithm.run_results import RunResults

# ignore 'setting concentration' warnings
warnings.filterwarnings('ignore', '.*setting concentration.*', )
Expand All @@ -49,14 +50,12 @@ class Meta:
help="End time for the simulation (sec)")),
(['--checkpoints-dir'], dict(
type=str,
help="Store simulation results; if provided, a timestamped sub-directory will hold results")),
help="Store simulation results; if provided, a timestamped sub-directory will hold results, "
"including an HDF5 file that can be accessed through a RunResults object")),
(['--checkpoint-period'], dict(
type=float,
default=config['checkpoint_period'],
help="Checkpointing period (sec)")),
(['--dataframe-file'], dict(
type=str,
help="File for storing Pandas DataFrame of checkpoints; written in HDF5; requires checkpoints-dir")),
(['--fba-time-step'], dict(
type=float,
default=config['fba_time_step'],
Expand All @@ -80,24 +79,13 @@ def process_and_validate_args(args):
:obj:`ValueError`: if any of the command line arguments are invalid
"""

# process dataframe_file
if args.dataframe_file and not args.checkpoints_dir:
raise ValueError("dataframe_file cannot be specified unless checkpoints_dir is provided")

# create results directory
if args.checkpoints_dir:
results_sup_dir = os.path.abspath(os.path.expanduser(args.checkpoints_dir))
args.checkpoints_dir = os.path.join(results_sup_dir, datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
if not os.path.isdir(args.checkpoints_dir):
os.makedirs(args.checkpoints_dir)

# prepare dataframe file path
if args.dataframe_file:
args.dataframe_file = os.path.abspath(os.path.expanduser(args.dataframe_file))
# suffix for HDF5 dataframe_file
if not args.dataframe_file.endswith('.h5'):
args.dataframe_file = args.dataframe_file + '.h5'

# validate args
if args.end_time <= 0:
raise ValueError("End time ({}) must be positive".format(args.end_time))
Expand All @@ -112,11 +100,16 @@ def process_and_validate_args(args):

@staticmethod
def create_metadata(args):
""" Initialize metadata for this simulation run
""" Record metadata for this simulation run
Args:
args (:obj:`object`): parsed command line arguments
args (:obj:`Namespace`): parsed command line arguments for this simulation run
Returns:
:obj:`SimulationMetadata`: a metadata record for this simulation run, but missing
the simulation `run_time`
"""
# print('type(args)', type(args))
model = ModelMetadata.create_from_repository()

# author metadata
Expand Down Expand Up @@ -182,14 +175,11 @@ def simulate(args):
num_events = simulation_engine.simulate(args.end_time)
simulation_metadata.run.record_end()

if args.dataframe_file:
pred_species_pops = MultialgorithmCheckpoint.convert_checkpoints(args.checkpoints_dir)
store = pandas.HDFStore(args.dataframe_file)
store['dataframe'] = pred_species_pops
store.close()

print('Simulated {} events'.format(num_events))
if args.checkpoints_dir:
# use RunResults to summarize results in an HDF5 file in args.checkpoints_dir
# print('type(simulation_metadata)', type(simulation_metadata))
RunResults(args.checkpoints_dir, simulation_metadata)
print("Saved chcekpoints in '{}'".format(args.checkpoints_dir))

return (num_events, args.checkpoints_dir)
Expand Down
30 changes: 2 additions & 28 deletions wc_sim/multialgorithm/multialgorithm_checkpointing.py
Expand Up @@ -6,14 +6,12 @@
:License: MIT
"""

# TODO(Arthur): discard when tossing convert_checkpoints
import numpy
import pandas

import os

from wc_utils.util.misc import obj_to_str
from wc_sim.log.checkpoint import Checkpoint # TODO(Arthur): discard when tossing convert_checkpoints
from wc_sim.log.checkpoint import Checkpoint
from wc_sim.core.simulation_checkpoint_object import CheckpointSimulationObject, AccessStateObjectInterface
from wc_sim.core.sim_metadata import SimulationMetadata
from wc_sim.multialgorithm.submodels.ssa import SSASubmodel
Expand All @@ -23,31 +21,7 @@
class MultialgorithmCheckpoint(Checkpoint):
""" Checkpoint class that holds multialgorithmic checkpoints
"""

# TODO(Arthur): discard: superceded by RunResults
@staticmethod
def convert_checkpoints(dirname):
""" Convert the species population in saved checkpoints into a pandas dataframe
Args:
dirname (:obj:`str`): directory containing the checkpoint data
Returns:
:obj:`pandas.DataFrame`: the species popuation in a simulation checkpoint history
"""
# create an empty DataFrame
checkpoints = Checkpoint.list_checkpoints(dirname)
checkpoint = Checkpoint.get_checkpoint(dirname, time=0)
species_pop, _ = checkpoint.state
species_ids = species_pop.keys()
pred_species_pops = pandas.DataFrame(index=checkpoints, columns=species_ids, dtype=numpy.float64)

# load the DataFrame
for time in Checkpoint.list_checkpoints(dirname):
species_populations, _ = Checkpoint.get_checkpoint(dirname, time=time).state
for species_id,population in species_populations.items():
pred_species_pops.loc[time, species_id] = population
return pred_species_pops
pass


class AccessState(AccessStateObjectInterface):
Expand Down
8 changes: 2 additions & 6 deletions wc_sim/multialgorithm/run_results.py
Expand Up @@ -55,8 +55,7 @@ def __init__(self, results_dir, metadata=None):
if metadata is None:
raise MultialgorithmError("'metadata' must be provided to create an HDF5 file")

# TODO(Arthur): instead, use the metadata provided in __init__()
metadata, population_df, aggregate_states_df, random_states_s = self.convert_checkpoints()
population_df, aggregate_states_df, random_states_s = self.convert_checkpoints()

# create the HDF file containing the run results
# populations
Expand Down Expand Up @@ -114,9 +113,6 @@ def convert_checkpoints(self):
# create pandas objects for species populations, aggregate states and simulation random states
checkpoints = Checkpoint.list_checkpoints(self.results_dir)
first_checkpoint = Checkpoint.get_checkpoint(self.results_dir, time=0)
# temporarily grab metadata from the first checkpoint
# TODO(Arthur): instead, use the metadata provided in __init__()
metadata = first_checkpoint.metadata
species_pop, aggregate_state = first_checkpoint.state

species_ids = species_pop.keys()
Expand Down Expand Up @@ -144,4 +140,4 @@ def convert_checkpoints(self):

random_states_s[time] = pickle.dumps(checkpoint.random_state)

return (metadata, population_df, aggregate_states_df, random_states_s)
return (population_df, aggregate_states_df, random_states_s)

0 comments on commit 5112bbe

Please sign in to comment.