Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New general purpose analyzers with examples and tests #581

Merged
merged 8 commits into from Jan 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/analyzers/example_analysis_AddAnalyzer.py
Expand Up @@ -11,7 +11,7 @@

# Set the platform where you want to run your analysis
# In this case we are running in COMPS, but this can be changed to run 'Local'
platform = Platform('COMPS')
platform = Platform('COMPS2')

# Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
# and filenames
Expand Down
31 changes: 31 additions & 0 deletions examples/analyzers/example_analysis_CSVAnalyzer.py
@@ -0,0 +1,31 @@
# Example CSVAnalyzer for any experiment
# In this example, we will demonstrate how to use a CSVAnalyzer to analyze csv files for experiments

# First, import some necessary system and idmtools packages.
from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.analysis.csv_analyzer import CSVAnalyzer
from idmtools.core import ItemType
from idmtools.core.platform_factory import Platform


if __name__ == '__main__':

# Set the platform where you want to run your analysis
# In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
platform = Platform('COMPS2')

# Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
# and filenames
# In this case, we want to provide a filename to analyze
filenames = ['output/c.csv']
# Initialize the analyser class with the path of the output csv file
analyzers = [CSVAnalyzer(filenames=filenames)]

# Set the experiment id you want to analyze
experiment_id = '9311af40-1337-ea11-a2be-f0921c167861' # staging exp id simple sim and csv example

# Specify the id Type, in this case an Experiment on COMPS
manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()
2 changes: 1 addition & 1 deletion examples/analyzers/example_analysis_DownloadAnalyzer.py
Expand Up @@ -11,7 +11,7 @@

# Set the platform where you want to run your analysis
# In this case we are running in COMPS, but this can be changed to run 'Local'
platform = Platform('COMPS')
platform = Platform('COMPS2')

# Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
# and filenames
Expand Down
2 changes: 1 addition & 1 deletion examples/analyzers/example_analysis_EndpointsAnalyzer.py
Expand Up @@ -94,7 +94,7 @@ def reduce(self, all_data):

# Set the platform where you want to run your analysis
# In this case we are running in COMPS, but this can be changed to run 'Local'
platform = Platform('COMPS')
platform = Platform('COMPS2')

# Set the experiment you want to analyze
exp_id = '719de048-64cb-e911-a2bb-f0921c167866' # comps2 staging exp id
Expand Down
31 changes: 31 additions & 0 deletions examples/analyzers/example_analysis_MultiCSVAnalyzer.py
@@ -0,0 +1,31 @@
# Example CSVAnalyzer for any experiment with multiple csv outputs
# In this example, we will demonstrate how to use a CSVAnalyzer to analyze csv files for experiments

# First, import some necessary system and idmtools packages.
from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.analysis.csv_analyzer import CSVAnalyzer
from idmtools.core import ItemType
from idmtools.core.platform_factory import Platform


if __name__ == '__main__':

# Set the platform where you want to run your analysis
# In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
platform = Platform('COMPS2')

# Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
# and filenames
# In this case, we have multiple csv files to analyze
filenames = ['output/a.csv', 'output/b.csv']
# Initialize the analyser class with the path of the output csv file
analyzers = [CSVAnalyzer(filenames=filenames)]

# Set the experiment id you want to analyze
experiment_id = '1bddce22-0c37-ea11-a2be-f0921c167861' # staging exp id with multiple csv file outputs

# Specify the id Type, in this case an Experiment on COMPS
manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()
28 changes: 28 additions & 0 deletions examples/analyzers/example_analysis_TagsAnalyzer.py
@@ -0,0 +1,28 @@
# Example TagsAnalyzer for any experiment
# In this example, we will demonstrate how to use a TagsAnalyzer to put your sim tags in a csv file

# First, import some necessary system and idmtools packages.
from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.analysis.tags_analyzer import TagsAnalyzer
from idmtools.core import ItemType
from idmtools.core.platform_factory import Platform

if __name__ == '__main__':

# Set the platform where you want to run your analysis
# In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
platform = Platform('COMPS2')

# Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
# and filenames
# Initialize the analyser class which just requires an experiment id
analyzers = [TagsAnalyzer()]

# Set the experiment id you want to analyze
experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861' # staging exp id JSuresh's Magude exp

# Specify the id Type, in this case an Experiment on COMPS
manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()
43 changes: 43 additions & 0 deletions idmtools_core/idmtools/analysis/csv_analyzer.py
@@ -0,0 +1,43 @@
# Example of a csv analyzer to concatenate csv results into one csv from your experiment simulations

# First, import some necessary system and idmtools packages.
import os
mfisher-idmod marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd
from idmtools.entities import IAnalyzer


# Create a class for the analyzer
class CSVAnalyzer(IAnalyzer):
# Arg option for analyzer init are uid, working_dir, parse (True to leverage the :class:`OutputParser`;
# False to get the raw data in the :meth:`select_simulation_data`), and filenames
# In this case, we want parse=True, and the filename(s) to analyze
def __init__(self, filenames, parse=True):
super().__init__(parse, filenames=filenames)
# Raise exception early if files are not csv files
if not all(['csv' in os.path.splitext(f)[1].lower() for f in self.filenames]):
raise Exception('Please ensure all filenames provided to CSVAnalyzer have a csv extension.')

# Map is called to get for each simulation a data object (all the metadata of the simulations) and simulation object
def map(self, data, simulation):
# If there are 1 to many csv files, concatenate csv data columns into one dataframe
concatenated_df = pd.concat(list(data.values()), axis=0, ignore_index=True, sort=True)
return concatenated_df

# In reduce, we are printing the simulation and result data filtered in map
def reduce(self, all_data):

# Let's hope the first simulation is representative
first_sim = next(iter(all_data.keys())) # Iterate over the dataframe keys
exp_id = str(first_sim.experiment.uid) # Set the exp id from the first sim data

results = pd.concat(list(all_data.values()), axis=0, # Combine a list of all the sims csv data column values
keys=[str(k.uid) for k in all_data.keys()], # Add a hierarchical index with the keys option
names=['SimId']) # Label the index keys you create with the names option
results.index = results.index.droplevel(1) # Remove default index

# Make a directory labeled the exp id to write the csv results to
os.makedirs(exp_id, exist_ok=True)
# NOTE: If running twice with different filename, the output files will collide
results.to_csv(os.path.join(exp_id, self.__class__.__name__+'.csv'))


30 changes: 30 additions & 0 deletions idmtools_core/idmtools/analysis/tags_analyzer.py
@@ -0,0 +1,30 @@
# Example of a tags analyzer to get all the tags from your experiment simulations into one csv file

# First, import some necessary system and idmtools packages.
import os
import pandas as pd
from idmtools.entities import IAnalyzer


# Create a class for the analyzer
class TagsAnalyzer(IAnalyzer):
# Arg option for analyzer init are uid, working_dir, parse (True to leverage the :class:`OutputParser`;
# False to get the raw data in the :meth:`select_simulation_data`), and filenames
# In this case, we want uid, working_dir, and parse=True
def __init__(self, uid=None, working_dir=None, parse=True):
super().__init__(uid, working_dir, parse)
self.exp_id = None

# Map is called to get for each simulation a data object (all the metadata of the simulations) and simulation object
def map(self, data, simulation):
df = pd.DataFrame(columns=simulation.tags.keys()) # Create a dataframe with the simulation tag keys
df.loc[str(simulation.uid)] = list(simulation.tags.values()) # Get a list of the sim tag values
df.index.name = 'SimId' # Label the index keys you create with the names option
return df

# In reduce, we are printing the simulation and result data filtered in map
def reduce(self, all_data):
exp_id = str(next(iter(all_data.keys())).experiment.uid) # Set the exp id from the first sim data
results = pd.concat(list(all_data.values()), axis=0) # Combine a list of all the sims tag values
os.makedirs(exp_id, exist_ok=True) # Make a directory labeled the exp id to write the tags to a csv
results.to_csv(os.path.join(exp_id, 'tags.csv')) # Write the sim tags to a csv
83 changes: 82 additions & 1 deletion idmtools_platform_comps/tests/test_analyzers_emod_comps.py
Expand Up @@ -10,14 +10,16 @@
from idmtools.analysis.analyze_manager import AnalyzeManager
from idmtools.analysis.download_analyzer import DownloadAnalyzer
from idmtools.builders import ExperimentBuilder
from idmtools.core import ItemType
from idmtools.core import ItemType, EntityStatus
from idmtools.core.platform_factory import Platform
from idmtools.managers import ExperimentManager
from idmtools_model_emod.defaults import EMODSir
from idmtools_model_emod.emod_experiment import EMODExperiment
from idmtools_test import COMMON_INPUT_PATH
from idmtools_test.utils.itest_with_persistence import ITestWithPersistence
from idmtools_test.utils.utils import del_file, del_folder, load_csv_file
from idmtools.analysis.tags_analyzer import TagsAnalyzer
from idmtools.analysis.csv_analyzer import CSVAnalyzer

current_directory = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -245,3 +247,82 @@ def test_download_analyzer_suite(self):
sims = self.p.get_children_by_object(comps_exp)
for simulation in sims:
self.assertTrue(os.path.exists(os.path.join('output', str(simulation.uid), "InsetChart.json")))

def test_tags_analyzer_emod_exp(self):
experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861' # staging exp id JSuresh's Magude exp

# delete output from previous run
del_folder(experiment_id)

# create a new empty 'output' dir
os.mkdir(experiment_id)

analyzers = [TagsAnalyzer()]

manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()

# verify results
self.assertTrue(os.path.exists(os.path.join(experiment_id, "tags.csv")))

def test_csv_analyzer_emod_exp(self):
experiment_id = '9311af40-1337-ea11-a2be-f0921c167861' # staging exp id with csv from config
# delete output from previous run
del_folder(experiment_id)

# create a new empty 'output' dir
os.mkdir(experiment_id)

filenames = ['output/c.csv']
analyzers = [CSVAnalyzer(filenames=filenames)]

manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()

# verify results
self.assertTrue(os.path.exists(os.path.join(experiment_id, "CSVAnalyzer.csv")))

def test_csv_analyzer_emod_exp_non_csv_error(self):
experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861' # staging exp id JSuresh's Magude exp

# delete output from previous run
del_folder(experiment_id)

# create a new empty 'output' dir
os.mkdir(experiment_id)

filenames = ['output/MalariaPatientReport.json']
analyzers = [CSVAnalyzer(filenames=filenames)]

manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()

# verify results
self.assertRaises(Exception, msg='Please ensure all filenames provided to CSVAnalyzer have a csv extension.')

def test_multi_csv_analyzer_emod_exp(self):
experiment_id = '1bddce22-0c37-ea11-a2be-f0921c167861' # staging exp id PythonExperiment with 2 csv outputs

# delete output from previous run
del_folder(experiment_id)

# create a new empty 'output' dir
os.mkdir(experiment_id)

filenames = ['output/a.csv', 'output/b.csv']
analyzers = [CSVAnalyzer(filenames=filenames)]

self.p = Platform('COMPS2')
manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
ids=[(experiment_id, ItemType.EXPERIMENT)],
analyzers=analyzers)
manager.analyze()

# verify results
self.assertTrue(os.path.exists(os.path.join(experiment_id, "CSVAnalyzer.csv")))