InstituteforDiseaseModeling · mfisher-idmod · Jan 22, 2020 · Jan 20, 2020 · Jan 20, 2020 · Jan 21, 2020
diff --git a/examples/analyzers/example_analysis_AddAnalyzer.py b/examples/analyzers/example_analysis_AddAnalyzer.py
@@ -11,7 +11,7 @@
 
     # Set the platform where you want to run your analysis
     # In this case we are running in COMPS, but this can be changed to run 'Local'
-    platform = Platform('COMPS')
+    platform = Platform('COMPS2')
 
     # Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
     # and filenames

diff --git a/examples/analyzers/example_analysis_CSVAnalyzer.py b/examples/analyzers/example_analysis_CSVAnalyzer.py
@@ -0,0 +1,31 @@
+# Example CSVAnalyzer for any experiment
+# In this example, we will demonstrate how to use a CSVAnalyzer to analyze csv files for experiments
+
+# First, import some necessary system and idmtools packages.
+from idmtools.analysis.analyze_manager import AnalyzeManager
+from idmtools.analysis.csv_analyzer import CSVAnalyzer
+from idmtools.core import ItemType
+from idmtools.core.platform_factory import Platform
+
+
+if __name__ == '__main__':
+
+    # Set the platform where you want to run your analysis
+    # In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
+    platform = Platform('COMPS2')
+
+    # Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
+    # and filenames
+    # In this case, we want to provide a filename to analyze
+    filenames = ['output/c.csv']
+    # Initialize the analyser class with the path of the output csv file
+    analyzers = [CSVAnalyzer(filenames=filenames)]
+
+    # Set the experiment id you want to analyze
+    experiment_id = '9311af40-1337-ea11-a2be-f0921c167861'  # staging exp id simple sim and csv example
+
+    # Specify the id Type, in this case an Experiment on COMPS
+    manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
+                             ids=[(experiment_id, ItemType.EXPERIMENT)],
+                             analyzers=analyzers)
+    manager.analyze()
diff --git a/examples/analyzers/example_analysis_DownloadAnalyzer.py b/examples/analyzers/example_analysis_DownloadAnalyzer.py
@@ -11,7 +11,7 @@
 
     # Set the platform where you want to run your analysis
     # In this case we are running in COMPS, but this can be changed to run 'Local'
-    platform = Platform('COMPS')
+    platform = Platform('COMPS2')
 
     # Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
     # and filenames

diff --git a/examples/analyzers/example_analysis_EndpointsAnalyzer.py b/examples/analyzers/example_analysis_EndpointsAnalyzer.py
@@ -94,7 +94,7 @@ def reduce(self, all_data):
 
     # Set the platform where you want to run your analysis
     # In this case we are running in COMPS, but this can be changed to run 'Local'
-    platform = Platform('COMPS')
+    platform = Platform('COMPS2')
 
     # Set the experiment you want to analyze
     exp_id = '719de048-64cb-e911-a2bb-f0921c167866'  # comps2 staging exp id

diff --git a/examples/analyzers/example_analysis_MultiCSVAnalyzer.py b/examples/analyzers/example_analysis_MultiCSVAnalyzer.py
@@ -0,0 +1,31 @@
+# Example CSVAnalyzer for any experiment with multiple csv outputs
+# In this example, we will demonstrate how to use a CSVAnalyzer to analyze csv files for experiments
+
+# First, import some necessary system and idmtools packages.
+from idmtools.analysis.analyze_manager import AnalyzeManager
+from idmtools.analysis.csv_analyzer import CSVAnalyzer
+from idmtools.core import ItemType
+from idmtools.core.platform_factory import Platform
+
+
+if __name__ == '__main__':
+
+    # Set the platform where you want to run your analysis
+    # In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
+    platform = Platform('COMPS2')
+
+    # Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
+    # and filenames
+    # In this case, we have multiple csv files to analyze
+    filenames = ['output/a.csv', 'output/b.csv']
+    # Initialize the analyser class with the path of the output csv file
+    analyzers = [CSVAnalyzer(filenames=filenames)]
+
+    # Set the experiment id you want to analyze
+    experiment_id = '1bddce22-0c37-ea11-a2be-f0921c167861'  # staging exp id with multiple csv file outputs
+
+    # Specify the id Type, in this case an Experiment on COMPS
+    manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
+                             ids=[(experiment_id, ItemType.EXPERIMENT)],
+                             analyzers=analyzers)
+    manager.analyze()
diff --git a/examples/analyzers/example_analysis_TagsAnalyzer.py b/examples/analyzers/example_analysis_TagsAnalyzer.py
@@ -0,0 +1,28 @@
+# Example TagsAnalyzer for any experiment
+# In this example, we will demonstrate how to use a TagsAnalyzer to put your sim tags in a csv file
+
+# First, import some necessary system and idmtools packages.
+from idmtools.analysis.analyze_manager import AnalyzeManager
+from idmtools.analysis.tags_analyzer import TagsAnalyzer
+from idmtools.core import ItemType
+from idmtools.core.platform_factory import Platform
+
+if __name__ == '__main__':
+
+    # Set the platform where you want to run your analysis
+    # In this case we are running in COMPS since the Work Item we are analyzing was run on COMPS
+    platform = Platform('COMPS2')
+
+    # Arg option for analyzer init are uid, working_dir, data in the method map (aka select_simulation_data),
+    # and filenames
+    # Initialize the analyser class which just requires an experiment id
+    analyzers = [TagsAnalyzer()]
+
+    # Set the experiment id you want to analyze
+    experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861'  # staging exp id JSuresh's Magude exp
+
+    # Specify the id Type, in this case an Experiment on COMPS
+    manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=platform,
+                             ids=[(experiment_id, ItemType.EXPERIMENT)],
+                             analyzers=analyzers)
+    manager.analyze()
diff --git a/idmtools_core/idmtools/analysis/csv_analyzer.py b/idmtools_core/idmtools/analysis/csv_analyzer.py
@@ -0,0 +1,43 @@
+# Example of a csv analyzer to concatenate csv results into one csv from your experiment simulations
+
+# First, import some necessary system and idmtools packages.
+import os
+import pandas as pd
+from idmtools.entities import IAnalyzer
+
+
+# Create a class for the analyzer
+class CSVAnalyzer(IAnalyzer):
+    # Arg option for analyzer init are uid, working_dir, parse (True to leverage the :class:`OutputParser`;
+    # False to get the raw data in the :meth:`select_simulation_data`), and filenames
+    # In this case, we want parse=True, and the filename(s) to analyze
+    def __init__(self, filenames, parse=True):
+        super().__init__(parse, filenames=filenames)
+        # Raise exception early if files are not csv files
+        if not all(['csv' in os.path.splitext(f)[1].lower() for f in self.filenames]):
+            raise Exception('Please ensure all filenames provided to CSVAnalyzer have a csv extension.')
+
+    # Map is called to get for each simulation a data object (all the metadata of the simulations) and simulation object
+    def map(self, data, simulation):
+        # If there are 1 to many csv files, concatenate csv data columns into one dataframe
+        concatenated_df = pd.concat(list(data.values()), axis=0, ignore_index=True, sort=True)
+        return concatenated_df
+
+    # In reduce, we are printing the simulation and result data filtered in map
+    def reduce(self, all_data):
+
+        # Let's hope the first simulation is representative
+        first_sim = next(iter(all_data.keys()))  # Iterate over the dataframe keys
+        exp_id = str(first_sim.experiment.uid)  # Set the exp id from the first sim data
+
+        results = pd.concat(list(all_data.values()), axis=0,  # Combine a list of all the sims csv data column values
+                            keys=[str(k.uid) for k in all_data.keys()],  # Add a hierarchical index with the keys option
+                            names=['SimId'])  # Label the index keys you create with the names option
+        results.index = results.index.droplevel(1)  # Remove default index
+
+        # Make a directory labeled the exp id to write the csv results to
+        os.makedirs(exp_id, exist_ok=True)
+        # NOTE: If running twice with different filename, the output files will collide
+        results.to_csv(os.path.join(exp_id, self.__class__.__name__+'.csv'))
+
+
diff --git a/idmtools_core/idmtools/analysis/tags_analyzer.py b/idmtools_core/idmtools/analysis/tags_analyzer.py
@@ -0,0 +1,30 @@
+# Example of a tags analyzer to get all the tags from your experiment simulations into one csv file
+
+# First, import some necessary system and idmtools packages.
+import os
+import pandas as pd
+from idmtools.entities import IAnalyzer
+
+
+# Create a class for the analyzer
+class TagsAnalyzer(IAnalyzer):
+    # Arg option for analyzer init are uid, working_dir, parse (True to leverage the :class:`OutputParser`;
+    # False to get the raw data in the :meth:`select_simulation_data`), and filenames
+    # In this case, we want uid, working_dir, and parse=True
+    def __init__(self, uid=None, working_dir=None, parse=True):
+        super().__init__(uid, working_dir, parse)
+        self.exp_id = None
+
+    # Map is called to get for each simulation a data object (all the metadata of the simulations) and simulation object
+    def map(self, data, simulation):
+        df = pd.DataFrame(columns=simulation.tags.keys())  # Create a dataframe with the simulation tag keys
+        df.loc[str(simulation.uid)] = list(simulation.tags.values())  # Get a list of the sim tag values
+        df.index.name = 'SimId'  # Label the index keys you create with the names option
+        return df
+
+    # In reduce, we are printing the simulation and result data filtered in map
+    def reduce(self, all_data):
+        exp_id = str(next(iter(all_data.keys())).experiment.uid)  # Set the exp id from the first sim data
+        results = pd.concat(list(all_data.values()), axis=0)  # Combine a list of all the sims tag values
+        os.makedirs(exp_id, exist_ok=True)  # Make a directory labeled the exp id to write the tags to a csv
+        results.to_csv(os.path.join(exp_id, 'tags.csv'))  # Write the sim tags to a csv
diff --git a/idmtools_platform_comps/tests/test_analyzers_emod_comps.py b/idmtools_platform_comps/tests/test_analyzers_emod_comps.py
@@ -10,14 +10,16 @@
 from idmtools.analysis.analyze_manager import AnalyzeManager
 from idmtools.analysis.download_analyzer import DownloadAnalyzer
 from idmtools.builders import ExperimentBuilder
-from idmtools.core import ItemType
+from idmtools.core import ItemType, EntityStatus
 from idmtools.core.platform_factory import Platform
 from idmtools.managers import ExperimentManager
 from idmtools_model_emod.defaults import EMODSir
 from idmtools_model_emod.emod_experiment import EMODExperiment
 from idmtools_test import COMMON_INPUT_PATH
 from idmtools_test.utils.itest_with_persistence import ITestWithPersistence
 from idmtools_test.utils.utils import del_file, del_folder, load_csv_file
+from idmtools.analysis.tags_analyzer import TagsAnalyzer
+from idmtools.analysis.csv_analyzer import CSVAnalyzer
 
 current_directory = os.path.dirname(os.path.realpath(__file__))
 
@@ -245,3 +247,82 @@ def test_download_analyzer_suite(self):
         sims = self.p.get_children_by_object(comps_exp)
         for simulation in sims:
             self.assertTrue(os.path.exists(os.path.join('output', str(simulation.uid), "InsetChart.json")))
+
+    def test_tags_analyzer_emod_exp(self):
+        experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861'  # staging exp id JSuresh's Magude exp
+
+        # delete output from previous run
+        del_folder(experiment_id)
+
+        # create a new empty 'output' dir
+        os.mkdir(experiment_id)
+
+        analyzers = [TagsAnalyzer()]
+
+        manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
+                                 ids=[(experiment_id, ItemType.EXPERIMENT)],
+                                 analyzers=analyzers)
+        manager.analyze()
+
+        # verify results
+        self.assertTrue(os.path.exists(os.path.join(experiment_id, "tags.csv")))
+
+    def test_csv_analyzer_emod_exp(self):
+        experiment_id = '9311af40-1337-ea11-a2be-f0921c167861'  # staging exp id with csv from config
+        # delete output from previous run
+        del_folder(experiment_id)
+
+        # create a new empty 'output' dir
+        os.mkdir(experiment_id)
+
+        filenames = ['output/c.csv']
+        analyzers = [CSVAnalyzer(filenames=filenames)]
+
+        manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
+                                 ids=[(experiment_id, ItemType.EXPERIMENT)],
+                                 analyzers=analyzers)
+        manager.analyze()
+
+        # verify results
+        self.assertTrue(os.path.exists(os.path.join(experiment_id, "CSVAnalyzer.csv")))
+
+    def test_csv_analyzer_emod_exp_non_csv_error(self):
+        experiment_id = '36d8bfdc-83f6-e911-a2be-f0921c167861'  # staging exp id JSuresh's Magude exp
+
+        # delete output from previous run
+        del_folder(experiment_id)
+
+        # create a new empty 'output' dir
+        os.mkdir(experiment_id)
+
+        filenames = ['output/MalariaPatientReport.json']
+        analyzers = [CSVAnalyzer(filenames=filenames)]
+
+        manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
+                                 ids=[(experiment_id, ItemType.EXPERIMENT)],
+                                 analyzers=analyzers)
+        manager.analyze()
+
+        # verify results
+        self.assertRaises(Exception, msg='Please ensure all filenames provided to CSVAnalyzer have a csv extension.')
+
+    def test_multi_csv_analyzer_emod_exp(self):
+        experiment_id = '1bddce22-0c37-ea11-a2be-f0921c167861'  # staging exp id PythonExperiment with 2 csv outputs
+
+        # delete output from previous run
+        del_folder(experiment_id)
+
+        # create a new empty 'output' dir
+        os.mkdir(experiment_id)
+
+        filenames = ['output/a.csv', 'output/b.csv']
+        analyzers = [CSVAnalyzer(filenames=filenames)]
+
+        self.p = Platform('COMPS2')
+        manager = AnalyzeManager(configuration={}, partial_analyze_ok=True, platform=self.p,
+                                 ids=[(experiment_id, ItemType.EXPERIMENT)],
+                                 analyzers=analyzers)
+        manager.analyze()
+
+        # verify results
+        self.assertTrue(os.path.exists(os.path.join(experiment_id, "CSVAnalyzer.csv")))