From e2cada1ccfbb5e830a5e0c9711f2331f61ed5567 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Wed, 11 Nov 2020 19:39:53 +0000
Subject: [PATCH 01/25] groupings assays by assay type #369 (tests WIP)

---
 isatools/create/models.py                | 51 ++++++++++++++----------
 isatools/model.py                        |  2 +-
 tests/test_create_models_study_design.py | 22 +++++-----
 3 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/isatools/create/models.py b/isatools/create/models.py
index 9a2d9e7c..7c4ca44f 100644
--- a/isatools/create/models.py
+++ b/isatools/create/models.py
@@ -2053,13 +2053,12 @@ def _generate_sources(self, ontology_source_references):
             src_map[s_arm.name] = list(srcs)
         return src_map
 
-    def _generate_samples(self, sources_map, sampling_protocol, performer, split_assays_by_sample_type):
+    def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer):
         """
         Private method to be used in 'generate_isa_study'.
         :param sources_map: dict - the output of '_generate_sources'
         :param sampling_protocol: isatools.model.Protocol
-        :param performer
-        :param split_assays_by_sample_type: bool
+        :param performer: str
         :return: 
         """
         factors = set()
@@ -2069,6 +2068,16 @@ def _generate_samples(self, sources_map, sampling_protocol, performer, split_ass
         process_sequence = []
         assays = []
         protocols = set()
+        unique_assay_types = {
+            assay_graph for arm in self.study_arms
+            for sample_assay_plan in arm.arm_map.values() if sample_assay_plan is not None
+            for assay_graph in sample_assay_plan.assay_plan if assay_graph is not None
+        }
+        samples_grouped_by_assay_graph = {
+            assay_graph: [] for assay_graph in unique_assay_types
+        }
+
+        # generate samples
         for arm in self.study_arms:
             for cell, sample_assay_plan in arm.arm_map.items():
                 if not sample_assay_plan:
@@ -2110,22 +2119,24 @@ def _generate_samples(self, sources_map, sampling_protocol, performer, split_ass
                                 process_sequence.append(process)
                 for sample_node in sample_assay_plan.sample_plan:
                     samples.extend(sample_batches[sample_node])
+
                 for assay_graph in sample_assay_plan.assay_plan:
-                    protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)})
-                    if split_assays_by_sample_type is True:
-                        for sample_node in sorted(sample_assay_plan.sample_plan, key=lambda st: st.id):
-                            if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]:
-                                assays.append(
-                                    self._generate_assay(assay_graph, sample_batches[sample_node], cell.name)
-                                )
-                    else:
-                        sample_batch = []
-                        for sample_node in sample_assay_plan.sample_plan:
-                            if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]:
-                                sample_batch.extend(sample_batches[sample_node])
-                        assays.append(
-                            self._generate_assay(assay_graph, sample_batch, cell.name)
-                        )
+                    for sample_node in sample_assay_plan.sample_plan:
+                        if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]:
+                            try:
+                                samples_grouped_by_assay_graph[assay_graph] += sample_batches[sample_node]
+                            except AttributeError:
+                                log.error('Assay graph is: {}'.format(assay_graph))
+                                problematic_sample_group = samples_grouped_by_assay_graph[assay_graph]
+                                log.error('Sample bach for assay graph is: {}'.format(
+                                    problematic_sample_group
+                                ))
+
+        # generate assays
+        for assay_graph in unique_assay_types:
+            protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)})
+            assays.append(self._generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph]))
+
         return factors, protocols, samples, assays, process_sequence, ontology_sources
 
     @staticmethod
@@ -2261,8 +2272,8 @@ def generate_isa_study(self, split_assays_by_sample_type=False):
         study.sources = [source for sources in sources_map.values() for source in sources]
         study.factors, protocols, study.samples, study.assays, study.process_sequence, \
             study.ontology_source_references = \
-            self._generate_samples(
-                sources_map, study.protocols[0], study_config['performers'][0]['name'], split_assays_by_sample_type
+            self._generate_samples_and_assays(
+                sources_map, study.protocols[0], study_config['performers'][0]['name']
             )
         for protocol in protocols:
             study.add_protocol(protocol)
diff --git a/isatools/model.py b/isatools/model.py
index 906cdae8..b5c95694 100644
--- a/isatools/model.py
+++ b/isatools/model.py
@@ -690,7 +690,7 @@ class OntologyAnnotation(Commentable):
     """
 
     def __init__(self, term='', term_source=None, term_accession='',
-                 comments=None, id_=str(uuid.uuid4())) :
+                 comments=None, id_=str(uuid.uuid4())):
         super().__init__(comments)
 
         self.__term = term
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index 2b476fc7..574c5e9e 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -1749,17 +1749,18 @@ def test_generate_isa_study_single_arm_single_cell_elements(self):
             self.assertEqual(len(source.characteristics), 1)
             self.assertEqual(source.characteristics[0], DEFAULT_SOURCE_TYPE)
 
-        expected_num_of_samples_per_plan = reduce(lambda acc_value, sample_node: acc_value+sample_node.size,
-                                                  self.nmr_sample_assay_plan.sample_plan, 0) * single_arm.group_size
-        expected_num_of_samples = expected_num_of_samples_per_plan * len([
+        expected_num_of_samples = reduce(
+            lambda acc_value, sample_node: acc_value + sample_node.size,
+            self.nmr_sample_assay_plan.sample_plan, 0
+        ) * single_arm.group_size * len([
             a_plan for a_plan in single_arm.arm_map.values() if a_plan is not None
         ])
-        print('Expected number of samples is: {0}'.format(expected_num_of_samples))
+        log.debug('Expected number of samples is: {0}'.format(expected_num_of_samples))
         self.assertEqual(len(study.samples), expected_num_of_samples)
-        self.assertEqual(len(study.assays), 2)
+        self.assertEqual(len(study.assays), 1)
         treatment_assay = next(iter(study.assays))
         self.assertIsInstance(treatment_assay, Assay)
-        # self.assertEqual(len(treatment_assay.samples), expected_num_of_samples_per_plan)
+        # self.assertEqual(len(treatment_assay.samples), expected_num_of_samples)
         self.assertEqual(treatment_assay.measurement_type, nmr_assay_dict['measurement_type'])
         self.assertEqual(treatment_assay.technology_type, nmr_assay_dict['technology_type'])
         # pdb.set_trace()
@@ -1767,12 +1768,13 @@ def test_generate_isa_study_single_arm_single_cell_elements(self):
                                 if process.executes_protocol.name == 'extraction']
         nmr_processes = [process for process in treatment_assay.process_sequence
                          if process.executes_protocol.name == 'nmr spectroscopy']
-        self.assertEqual(len(extraction_processes), expected_num_of_samples_per_plan)
-        self.assertEqual(len(nmr_processes), 8 * nmr_assay_dict['nmr spectroscopy']['#replicates']
-                         * expected_num_of_samples_per_plan)
+        self.assertEqual(len(extraction_processes), expected_num_of_samples)
+        self.assertEqual(
+            len(nmr_processes),
+            8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] * expected_num_of_samples)
         self.assertEqual(
             len(treatment_assay.process_sequence),
-            (8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] + 1) * expected_num_of_samples_per_plan
+            (8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] + 1) * expected_num_of_samples
         )
         for ix, process in enumerate(extraction_processes):
             self.assertEqual(process.inputs, [study.samples[ix]])

From c9f81d79e45c1eae7fde120aada5eb9396b95f7f Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Thu, 12 Nov 2020 13:20:51 +0000
Subject: [PATCH 02/25] removing unused argument; tests wip #369

---
 isatools/create/models.py                |  2 +-
 tests/test_create_models_study_design.py | 32 +-----------------------
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/isatools/create/models.py b/isatools/create/models.py
index 7c4ca44f..e1b38a58 100644
--- a/isatools/create/models.py
+++ b/isatools/create/models.py
@@ -2251,7 +2251,7 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''):
                                                                                             len(data_files)))
         return assay
 
-    def generate_isa_study(self, split_assays_by_sample_type=False):
+    def generate_isa_study(self):
         """
         this is the core method to return the fully populated ISA Study object from the StudyDesign
         :return: isatools.model.Study
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index 574c5e9e..c3630187 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -1792,34 +1792,6 @@ def test_generate_isa_study_single_arm_single_cell_elements(self):
         log.debug('NMR assay graph: {0}'.format([(getattr(el, 'name', None), type(el))
                                                  for el in treatment_assay.graph.nodes()]))
 
-    def test_generate_isa_study_single_arm_single_cell_elements_split_assay_by_sample_type(self):
-        with open(os.path.join(os.path.dirname(__file__), '..', 'isatools', 'resources', 'config', 'yaml',
-                               'study-creator-config.yaml')) as yaml_file:
-            config = yaml.load(yaml_file, Loader=yaml.FullLoader)
-        # study_config = config['study']
-        single_arm = StudyArm(name=TEST_STUDY_ARM_NAME_00, group_size=10, arm_map=OrderedDict([
-            (self.cell_screen, None), (self.cell_run_in, None),
-            (self.cell_single_treatment_00, self.nmr_sample_assay_plan),
-            (self.cell_follow_up, self.nmr_sample_assay_plan)
-        ]))
-        study_design = StudyDesign(study_arms=(single_arm,))
-        study = study_design.generate_isa_study(split_assays_by_sample_type=True)
-        self.assertEqual(len(study.assays), 6)
-        treatment_assay_st0, treatment_assay_st1, treatment_assay_st2 = study.assays[0:3]
-        self.assertIsInstance(treatment_assay_st0, Assay)
-        self.assertEqual(treatment_assay_st0.measurement_type, nmr_assay_dict['measurement_type'])
-        self.assertEqual(treatment_assay_st0.technology_type, nmr_assay_dict['technology_type'])
-        extraction_processes = [process for process in treatment_assay_st0.process_sequence
-                                if process.executes_protocol.name == 'extraction']
-        nmr_processes = [process for process in treatment_assay_st0.process_sequence
-                         if process.executes_protocol.name == 'nmr spectroscopy']
-        expected_num_of_samples_per_plan = reduce(lambda acc_value, sample_node: acc_value+sample_node.size,
-                                                  self.nmr_sample_assay_plan.sample_plan, 0) * single_arm.group_size
-        expected_num_of_samples_first = sample_list[0]['size'] * single_arm.group_size
-        self.assertEqual(len(extraction_processes), expected_num_of_samples_first)
-        self.assertEqual(len(nmr_processes), 8 * 2 * expected_num_of_samples_first)
-        self.assertEqual(len(treatment_assay_st0.process_sequence), (8 * 2 + 1) * expected_num_of_samples_first)
-
     def test_generate_isa_study_two_arms_single_cell_elements(self):
         first_arm = StudyArm(name=TEST_STUDY_ARM_NAME_00, group_size=20, arm_map=OrderedDict([
             (self.cell_screen, None), (self.cell_run_in, None),
@@ -1833,7 +1805,7 @@ def test_generate_isa_study_two_arms_single_cell_elements(self):
         ]))
         study_design = StudyDesign(study_arms=(first_arm, second_arm))
         study = study_design.generate_isa_study()
-        self.assertEqual(len(study.assays), 4)
+        self.assertEqual(len(study.assays), 2)
         expected_num_of_samples_nmr_plan_first_arm = reduce(
             lambda acc_value, sample_node: acc_value + sample_node.size,
             self.nmr_sample_assay_plan.sample_plan, 0) * first_arm.group_size
@@ -1904,8 +1876,6 @@ def test_generate_isa_study_two_arms_single_cell_elements_check_source_character
                 self.assertEqual(source.characteristics, [control_source_type])
             else:
                 self.assertEqual(source.characteristics, [treatment_source_type])
-        # self.assertIn(control_source_type.category, study.characteristic_categories)
-        # self.assertIn(treatment_source_type.category, study.characteristic_categories)
 
 
 class QualityControlServiceTest(BaseStudyDesignTest):

From 6140f9e9811f6f81b66d2ea161a2dca78fd23032 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Thu, 12 Nov 2020 16:56:08 +0000
Subject: [PATCH 03/25] tests work fix #369

---
 isatools/create/connectors.py   |  2 +-
 isatools/create/models.py       | 44 +++++++++++++++++----------------
 tests/test_create_connectors.py |  6 +++--
 3 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py
index 0a204c06..ede27417 100644
--- a/isatools/create/connectors.py
+++ b/isatools/create/connectors.py
@@ -209,7 +209,7 @@ def _generate_characteristics_from_observational_factor(observational_factor_dic
 
 def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epoch_no):
     res = OrderedDict()
-    res['name'] = datascriptor_assay_config['name']
+    res['id'], res['name'] = datascriptor_assay_config['id'], datascriptor_assay_config['name']
     res['measurement_type'] = _map_ontology_annotation(
         datascriptor_assay_config['measurement_type'], expand_strings=True
     )
diff --git a/isatools/create/models.py b/isatools/create/models.py
index e1b38a58..f1002cf5 100644
--- a/isatools/create/models.py
+++ b/isatools/create/models.py
@@ -1021,7 +1021,7 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
         current_nodes = []
         for node_key, node_params in assay_plan_dict.items():
 
-            if node_key in ('name', 'selected_sample_types', 'measurement_type', 'technology_type'):
+            if node_key in ('id', 'name', 'selected_sample_types', 'measurement_type', 'technology_type'):
                 continue
 
             if isinstance(node_params, list):    # the node is a ProductNode
@@ -1426,7 +1426,10 @@ def from_sample_and_assay_plan_dict(cls, name, sample_type_dicts, *assay_plan_di
         for i, assay_plan_dict in enumerate(assay_plan_dicts):
             assay_graph = AssayGraph.generate_assay_plan_from_dict(
                 assay_plan_dict,
-                id_=str(uuid.uuid4()) if use_guids else '{0}{1}'.format(
+                # FIXME: this id cannot work as it is
+                id_=str(uuid.uuid4()) if use_guids
+                else assay_plan_dict['id'] if 'id' in assay_plan_dict
+                else '{0}{1}'.format(
                     ASSAY_GRAPH_PREFIX, str(i).zfill(n_digits(len(assay_plan_dicts)))
                 ),
                 quality_control=quality_controls[i] if len(quality_controls) > i else None
@@ -2135,7 +2138,7 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer
         # generate assays
         for assay_graph in unique_assay_types:
             protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)})
-            assays.append(self._generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph]))
+            assays.append(self.generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph]))
 
         return factors, protocols, samples, assays, process_sequence, ontology_sources
 
@@ -2206,7 +2209,7 @@ def _generate_isa_elements_from_node(
         return processes, other_materials, data_files, item, counter
 
     @staticmethod
-    def _generate_assay(assay_graph, assay_samples, cell_name=''):
+    def generate_assay(assay_graph, assay_samples):
         if not isinstance(assay_graph, AssayGraph):
             raise TypeError()
         """
@@ -2215,12 +2218,11 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''):
             else None
         """
         measurement_type, technology_type = assay_graph.measurement_type, assay_graph.technology_type
-        assay_file_prefix = assay_graph.id if not cell_name else '{}_{}'.format(cell_name, assay_graph.id)
         assay = Assay(
             measurement_type=measurement_type,
             technology_type=technology_type,
             filename=urlify('a_{0}_{1}_{2}.txt'.format(
-                assay_file_prefix,
+                assay_graph.id,
                 measurement_type.term if isinstance(measurement_type, OntologyAnnotation) else measurement_type,
                 technology_type.term if isinstance(technology_type, OntologyAnnotation) else technology_type
             ))
@@ -2241,7 +2243,7 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''):
                     ix = i * len(assay_samples) * size + j * size + k
                     log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix))
                     processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node(
-                        node, assay_graph, assay_file_prefix, ix=ix, jx=0, counter=0, processes=[], other_materials=[],
+                        node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=0, processes=[], other_materials=[],
                         data_files=[], previous_items=[sample]
                     )
                     assay.other_material.extend(other_materials)
@@ -2330,13 +2332,13 @@ def augment_study(cls, study, study_design, in_place=False):
                     for assay_graph in study_assay_plan.assay_plan:
                         assert isinstance(assay_graph, AssayGraph)
                         if assay_graph.quality_control:
-                            # CHECK the assumption here is that an assay file can univocally be identified
+                            # CHECK the assumption here is that an assay file can unequivocally be identified
                             # by StudyCell name, corresponding AssayGraph id and measurement type
                             # Such an assumption is correct as far a the Assay filename convention is not modified
                             measurement_type, technology_type = assay_graph.measurement_type, \
-                                                                assay_graph.technology_type
-                            assay_filename = urlify('a_{0}_{1}_{2}_{3}.txt'.format(
-                                cell.name, assay_graph.id,
+                                assay_graph.technology_type
+                            assay_filename = urlify('a_{0}_{1}_{2}.txt'.format(
+                                assay_graph.id,
                                 measurement_type.term if isinstance(measurement_type, OntologyAnnotation)
                                 else measurement_type,
                                 technology_type.term if isinstance(technology_type, OntologyAnnotation)
@@ -2368,8 +2370,7 @@ def augment_study(cls, study, study_design, in_place=False):
                                 post_run_samples=qc_samples_post_run,
                                 interspersed_samples=qc_samples_interspersed
                             )
-                            qc_study.assays[index] = StudyDesign._generate_assay(assay_graph, augmented_samples,
-                                                                                 cell_name=cell.name)
+                            qc_study.assays[index] = StudyDesign.generate_assay(assay_graph, augmented_samples)
         return qc_study
 
     @staticmethod
@@ -2520,13 +2521,13 @@ def isa_objects_factory(
     log.debug('sequence_no: {0}'.format(sequence_no))
     if isinstance(node, ProtocolNode):
         return Process(
-                name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)),
-                executes_protocol=node,
-                performer=performer,
-                parameter_values=node.parameter_values,
-                inputs=[],
-                outputs=[],
-            )
+            name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)),
+            executes_protocol=node,
+            performer=performer,
+            parameter_values=node.parameter_values,
+            inputs=[],
+            outputs=[],
+        )
     if isinstance(node, ProductNode):
         if node.type == SAMPLE:
             return Sample(
@@ -2562,10 +2563,11 @@ def isa_objects_factory(
                     measurement_type, technology_type, curr_assay_opt)
                 )
                 isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
+                assert isa_class in {RawDataFile, RawSpectralDataFile}
                 return isa_class(
                     filename='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH))
                 )
-            except StopIteration as e:
+            except StopIteration:
                 return RawDataFile(
                     filename='{0}_{1}'.format(node.name, str(sequence_no).zfill(ZFILL_WIDTH))
                 )
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index 5ec3e797..7c99444f 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -69,7 +69,7 @@ def test_assay_template_convert_ordered_dict_to_json_met_prof_mass_spec_annotate
         self.assertEqual(actual_annotated_json_mp_ms, {
             key: value for key, value in self.met_prof_jsons[1].items() if key not in ['@context']
         })
-    
+
     @staticmethod
     def _load_config(file_name):
         ds_design_config_file_path = os.path.abspath(
@@ -130,7 +130,9 @@ def test_generate_study_design_from_config_with_observational_factors(self):
                 self.assertIsInstance(source_char, Characteristic)
                 self.assertIsInstance(source_char.category, OntologyAnnotation)
                 self.assertIsInstance(source_char.value, OntologyAnnotation)
-        investigation = Investigation(studies=[design.generate_isa_study(split_assays_by_sample_type=True)])
+        investigation = Investigation(studies=[design.generate_isa_study()])
+        # two assay types are selected, so we expect to find only two assays in the studies
+        self.assertEqual(len(investigation.studies[0].assays), 2)
         inv_json = json.dumps(
             investigation,
             cls=ISAJSONEncoder,

From 07a34479c0fea7a2d7f5dfe84245148ca331c2b9 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Mon, 16 Nov 2020 19:51:47 +0000
Subject: [PATCH 04/25] implemented counter by type to name nodes #370

---
 isatools/create/models.py                | 71 +++++++++++++++++-------
 tests/test_create_models_study_design.py | 21 ++++++-
 2 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/isatools/create/models.py b/isatools/create/models.py
index f1002cf5..5008aa01 100644
--- a/isatools/create/models.py
+++ b/isatools/create/models.py
@@ -660,10 +660,10 @@ def __init__(self, id_=str(uuid.uuid4()), name='', protocol_type=None, uri='',
         """
 
         :param id_:
-        :param name:
-        :param protocol_type:
-        :param uri:
-        :param description:
+        :param name: the name of the protocol
+        :param protocol_type: the type of the protocol
+        :param uri: a uri pointing to a resource describing the protocol
+        :param description: a  textual description of the protocol
         :param version:
         :param parameter_values: the values to be supplied to the Protocol Parameters
         :param replicates: int - the number of replicates (biological or technical) for this Protocol step. Must be a
@@ -2142,6 +2142,16 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer
 
         return factors, protocols, samples, assays, process_sequence, ontology_sources
 
+    @staticmethod
+    def _increment_counter_by_node_type(counter, node):
+        if isinstance(node, ProductNode):
+            counter[node.type] = counter[node.type] + 1 if node.type in counter else 1
+            # FIXME do we need a check by node.name for DATA_FILE?
+        if isinstance(node, ProtocolNode):
+            # the attribute "name" should contain the same value as "protocol_type.term"
+            counter[node.name] = counter[node.name] + 1 if node.name in counter else 1
+        return counter
+
     @staticmethod
     def _generate_isa_elements_from_node(
             node,
@@ -2153,8 +2163,10 @@ def _generate_isa_elements_from_node(
             previous_items=None,
             ix=0,
             jx=0,
-            counter=0
+            counter=None
     ):
+        if counter is None:
+            counter = {}
         if previous_items is None:
             previous_items = []
         if data_files is None:
@@ -2164,8 +2176,9 @@ def _generate_isa_elements_from_node(
         if processes is None:
             processes = []
         log.debug('# processes: {0} - ix: {1}'.format(len(processes), ix))
+        counter = StudyDesign._increment_counter_by_node_type(counter, node)
         item = isa_objects_factory(
-            node, sequence_no='{0}-{1}-{2}'.format(assay_file_prefix, ix, counter),
+            node, assay_file_prefix, ix, counter,
             measurement_type=assay_graph.measurement_type,
             technology_type=assay_graph.technology_type
         )
@@ -2184,7 +2197,7 @@ def _generate_isa_elements_from_node(
             for jj in range(size):
                 jx = ii * size + jj
                 log.debug('ii = {0} - jj = {1} - jx = {2}'.format(ii, jj, jx))
-                counter += 1
+                # counter += 1
                 processes, other_materials, data_files, next_item, counter = \
                     StudyDesign._generate_isa_elements_from_node(
                         next_node, assay_graph, assay_file_prefix, processes, other_materials, data_files,
@@ -2243,7 +2256,7 @@ def generate_assay(assay_graph, assay_samples):
                     ix = i * len(assay_samples) * size + j * size + k
                     log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix))
                     processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node(
-                        node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=0, processes=[], other_materials=[],
+                        node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=None, processes=[], other_materials=[],
                         data_files=[], previous_items=[sample]
                     )
                     assay.other_material.extend(other_materials)
@@ -2502,26 +2515,32 @@ def _generate_quality_control_samples(quality_control, study_cell, sample_size=0
         log.debug("Completed post-batch samples")
         return qc_sources, qc_samples_pre_run, qc_samples_interspersed, qc_samples_post_run, qc_processes
 
-
+# TODO: should I move this inside the StudyDesign class?
 def isa_objects_factory(
         node,
-        sequence_no,
+        assay_file_prefix,
+        ix,
+        counter,
         measurement_type=None,
         technology_type=None,
         performer=DEFAULT_PERFORMER
-    ):
+):
     """
     This method generates an ISA element from an ISA node
     :param technology_type:
     :param measurement_type:
     :param node: SequenceNode - can be either a ProductNode or a ProtocolNode
-    :param sequence_no: str - a sequential number to discriminate among items built in a batch
+    :param assay_file_prefix: str
+    :param ix: int the index of the starting node in the graph
+    :param counter: dict containing the counts for this specific subgraph
+    :param performer: str/Person
     :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files
     """
-    log.debug('sequence_no: {0}'.format(sequence_no))
     if isinstance(node, ProtocolNode):
         return Process(
-            name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)),
+            name='{}_{}-{}-<acquisition>{}'.format(
+                urlify(node.name), assay_file_prefix, ix, counter[node.name]
+            ),  # FIXME!!
             executes_protocol=node,
             performer=performer,
             parameter_values=node.parameter_values,
@@ -2531,24 +2550,24 @@ def isa_objects_factory(
     if isinstance(node, ProductNode):
         if node.type == SAMPLE:
             return Sample(
-                name='{0}_{1}'.format(SAMPLE_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)),
+                name='{}-{}-Sample{}'.format(assay_file_prefix, ix, counter[SAMPLE]),
                 characteristics=node.characteristics
             )
         if node.type == EXTRACT:
             return Extract(
-                name='{0}_{1}'.format(EXTRACT_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)),
+                name='{}-{}-Extract{}'.format(assay_file_prefix, ix, counter[EXTRACT]),
                 characteristics=node.characteristics
             )
         if node.type == LABELED_EXTRACT:
             return LabeledExtract(
-                name='{0}_{1}'.format(LABELED_EXTRACT_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)),
+                name='{}-{}-LE{}'.format(assay_file_prefix, ix, counter[LABELED_EXTRACT]),
                 characteristics=node.characteristics
             )
         # under the hypothesis that we deal only with raw data files
         # derived data file would require a completely separate approach
         if node.type == DATA_FILE:
             try:
-                log.debug('isa_objects_factory: Assay conf. found: {}; {};'.format(
+                log.debug('Assay conf. found: {}; {};'.format(
                     measurement_type, technology_type)
                 )
                 m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \
@@ -2559,17 +2578,27 @@ def isa_objects_factory(
                     opt for opt in assays_opts if opt['measurement type'] == m_type_term and
                     opt['technology type'] == t_type_term
                 )
-                log.debug('isa_objects_factory: Assay conf. found: {}; {}; {};'.format(
+                log.debug('Assay conf. found: {}; {}; {};'.format(
                     measurement_type, technology_type, curr_assay_opt)
                 )
                 isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
                 assert isa_class in {RawDataFile, RawSpectralDataFile}
                 return isa_class(
-                    filename='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH))
+                    filename='{}_{}-{}-{}'.format(
+                        urlify(node.name),
+                        assay_file_prefix,
+                        ix,
+                        counter[node.type]  # FIXME should this be changed to "counter[node.name]"?
+                    )
                 )
             except StopIteration:
                 return RawDataFile(
-                    filename='{0}_{1}'.format(node.name, str(sequence_no).zfill(ZFILL_WIDTH))
+                    filename='{}_{}-{}-{}'.format(
+                        urlify(node.name),
+                        assay_file_prefix,
+                        ix,
+                        counter[node.type]  # FIXME should this be changed to "counter[node.name]"?
+                    )
                 )
 
 
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index c3630187..f9544a51 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -1704,6 +1704,21 @@ def test_generate_isa_study_00(self):
         print('Sources: {0}'.format(study.sources))
     """
 
+    def test_increment_counter_by_node_type(self):
+        assay_graph = AssayGraph.generate_assay_plan_from_dict(nmr_assay_dict)
+        extract_node = next(
+            node for node in assay_graph.nodes if isinstance(node, ProductNode) and node.type == EXTRACT
+        )
+        counter = StudyDesign._increment_counter_by_node_type({}, extract_node)
+        self.assertEqual(counter[EXTRACT], 1)
+        counter = StudyDesign._increment_counter_by_node_type(counter, extract_node)
+        self.assertEqual(counter[EXTRACT], 2)
+        protocol_node = next(node for node in assay_graph.nodes if isinstance(node, ProtocolNode))
+        counter = StudyDesign._increment_counter_by_node_type(counter, protocol_node)
+        self.assertEqual(counter[protocol_node.name], 1)
+        counter = StudyDesign._increment_counter_by_node_type(counter, protocol_node)
+        self.assertEqual(counter[protocol_node.name], 2)
+
     def test__generate_isa_elements_from_node(self):
         assay_graph = AssayGraph.generate_assay_plan_from_dict(nmr_assay_dict)
         node = next(iter(assay_graph.start_nodes))
@@ -1716,10 +1731,10 @@ def test__generate_isa_elements_from_node(self):
         extraction_processes = [process for process in processes if process.executes_protocol.name == 'extraction']
         self.assertEqual(len(extraction_processes), 1)
         nmr_processes = [process for process in processes if process.executes_protocol.name == 'nmr spectroscopy']
-        self.assertEqual(len(nmr_processes), 8*2)
-        self.assertEqual(len(processes), 1+8*2)
+        self.assertEqual(len(nmr_processes), 8 * 2)
+        self.assertEqual(len(processes), 1 + 8 * 2)
         self.assertEqual(len(other_materials), 2)
-        self.assertEqual(len(data_files), 8*2)      # 16 raw data files
+        self.assertEqual(len(data_files), 8 * 2)      # 16 raw data files
         for nmr_process in nmr_processes:
             self.assertIsInstance(nmr_process, Process)
             print('expected previous process: {0}'.format(extraction_processes[0]))

From 348f6324166f1b93e0a2ffefc392ec79952c76e3 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 17 Nov 2020 17:21:15 +0000
Subject: [PATCH 05/25] deleted unused files #368

---
 .../create/MTBLS-purge-spurious-factors.py    |  27 ---
 isatools/create/{models.py => model.py}       |   0
 isatools/create/mtbls-process.py              | 200 ------------------
 3 files changed, 227 deletions(-)
 delete mode 100644 isatools/create/MTBLS-purge-spurious-factors.py
 rename isatools/create/{models.py => model.py} (100%)
 delete mode 100644 isatools/create/mtbls-process.py

diff --git a/isatools/create/MTBLS-purge-spurious-factors.py b/isatools/create/MTBLS-purge-spurious-factors.py
deleted file mode 100644
index 2fd8f392..00000000
--- a/isatools/create/MTBLS-purge-spurious-factors.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from isatools.utils import IsaTabFixer
-
-
-def main():
-    fixer = IsaTabFixer('/Users/Philippe/Downloads/ftp.ebi.ac.uk/pub/'
-                        'databases/metabolights/studies/public/MTBLS81/'
-                        's_Study id.txt')
-
-    this_factor = "Age at sacrifice"
-
-    fixer.fix_factor(this_factor)  # fixes by moving factor to charac
-
-    # spurious_factors = "factor: Age at sacrifice"
-    #
-    # factornames = []
-    # factornames = spurious_factors.split("factor: ")
-    #
-    # for element in factornames:
-    #
-    #     this_factor = element.strip()
-    #     this_factor = re.sub(";","", this_factor)
-    #     this_factor = this_factor.strip()
-    #     print(this_factor)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/isatools/create/models.py b/isatools/create/model.py
similarity index 100%
rename from isatools/create/models.py
rename to isatools/create/model.py
diff --git a/isatools/create/mtbls-process.py b/isatools/create/mtbls-process.py
deleted file mode 100644
index 08d6765f..00000000
--- a/isatools/create/mtbls-process.py
+++ /dev/null
@@ -1,200 +0,0 @@
-
-import io
-import json
-from datetime import date
-
-
-# Make it work for Python 2+3 and with Unicode
-
-
-def main():
-    blocks = []
-    write_dir = "/Users/Philippe/Documents/git/isa-api/isatools/create/"
-    data_header = str.join("\t", ("Accession Number",
-                                  "calculated factor combinations",
-                                  "counted factor combinations",
-                                  "design automatic annotation",
-                                  "number of sources",
-                                  "number of samples",
-                                  "curation warnings",
-                                  "spurious factors"))
-
-    fh = open(write_dir + "/" + str(date.today())
-              + "-MTBLS-ISA-curation-report.txt", "w")
-    fh.writelines(data_header)
-    fh.writelines("\n")
-
-    with open('/Users/Philippe/Documents/PhenoMenal/'
-              'Metabolights-metadata-Testing/out.txt') as fp:
-
-        for line in fp:
-
-            begin = False
-            acc_num = 0
-            # block = []
-            if "load OK" in line:
-                block = []
-                start = line.strip()
-                block.append(start)
-                begin = True
-                # print(begin)
-                # print(line.strip())
-
-            elif "load FAIL" in line:
-                block = []
-                start = line.strip()
-                block.append(start)
-                begin = True
-                # print(begin)
-                # print(line.strip())
-
-            else:
-                begin = False
-                # print(line)
-                # print(begin)
-                block.append(line.strip())
-            # print(block)
-            if begin:
-                blocks.append(block)
-
-        # print(len(blocks))
-
-    # print("BLOCK: ", blocks[3])
-
-        data = []
-    for e in blocks:
-        # print("L: ",e)
-        design = ""
-        factors = {}
-        factor_count = 0
-        count_mat = {}
-        spurious_factor = {}
-        non_factors = []
-        calc_nb_sg = -1
-
-        for x in e:
-            bits = []
-            if "load OK" in x:
-                bits = x.split(" ")
-                acc_num = bits[0]
-                # print("acc_num: ", acc_num)
-
-            elif "load FAIL" in x:
-                bits = x.split(",")
-                acc_num = bits[0]
-
-                print("accnum: ", acc_num)
-                max_nb_study_group = -1
-                calc_nb_sg = -1
-                design = "_"
-                count_mat["source"] = "_"
-                count_mat["sample"] = "_"
-                sampling_event = bits[1]
-                non_factors_as_string = "_"
-
-            elif x.startswith("Calculated"):
-                bits = x.split(" ")
-                calc_nb_sg = int(bits[1])
-                # print("number of calculated study groups: ", calc_nb_sg )
-
-            elif x.startswith("Study sample level:"):
-                bits = x.split(',')
-                # print("group sizes: ", bits)
-                for bit in bits:
-                    bob_a, bob_b = bit.split(" = ")
-                    if "total sources" in bob_a:
-                        count_mat["source"] = int(bob_b)
-                    if "total samples" in bob_a:
-                        count_mat["sample"] = int(bob_b)
-
-                    # print("check Source Definitions")
-                if count_mat["source"] == count_mat["sample"]:
-                    sampling_event = "single sampling"
-                else:
-                    # print("sample size: ", count_mat["source"], "|
-                    # number of samples: ", count_mat["sample"])
-                    sampling_event = "multiple/repeated samping"
-
-            elif x.startswith("factor: "):
-                bits = x.split("|")
-                # print("BITS:", bits[1])
-                factor_count = factor_count + 1
-                bits[1] = bits[1].strip("' levels=")
-                bits[1] = bits[1].strip(" '")
-                # print("factor bits:", bits[1])
-                if int(bits[1]) == 1:
-                    spurious_factor[bits[0]] = int(bits[1])
-                    # print("SPURIOUS FACTOR", bits[0], bits[1])
-                    non_factors.append(bits[0])
-                else:
-                    factors[bits[0]] = int(bits[1])
-
-            elif x.startswith("('"):
-                bits = x.split(",")
-                # print("treatment: ", bits)
-
-        this_array = factors.values()
-        max_nb_study_group = 1
-        for element in this_array:
-            # print("in array: ",element)
-            max_nb_study_group = element * max_nb_study_group
-        # print("max: ", max_nb_stdy_group)
-
-        if count_mat["source"] == 1 and calc_nb_sg > 1:
-            sampling_event = "ERROR LIKELY: check source declaration"
-            # print(count_mat["source"],
-            # ":::", count_mat["sample"], "///",  calc_nb_sg)
-            # print(sampling_event)
-
-        if max_nb_study_group == calc_nb_sg:
-            design = "full factorial  design"
-            # print(design)
-
-        elif max_nb_study_group > calc_nb_sg > 0:
-            design = "fractional factorial design"
-            # print(design)
-
-        elif calc_nb_sg == -1:
-            design = "none"
-
-        # elif calc_nb_sg > 1 &
-
-        else:
-            print("problem with study group declaration, please review study!")
-
-        non_factors_as_string = ';'.join(non_factors)
-        print(acc_num, " \t ", max_nb_study_group, " \t ", calc_nb_sg,
-              " \t ", design, " \t ", count_mat["source"], " \t ",
-              count_mat["sample"], " \t ", sampling_event, " \t ",
-              non_factors_as_string)
-
-        data_element = {"study_key": acc_num,
-                        "total_study_groups": max_nb_study_group,
-                        "sources": count_mat["source"],
-                        "samples": count_mat["sample"],
-                        "inferred_study_design": design,
-                        "sampling": sampling_event,
-                        "spurious_factors": non_factors_as_string}
-        data.append(data_element)
-
-        fh.writelines(str.join('\t', (acc_num, str(max_nb_study_group),
-                                      str(calc_nb_sg), design,
-                                      str(count_mat["source"]),
-                                      str(count_mat["sample"]),
-                                      sampling_event, non_factors_as_string)))
-        fh.writelines("\n")
-
-        try:
-            to_unicode = unicode
-        except NameError:
-            to_unicode = str
-        # Write JSON file
-        with io.open('data.json', 'w', encoding='utf8') as outfile:
-            str_ = json.dumps(data,
-                              indent=4, sort_keys=True,
-                              separators=(',', ': '), ensure_ascii=False)
-            outfile.write(to_unicode(str_))
-
-
-if __name__ == '__main__':
-    main()

From 484c5557b6e71b1e51a5da9bec2fd1cc772db4ab Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 17 Nov 2020 17:22:02 +0000
Subject: [PATCH 06/25] deleted unused files #368 v2

---
 isatools/create/study_design_wizard.py | 1885 ------------------------
 1 file changed, 1885 deletions(-)
 delete mode 100644 isatools/create/study_design_wizard.py

diff --git a/isatools/create/study_design_wizard.py b/isatools/create/study_design_wizard.py
deleted file mode 100644
index a3bd3391..00000000
--- a/isatools/create/study_design_wizard.py
+++ /dev/null
@@ -1,1885 +0,0 @@
-import datetime
-import json
-import random
-import uuid
-from itertools import permutations, product
-
-from isatools import isatab
-from isatools.isatab import dump, write_study_table_files
-from isatools.model import *
-
-
-# from random import sample
-
-__author__ = 'proccaserra@gmail.com'
-
-
-# def save_study_profile():
-    # TODO: code the method
-    # save study parameters using YAML or JSON file
-    # -study_type: intervention or observation
-    # -number_of_intervention: integer
-    #   -agent
-    #   -intensity
-    #   -duration
-    # -study_regularity: balanced_or_imbalanced
-    #    -study group size: integer
-    # -study_variable_blocking:
-    # -study_hard_to_change_variable: yes_no
-    #
-
-
-# def load_study_profile():
-    # TODO: do it
-
-
-def get_parameters( some_json_file ):
-    source = json.loads( some_json_file )
-    return dict(
-        study_type = source.get('study_type',1),
-        treatment_multiplicity= source['treatment_multiplicity'],
-        hard_to_change_factor = source['hard_to_change_factor'],
-        intervention_type_list = source['intervention_type_list'],
-        sg_size = source["sg_size"]["size"],
-        sg_size_toall = source["sg_size"]["applied_to_all_flag"],
-        sample_list = source["sample_collection_list"]["sample_type"],
-        assay_plan = source["assay_type"][""][""]
-    )
-
-
-
-def use_default_inv():
-    try:
-        investigation = Investigation()
-        investigation.identifier = ""
-        investigation.title = ""
-        investigation.description = ""
-        investigation.submission_date = ""
-        investigation.public_release_date = ""
-        study = Study(filename="s_study.txt", comments=[])
-        study.identifier = str(uuid.uuid4())
-        study.title = "boilerplate title"
-        study.description = "boilerplate study description (testing purpose)"
-        study.submission_date = str(datetime.date.today())
-        study.public_release_date = str(datetime.date.today() + datetime.timedelta(days=30))
-
-        sample_collection_protocol = Protocol(name="sample collection",
-                                              protocol_type=OntologyAnnotation(term="sample collection"))
-
-        study.protocols.append(sample_collection_protocol)
-        investigation.studies.append(study)
-
-        contact = Person(first_name="Boiler", last_name="Plate", affiliation="boiler plate affiliation")
-        # roles=[OntologyAnnotation(term="submitter")]
-        study.contacts.append(contact)
-        publication = Publication(title="boiler plate publication", author_list="A. Robertson, B. Robertson")
-        publication.pubmed_id = "12345678"
-        publication.status = OntologyAnnotation(term="published")
-        study.publications.append(publication)
-
-        return investigation
-
-    except IOError:
-        print("error in get_number_of_factors() method")
-
-
-def remove_duplicate_from_list(some_list):
-    # and some_list.contains(',')
-    try:
-        if len(some_list) > 0:
-            # removes trailing whitespace in a list such as a,b ,c ,c
-            list_values = [x.strip() for x in some_list.split(',')]
-            # removes any duplicate values in a list such as a,a,b,c
-            list_values_nodup = list(set(list_values))
-            # removes any empty string supplied as is a,,c,d
-            # list_values_nodup = filter(bool, list_values_nodup)
-        else:
-            print("the list you have supplied is not valid, please enter a csv list")
-
-        return list_values_nodup
-
-    except ValueError:
-        print("error in value in remove_duplicate_from_list() method")
-
-
-def compute_study_groups(factor_and_levels):
-    # TODO: rename compute_study_groups to compute_treatment
-    try:
-        study_groups = [dict(zip(factor_and_levels, x)) for x in product(*factor_and_levels.values())]
-        # print study_groups
-        return study_groups
-    except IOError:
-        print("error in compute_study_groups() method")
-
-
-def get_number_of_factors():
-    try:
-        number = input("how many study non-interventional factors (i.e variable intrinsic to the model organism such as gender or strain or ethnicity) are there? (provide an integer): ")
-        return number
-    except IOError:
-        print("error in get_number_of_factors() method")
-
-
-def intervention_or_observation():
-    try:
-        is_intervention = True
-        inter_or_obs = input("is the study an intervention or an observation (please select key)?"
-                             " (intervention [1]/observation [2])")
-        # intervention
-        if inter_or_obs == "1":
-            is_intervention = True
-
-        # observation
-        elif inter_or_obs == "2":
-            is_intervention = False
-
-        else:
-            print("answer should be either 'intervention' or 'observation'")
-            print("answer not recognized, choose between 'intervention' or 'observation'")
-
-        return is_intervention
-    except IOError:
-        print("input error in intervention_or_observation() method")
-
-
-def single_or_repeated_treatment():
-    treatment_repeat = False
-    try:
-        treatment_repeat_input = input("are study subjects exposed to a single intervention or to multiple intervention"
-                                       " (applied sequentially)? (choose either 'single [1]' or 'multiple [2]')")
-        if treatment_repeat_input == '1':
-            treatment_repeat = False
-        elif treatment_repeat_input == '2':
-            treatment_repeat = True
-        else:
-            print('invalid input, please try again')
-            single_or_repeated_treatment()
-
-        return treatment_repeat
-    except IOError:
-        print("input error in single_or_repeated_treatment() method")
-
-
-def get_repeat_number():
-    try:
-        nbr_of_repeats_input = input("how many interventions each subject receives in total (enter an integer)? ")
-        nbr_of_repeats = int(nbr_of_repeats_input)
-        return nbr_of_repeats
-    except IOError:
-        print("get_repeat_number() method error")
-
-
-def get_processrun_random_token(number_of_elements):
-    try:
-        # number_of_elements = input("what is the size of the plate/list? ")
-        run_order = []
-        my_list = list(range(int(number_of_elements)))
-        new_list = [x + 1 for x in my_list]
-        # print("my list: ",new_list)
-        run_order = random.sample(new_list, len(new_list))
-        # print("run order: ", run_order)
-        return run_order
-
-    except NotImplemented:
-        print('something went wrong in get_processrun_random_token() method')
-
-
-def create_control_element(some_inv, control_type, quantity, frequency):
-
-    try:
-        if control_type == "1":
-            for entity in 1..quantity:
-                control_source = Source(name='solvent blank', id_=entity)
-                some_inv.sources.append(control_source)
-        if control_type == "2":
-            for entity in 1..quantity:
-                control_source = Source(name='sample preparation blank', id_=entity)
-                some_inv.sources.append(control_source)
-        if control_type == "3":
-            for entity in 1..quantity:
-                control_source = Source(name='study reference material', id_=entity)
-                some_inv.sources.append(control_source)
-        else:
-            print('choice not,recognised,please try again')
-            # create_control_element()
-
-    except NotImplemented:
-        print("something went wrong in create_control_element() method")
-
-
-def get_list_of_interventions(some_investigation):
-
-    try:
-        # IMPORTANT: we will first only support symmetric arms
-        treatment_type_list = input("list the different intervention types (comma-separated-values from the following"
-                                    " options {chemical intervention [1], behavioral intervention [2], "
-                                    "surgical intervention [3], "
-                                    "biological intervention [4], radiological intervention [5]}): ")
-        treatment_type_list = remove_duplicate_from_list(treatment_type_list)
-
-        treatment_types = {}
-        for treatment_type in treatment_type_list:
-            treatment_type.strip()
-            if treatment_type == "1":
-                treatment_types["chemical intervention"] = {"agent": [], "dose": [], "duration of exposure": []}
-                f1 = StudyFactor(name="agent", factor_type=OntologyAnnotation(term="perturbation agent"))
-                some_investigation.studies[0].factors.append(f1)
-                f2 = StudyFactor(name="dose", factor_type=OntologyAnnotation(term="intensity"))
-                some_investigation.studies[0].factors.append(f2)
-                f3 = StudyFactor(name="duration of exposure", factor_type=OntologyAnnotation(term="time"))
-                some_investigation.studies[0].factors.append(f3)
-
-                # set_factor_as_key("chemical agent", factor_dict)
-
-            if treatment_type == "2":
-                # set_factor_as_key("behavioral agent", factor_dict)
-                treatment_types["behavioral intervention"] = {"agent": [],
-                                                              "dose": [],
-                                                              "duration of exposure": []}
-
-            if treatment_type == "3":
-                # set_factor_as_key("surgery", factor_dict)
-                treatment_types["surgical intervention"] = {"surgery procedure": [],
-                                                            "dose": [],
-                                                            "duration post surgery": []}
-
-            if treatment_type == "4":
-                # set_factor_as_key("biological agent", factor_dict)
-                treatment_types["biological intervention"] = {"agent": [],
-                                                              "dose": [],
-                                                              "duration of exposure": []}
-
-            if treatment_type == "5":
-                # set_factor_as_key("radiological agent", factor_dict)
-                treatment_types["radiological intervention"] = {"agent": [],
-                                                                "dose": [],
-                                                                "duration of exposure": []}
-
-        return treatment_types, some_investigation
-
-    except IOError:
-        print("error in get_list_of_interventions() method")
-#
-#     """if treatment_list != "" and treatment_list.isalnum():
-#        return treatment_list
-#     else:
-#        print "the treatments supplied are not valid, please enter a string: "
-#        """
-#
-#     """if treatment"""
-#
-#
-# """def get_factors_from_treatment_type(treatment_type_list):"""
-
-
-
-def compute_treatment_sequences(treatments, num_repeats):
-    try:
-        treatment_sequences = list(permutations(treatments, num_repeats))
-        return treatment_sequences
-    except IOError:
-        print("error in compute_treatment_sequences() method")
-
-
-def get_factor_name():
-    try:
-        factor_name = input("provide factor name: ")
-        if factor_name != "" and factor_name.isalnum():
-            return factor_name
-        else:
-            print("the factor supplied is not valid, please enter a string: ")
-    except IOError:
-        print("error in get_factor_name() method")
-
-
-def set_factor_as_key(factor_name, factor_dict):
-    try:
-        this_factor_dict = factor_dict
-        if factor_name not in factor_dict.keys():
-            this_factor_dict[factor_name] = []
-        else:
-            print("factor already declared! define a new factor")
-            get_factor_name()
-        return this_factor_dict
-    except IOError:
-        print("error in set_factor_as_key() method")
-
-
-def set_factor_values(factor_name, factor_dict):
-    try:
-        factor_values = input("provide the factor levels associated with '" + factor_name +
-                              "' as a list of comma separated values: ")
-        factor_values = remove_duplicate_from_list(factor_values)
-        for element in factor_values:
-            factor_dict[factor_name].append(element)
-        return factor_dict
-    except IOError:
-        print("error in set_factor_values() method")
-
-
-def balanced_design():
-    try:
-        balanced_design_var = input("Are all study groups of the same size, i.e have the same number of subjects? "
-                                    "(in other words, are the groups balanced)? (balanced [1]/unbalanced [2])")
-        if balanced_design_var == "1":
-            is_balanced = True
-            return is_balanced
-        elif balanced_design_var == "2":
-            is_balanced = False
-            return is_balanced
-        else:
-            print("answer should be either 'balanced' or 'unbalanced'")
-            print("answer not recognized, choose between 'balanced' or 'unbalanced'")
-    except IOError:
-        print("Error in balanced_design() method")
-
-
-def full_or_fractional():
-    try:
-        full_or_fract = input("did you use a all possible groups or only a subset? (full [1]/fractional [2])")
-        if full_or_fract == "1":
-            full_or_fract = True
-        elif full_or_fract == "2":
-            full_or_fract = False
-        else:
-            print("answer not recognized, choose between 'full' or 'fractional'")
-            full_or_fractional()
-
-        return full_or_fract
-
-    except IOError:
-        print("error in full_or_fractional() method")
-
-
-def free_or_restricted_randomization():
-    try:
-        design = ""
-        hardtochange = input("Are there 'hard to change' factors,"
-                             " which restrict randomization of experimental unit? (no [1]/yes [2])")
-        if hardtochange == "1":
-            # free_randomization = True
-            design = "factorial design"
-        elif hardtochange == "2":
-            # free_randomization = False
-            splitplot = input("how many 'hard to change factors'? (1/2")
-            if splitplot == "1":
-                design = "split plot design"
-            elif splitplot == "2":
-                design = "split split plot design"
-        else:
-            print("answer not recognized, choose between 'yes' or 'no'")
-            free_or_restricted_randomization()
-
-        return design
-
-    except IOError:
-        print("error in complete_or_restricted_randomization() method")
-
-
-def choose_fluid_or_solid_or_both():
-
-    this_sample_type = input("are the samples 'solid'[1] or 'fluid'[2] or 'both'[3]? ")
-    if this_sample_type == "1":
-        return this_sample_type
-        # collected_samples(sample_type)
-    elif this_sample_type == "2":
-        return this_sample_type
-        # collected_samples(sample_type)
-    elif this_sample_type == "3":
-        return this_sample_type
-        # collected_samples(sample_type)
-    else:
-        print("input not recognised")
-        choose_fluid_or_solid_or_both()
-
-# def sample_collection_plan(sample_types):
-#
-#         samples_and_events = {}
-#         for s_type in sample_types:
-#             specific_sampling_events = input("for sample type " + "'" + str(s_type) + "'," +
-#                                              " how many times each of the samples have been collected (integer): ")
-#             # specific_sampling_events = remove_duplicate_from_list(specific_sampling_events)
-#             samples_and_events[str(s_type)] = specific_sampling_events
-#
-#         return samples_and_events
-
-
-def sample_collection_events(some_sample_type):
-
-    try:
-        sample_types = []
-        samples_and_events_plan = {}
-
-        if some_sample_type == "1":
-            sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,flower):  ")
-            sample_types = remove_duplicate_from_list(sample_types)
-            # return solid_samples
-
-        elif some_sample_type == "2":
-            sample_types = input("select from the following list (urine,blood,csf,sweat,lavage):  ")
-            # for example: blood,urine,sweat,muscle
-            sample_types = remove_duplicate_from_list(sample_types)
-            # return fluid_samples
-
-        elif some_sample_type == "3":
-            sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ")
-            sample_types = remove_duplicate_from_list(sample_types)
-            # print(sample_types)
-            s_sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,2): ")
-            s_sample_types = remove_duplicate_from_list(s_sample_types)
-            sample_types.append(s_sample_types)
-
-        else:
-            print("input not recognised")
-            # choose_fluid_or_solid_both()
-        print(sample_types)
-        for s_type in sample_types:
-            specific_sampling_events = input("for sample type " + "'" + str(s_type) + "'," +
-                                             " how many times each of the samples have been collected (integer): ")
-            # specific_sampling_events = remove_duplicate_from_list(specific_sampling_events)
-            samples_and_events_plan[str(s_type)] = specific_sampling_events
-        print(samples_and_events_plan)
-
-        return samples_and_events_plan
-    except IOError:
-        print("error in sample_collection_events() method")
-
-
-# def define_sample_collection_plan():
-# def define_assay_data_plan():
-
-def create_study_subjects(group_size, this_study, group_uuid, group_factor_combo, some_sampling_event_plan):
-    try:
-
-        if group_size > 0:
-
-            # sample_type = choose_fluid_or_solid_or_both()
-            # collected_materials = collected_sample_types(sample_type)
-            # sampling_plan = sample_collection_plan(collected_materials)
-
-            for individual in range(group_size):
-                source_name = "studygroup_" + str(group_uuid) + "_subject#" + str(individual)
-                ncbitaxon = OntologySource(name="NCBITaxon", description="NCBI Taxonomy")
-                characteristic_organism = Characteristic(category=OntologyAnnotation(term="organism"),
-                                                         value=OntologyAnnotation(term="Homo sapiens",
-                                                                                  term_source=ncbitaxon,
-                                                                                  term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606"))
-                # TODO: request taxonomic information from users
-                source = Source(name=source_name)
-                source.characteristics.append(characteristic_organism)
-                # print("source: ", source.name, source.characteristics[0].category.term,
-                #       source.characteristics[0].value.term)
-                this_study.studies[0].sources.append(source)
-
-                for tissue, number_of_collections in some_sampling_event_plan.items():
-
-                    for specimen_number in range(int(number_of_collections)):
-
-                        sample_name = source_name + "_" + "sample#" + str(specimen_number) + "_" + tissue
-                        sample_template = Sample(name=sample_name, derives_from=[source])
-                        characteristic_op = Characteristic(category=OntologyAnnotation(term="organism part"),
-                                                           value=OntologyAnnotation(term=tissue))
-                        sample_template.characteristics.append(characteristic_op)
-                        characteristic_rk = Characteristic(category=OntologyAnnotation(term="collection event rank"),
-                                                           value=OntologyAnnotation(term=str(specimen_number+1)))
-                        sample_template.characteristics.append(characteristic_rk)
-                        # print("sample type: " + key, ", number of collection events: " + value + " times.")
-
-                        # this_study.studies[0].samples = batch_create_materials(prototype_sample, n=2)
-                        # for sam in this_study.studies[0].samples:
-                        # sample_name = source_name + "_" + "sample#" + str(i)
-                        # sample = Sample(name=sample_name, derives_from=[source])
-
-                        combo = group_factor_combo
-                        # print("this study group from create_study_subject: ", combo)
-                        for key, value in combo.items():
-                            # print("this key:", key)
-                            for item in range(0, len(this_study.studies[0].factors)):
-                                if key == this_study.studies[0].factors[item].name:
-                                    # print("factor name: ", this_study.studies[0].factors[item].name)
-                                    fv = FactorValue(factor_name=this_study.studies[0].factors[item],
-                                                     value=OntologyAnnotation(term=combo[key]))
-                                    sample_template.factor_values.append(fv)
-
-                        # print("sample: ", sample_template.name)
-                        this_study.studies[0].samples.append(sample_template)
-                        process_name = "protocol_" + str(group_uuid)
-                        sample_collection_process = Process(name=process_name,
-                                                            executes_protocol=this_study.studies[0].protocols[0],
-                                                            date_=datetime.date.today() + datetime.timedelta(days=-30),
-                                                            performer="bob")
-                        sample_collection_process.outputs.append(sample_template)
-                        sample_collection_process.inputs.append(source)
-
-                        this_study.studies[0].process_sequence.append(sample_collection_process)
-                        # print("p: ", sample_collection_process.name, sample_collection_process.executes_protocol.name,
-                        #       sample_collection_process.inputs[0].name, sample_collection_process.outputs[0].name,
-                        #       sample_collection_process.date, sample_collection_process.performer)
-
-        return this_study
-
-    except NotImplemented:
-        print("error in create_study_subject() method")
-
-
-def set_study_arms(list_of_study_group_dictionaries, this_investigation, this_repeats):
-    try:
-        # print(this_repeats)
-        study_groups = {}
-        # forf = full_or_fractional()
-        bd = balanced_design()
-
-        if bd is True and this_repeats is False:
-
-            size = input("provide the number of subject per study group (must be an integer): ")
-            if size.isdigit():
-                size = int(size)
-                if size > 0:
-
-                    study_group_size = size
-                    stato = OntologySource(name="STATO", description="Ontology for Statistical Methods")
-                    this_investigation.ontology_source_references.append(stato)
-                    design_term = OntologyAnnotation(term_source=stato)
-                    design_term.term = "balanced design"
-                    design_term.term_accession = "http://purl.obolibrary.org/obo/STATO_0000003"
-                    this_investigation.studies[0].design_descriptors.append(design_term)
-
-                    comment1 = Comment(name="number of study groups", value=len(list_of_study_group_dictionaries))
-                    comment2 = Comment(name="study group size", value=int(study_group_size))
-
-                    this_investigation.studies[0].comments.append(comment1)
-                    this_investigation.studies[0].comments.append(comment2)
-
-                    sample_type = choose_fluid_or_solid_or_both()
-                    current_sampling_plan = sample_collection_events(sample_type)
-
-                    sg_index = 0
-                    for sg_index in range(len(list_of_study_group_dictionaries)):
-                        study_groups["guid"] = uuid.uuid4()
-                        study_groups["id"] = sg_index
-                        study_groups["factor_level_combo"] = list_of_study_group_dictionaries[sg_index]
-                        # print("this study group: ", study_groups["factor_level_combo"])
-                        study_groups["size"] = study_group_size
-                        this_investigation = create_study_subjects(study_group_size,
-                                                                   this_investigation,
-                                                                   study_groups["guid"],
-                                                                   study_groups["factor_level_combo"],
-                                                                   current_sampling_plan)
-            else:
-                print("invalid input, please try again")
-
-        elif bd is False and this_repeats is False:
-            for sg_index in range(len(list_of_study_group_dictionaries)):
-                study_groups["guid"] = uuid.uuid4()
-                study_groups["id"] = sg_index
-                study_groups["factor_level_combo"] = list_of_study_group_dictionaries[sg_index]
-                size = input("provide the number of subject per study group (must be an integer): ")
-                size = int(size)
-                if int(size) > 0:
-                    study_group_size = size
-                    stato = OntologySource(name="STATO", description="Ontology for Statistical Methods")
-                    this_investigation.ontology_source_references.append(stato)
-                    design_term = OntologyAnnotation(term_source=stato)
-                    design_term.term = "unbalanced design"
-                    design_term.term_accession = "http://purl.obolibrary.org/obo/STATO_000000X"
-                    this_investigation.studies[0].design_descriptors.append(design_term)
-
-                    study_groups["size"] = study_group_size
-                    sample_type = choose_fluid_or_solid_or_both()
-                    current_sampling_plan = sample_collection_events(sample_type)
-
-                    this_investigation = create_study_subjects(study_group_size,
-                                                               this_investigation,
-                                                               study_groups["guid"],
-                                                               study_groups["factor_level_combo"],
-                                                               current_sampling_plan)
-                else:
-                    print("invalid input, please try again")
-
-                print(study_groups)
-
-        elif bd is False and this_repeats is True:
-
-            nb_repeats = input("state the number of consecutive treatments (integer): ")
-            # print study_factor_combo
-            sequences = compute_treatment_sequences(list_of_study_group_dictionaries, int(nb_repeats))
-            print("sequences")
-            for sg_index in range(len(sequences)):
-                study_groups["guid"] = uuid.uuid4()
-                study_groups["id"] = sg_index
-                study_groups["sequence"] = sequences[sg_index]
-                size = input("provide the number of subject per study arm (must be an integer): ")
-                size = int(size)
-                if int(size) > 0:
-                    study_group_size = size
-                    study_groups["size"] = study_group_size
-                    sample_type = choose_fluid_or_solid_or_both()
-                    current_sampling_plan = sample_collection_events(sample_type)
-                    this_investigation = create_study_subjects(study_group_size,
-                                                               this_investigation,
-                                                               study_groups["guid"],
-                                                               study_groups["factor_level_combo"],
-                                                               current_sampling_plan)
-
-                else:
-                    print("invalid input, please try again")
-
-        else:
-            nb_repeats = input("state the number of consecutive treatments (integer): ")
-            # print study_factor_combo
-            sequences = compute_treatment_sequences(list_of_study_group_dictionaries, int(nb_repeats))
-            print(sequences)
-            for sg_index in range(len(sequences)):
-                study_groups["guid"] = uuid.uuid4()
-                study_groups["id"] = sg_index
-                study_groups["sequence"] = sequences[sg_index]
-                size = input("provide the number of subject per study arm (must be an integer): ")
-                size = int(size)
-                if int(size) > 0:
-                    study_group_size = size
-                    study_groups["size"] = study_group_size
-                    sample_type = choose_fluid_or_solid_or_both()
-                    current_sampling_plan = sample_collection_events(sample_type)
-                    this_investigation = create_study_subjects(study_group_size,
-                                                               this_investigation,
-                                                               study_groups["guid"],
-                                                               study_groups["factor_level_combo"],
-                                                               current_sampling_plan)
-                else:
-                    print("invalid input, please try again")
-
-                # print(study_groups)
-
-        return this_investigation, current_sampling_plan
-
-    except IOError:
-        print("error in set_study_arms() method")
-
-# def sample_collection_plan(sample_types):
-#
-#         samples_and_events = {}
-#         for s_type in sample_types:
-#             specific_sampling_events = input("for sample type " + "'"
-# + str(s_type) + "'," +
-# " how many times each of the samples have been collected (integer): ")
-# specific_sampling_events = remove_duplicate_from_list(
-# specific_sampling_events)
-#             samples_and_events[str(s_type)] = specific_sampling_events
-#
-#         return samples_and_events
-
-# def collection_sample_type():
-#     try:
-#         sample_types = input("list the type of sample collected from each
-# study group member as csv list: ")
-#         # for example: blood,urine,sweat,muscle
-#         sample_types = remove_duplicate_from_list(sample_types)
-#         return sample_types
-#     except IOError:
-#         print("error in collection_sample_type() method")
-
-# def collected_sample_types(some_sample_type):
-# TODO implement pulling the list of allowed values from ISA configuration or
-# another configuration files
-#
-#     if some_sample_type == "1":
-#         sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,flower):  ")
-#         sample_types = remove_duplicate_from_list(sample_types)
-#         # return solid_samples
-#
-#     elif some_sample_type == "2":
-#         sample_types = input("select from the following list (urine,blood,csf,sweat,lavage):  ")
-#         # for example: blood,urine,sweat,muscle
-#         sample_types = remove_duplicate_from_list(sample_types)
-#         # return fluid_samples
-#
-#     elif some_sample_type == "3":
-#         sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ")
-#         sample_types = remove_duplicate_from_list(sample_types)
-#         # print(sample_types)
-#
-#         s_sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,2): ")
-#         s_sample_types = remove_duplicate_from_list(s_sample_types)
-#
-#         sample_types.append(s_sample_types)
-#
-#     else:
-#         print("input not recognised")
-#         choose_fluid_or_solid_both()
-#
-#     return sample_types
-
-
-def define_assay_plan(some_investigation, some_sample_collection_events):
-
-    try:
-        study_assay_plan = []
-
-        applies_to_all = input("will all samples be tested with the same set of assays? yes[1]/no[2]")
-
-        if applies_to_all == "1":
-            initial_sample_assay_plan = input("provide  assay types being used as a comma separated list:"
-                                              " [1]:transcription profiling using ngs, "
-                                              " [2]:transcription profiling using DNA microarray,"
-                                              " [3]:targeted metabolite profiling using mass spectrometry,"
-                                              " [4]:metabolite profiling using NMR spectroscopy? ")
-
-            initial_sample_assay_plan = remove_duplicate_from_list(initial_sample_assay_plan)
-            print("initial sample assay plan:", initial_sample_assay_plan)
-
-            for bio_material, nb_sampling_event in some_sample_collection_events.items():
-                # print("biomat:", bio_material, "how many? ", nb_sampling_event)
-                for element in range(int(nb_sampling_event)):
-                    for this_item in range(len(initial_sample_assay_plan)):
-                        sample_assay_plan = {"sample_type": bio_material,
-                                             "sample_number": element+1,
-                                             "assay_type": initial_sample_assay_plan[this_item]}
-                        study_assay_plan.append(sample_assay_plan)
-            print("final number of assay plans:", len(study_assay_plan))
-
-        elif applies_to_all == "2":
-
-            # we need to iterate through each sample type and record the relevant assays for that sample type
-
-            for bio_material, nb_sampling_event in some_sample_collection_events.items():
-                # sample_assay_plan = {"sample_type": "", "sample_number": "", "assay_type": []}
-                print("biomat:", bio_material, "how many? ", nb_sampling_event)
-                initial_sample_assay_plan = input("select assay types being used for that sample type '" + bio_material
-                                                  + "' as a comma separated list:"
-                                                  " [1]:transcription profiling using ngs, "
-                                                  " [2]:transcription profiling using DNA microarray,"
-                                                  " [3]:targeted metabolite profiling using mass spectrometry,"
-                                                  " [4]:metabolite profiling using NMR spectroscopy? ")
-
-                initial_sample_assay_plan = remove_duplicate_from_list(initial_sample_assay_plan)
-
-                to_all_of_these = input("will these assays be performed on all specimens"
-                                        " of this sample type? yes[1]/no[2]")
-
-                if to_all_of_these == "2":
-                    for element in range(int(nb_sampling_event)):
-                            this_sample_assay_plan = input("select assay types being used for that sample type"
-                                                           " as a comma separated list:"
-                                                           " [1]:transcription profiling using ngs, "
-                                                           " [2]:transcription profiling using DNA microarray,"
-                                                           " [3]:targeted metabolite profiling using mass spectrometry,"
-                                                           " [4]:metabolite profiling using NMR spectroscopy? ")
-
-                            study_assay_plan = remove_duplicate_from_list(this_sample_assay_plan)
-
-                            for this_item in range(len(initial_sample_assay_plan)):
-                                sample_assay_plan = {"sample_type": bio_material,
-                                                     "sample_number": element+1,
-                                                     "assay_type": initial_sample_assay_plan[this_item]}
-                            study_assay_plan.append(sample_assay_plan)
-                            # [{"sample_type":"liver", "sample_number":"1", "assay_types": ["1","2","3"]}]
-
-                    print(this_sample_assay_plan[0]["sample_type"])
-
-                elif to_all_of_these == "1":
-                    for element in range(int(nb_sampling_event)):
-                        for this_item in range(len(initial_sample_assay_plan)):
-                            sample_assay_plan = {"sample_type": bio_material,
-                                                 "sample_number": element+1,
-                                                 "assay_type": initial_sample_assay_plan[this_item]}
-                        study_assay_plan.append(sample_assay_plan)
-
-                else:
-                    print("input not recognized, please reiterate your selection.")
-
-        else:
-            print("input not recognized, please reiterate your selection.")
-            define_assay_plan(some_investigation, some_sample_collection_events)
-        print("number of assay plans from define_assay_plan(): ", len(study_assay_plan))
-
-        return some_investigation, study_assay_plan
-
-    except IOError:
-        print("error in define_assay_plan() method")
-
-
-def set_assay_type_topology_modifiers(this_sample_type, this_sampling_event, this_assay_type):
-    # TODO: refactor in order to implement modular assay specific topologies, switching between cases depending on assay
-    # TODO: types supplied by users with the define_assay_plan() method
-    try:
-        # this_assay_type = input(
-        #                         "which assay types are being used: [1]:transcription profiling using ngs, "
-        #                         " [2]:transcription profiling using DNA microarray,"
-        #                         " [3]:targeted metabolite profiling using mass spectrometry,"
-        #                         " [4]:metabolite profiling using NMR spectroscopy? ")
-        sample_assay_plans = []
-
-        # for this_assay_type in range(len(this_assay_type_array)):
-        with_topology_params = {"sample type": "",
-                           "collection event": "",
-                           "assay type": 0,
-                           "params": {
-                               "distinct libraries": 0,
-                               "distinct array designs": 0,
-                               "number of injection modes": 0,
-                               "number of acquisition modes": 0,
-                               "number of channels": 0,
-                               "pulse sequences": 0,
-                               "number of technical replicates": 0}
-                           }
-        # print("this assay type:", this_assay_type_array[this_assay_type], "counter:", this_assay_type)
-        if int(this_assay_type) == 1:
-
-            nb_library = input(
-                "how many distinct libraries per sample (provide an positive integer, default is 1)?")
-            nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?")
-            nb_technical_rep = input("how many technical replicate for each sample, default is 1?")
-            with_topology_params["sample type"] = this_sample_type
-            with_topology_params["collection event"] = this_sampling_event
-            with_topology_params["assay type"] = 1
-            with_topology_params["params"]["distinct libraries"] = nb_library
-            with_topology_params["params"]["number of channels"] = nb_multiplexing_channels
-            with_topology_params["params"]["number of technical replicates"] = nb_technical_rep
-
-        elif int(this_assay_type) == 2:
-
-            nb_chip_design = input(
-                "how many distinct microarray designs (provide an positive integer, default is 1)?")
-            nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?")
-            nb_technical_rep = input("how many technical replicate for each sample, default is 1?")
-
-            with_topology_params["sample type"] = this_sample_type
-            with_topology_params["collection event"] = this_sampling_event
-            with_topology_params["assay type"] = 2
-            with_topology_params["params"]["distinct array designs"] = nb_chip_design
-            print("typology:", with_topology_params["params"]["distinct array designs"])
-            with_topology_params["params"]["number of channels"] = nb_multiplexing_channels
-            with_topology_params["params"]["number of technical replicates"] = nb_technical_rep
-
-        elif this_assay_type == "3":
-
-            injection_modes = input(
-                "how many distinct sample introduction modes (1:FIA,2:LC,3:GC)?")
-            injection_modes = remove_duplicate_from_list(injection_modes)
-            acquisition_modes = input(
-                "how many distinct acquisition modes (1:negative mode, 2:positive mode) ?")
-            acquisition_modes = remove_duplicate_from_list(acquisition_modes)
-            # nb_channels = input("how many labels were used (provide an positive integer, default is 1)?")
-            nb_technical_rep = input("how many technical replicate for each sample, default is 1?")
-
-            with_topology_params["sample type"] = this_sample_type
-            with_topology_params["collection event"] = this_sampling_event
-            with_topology_params["assay type"] = 3
-            with_topology_params["params"]["injection modes"] = injection_modes
-            with_topology_params["params"]["number of channels"] = acquisition_modes
-            with_topology_params["params"]["number of technical replicates"] = nb_technical_rep
-
-        elif this_assay_type == "4":
-
-            injection_modes = input(
-                "how many distinct sample introduction modes (1:autoloader,2:LC,3:GC)?")
-            injection_modes = remove_duplicate_from_list(injection_modes)
-            acquisition_modes = input(
-                "how many distinct acquisition modes (1:COSY, 2:NOESY, 3:TOSCY, 4:CPMG ,5: INEPT, 6:HMQC, 7:WATERGATE) ?")
-            acquisition_modes = remove_duplicate_from_list(acquisition_modes)
-            # nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?")
-            nb_technical_rep = input("how many technical replicate for each sample, default is 1?")
-
-            with_topology_params["sample type"] = this_sample_type
-            with_topology_params["collection event"] = this_sampling_event
-            with_topology_params["assay type"] = 4
-            with_topology_params["params"]["injection modes"] = injection_modes
-            with_topology_params["params"]["pulse sequences"] = acquisition_modes
-            # typology_params["params"]["number of channels"] = acquisition_modes
-            with_topology_params["params"]["number of technical replicates"] = nb_technical_rep
-
-        # else:
-        #     print("input not recognised in set_assay_type_topology_modifiers() method")
-            # set_assay_type_topology_modifiers( this_sample_type, this_assay_type)
-
-        # sample_assay_plans.append(typology_params)
-
-        return with_topology_params
-        # nb_chip_design, nb_multiplexing_channels, nb_technical_rep
-
-    except IOError:
-        print("error in set_assay_type_topology_modifiers() method")
-
-
-# def create_assays(some_assay_plan,some_assay_object):
-# #
-#     for item in range(len(assay_plan)):
-#         # print("assay definitions are: ", assay_definitions[item])
-#         print("dealing with the first assay plan, for the specimen of sample type :", assay_plan[item]["sample type"],
-#               "for collection event:", assay_plan[item]["collection event"])
-#         # print("sample type:", assay_definitions[item]["sample type"],
-#         #       "| assay type: ", assay_definitions[item]["assay type"],
-#         #       "| assay params: ", assay_definitions[item]["params"])
-#         # dealing with Mass Spectrometry Applications
-#         if assay_plan[item]["assay type"] == 3:
-#             # TODO: implement get_or_create method and refactor
-#             ms = [a for a in new_inv.studies[0].assays if
-#                    a.measurement_type.term == "metabolite profiling" and a.technology_type.term == "liquid chromatography mass spectrometry" and a.filename == "a_mp_lcms.txt"]
-#             if len(ngs) > 0:
-#                 print("yes, exists in 1", ms)
-#                 # if such an assay table already exists, we retrieve it
-#                 this_assay = ms[0]
-#             else:
-#                 # or print('nothing found, creating a new object)...')
-#                 this_assay = Assay(measurement_type=OntologyAnnotation(term="metabolite profiling"),
-#                                    technology_type=OntologyAnnotation(term="liquid chromatography mass spectrometry"),
-#                                    filename="a_mp_lcms.txt")
-#                 # the object is attached to the relevant study
-#                 new_inv.studies[0].assays.append(this_assay)
-#
-#                 extraction_protocol = Protocol(name='metabolite extraction',
-#                                                protocol_type=OntologyAnnotation(term="material separation"))
-#                 new_inv.studies[0].protocols.append(extraction_protocol)
-#
-#                 # lc_protocol = Protocol(name="liquid chromatography",
-#                 #                              protocol_type=OntologyAnnotation(term="material separation"))
-#                 # new_inv.studies[0].protocols.append(labeling_protocol)
-#
-#                 ms_protocol = Protocol(name='liquid chromatography mass spectrometry',
-#                                                protocol_type=OntologyAnnotation(term="data collection"))
-#                 new_inv.studies[0].protocols.append(sequencing_protocol)
-#
-#             index_i = 0
-#             index_j = 0
-#             index_k = 0
-#             # for index_i, sample in enumerate(new_inv.studies[0].samples):
-#             some_sample_list = [sample for sample in new_inv.studies[0].samples if
-#                           sample.characteristics[0].value.term == assay_plan[item]["sample type"] and
-#                           sample.characteristics[1].value.term == assay_plan[item]["collection event"]]
-#             print("number of samples: ", len(some_sample_list))
-#             extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if
-#                                   ext.type == "Extract Name"]
-#             # print("number of extracts", len(extractlist_before))
-#
-#             for index_i, sample in enumerate([sample for sample in new_inv.studies[0].samples if
-#                                         sample.characteristics[0].value.term == assay_plan[item][
-#                                             "sample type"]]):
-#                 # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term)
-#                 # print("current collection event", assay_plan[item]["collection event"])
-#                 if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]):
-#                     # create an extraction process that executes the extraction protocol
-#                     extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-#                                                                     if prtcl.name == "metabolite extraction"][0],
-#                                                  performer="rick",
-#                                                  date_=datetime.datetime.now())
-#
-#                     # extraction process takes as input a sample, and produces an extract material as output
-#                     # we make sure only the right kind of samples get assayed so we check against the sample type
-#                     # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]:
-#                     # print("sample characteristics: ", sample.characteristics[0].value.term)
-#
-#                     extraction_process.inputs.append(sample)
-#                     extract = Material(name=sample.name + "extract-{}".format(index_i))
-#                     extract.type = "Extract Name"
-#                     extraction_process.outputs.append(extract)
-#
-#                     # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample
-#                     # TODO: include a function to obtain the relevant parameters used for data acquisition
-#                     for index_j in range(int(assay_plan[item]["params"]["platforms"])):
-#                         # this inner is for handling multiple runs of the same platform, i.e. tech replicates
-#                         for index_k in range(
-#                                 int(assay_plan[item]["params"]["number of technical replicates"])):
-#                             prtcl_name = [prtcl for prtcl in new_inv.studie   s[0].protocols
-#                                           if prtcl.name == "data collection"][0]
-#
-#                             data_acq_process = Process(executes_protocol=prtcl_name,
-#                                                        performer="louis",
-#                                                        date_=datetime.datetime.now())
-#
-#                             platform_name = "platform-{}".format(index_j)
-#                             data_acq_process.name = "assay-name-{}".format(index_i) + "_" + platform_name + \
-#                                                     "_run-{}".format(index_k)
-#                             data_acq_process.inputs.append(labeling_process.outputs[0])
-#
-#                             # data acquisition process usually has an output data file
-#                             datafile = DataFile(
-#                                 filename="acquired-data-{}".format(index_i) + "_" + platform_name +
-#                                          "_run-{}".format(index_k) + ".mzml.gz",
-#                                 label="MS Raw Data File")
-#                             data_acq_process.outputs.append(datafile)
-#
-#                             # ensure Processes are linked forward and backward
-#                             extraction_process.next_process = data_acq_process
-#                             # labeling_process.prev_process = extraction_process
-#                             extraction_process.next_process = data_acq_process
-#                             # data_acq_process.prev_process = labeling_process
-#                             data_acq_process.prev_process = extraction_process
-#
-#                             # make sure extract(library), data file, and the processes are attached to the assay
-#                             this_assay.data_files.append(datafile)
-#                             this_assay.other_material.append(extract)
-#                             # this_assay.other_material.append(le)
-#                             this_assay.process_sequence.append(extraction_process)
-#                             # this_assay.process_sequence.append(labeling_process)
-#                             this_assay.process_sequence.append(data_acq_process)
-#
-#             # extractlist_after = [ext for ext in new_inv.studies[0].assays[0].other_material if
-#             #                      ext.type == "Extract Name"]
-#             # print("number of extracts", len(extractlist_after))
-
-
-# MAIN METHOD:
-
-def main():
-
-    intervention_list = []
-
-    intervention_check = intervention_or_observation()
-
-    if intervention_check is True:
-
-        try:
-            new_inv = use_default_inv()
-            repeats = single_or_repeated_treatment()
-            free_or_restricted_design = free_or_restricted_randomization()
-            assay_plan = []
-
-            if repeats is False and "factorial" in free_or_restricted_design:
-
-                obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations")
-                new_inv.ontology_source_references.append(obi)
-                stato = OntologySource(name="STATO", description="Ontology for Statistical Methods")
-                new_inv.ontology_source_references.append(stato)
-                design1 = OntologyAnnotation(term_source=obi)
-                design1.term = "intervention design"
-                design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
-                new_inv.studies[0].design_descriptors.append(design1)
-                design2 = OntologyAnnotation(term_source=stato)
-                design2.term = "full factorial design"
-                design2.term_accession = "http://purl.obolibrary.org/obo/STATO_0000270"
-                new_inv.studies[0].design_descriptors.append(design2)
-
-                intervention_list, new_inv = get_list_of_interventions(new_inv)
-
-                assay_plan = []
-                for intervention_type in intervention_list.keys():
-                    # print("type of intervention: ", intervention_type)
-                    for factor in intervention_list[intervention_type].keys():
-                        # print("factor :", factor)
-                        set_factor_values(factor, intervention_list[intervention_type])
-                        # print("associated factor values:", intervention_list[intervention_type][factor])
-
-                # study_factor_combo = compute_study_groups(my_factors)
-                study_group_dictionaries = compute_study_groups(intervention_list[intervention_type])
-                # print("study groups:", study_group_dictionaries)
-                new_inv, sampling_plan = set_study_arms(study_group_dictionaries, new_inv, repeats)
-                # print("is this correct?" , new_inv.studies[0].sources[0].name)
-
-                new_inv, assay_plan = define_assay_plan(new_inv, sampling_plan)
-
-                print("number of assay plans in Main: ", len(assay_plan))
-
-                for l in range(len(assay_plan)):
-                    assay_plan[l] = set_assay_type_topology_modifiers(assay_plan[l]["sample_type"],
-                                                                      assay_plan[l]["sample_number"],
-                                                                      assay_plan[l]["assay_type"])
-
-                    # applies_to_all_plan_of_that_assay_type = input("Apply this parameter selection to all plans using this assay type? [1]Yes/2[No]")
-
-                    # if applies_to_all_plan_of_that_assay_type == 1:
-
-
-                    print(assay_plan[l])
-
-                    # assay_definitions.append(set_assay_type_topology_modifiers(assay_plan[l]["sample_type"],
-                    #                                                             assay_plan[l]["assay_type"]))
-                    # print("assay plan: ", assay_plan[l]["sample_type"], "|", assay_plan[l]["assay_type"])
-
-                # for m in range(len(assay_plan[l]["assay_types"])):
-
-                print("number of assay full definitions", len(assay_plan))
-                # print(assay_definitions[0]["sample type"])
-
-                for item in range(len(assay_plan)):
-                    # print("assay definitions are: ", assay_definitions[item])
-                    print("dealing with the first assay plan, for the specimen of sample type :", assay_plan[item]["sample type"], "for collection event:", assay_plan[item]["collection event"])
-                    # print("sample type:", assay_definitions[item]["sample type"],
-                    #       "| assay type: ", assay_definitions[item]["assay type"],
-                    #       "| assay params: ", assay_definitions[item]["params"])
-
-                    if assay_plan[item]["assay type"] == 1:
-                        # TODO: implement get_or_create method and refactor
-                        ngs = [a for a in new_inv.studies[0].assays if a.measurement_type.term == "transcription profiling" and a.technology_type.term == "nucleic acid sequencing" and a.filename == "a_tp_ngs.txt"]
-                        if len(ngs) > 0:
-                            print("yes, exists in 1", ngs)
-                            # if such an assay table already exists, we retrieve it
-                            this_assay = ngs[0]
-                        else:
-                            # or print('nothing found, creating a new object)...')
-                            this_assay = Assay(measurement_type=OntologyAnnotation(term="transcription profiling"),
-                                               technology_type=OntologyAnnotation(term="nucleic acid sequencing"),
-                                               filename="a_tp_ngs.txt")
-                            # the object is attached to the relevant study
-                            new_inv.studies[0].assays.append(this_assay)
-
-                            extraction_protocol = Protocol(name='RNA extraction',
-                                                           protocol_type=OntologyAnnotation(term="material separation"))
-                            new_inv.studies[0].protocols.append(extraction_protocol)
-
-                            labeling_protocol = Protocol(name="nucleic acid library preparation",
-                                                         protocol_type=OntologyAnnotation(term="material labeling"))
-                            new_inv.studies[0].protocols.append(labeling_protocol)
-
-                            sequencing_protocol = Protocol(name='nucleic acid sequencing',
-                                                           protocol_type=OntologyAnnotation(term="data collection"))
-                            new_inv.studies[0].protocols.append(sequencing_protocol)
-
-                        i = 0
-                        j = 0
-                        k = 0
-                        # for i, sample in enumerate(new_inv.studies[0].samples):
-                        samplelist=[sample for sample in new_inv.studies[0].samples if
-                         sample.characteristics[0].value.term == assay_plan[item]["sample type"] and sample.characteristics[1].value.term == assay_plan[item]["collection event"]]
-                        # print("number of samples: ", len(samplelist))
-                        extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if ext.type == "Extract Name"]
-                        # print("number of extracts", len(extractlist_before))
-
-                        for i, sample in enumerate([sample for sample in new_inv.studies[0].samples if
-                                                    sample.characteristics[0].value.term == assay_plan[item][
-                                                            "sample type"]]):
-                            # print("i: ", i, "sample: ", sample.characteristics[1].value.term)
-                            # print("current collection event", assay_plan[item]["collection event"])
-                            if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]):
-                                # create an extraction process that executes the extraction protocol
-                                extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-                                                                                if prtcl.name == "RNA extraction"][0],
-                                                             performer="amy",
-                                                             date_=datetime.datetime.now())
-
-                                # extraction process takes as input a sample, and produces an extract material as output
-                                # we make sure only the right kind of samples get assayed so we check against the sample type
-                                # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]:
-                                # print("sample characteristics: ", sample.characteristics[0].value.term)
-
-                                extraction_process.inputs.append(sample)
-                                extract = Material(name=sample.name+"extract-{}".format(i))
-                                extract.type = "Extract Name"
-                                extraction_process.outputs.append(extract)
-
-                                # TODO: support multiplex identifiers in a future release
-                                labeling_process = Process(
-                                    executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-                                                       if prtcl.name == "nucleic acid library preparation"][0],
-                                    performer="xua",
-                                    date_=datetime.datetime.now()
-                                    )
-                                # extraction process takes as input a sample, and produces an extract material as output
-                                labeling_process.inputs.append(extract)
-                                le = Material(name= extract.name +"labeled-extract-{}".format(i))
-                                le.type = "Labeled Extract Name"
-                                dye = Characteristic(category=OntologyAnnotation(term="label"),
-                                                     value=OntologyAnnotation(term="none"))
-                                le.characteristics.append(dye)
-                                labeling_process.outputs.append(le)
-
-                                # this loop is meant to handle the case where several libraries are produced from a sample
-                                # TODO: include a function to obtain the relevant parameters used for library creation
-                                for j in range(int(assay_plan[item]["params"]["distinct libraries"])):
-                                    # this inner is for handling multiple runs of the same library, ie tech replicates
-                                    for k in range(
-                                            int(assay_plan[item]["params"]["number of technical replicates"])):
-                                        prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols
-                                                      if prtcl.name == "nucleic acid sequencing"][0]
-
-                                        data_acq_process = Process(executes_protocol=prtcl_name,
-                                                                   performer="louis",
-                                                                   date_=datetime.datetime.now())
-
-                                        library_name = "library-{}".format(j)
-                                        data_acq_process.name = "assay-name-{}".format(i) + "_" + library_name + \
-                                                                "_run-{}".format(k)
-                                        data_acq_process.inputs.append(labeling_process.outputs[0])
-
-                                        # data acquisition process usually has an output data file
-                                        datafile = DataFile(
-                                            filename="sequence-data-{}".format(i) + "_" + library_name +
-                                                     "_run-{}".format(k) + ".fastq.gz",
-                                            label="Raw Data File")
-                                        data_acq_process.outputs.append(datafile)
-
-                                        # ensure Processes are linked forward and backward
-                                        extraction_process.next_process = labeling_process
-                                        labeling_process.prev_process = extraction_process
-                                        labeling_process.next_process = data_acq_process
-                                        data_acq_process.prev_process = labeling_process
-
-                                        # make sure extract(library), data file, and the processes are attached to the assay
-                                        this_assay.data_files.append(datafile)
-                                        this_assay.other_material.append(extract)
-                                        this_assay.other_material.append(le)
-                                        this_assay.process_sequence.append(extraction_process)
-                                        this_assay.process_sequence.append(labeling_process)
-                                        this_assay.process_sequence.append(data_acq_process)
-
-                        extractlist_after = [ext for ext in new_inv.studies[0].assays[0].other_material if
-                                              ext.type == "Extract Name"]
-                        print("number of extracts", len(extractlist_after))
-
-                    elif assay_plan[item]["assay type"] == 2:
-                        # TODO: refactor to rely on a specific function handling assay create (create_assays() method)
-                        tx = [a for a in new_inv.studies[0].assays if a.measurement_type.term == "transcription profiling" and a.technology_type.term == "DNA microarray"]
-                        if len(tx) > 0:
-                            print("yes, exists in 2", tx)
-                            this_assay = tx[0]
-                        else:
-                            this_assay = Assay(measurement_type=OntologyAnnotation(term="transcription profiling"),
-                                               technology_type=OntologyAnnotation(term="DNA microarray"),
-                                               filename="a_tp_microarray.txt")
-                            # attach the assay to the study
-                            new_inv.studies[0].assays.append(this_assay)
-
-                            extraction_protocol = Protocol(name='RNA extraction',
-                                                           protocol_type=OntologyAnnotation(term="material separation"))
-                            new_inv.studies[0].protocols.append(extraction_protocol)
-
-                            labeling_protocol = Protocol(name="nucleic acid labeling",
-                                                         protocol_type=OntologyAnnotation(term="material labeling"))
-                            new_inv.studies[0].protocols.append(labeling_protocol)
-
-                            hyb_protocol = Protocol(name='nucleic acid hybridization',
-                                                    protocol_type=OntologyAnnotation(term="nucleic acid hybridization"))
-
-                            new_inv.studies[0].protocols.append(hyb_protocol)
-
-                        i = 0
-                        j = 0
-                        k = 0
-                        # for i, sample in enumerate(new_inv.studies[0].samples):
-                        for i, sample in enumerate([sample for sample in new_inv.studies[0].samples if
-                                                   sample.characteristics[0].value.term == assay_plan[item]["sample type"]]):
-
-                            if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]):
-                                # print("i: ", i, "sample: ", sample.characteristics[0].value.term)
-
-                                # create an extraction process that executes the extraction protocol
-                                # [prtcl for prtcl in inv.studies[0].protocols if prtcl.name == "RNA extraction"][0]
-
-                                extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-                                                                                if prtcl.name == "RNA extraction"][0],
-                                                             performer="amy",
-                                                             date_=datetime.datetime.now())
-
-                                # extraction process takes as input a sample, and produces an extract material as output
-                                # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]:
-
-                                extraction_process.inputs.append(sample)
-                                extract = Material(name="extract-{}".format(i))
-                                extract.type = "Extract Name"
-                                extraction_process.outputs.append(extract)
-
-                                labeling_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-                                                                              if prtcl.name == "nucleic acid labeling"][0],
-                                                           performer="xua",
-                                                           date_=datetime.datetime.now()
-                                                           )
-
-                                # extraction process takes as input a sample, and produces an extract material as output
-                                labeling_process.inputs.append(extract)
-                                le = Material(name="labeled-extract-{}".format(i))
-                                le.type = "Labeled Extract Name"
-                                dye = Characteristic(category=OntologyAnnotation(term="label"),
-                                                     value=OntologyAnnotation(term="biotin"))
-                                le.characteristics.append(dye)
-                                labeling_process.outputs.append(le)
-
-                                # create a data acquisition process that executes a data acquisition protocol
-                                # print('number of array-design: ',
-                                #       assay_definitions[item][0]["params"]["distinct array designs"])
-                                        #assay_modifier1)
-                                # print('number of technical replicates:',
-                                #       assay_definitions[item][0]["params"]["number of technical replicates"])
-                                      # assay_modifier3)
-
-                                for j in range(int(assay_plan[item]["params"]["distinct array designs"])):
-
-                                    for k in range(int(assay_plan[item]["params"]["number of technical replicates"])):
-
-                                        prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols
-                                                      if prtcl.name == "nucleic acid hybridization"][0]
-
-                                        data_acq_process = Process(executes_protocol=prtcl_name,
-                                                                   performer="louis",
-                                                                   date_=datetime.datetime.now())
-
-                                        array_design_name = "arraydesign-{}".format(j)
-                                        data_acq_process.array_design_ref = OntologyAnnotation(term=array_design_name)
-                                        # print("with array_design: ", array_design_name)
-                                        array_design_as_pv = ParameterValue(
-                                                                       category=ProtocolParameter(parameter_name=OntologyAnnotation(term="array_design_ref")),
-                                                                       value=OntologyAnnotation(term=array_design_name))
-                                        data_acq_process.parameter_values.append(array_design_as_pv)
-
-                                        # print("data acquisition protocol name:", prtcl_name.name)
-                                        # print("replicate: ", k)
-                                        data_acq_process.name = "assay-name-{}".format(i) + "_" + array_design_name +\
-                                                                "_run-{}".format(k)
-                                        data_acq_process.array_design_ref = array_design_name
-                                        # print(data_acq_process.name)
-                                        data_acq_process.inputs.append(labeling_process.outputs[0])
-
-                                        # process usually has an output data file
-                                        datafile = DataFile(filename="microarray-data-{}".format(i) + "_" + array_design_name +
-                                                                     "_run-{}".format(k),
-                                                            label="Array Data File")
-                                        data_acq_process.outputs.append(datafile)
-
-                                        # ensure Processes are linked forward and backward
-                                        extraction_process.next_process = labeling_process
-                                        labeling_process.prev_process = extraction_process
-                                        labeling_process.next_process = data_acq_process
-                                        data_acq_process.prev_process = labeling_process
-
-                                        # make sure the extract, data file, and the processes are attached to the assay
-                                        this_assay.data_files.append(datafile)
-                                        this_assay.other_material.append(extract)
-                                        this_assay.other_material.append(le)
-                                        this_assay.process_sequence.append(extraction_process)
-                                        this_assay.process_sequence.append(labeling_process)
-                                        this_assay.process_sequence.append(data_acq_process)
-
-                    elif assay_plan[item]["assay type"] == 3:
-                        # TODO: implement get_or_create method and refactor
-
-                        if len(assay_plan[item]["params"]["injection modes"]) > 0:
-                            inj_mode = ""
-                            acq_mode = ""
-                            for inj_mode_code in range(len(assay_plan[item]["params"]["injection modes"])):
-                                if inj_mode_code == 0:
-                                    print("YAY, this is FIA")
-                                    inj_mode = "FIA"
-                                elif inj_mode_code == 1:
-                                    inj_mode = "LC"
-                                elif inj_mode_code == 2:
-                                    inj_mode = "GC"
-                                else:
-                                    print("error, injection method not recognized)")
-
-                                for acq_mode_code in range(len(assay_plan[item]["params"]["number of channels"])):
-                                  if acq_mode_code == 0:
-                                    acq_mode = "positive"
-                                  elif acq_mode_code == 1:
-                                    acq_mode = "negative"
-                                  else:
-                                        print("error, injection method not recognized)")
-
-                                  techname = inj_mode + "-" + acq_mode + " mass spectrometry"
-
-                                  ms_filename = "a_mp_" + inj_mode + "_" + acq_mode + "_ms.txt"
-
-                                  ms = [a for a in new_inv.studies[0].assays if
-                                        a.measurement_type.term == "metabolite profiling"
-                                        and a.technology_type.term == techname and a.filename == ms_filename]
-                                  if len(ms) > 0:
-                                      print("yes, exists in :", ms)
-                                      # if such an assay table already exists, we retrieve it
-                                      this_assay = ms[0]
-                                  else:
-                                      # or print('nothing found, creating a new object)...')
-                                      this_assay = Assay(measurement_type=OntologyAnnotation(term="metabolite profiling"),
-                                                       technology_type=OntologyAnnotation(term=techname),
-                                                       filename=ms_filename)
-
-                                      new_inv.studies[0].assays.append(this_assay)
-
-                                      extraction_protocol = Protocol(name='metabolite extraction',
-                                                                   protocol_type=OntologyAnnotation(term="material separation"))
-                                      new_inv.studies[0].protocols.append(extraction_protocol)
-
-                                    # lc_protocol = Protocol(name="liquid chromatography",
-                                    #                              protocol_type=OntologyAnnotation(term="material separation"))
-                                    # new_inv.studies[0].protocols.append(labeling_protocol)
-
-                                      ms_protocol = Protocol(name=inj_mode + "-" + acq_mode +' mass spectrometry',
-                                                                   protocol_type=OntologyAnnotation(term="mass spectrometry"))
-
-                                      randomized_run_order = ProtocolParameter(parameter_name=OntologyAnnotation(term="randomized run order"))
-                                      inj_param = ProtocolParameter(parameter_name=OntologyAnnotation(term="injection mode"))
-
-                                      if inj_mode == 1 or inj_mode == 2:
-                                        ch_instr = ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography instrument"))
-                                        ch_column = ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column"))
-                                        ch_elu_p = ProtocolParameter(parameter_name=OntologyAnnotation(term="elution program"))
-                                        ms_protocol.parameters.append(ch_instr)
-                                        ms_protocol.parameters.append(ch_column)
-                                        ms_protocol.parameters.append(ch_elu_p)
-                                      ms_instr = ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument"))
-                                      acq_param = ProtocolParameter(parameter_name=OntologyAnnotation(term="scan polarity"))
-
-                                      ms_protocol.parameters.append(randomized_run_order)
-                                      ms_protocol.parameters.append(inj_param)
-                                      ms_protocol.parameters.append(ms_instr)
-                                      ms_protocol.parameters.append(acq_param)
-
-                                      new_inv.studies[0].protocols.append(ms_protocol)
-
-                                  index_i = 0
-                                  index_j = 0
-                                  index_k = 0
-                                  randomized_order = []
-                                  # for index_i, sample in enumerate(new_inv.studies[0].samples):
-                                  #some_sample_list = [sample for sample in new_inv.studies[0].samples if
-                                  #                sample.characteristics[0].value.term == assay_plan[item]["sample type"] and
-                                  #                sample.characteristics[1].value.term == assay_plan[item]["collection event"]]
-                                  #print("number of samples: ", len(some_sample_list))
-                                  #extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if
-                                  #                        ext.type == "Extract Name"]
-                                  # print("number of extracts", len(extractlist_before))
-                                  expected_total_number_run = len([sample for sample in new_inv.studies[0].samples if
-                                                                sample.characteristics[0].value.term == assay_plan[item][
-                                                                    "sample type"]]) \
-                                                              * int(
-                                      assay_plan[item]["params"]["number of technical replicates"])
-                                                     # * len(assay_plan[item]["params"]["number of channels"]) \
-
-                                  print(len([sample for sample in new_inv.studies[0].samples if
-                                                                sample.characteristics[0].value.term == assay_plan[item][
-                                                                    "sample type"]]))
-                                  print("expected size:", expected_total_number_run)
-
-                                  len(assay_plan) *  int(
-                                      assay_plan[item]["params"]["number of technical replicates"])
-
-                                  randomized_order = get_processrun_random_token(expected_total_number_run)
-                                  counter = -1
-                                  for index_i, sample in enumerate([sample for sample in new_inv.studies[0].samples if
-                                                                sample.characteristics[0].value.term == assay_plan[item][
-                                                                    "sample type"]]):
-                                        # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term)
-                                        # print("current collection event", assay_plan[item]["collection event"])
-                                        if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]):
-                                            # create an extraction process that executes the extraction protocol
-                                            extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols
-                                                                                            if prtcl.name == "metabolite extraction"][0],
-                                                                         performer="rick",
-                                                                         date_=datetime.datetime.now())
-
-                                            # extraction process takes as input a sample, and produces an extract material as output
-                                            # we make sure only the right kind of samples get assayed so we check against the sample type
-                                            # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]:
-                                            # print("sample characteristics: ", sample.characteristics[0].value.term)
-
-                                            extraction_process.inputs.append(sample)
-                                            extract = Material(name=sample.name + "extract-{}".format(index_i))
-                                            extract.type = "Extract Name"
-                                            extraction_process.outputs.append(extract)
-
-                                            # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample
-                                            # TODO: include a function to obtain the relevant parameters used for data acquisition
-                                            #for index_j in range(int(assay_plan[item]["params"]["injection modes"])):
-                                                # this inner is for handling multiple runs of the same mode, i.e. tech replicates
-                                            for index_k in range(
-                                                    int(assay_plan[item]["params"]["number of technical replicates"])):
-                                                prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols
-                                                              if prtcl.name == inj_mode + "-" + acq_mode + ' mass spectrometry'][0]
-                                                data_acq_process = Process(executes_protocol=prtcl_name,
-                                                                           performer="louis",
-                                                                           date_=datetime.datetime.now())
-                                                counter = counter+1
-                                                # print(counter, randomized_order[counter])
-                                                run_order = randomized_order[counter]
-                                                pv_run_order = ParameterValue(category=ProtocolParameter(
-                                                    parameter_name=OntologyAnnotation(term="randomized run order")),
-                                                                      value=OntologyAnnotation(term=str(run_order)))
-
-                                                pv_1 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="injection mode")),value=OntologyAnnotation(term=inj_mode))
-
-                                                # if we are dealing with liquid or gas "C"hromatography
-                                                if "C" in inj_mode:
-                                                    pv_1a = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography instrument")),value=OntologyAnnotation(term="Agilent Q12324A"))
-                                                    pv_1b = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column")),value=OntologyAnnotation(term="AB Hydroxyapatite"))
-                                                    pv_1c = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="elution program")),value=OntologyAnnotation(term="Acetonitrile 90%, water 10% for 30 min, flow rate: 1ml/min"))
-
-                                                    data_acq_process.parameter_values.append(pv_1a)
-                                                    data_acq_process.parameter_values.append(pv_1b)
-                                                    data_acq_process.parameter_values.append(pv_1c)
-
-                                                pv_2 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument")),value=OntologyAnnotation(term="Agilent QTOF"))
-                                                pv_3 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="scan polarity")),value=OntologyAnnotation(term=acq_mode))
-
-                                                data_acq_process.parameter_values.append(pv_1)
-                                                data_acq_process.parameter_values.append(pv_2)
-                                                data_acq_process.parameter_values.append(pv_3)
-                                                data_acq_process.parameter_values.append(pv_run_order)
-
-
-                                                # platform_name = "platform-{}".format(index_j)
-                                                platform_name = "platform-" + inj_mode
-                                                data_acq_process.name = "assay-name-{}".format(index_i) + "_" + platform_name + \
-                                                                        "_run-{}".format(index_k)
-                                                data_acq_process.inputs.append(extraction_process.outputs[0])
-
-                                                # data acquisition process usually has an output data file
-                                                datafile = DataFile(
-                                                    filename="acquired-data-{}".format(index_i) + "_" + platform_name +
-                                                             "_run-{}".format(index_k) + ".mzml.gz",
-                                                    label="Raw Spectral Data File")
-                                                data_acq_process.outputs.append(datafile)
-
-                                                # ensure Processes are linked forward and backward
-                                                extraction_process.next_process = data_acq_process
-                                                # labeling_process.prev_process = extraction_process
-                                                extraction_process.next_process = data_acq_process
-                                                # data_acq_process.prev_process = labeling_process
-                                                data_acq_process.prev_process = extraction_process
-
-                                                # make sure extract(library), data file, and the processes are attached to the assay
-                                                this_assay.data_files.append(datafile)
-                                                this_assay.other_material.append(extract)
-                                                # this_assay.other_material.append(le)
-                                                this_assay.process_sequence.append(extraction_process)
-                                                # this_assay.process_sequence.append(labeling_process)
-                                                this_assay.process_sequence.append(data_acq_process)
-                    # for NMR:
-                    elif assay_plan[item]["assay type"] == 4:
-                        #TODO: implement get_or_create method and refactor
-
-                        if len(assay_plan[item]["params"]["injection modes"]) > 0:
-                            inj_mode = ""
-                            acq_mode = ""
-                            for inj_mode_code in range(
-                                    len(assay_plan[item]["params"]["injection modes"])):
-                                if inj_mode_code == 0:
-                                    print("YAY, this is autoloader")
-                                    inj_mode = "autoloader"
-                                elif inj_mode_code == 1:
-                                    inj_mode = "LC"
-                                elif inj_mode_code == 2:
-                                    inj_mode = "GC"
-                                else:
-                                    print("error, injection method not recognized)")
-
-                                for acq_mode_code in range(
-                                        len(assay_plan[item]["params"]["pulse sequences"])):
-                                    print("CODE:", assay_plan[item]["params"]["pulse sequences"])
-                                    if assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "1":
-                                        acq_mode = "COSY"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "2":
-                                        acq_mode = "NOESY"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "3":
-                                        acq_mode = "TOSCY"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "3":
-                                        acq_mode = "CPMG"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "4":
-                                        acq_mode = "INEPT"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "5":
-                                        acq_mode = "HMQC"
-                                    elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "6":
-                                        acq_mode = "WATERGATE"
-                                    else:
-                                        print("error, injection method not recognized)")
-
-                                    techname = inj_mode + "-" + acq_mode + " nmr spectroscopy"
-
-                                    nmr_filename = "a_mp_" + inj_mode + "_" + acq_mode + "_nmr.txt"
-
-                                    nmr = [a for a in new_inv.studies[0].assays if
-                                          a.measurement_type.term == "metabolite profiling"
-                                          and a.technology_type.term == techname and a.filename == nmr_filename]
-                                    if len(nmr) > 0:
-                                        print("yes, exists in :", nmr)
-                                        # if such an assay table already exists, we retrieve it
-                                        this_assay = nmr[0]
-                                    else:
-                                        # or print('nothing found, creating a new object)...')
-                                        this_assay = Assay(measurement_type=OntologyAnnotation(
-                                            term="metabolite profiling"),
-                                                           technology_type=OntologyAnnotation(
-                                                               term=techname),
-                                                           filename=nmr_filename)
-
-                                        new_inv.studies[0].assays.append(this_assay)
-
-                                        extraction_protocol = Protocol(name='metabolite extraction',
-                                                                       protocol_type=OntologyAnnotation(
-                                                                           term="material separation"))
-                                        new_inv.studies[0].protocols.append(extraction_protocol)
-
-                                        # lc_protocol = Protocol(name="liquid chromatography",
-                                        #                              protocol_type=OntologyAnnotation(term="material separation"))
-                                        # new_inv.studies[0].protocols.append(labeling_protocol)
-
-                                        nmr_protocol = Protocol(
-                                            name=inj_mode + "-" + acq_mode + ' nmr spectroscopy',
-                                            protocol_type=OntologyAnnotation(
-                                                term="nmr spectroscopy"))
-                                        inj_param = ProtocolParameter(
-                                            parameter_name=OntologyAnnotation(
-                                                term="injection mode"))
-                                        if inj_mode == 1 or inj_mode == 2:
-                                            ch_instr = ProtocolParameter(
-                                                parameter_name=OntologyAnnotation(
-                                                    term="chromatography instrument"))
-                                            ch_column = ProtocolParameter(
-                                                parameter_name=OntologyAnnotation(
-                                                    term="chromatography column"))
-                                            ch_elu_p = ProtocolParameter(
-                                                parameter_name=OntologyAnnotation(
-                                                    term="elution program"))
-                                            nmr_protocol.parameters.append(ch_instr)
-                                            nmr_protocol.parameters.append(ch_column)
-                                            nmr_protocol.parameters.append(ch_elu_p)
-                                        nmr_instr = ProtocolParameter(
-                                            parameter_name=OntologyAnnotation(
-                                                term="nmr spectroscopy instrument"))
-                                        nmr_probe = ProtocolParameter(parameter_name=(OntologyAnnotation(term="NMR probe")))
-                                        acq_param = ProtocolParameter(
-                                            parameter_name=OntologyAnnotation(term="pulse sequence"))
-
-                                        nmr_protocol.parameters.append(inj_param)
-                                        nmr_protocol.parameters.append(nmr_instr)
-                                        nmr_protocol.parameters.append(nmr_probe)
-                                        nmr_protocol.parameters.append(acq_param)
-
-                                        new_inv.studies[0].protocols.append(nmr_protocol)
-
-                                    index_i = 0
-                                    index_j = 0
-                                    index_k = 0
-                                    # for index_i, sample in enumerate(new_inv.studies[0].samples):
-                                    # some_sample_list = [sample for sample in new_inv.studies[0].samples if
-                                    #                sample.characteristics[0].value.term == assay_plan[item]["sample type"] and
-                                    #                sample.characteristics[1].value.term == assay_plan[item]["collection event"]]
-                                    # print("number of samples: ", len(some_sample_list))
-                                    # extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if
-                                    #                        ext.type == "Extract Name"]
-                                    # print("number of extracts", len(extractlist_before))
-
-                                    for index_i, sample in enumerate([sample for sample in
-                                                                      new_inv.studies[0].materials[
-                                                                          'samples'] if
-                                                                      sample.characteristics[
-                                                                          0].value.term ==
-                                                                              assay_plan[item][
-                                                                                  "sample type"]]):
-                                        # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term)
-                                        # print("current collection event", assay_plan[item]["collection event"])
-                                        if str(sample.characteristics[1].value.term) == str(
-                                                assay_plan[item]["collection event"]):
-                                            # create an extraction process that executes the extraction protocol
-                                            extraction_process = Process(executes_protocol=
-                                                                         [prtcl for prtcl in
-                                                                          new_inv.studies[
-                                                                              0].protocols
-                                                                          if
-                                                                          prtcl.name == "metabolite extraction"][
-                                                                             0],
-                                                                         performer="rick",
-                                                                         date_=datetime.datetime.now())
-
-                                            # extraction process takes as input a sample, and produces an extract material as output
-                                            # we make sure only the right kind of samples get assayed so we check against the sample type
-                                            # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]:
-                                            # print("sample characteristics: ", sample.characteristics[0].value.term)
-
-                                            extraction_process.inputs.append(sample)
-                                            extract = Material(
-                                                name=sample.name + "extract-{}".format(index_i))
-                                            extract.type = "Extract Name"
-                                            extraction_process.outputs.append(extract)
-
-                                            # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample
-                                            # TODO: include a function to obtain the relevant parameters used for data acquisition
-                                            # for index_j in range(int(assay_plan[item]["params"]["injection modes"])):
-                                            # this inner is for handling multiple runs of the same mode, i.e. tech replicates
-                                            for index_k in range(
-                                                    int(assay_plan[item]["params"][
-                                                            "number of technical replicates"])):
-                                                prtcl_name = \
-                                                [prtcl for prtcl in new_inv.studies[0].protocols
-                                                 if
-                                                 prtcl.name == inj_mode + "-" + acq_mode + ' nmr spectroscopy'][
-                                                    0]
-                                                data_acq_process = Process(
-                                                    executes_protocol=prtcl_name,
-                                                    performer="mitsuko",
-                                                    date_=datetime.datetime.now())
-                                                pv_1 = ParameterValue(category=ProtocolParameter(
-                                                    parameter_name=OntologyAnnotation(
-                                                        term="injection mode")),
-                                                                      value=OntologyAnnotation(
-                                                                          term=inj_mode))
-
-                                                # if we are dealing with liquid or gas "C"hromatography
-                                                if "C" in inj_mode:
-                                                    pv_1a = ParameterValue(
-                                                        category=ProtocolParameter(
-                                                            parameter_name=OntologyAnnotation(
-                                                                term="chromatography instrument")),
-                                                        value=OntologyAnnotation(
-                                                            term="Agilent Q12324A"))
-                                                    pv_1b = ParameterValue(
-                                                        category=ProtocolParameter(
-                                                            parameter_name=OntologyAnnotation(
-                                                                term="chromatography column")),
-                                                        value=OntologyAnnotation(
-                                                            term="AB Hydroxyapatite"))
-                                                    pv_1c = ParameterValue(
-                                                        category=ProtocolParameter(
-                                                            parameter_name=OntologyAnnotation(
-                                                                term="elution program")),
-                                                        value=OntologyAnnotation(
-                                                            term="acetonitrile 90%, water 10% for 30 min, flow rate: 1ml/min"))
-
-                                                    pv_1d = ParameterValue(
-                                                        category=ProtocolParameter(
-                                                            parameter_name=OntologyAnnotation(
-                                                                term="NMR probe")),
-                                                        value=OntologyAnnotation(
-                                                            term="flow probe"))
-
-                                                    data_acq_process.parameter_values.append(pv_1a)
-                                                    data_acq_process.parameter_values.append(pv_1b)
-                                                    data_acq_process.parameter_values.append(pv_1c)
-                                                    data_acq_process.parameter_values.append(pv_1d)
-
-                                                else:
-                                                    pv_1d = ParameterValue(
-                                                        category=ProtocolParameter(
-                                                            parameter_name=OntologyAnnotation(
-                                                                term="NMR probe")),
-                                                        value=OntologyAnnotation(
-                                                            term="non-flow probe"))
-
-
-
-                                                pv_2 = ParameterValue(category=ProtocolParameter(
-                                                    parameter_name=OntologyAnnotation(
-                                                        term="nmr spectroscopy instrument")),
-                                                                      value=OntologyAnnotation(
-                                                                          term="Bruker Avance III"))
-                                                pv_3 = ParameterValue(category=ProtocolParameter(
-                                                    parameter_name=OntologyAnnotation(
-                                                        term="pulse sequence")),
-                                                                      value=OntologyAnnotation(
-                                                                          term=acq_mode))
-
-                                                data_acq_process.parameter_values.append(pv_1)
-                                                data_acq_process.parameter_values.append(pv_2)
-                                                data_acq_process.parameter_values.append(pv_3)
-
-                                                # platform_name = "platform-{}".format(index_j)
-                                                platform_name = "platform-" + inj_mode
-                                                data_acq_process.name = "assay-name-{}".format(
-                                                    index_i) + "_" + platform_name + \
-                                                                        "_run-{}".format(index_k)
-                                                data_acq_process.inputs.append(
-                                                    extraction_process.outputs[0])
-
-                                                # data acquisition process usually has an output data file
-                                                datafile = DataFile(
-                                                    filename="acquired-data-{}".format(
-                                                        index_i) + "_" + platform_name +
-                                                             "_run-{}".format(index_k) + ".nmrml.gz",
-                                                    label="Free Induction Decay Data File")
-                                                data_acq_process.outputs.append(datafile)
-
-                                                # ensure Processes are linked forward and backward
-                                                extraction_process.next_process = data_acq_process
-                                                # labeling_process.prev_process = extraction_process
-                                                extraction_process.next_process = data_acq_process
-                                                # data_acq_process.prev_process = labeling_process
-                                                data_acq_process.prev_process = extraction_process
-
-                                                # make sure extract(library), data file, and the processes are attached to the assay
-                                                this_assay.data_files.append(datafile)
-                                                this_assay.other_material.append(
-                                                    extract)
-                                                # this_assay.other_material.append(le)
-                                                this_assay.process_sequence.append(
-                                                    extraction_process)
-                                                # this_assay.process_sequence.append(labeling_process)
-                                                this_assay.process_sequence.append(data_acq_process)
-                    # else:
-                    #     print("no luck :(")
-
-            elif repeats is True and "factorial" in free_or_restricted_design:
-
-                obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations")
-                new_inv.ontology_source_references.append(obi)
-                stato = OntologySource(name="STATO", description="Ontology for Statistical Methods")
-                new_inv.ontology_source_references.append(stato)
-                design1 = OntologyAnnotation(term_source=obi)
-                design1.term = "intervention design"
-                design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
-                new_inv.studies[0].design_descriptors.append(design1)
-                design2 = OntologyAnnotation(term_source=stato)
-                design2.term = "full factorial design"
-                design2.term_accession = "http://purl.obolibrary.org/obo/STATO_0000270"
-                new_inv.studies[0].design_descriptors.append(design2)
-                design3 = OntologyAnnotation(term_source=obi)
-                design3.term = "repeated measures design"
-                design3.term_accession = "http://purl.obolibrary.org/obo/OBI_0500002"
-                new_inv.studies[0].design_descriptors.append(design3)
-
-                intervention_list, new_inv = get_list_of_interventions(new_inv)
-
-                assay_plan = []
-                for intervention_type in intervention_list.keys():
-                    # print("type of intervention: ", intervention_type)
-                    for factor in intervention_list[intervention_type].keys():
-                        # print("factor :", factor)
-                        set_factor_values(factor, intervention_list[intervention_type])
-                        # print("associated factor values:", intervention_list[intervention_type][factor])
-
-            dump(isa_obj=new_inv, output_path='./')
-
-        except NotImplemented:
-                    print("we have recognized a cross over design & repeated treatment case, which is not yet fully implemented")
-                    print("error in create_study_subject() method")
-
-                # my_factors = {}
-                # study_group_dictionaries = []
-                # number_of_repeats = get_repeat_number()
-                # intervention_list = get_list_of_interventions()
-                # """factors_for_treatment = get_factors_from_treatment_type(intervention_list)"""
-                # for intervention_type in intervention_list.keys():
-                #     print("type of intervention: ", intervention_type)
-                #     for factor in intervention_list[intervention_type].keys():
-                #         print("factor :", factor)
-                #         set_factor_values(factor, intervention_list[intervention_type])
-                #         print("associated factor values:", intervention_list[intervention_type][factor])
-                #
-                #         study_group_dictionaries.append(compute_study_groups(intervention_list[intervention_type]))
-                #     print("study groups:", list_of_study_group_dictionaries)
-                #     # set_study_arms()
-                #
-                # # for intervention in intervention_list:
-                # #         int_dict = dict
-                # #         set_factor_values(intervention, int_dict)
-                # print(compute_treatment_sequences(list_of_study_group_dictionaries, number_of_repeats))
-                # # treatment_arms = compute_treatment_sequences(intervention_list, number_of_repeats)
-                # new_inv = set_study_arms(number_of_repeats)
-                #
-                # # for element in range(len(treatment_arms)):
-
-    else:
-        try:
-            new_inv = use_default_inv()
-
-            obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations")
-            new_inv.ontology_source_references.append(obi)
-            stato = OntologySource(name="STATO", description="Ontology for Statistical Methods")
-            new_inv.ontology_source_references.append(stato)
-            omiabis = OntologySource(name="OMIABIS", description="an ontological version of MIABIS (Minimum Information About BIobank data Sharing)")
-            new_inv.ontology_source_references.append(obi)
-
-            design1 = OntologyAnnotation(term_source=obi)
-            design1.term = "observation design"
-            design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0300311"
-            new_inv.studies[0].design_descriptors.append(design1)
-            design2 = OntologyAnnotation(term_source=omiabis)
-            design2.term = "cohort study design"
-            design2.term_accession = "http://purl.obolibrary.org/obo/OMIABIS_0001020"
-            new_inv.studies[0].design_descriptors.append(design2)
-
-            # get_study_group()
-            # get_study_temporal_span()
-            # get_sample_collection_plan()
-            # get_assay_plan()
-
-        except NotImplemented:
-            print("we have recognized an observation study, which is not yet fully implemented")
-            print("error in create_study_subject() method")
-
-if __name__ == '__main__':
-    main()

From ea600940412876a48f73dba0c73bbae437fe3159 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 17 Nov 2020 17:39:45 +0000
Subject: [PATCH 07/25] deleted unused files #368 v3

---
 isatools/create/data.json | 2351 -------------------------------------
 1 file changed, 2351 deletions(-)
 delete mode 100644 isatools/create/data.json

diff --git a/isatools/create/data.json b/isatools/create/data.json
deleted file mode 100644
index b0ac1d52..00000000
--- a/isatools/create/data.json
+++ /dev/null
@@ -1,2351 +0,0 @@
-[
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 132,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS1",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS10",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 15,
-        "sampling": "single sampling",
-        "sources": 15,
-        "spurious_factors": "",
-        "study_key": "MTBLS100",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 11,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS102",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS103 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS104 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS105 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Phytohormones ",
-        "study_key": "MTBLS107",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Phytohormones ",
-        "study_key": "MTBLS108",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Phytohormones ",
-        "study_key": "MTBLS109",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 107,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Total Light_100pc_after_28d ;factor: Medium_after_initiation ;factor: Total Light_0pc_after_28d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Medium_after_14d ;factor: Container_after_14d ",
-        "study_key": "MTBLS11",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Phytohormones ",
-        "study_key": "MTBLS110",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Phytohormones ",
-        "study_key": "MTBLS111",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 296,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS112",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 114,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS113",
-        "total_study_groups": 648
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 15,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS114",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS116 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 26,
-        "sampling": "single sampling",
-        "sources": 26,
-        "spurious_factors": "factor: Replicate ;factor: Time point ;factor: Infection ;factor: Replicate ;factor: Time point ;factor: Infection ",
-        "study_key": "MTBLS117",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS118 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS119 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 69,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Medium_after_initiation ;factor: Medium_after_14d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Container_after_14d ",
-        "study_key": "MTBLS12",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS120 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 1596,
-        "sampling": "single sampling",
-        "sources": 1596,
-        "spurious_factors": "",
-        "study_key": "MTBLS123",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 1138,
-        "sampling": "single sampling",
-        "sources": 1138,
-        "spurious_factors": "factor: Gender ;factor: Age ",
-        "study_key": "MTBLS124",
-        "total_study_groups": 10430784
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 60,
-        "sampling": "multiple/repeated samping",
-        "sources": 12,
-        "spurious_factors": "",
-        "study_key": "MTBLS125",
-        "total_study_groups": 20
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 72,
-        "sampling": "multiple/repeated samping",
-        "sources": 24,
-        "spurious_factors": "",
-        "study_key": "MTBLS126",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 26,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Cell line ;factor: Cell line ",
-        "study_key": "MTBLS127",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 16,
-        "sampling": "single sampling",
-        "sources": 16,
-        "spurious_factors": "",
-        "study_key": "MTBLS128",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 24,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Time ;factor: Time ",
-        "study_key": "MTBLS129",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 83,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Medium_after_initiation ;factor: Medium_after_14d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Container_after_14d ",
-        "study_key": "MTBLS13",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS131",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS132",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 14,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS133",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 15,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS134",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 41,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "",
-        "study_key": "MTBLS137",
-        "total_study_groups": 9
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 96,
-        "sampling": "multiple/repeated samping",
-        "sources": 16,
-        "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ",
-        "study_key": "MTBLS14",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 44,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS140",
-        "total_study_groups": 64
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS143 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 72,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS144",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS146 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 978,
-        "sampling": "single sampling",
-        "sources": 978,
-        "spurious_factors": "",
-        "study_key": "MTBLS147",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS148 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 422,
-        "sampling": "multiple/repeated samping",
-        "sources": 6,
-        "spurious_factors": "",
-        "study_key": "MTBLS149",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 95,
-        "sampling": "multiple/repeated samping",
-        "sources": 16,
-        "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ",
-        "study_key": "MTBLS15",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 162,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "factor: Peturbation ;factor: Time Point ;factor: Cell Type ",
-        "study_key": "MTBLS150",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 120,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS151",
-        "total_study_groups": 150
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 134,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS152",
-        "total_study_groups": 28
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 24,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: control ;factor: time ;factor: inorganic phosphate ;factor: control ",
-        "study_key": "MTBLS154",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 34,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: control ",
-        "study_key": "MTBLS155",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS156 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS157 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 83,
-        "sampling": "multiple/repeated samping",
-        "sources": 14,
-        "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ",
-        "study_key": "MTBLS16",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 60,
-        "sampling": "multiple/repeated samping",
-        "sources": 223,
-        "spurious_factors": "",
-        "study_key": "MTBLS160",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 117,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS161",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 64,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "",
-        "study_key": "MTBLS162",
-        "total_study_groups": 17856
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 24,
-        "sampling": "single sampling",
-        "sources": 24,
-        "spurious_factors": "",
-        "study_key": "MTBLS163",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 390,
-        "sampling": "multiple/repeated samping",
-        "sources": 51,
-        "spurious_factors": "",
-        "study_key": "MTBLS164",
-        "total_study_groups": 45
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 57,
-        "sampling": "multiple/repeated samping",
-        "sources": 37,
-        "spurious_factors": "factor: Panda mother ;factor: Mother age ",
-        "study_key": "MTBLS165",
-        "total_study_groups": 261
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 14,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS166",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 63,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS168",
-        "total_study_groups": 3520
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 14,
-        "sampling": "single sampling",
-        "sources": 14,
-        "spurious_factors": "",
-        "study_key": "MTBLS169",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 1050,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS17",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 68,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS170",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 24,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS171",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 140,
-        "sampling": "single sampling",
-        "sources": 140,
-        "spurious_factors": "",
-        "study_key": "MTBLS172",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 131,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS173",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 18,
-        "sampling": "single sampling",
-        "sources": 18,
-        "spurious_factors": "factor: Metabolic syndrome ",
-        "study_key": "MTBLS174",
-        "total_study_groups": 3520
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 96,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS175",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS176 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 42,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS177",
-        "total_study_groups": 112
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 180,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS178",
-        "total_study_groups": 18
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 41,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS185",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 342,
-        "sampling": "single sampling",
-        "sources": 342,
-        "spurious_factors": "",
-        "study_key": "MTBLS187",
-        "total_study_groups": 24
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 7,
-        "sampling": "single sampling",
-        "sources": 7,
-        "spurious_factors": "",
-        "study_key": "MTBLS188",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 192,
-        "sampling": "multiple/repeated samping",
-        "sources": 5,
-        "spurious_factors": "factor: drug intervention ;factor: drug intervention ",
-        "study_key": "MTBLS189",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS19 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 56,
-        "sampling": "single sampling",
-        "sources": 56,
-        "spurious_factors": "",
-        "study_key": "MTBLS191",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 311,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "",
-        "study_key": "MTBLS193",
-        "total_study_groups": 192
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 60,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS194",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 168,
-        "sampling": "single sampling",
-        "sources": 168,
-        "spurious_factors": "",
-        "study_key": "MTBLS196",
-        "total_study_groups": 18
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 130,
-        "sampling": "multiple/repeated samping",
-        "sources": 13,
-        "spurious_factors": "",
-        "study_key": "MTBLS197",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 504,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS198",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 16,
-        "sampling": "single sampling",
-        "sources": 16,
-        "spurious_factors": "",
-        "study_key": "MTBLS2",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 148,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Material sample ;factor: Material sample ",
-        "study_key": "MTBLS20",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS200 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 63,
-        "sampling": "single sampling",
-        "sources": 63,
-        "spurious_factors": "",
-        "study_key": "MTBLS202",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 50,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS203",
-        "total_study_groups": 5
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 27,
-        "sampling": "single sampling",
-        "sources": 27,
-        "spurious_factors": "",
-        "study_key": "MTBLS208",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS209 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS21 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 62,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS210",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 87,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS212",
-        "total_study_groups": 24
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS213 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 15,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS214",
-        "total_study_groups": 15
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 231,
-        "sampling": "single sampling",
-        "sources": 231,
-        "spurious_factors": "factor: Ionisation ;factor: Ionisation ",
-        "study_key": "MTBLS215",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS216",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 34,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS217",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 134,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "",
-        "study_key": "MTBLS218",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 62,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS219",
-        "total_study_groups": 9
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "multiple/repeated samping",
-        "sources": 5,
-        "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ",
-        "study_key": "MTBLS22",
-        "total_study_groups": 120
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS224 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 134,
-        "sampling": "multiple/repeated samping",
-        "sources": 29,
-        "spurious_factors": "",
-        "study_key": "MTBLS225",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 64,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS226",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS227 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 19,
-        "sampling": "single sampling",
-        "sources": 19,
-        "spurious_factors": "",
-        "study_key": "MTBLS228",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 39,
-        "sampling": "single sampling",
-        "sources": 39,
-        "spurious_factors": "",
-        "study_key": "MTBLS229",
-        "total_study_groups": 20
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 12,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS23",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 44,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS233",
-        "total_study_groups": 192
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 33,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS234",
-        "total_study_groups": 33
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 16,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS235",
-        "total_study_groups": 78
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 240,
-        "sampling": "multiple/repeated samping",
-        "sources": 114,
-        "spurious_factors": "",
-        "study_key": "MTBLS237",
-        "total_study_groups": 3139184885760
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 106,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Time Collected Urine sample ",
-        "study_key": "MTBLS24",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 16,
-        "sampling": "multiple/repeated samping",
-        "sources": 8,
-        "spurious_factors": "",
-        "study_key": "MTBLS240",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 465,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS242",
-        "total_study_groups": 5
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 22,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS243",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "factor: Substrate ;factor: Labeling ;factor: Substrate ;factor: Labeling ",
-        "study_key": "MTBLS247",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 250,
-        "sampling": "multiple/repeated samping",
-        "sources": 124,
-        "spurious_factors": "factor: Baecke modified ;factor: Packs-years ;factor: Inspiratory capacity / Total Lung capacity ",
-        "study_key": "MTBLS249",
-        "total_study_groups": 668224566533597895713700836288057359530593353728000000000000
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 8,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS25",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 86,
-        "sampling": "single sampling",
-        "sources": 86,
-        "spurious_factors": "",
-        "study_key": "MTBLS253",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 35,
-        "sampling": "multiple/repeated samping",
-        "sources": 16,
-        "spurious_factors": "",
-        "study_key": "MTBLS256",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 75,
-        "sampling": "multiple/repeated samping",
-        "sources": 4,
-        "spurious_factors": "",
-        "study_key": "MTBLS259",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 18,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Genotype ;factor: Genotype ;factor: Genotype ",
-        "study_key": "MTBLS26",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS260 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 6,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS263",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 48,
-        "sampling": "multiple/repeated samping",
-        "sources": 4,
-        "spurious_factors": "",
-        "study_key": "MTBLS264",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS265",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS266",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS267",
-        "total_study_groups": 60
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 204,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS270",
-        "total_study_groups": 27
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 219,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS272",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 111,
-        "sampling": "multiple/repeated samping",
-        "sources": 23,
-        "spurious_factors": "",
-        "study_key": "MTBLS273",
-        "total_study_groups": 243
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 18,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS274",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 20,
-        "sampling": "single sampling",
-        "sources": 20,
-        "spurious_factors": "",
-        "study_key": "MTBLS275",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS276 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 105,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS277",
-        "total_study_groups": 168
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 33,
-        "sampling": "single sampling",
-        "sources": 33,
-        "spurious_factors": "",
-        "study_key": "MTBLS278",
-        "total_study_groups": 9
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 88,
-        "sampling": "single sampling",
-        "sources": 88,
-        "spurious_factors": "",
-        "study_key": "MTBLS279",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 2010,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS28",
-        "total_study_groups": 24
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 88,
-        "sampling": "single sampling",
-        "sources": 88,
-        "spurious_factors": "",
-        "study_key": "MTBLS280",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS281",
-        "total_study_groups": 15
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 13,
-        "sampling": "single sampling",
-        "sources": 13,
-        "spurious_factors": "",
-        "study_key": "MTBLS282",
-        "total_study_groups": 13
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 36,
-        "sampling": "multiple/repeated samping",
-        "sources": 6,
-        "spurious_factors": "",
-        "study_key": "MTBLS283",
-        "total_study_groups": 36
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 81,
-        "sampling": "single sampling",
-        "sources": 81,
-        "spurious_factors": "",
-        "study_key": "MTBLS286",
-        "total_study_groups": 30
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 81,
-        "sampling": "single sampling",
-        "sources": 81,
-        "spurious_factors": "",
-        "study_key": "MTBLS287",
-        "total_study_groups": 30
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 81,
-        "sampling": "single sampling",
-        "sources": 81,
-        "spurious_factors": "",
-        "study_key": "MTBLS288",
-        "total_study_groups": 30
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 108,
-        "sampling": "multiple/repeated samping",
-        "sources": 47,
-        "spurious_factors": "",
-        "study_key": "MTBLS289",
-        "total_study_groups": 19035
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 1704,
-        "sampling": "single sampling",
-        "sources": 1704,
-        "spurious_factors": "factor: Treatment ;factor: Timepoint ;factor: Level measurement type ;factor: Cell number ",
-        "study_key": "MTBLS29",
-        "total_study_groups": 3200
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 95,
-        "sampling": "single sampling",
-        "sources": 95,
-        "spurious_factors": "",
-        "study_key": "MTBLS290",
-        "total_study_groups": 296
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 8,
-        "sampling": "single sampling",
-        "sources": 8,
-        "spurious_factors": "",
-        "study_key": "MTBLS291",
-        "total_study_groups": 5
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 196,
-        "sampling": "multiple/repeated samping",
-        "sources": 97,
-        "spurious_factors": "factor: siRNA ",
-        "study_key": "MTBLS292",
-        "total_study_groups": 36864
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 15,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS293",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 147,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS295",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 192,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ",
-        "study_key": "MTBLS296",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS297 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 60,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS298",
-        "total_study_groups": 504
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 63,
-        "sampling": "single sampling",
-        "sources": 63,
-        "spurious_factors": "",
-        "study_key": "MTBLS3",
-        "total_study_groups": 6
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 300,
-        "sampling": "multiple/repeated samping",
-        "sources": 5,
-        "spurious_factors": "",
-        "study_key": "MTBLS30",
-        "total_study_groups": 300
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 75,
-        "sampling": "single sampling",
-        "sources": 75,
-        "spurious_factors": "",
-        "study_key": "MTBLS303",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 129,
-        "sampling": "multiple/repeated samping",
-        "sources": 128,
-        "spurious_factors": "",
-        "study_key": "MTBLS306",
-        "total_study_groups": 336
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 89,
-        "sampling": "multiple/repeated samping",
-        "sources": 34,
-        "spurious_factors": "",
-        "study_key": "MTBLS307",
-        "total_study_groups": 189
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS309 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 168,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS31",
-        "total_study_groups": 72
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 288,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS311",
-        "total_study_groups": 288
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS312 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 4,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS313",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Study Factor from Factor Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS315 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 30,
-        "sampling": "multiple/repeated samping",
-        "sources": 10,
-        "spurious_factors": "",
-        "study_key": "MTBLS316",
-        "total_study_groups": 30
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 1,
-        "sampling": "single sampling",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS317",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 121,
-        "sampling": "multiple/repeated samping",
-        "sources": 119,
-        "spurious_factors": "",
-        "study_key": "MTBLS319",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 23,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS32",
-        "total_study_groups": 20
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 109,
-        "sampling": "multiple/repeated samping",
-        "sources": 108,
-        "spurious_factors": "",
-        "study_key": "MTBLS320",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 25,
-        "sampling": "single sampling",
-        "sources": 25,
-        "spurious_factors": "",
-        "study_key": "MTBLS321",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 39,
-        "sampling": "single sampling",
-        "sources": 39,
-        "spurious_factors": "",
-        "study_key": "MTBLS327",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 30,
-        "sampling": "single sampling",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS328",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 25,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS33",
-        "total_study_groups": 16
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 26,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS333",
-        "total_study_groups": 4320
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 140,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS335",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS336 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 27,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS337",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 222,
-        "sampling": "single sampling",
-        "sources": 222,
-        "spurious_factors": "",
-        "study_key": "MTBLS338",
-        "total_study_groups": 57
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 36,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS34",
-        "total_study_groups": 63
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS340 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 55,
-        "sampling": "multiple/repeated samping",
-        "sources": 19,
-        "spurious_factors": "",
-        "study_key": "MTBLS341",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 129,
-        "sampling": "single sampling",
-        "sources": 129,
-        "spurious_factors": "factor: Facility ;factor: Strain ;factor: Sex ",
-        "study_key": "MTBLS345",
-        "total_study_groups": 32
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 18,
-        "sampling": "single sampling",
-        "sources": 18,
-        "spurious_factors": "",
-        "study_key": "MTBLS35",
-        "total_study_groups": 54
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 24,
-        "sampling": "single sampling",
-        "sources": 24,
-        "spurious_factors": "factor: Extract ;factor: Extract ",
-        "study_key": "MTBLS350",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 730,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "",
-        "study_key": "MTBLS352",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 239,
-        "sampling": "single sampling",
-        "sources": 239,
-        "spurious_factors": "",
-        "study_key": "MTBLS354",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS355 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS358 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 27,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS359",
-        "total_study_groups": 27
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 165,
-        "sampling": "multiple/repeated samping",
-        "sources": 7,
-        "spurious_factors": "",
-        "study_key": "MTBLS36",
-        "total_study_groups": 294
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 4,
-        "sampling": "single sampling",
-        "sources": 4,
-        "spurious_factors": "",
-        "study_key": "MTBLS362",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 140,
-        "sampling": "single sampling",
-        "sources": 140,
-        "spurious_factors": "",
-        "study_key": "MTBLS364",
-        "total_study_groups": 270
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 20,
-        "sampling": "single sampling",
-        "sources": 20,
-        "spurious_factors": "",
-        "study_key": "MTBLS366",
-        "total_study_groups": 18
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS368 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 12,
-        "sampling": "single sampling",
-        "sources": 12,
-        "spurious_factors": "",
-        "study_key": "MTBLS37",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 59,
-        "sampling": "multiple/repeated samping",
-        "sources": 30,
-        "spurious_factors": "",
-        "study_key": "MTBLS370",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 10,
-        "sampling": "multiple/repeated samping",
-        "sources": 30,
-        "spurious_factors": "factor: Geographic location ;factor: Geographic location ",
-        "study_key": "MTBLS372",
-        "total_study_groups": 32
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 14655,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS373",
-        "total_study_groups": 14650
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS374 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS376 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 1,
-        "sampling": "single sampling",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS378",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 57,
-        "sampling": "multiple/repeated samping",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS38",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 1,
-        "sampling": "single sampling",
-        "sources": 1,
-        "spurious_factors": "factor: spike-in concentration ",
-        "study_key": "MTBLS381",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 145,
-        "sampling": "single sampling",
-        "sources": 145,
-        "spurious_factors": "",
-        "study_key": "MTBLS384",
-        "total_study_groups": 36
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS385 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 27,
-        "sampling": "single sampling",
-        "sources": 27,
-        "spurious_factors": "",
-        "study_key": "MTBLS39",
-        "total_study_groups": 27
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 60,
-        "sampling": "multiple/repeated samping",
-        "sources": 10,
-        "spurious_factors": "",
-        "study_key": "MTBLS4",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 51,
-        "sampling": "single sampling",
-        "sources": 51,
-        "spurious_factors": "",
-        "study_key": "MTBLS40",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 49,
-        "sampling": "single sampling",
-        "sources": 49,
-        "spurious_factors": "",
-        "study_key": "MTBLS403",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": 211,
-        "sampling": "single sampling",
-        "sources": 211,
-        "spurious_factors": "",
-        "study_key": "MTBLS404",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 28,
-        "sampling": "multiple/repeated samping",
-        "sources": 5,
-        "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ",
-        "study_key": "MTBLS41",
-        "total_study_groups": 120
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 76,
-        "sampling": "single sampling",
-        "sources": 76,
-        "spurious_factors": "",
-        "study_key": "MTBLS414",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS415 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS419 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 30,
-        "sampling": "multiple/repeated samping",
-        "sources": 5,
-        "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ",
-        "study_key": "MTBLS42",
-        "total_study_groups": 120
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 68,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Sex ",
-        "study_key": "MTBLS422",
-        "total_study_groups": 27
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS424 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 96,
-        "sampling": "single sampling",
-        "sources": 96,
-        "spurious_factors": "",
-        "study_key": "MTBLS427",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS428 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 47,
-        "sampling": "multiple/repeated samping",
-        "sources": 7,
-        "spurious_factors": "",
-        "study_key": "MTBLS43",
-        "total_study_groups": 480
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS433 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS435 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 46,
-        "sampling": "multiple/repeated samping",
-        "sources": 7,
-        "spurious_factors": "",
-        "study_key": "MTBLS44",
-        "total_study_groups": 480
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 54,
-        "sampling": "single sampling",
-        "sources": 54,
-        "spurious_factors": "",
-        "study_key": "MTBLS45",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 153,
-        "sampling": "multiple/repeated samping",
-        "sources": 10,
-        "spurious_factors": "factor: Experiment ;factor: Experiment ;factor: Treatment ;factor: Replicate ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Experiment ;factor: Timepoint ;factor: Experiment ;factor: Experiment ;factor: Experiment ",
-        "study_key": "MTBLS459",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS46 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 15,
-        "sampling": "single sampling",
-        "sources": 15,
-        "spurious_factors": "",
-        "study_key": "MTBLS461",
-        "total_study_groups": 18
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 239,
-        "sampling": "multiple/repeated samping",
-        "sources": 7,
-        "spurious_factors": "factor: Batch ;factor: Batch ;factor: Batch ;factor: Batch ;factor: Batch ",
-        "study_key": "MTBLS464",
-        "total_study_groups": 108
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 659,
-        "sampling": "single sampling",
-        "sources": 659,
-        "spurious_factors": "",
-        "study_key": "MTBLS47",
-        "total_study_groups": 318
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 34,
-        "sampling": "multiple/repeated samping",
-        "sources": 17,
-        "spurious_factors": "",
-        "study_key": "MTBLS472",
-        "total_study_groups": 20
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 33,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS49",
-        "total_study_groups": 20
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 35,
-        "sampling": "multiple/repeated samping",
-        "sources": 9,
-        "spurious_factors": "factor: CarbonDioxide_Quantity_after_initiation ;factor: CarbonDioxide_Quantity_after_01d ",
-        "study_key": "MTBLS5",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS52 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 40,
-        "sampling": "multiple/repeated samping",
-        "sources": 7,
-        "spurious_factors": "",
-        "study_key": "MTBLS54",
-        "total_study_groups": 480
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 183,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Storage time ;factor: Storage time ",
-        "study_key": "MTBLS55",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 54,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS56",
-        "total_study_groups": 10
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 48,
-        "sampling": "single sampling",
-        "sources": 48,
-        "spurious_factors": "",
-        "study_key": "MTBLS57",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS59 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 19,
-        "sampling": "single sampling",
-        "sources": 19,
-        "spurious_factors": "factor: Relative Intensity ;factor: Retention Time Modulation ;factor: Skewing ;factor: Min Number Ions ;factor: Gap Penalty ",
-        "study_key": "MTBLS6",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 44,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "factor: Ectonucleotidase inhibition ;factor: Gender ",
-        "study_key": "MTBLS61",
-        "total_study_groups": 12
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 12,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS67",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 18,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS69",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 113,
-        "sampling": "multiple/repeated samping",
-        "sources": 3,
-        "spurious_factors": "factor: Total Light_100pc_after_28d ;factor: Medium_after_initiation ;factor: Total Light_0pc_after_28d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Medium_after_14d ;factor: Container_after_14d ",
-        "study_key": "MTBLS7",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not find protocol matching '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS71 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 1252,
-        "sampling": "multiple/repeated samping",
-        "sources": 383,
-        "spurious_factors": "",
-        "study_key": "MTBLS72",
-        "total_study_groups": 2
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 108,
-        "sampling": "multiple/repeated samping",
-        "sources": 27,
-        "spurious_factors": "",
-        "study_key": "MTBLS74",
-        "total_study_groups": 4
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 48,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS75",
-        "total_study_groups": 48
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 73,
-        "sampling": "single sampling",
-        "sources": 73,
-        "spurious_factors": "",
-        "study_key": "MTBLS77",
-        "total_study_groups": 8
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: '\t' expected after '\"'",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS79 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 126,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS8",
-        "total_study_groups": 18
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 12,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "factor: Age at sacrifice ;factor: Age at sacrifice ;factor: Age at sacrifice ",
-        "study_key": "MTBLS81",
-        "total_study_groups": 576
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 34,
-        "sampling": "multiple/repeated samping",
-        "sources": 2,
-        "spurious_factors": "",
-        "study_key": "MTBLS85",
-        "total_study_groups": 40
-    },
-    {
-        "inferred_study_design": "none",
-        "samples": "_",
-        "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '",
-        "sources": "_",
-        "spurious_factors": "",
-        "study_key": "MTBLS86 load FAIL",
-        "total_study_groups": 1
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 14,
-        "sampling": "multiple/repeated samping",
-        "sources": 4,
-        "spurious_factors": "",
-        "study_key": "MTBLS87",
-        "total_study_groups": 5
-    },
-    {
-        "inferred_study_design": "full factorial  design",
-        "samples": 12,
-        "sampling": "ERROR LIKELY: check source declaration",
-        "sources": 1,
-        "spurious_factors": "",
-        "study_key": "MTBLS88",
-        "total_study_groups": 3
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 968,
-        "sampling": "single sampling",
-        "sources": 968,
-        "spurious_factors": "factor: Age ",
-        "study_key": "MTBLS90",
-        "total_study_groups": 738816
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 138,
-        "sampling": "single sampling",
-        "sources": 138,
-        "spurious_factors": "",
-        "study_key": "MTBLS91",
-        "total_study_groups": 27
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 447,
-        "sampling": "single sampling",
-        "sources": 447,
-        "spurious_factors": "",
-        "study_key": "MTBLS92",
-        "total_study_groups": 192000
-    },
-    {
-        "inferred_study_design": "fractional factorial design",
-        "samples": 2139,
-        "sampling": "single sampling",
-        "sources": 2139,
-        "spurious_factors": "",
-        "study_key": "MTBLS93",
-        "total_study_groups": 1956740800
-    }
-]
\ No newline at end of file

From 0ddcd89fc9fa8219d2c3ba3543dd6ee5fca10c8f Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 17 Nov 2020 17:40:17 +0000
Subject: [PATCH 08/25] renamed isatools.create.models to isatools.create.model

---
 .travis.yml                               | 2 +-
 isatools/create/assay_templates.py        | 2 +-
 isatools/create/connectors.py             | 2 +-
 isatools/examples/createFromSamplePlan.py | 2 +-
 tests/test_create_connectors.py           | 2 +-
 tests/test_create_models_json.py          | 2 +-
 tests/test_create_models_study_design.py  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7684dc7b..54052510 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,7 +11,7 @@ install:
 - pip install --upgrade pip && pip install -r requirements.txt
 - pip install tox-travis
 script:
-#- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py,study_design_wizard.py isatools
+#- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py isatools
 - behave --no-capture --no-capture-stderr --format=progress features/isa-file-handler.feature
 - python -m unittest discover -s tests/
 - coverage run -m unittest discover -s tests/
diff --git a/isatools/create/assay_templates.py b/isatools/create/assay_templates.py
index fde8a681..05ac0691 100644
--- a/isatools/create/assay_templates.py
+++ b/isatools/create/assay_templates.py
@@ -1,4 +1,4 @@
-from isatools.create.models import *
+from isatools.create.model import *
 
 NAME = 'name'
 
diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py
index ede27417..22a15c92 100644
--- a/isatools/create/connectors.py
+++ b/isatools/create/connectors.py
@@ -1,5 +1,5 @@
 from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic
-from isatools.create.models import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
+from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
 from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART
 from collections import OrderedDict
 
diff --git a/isatools/examples/createFromSamplePlan.py b/isatools/examples/createFromSamplePlan.py
index b077dcaa..4e6f0384 100644
--- a/isatools/examples/createFromSamplePlan.py
+++ b/isatools/examples/createFromSamplePlan.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import
 
 from isatools import isatab
-from isatools.create.models import (
+from isatools.create.model import (
     IsaModelObjectFactory,
     SampleAssayPlan,
     TreatmentFactory,
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index 7c99444f..f225499b 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -20,7 +20,7 @@
     Study,
     Investigation
 )
-from isatools.create.models import (
+from isatools.create.model import (
     StudyDesign,
     StudyArm,
     StudyCell,
diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py
index 3f2186fc..1c7aac93 100644
--- a/tests/test_create_models_json.py
+++ b/tests/test_create_models_json.py
@@ -14,7 +14,7 @@
     ParameterValue,
     OntologySource
 )
-from isatools.create.models import (
+from isatools.create.model import (
     NonTreatment,
     Treatment,
     StudyCell,
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index f9544a51..de026a8c 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -21,7 +21,7 @@
     Assay,
     Process
 )
-from isatools.create.models import (
+from isatools.create.model import (
     NonTreatment,
     Treatment,
     TreatmentFactory,

From eaebbc629a0d272ceed7c231a7833c9bcba0b6f4 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 17 Nov 2020 18:00:25 +0000
Subject: [PATCH 09/25] clean-up for naming coventions #370

---
 isatools/create/model.py                 | 198 ++++++++++++-----------
 tests/test_create_models_json.py         |  46 +++---
 tests/test_create_models_study_design.py |   4 +-
 3 files changed, 125 insertions(+), 123 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index 5008aa01..5ca90835 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -2145,8 +2145,12 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer
     @staticmethod
     def _increment_counter_by_node_type(counter, node):
         if isinstance(node, ProductNode):
-            counter[node.type] = counter[node.type] + 1 if node.type in counter else 1
-            # FIXME do we need a check by node.name for DATA_FILE?
+            # use node.name for DATA_FILE, node.type for other Product Nodes
+            if node.type == DATA_FILE:
+                counter[node.name] = counter[node.name] + 1 if node.name in counter else 1
+            else:
+                counter[node.type] = counter[node.type] + 1 if node.type in counter else 1
+
         if isinstance(node, ProtocolNode):
             # the attribute "name" should contain the same value as "protocol_type.term"
             counter[node.name] = counter[node.name] + 1 if node.name in counter else 1
@@ -2161,8 +2165,7 @@ def _generate_isa_elements_from_node(
             other_materials=None,
             data_files=None,
             previous_items=None,
-            ix=0,
-            jx=0,
+            start_node_index=0,
             counter=None
     ):
         if counter is None:
@@ -2175,10 +2178,10 @@ def _generate_isa_elements_from_node(
             other_materials = []
         if processes is None:
             processes = []
-        log.debug('# processes: {0} - ix: {1}'.format(len(processes), ix))
+        log.debug('# processes: {0} - ix: {1}'.format(len(processes), start_node_index))
         counter = StudyDesign._increment_counter_by_node_type(counter, node)
-        item = isa_objects_factory(
-            node, assay_file_prefix, ix, counter,
+        item = StudyDesign._isa_objects_factory(
+            node, assay_file_prefix, start_node_index, counter,
             measurement_type=assay_graph.measurement_type,
             technology_type=assay_graph.technology_type
         )
@@ -2195,13 +2198,12 @@ def _generate_isa_elements_from_node(
                 else next_node.replicates if isinstance(next_node, ProtocolNode) \
                 else 1
             for jj in range(size):
-                jx = ii * size + jj
-                log.debug('ii = {0} - jj = {1} - jx = {2}'.format(ii, jj, jx))
+                log.debug('ii = {} - jj = {}'.format(ii, jj))
                 # counter += 1
                 processes, other_materials, data_files, next_item, counter = \
                     StudyDesign._generate_isa_elements_from_node(
                         next_node, assay_graph, assay_file_prefix, processes, other_materials, data_files,
-                        [item], ix=ix, jx=jx, counter=counter
+                        [item], start_node_index=start_node_index, counter=counter
                 )
                 if isinstance(node, ProtocolNode):
                     item.outputs.append(next_item)
@@ -2218,7 +2220,7 @@ def _generate_isa_elements_from_node(
                         assert isinstance(previous_process, Process)
                         assert isinstance(item, Process)
                         log.debug('linking process {0} to process {1}'.format(previous_process.name, item.name))
-                        plink(previous_process, item)  # TODO this does not work
+                        plink(previous_process, item)  # TODO check if this generates any issue
         return processes, other_materials, data_files, item, counter
 
     @staticmethod
@@ -2256,7 +2258,7 @@ def generate_assay(assay_graph, assay_samples):
                     ix = i * len(assay_samples) * size + j * size + k
                     log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix))
                     processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node(
-                        node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=None, processes=[], other_materials=[],
+                        node, assay_graph, assay_graph.id, start_node_index=ix, counter=None, processes=[], other_materials=[],
                         data_files=[], previous_items=[sample]
                     )
                     assay.other_material.extend(other_materials)
@@ -2266,6 +2268,92 @@ def generate_assay(assay_graph, assay_samples):
                                                                                             len(data_files)))
         return assay
 
+    @staticmethod
+    def _isa_objects_factory(
+            node,
+            assay_file_prefix,
+            start_node_index,
+            counter,
+            measurement_type=None,
+            technology_type=None,
+            performer=DEFAULT_PERFORMER
+    ):
+        """
+        This method generates an ISA element from an ISA node
+        :param technology_type:
+        :param measurement_type:
+        :param node: SequenceNode - can be either a ProductNode or a ProtocolNode
+        :param assay_file_prefix: str
+        :param start_node_index: int the index of the starting node in the graph
+        :param counter: dict containing the counts for this specific subgraph
+        :param performer: str/Person
+        :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files
+        """
+        if isinstance(node, ProtocolNode):
+            return Process(
+                name='{}_{}-{}-<acquisition>{}'.format(
+                    urlify(node.name), assay_file_prefix, start_node_index, counter[node.name]
+                ),
+                executes_protocol=node,
+                performer=performer,
+                parameter_values=node.parameter_values,
+                inputs=[],
+                outputs=[],
+            )
+        if isinstance(node, ProductNode):
+            if node.type == SAMPLE:
+                return Sample(
+                    name='{}-{}-Sample{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]),
+                    characteristics=node.characteristics
+                )
+            if node.type == EXTRACT:
+                return Extract(
+                    name='{}-{}-Extract{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]),
+                    characteristics=node.characteristics
+                )
+            if node.type == LABELED_EXTRACT:
+                return LabeledExtract(
+                    name='{}-{}-LE{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]),
+                    characteristics=node.characteristics
+                )
+            # under the hypothesis that we deal only with raw data files
+            # derived data file would require a completely separate approach
+            if node.type == DATA_FILE:
+                try:
+                    log.debug('Assay conf. found: {}; {};'.format(
+                        measurement_type, technology_type)
+                    )
+                    m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \
+                        else measurement_type
+                    t_type_term = technology_type.term if isinstance(technology_type, OntologyAnnotation) \
+                        else technology_type
+                    curr_assay_opt = next(
+                        opt for opt in assays_opts if opt['measurement type'] == m_type_term and
+                        opt['technology type'] == t_type_term
+                    )
+                    log.debug('Assay conf. found: {}; {}; {};'.format(
+                        measurement_type, technology_type, curr_assay_opt)
+                    )
+                    isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
+                    assert isa_class in {RawDataFile, RawSpectralDataFile}
+                    return isa_class(
+                        filename='{}_{}-{}-{}'.format(
+                            urlify(node.name),
+                            assay_file_prefix,
+                            start_node_index,
+                            counter[node.name]
+                        )
+                    )
+                except StopIteration:
+                    return RawDataFile(
+                        filename='{}_{}-{}-{}'.format(
+                            urlify(node.name),
+                            assay_file_prefix,
+                            start_node_index,
+                            counter[node.name]
+                        )
+                    )
+
     def generate_isa_study(self):
         """
         this is the core method to return the fully populated ISA Study object from the StudyDesign
@@ -2515,92 +2603,6 @@ def _generate_quality_control_samples(quality_control, study_cell, sample_size=0
         log.debug("Completed post-batch samples")
         return qc_sources, qc_samples_pre_run, qc_samples_interspersed, qc_samples_post_run, qc_processes
 
-# TODO: should I move this inside the StudyDesign class?
-def isa_objects_factory(
-        node,
-        assay_file_prefix,
-        ix,
-        counter,
-        measurement_type=None,
-        technology_type=None,
-        performer=DEFAULT_PERFORMER
-):
-    """
-    This method generates an ISA element from an ISA node
-    :param technology_type:
-    :param measurement_type:
-    :param node: SequenceNode - can be either a ProductNode or a ProtocolNode
-    :param assay_file_prefix: str
-    :param ix: int the index of the starting node in the graph
-    :param counter: dict containing the counts for this specific subgraph
-    :param performer: str/Person
-    :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files
-    """
-    if isinstance(node, ProtocolNode):
-        return Process(
-            name='{}_{}-{}-<acquisition>{}'.format(
-                urlify(node.name), assay_file_prefix, ix, counter[node.name]
-            ),  # FIXME!!
-            executes_protocol=node,
-            performer=performer,
-            parameter_values=node.parameter_values,
-            inputs=[],
-            outputs=[],
-        )
-    if isinstance(node, ProductNode):
-        if node.type == SAMPLE:
-            return Sample(
-                name='{}-{}-Sample{}'.format(assay_file_prefix, ix, counter[SAMPLE]),
-                characteristics=node.characteristics
-            )
-        if node.type == EXTRACT:
-            return Extract(
-                name='{}-{}-Extract{}'.format(assay_file_prefix, ix, counter[EXTRACT]),
-                characteristics=node.characteristics
-            )
-        if node.type == LABELED_EXTRACT:
-            return LabeledExtract(
-                name='{}-{}-LE{}'.format(assay_file_prefix, ix, counter[LABELED_EXTRACT]),
-                characteristics=node.characteristics
-            )
-        # under the hypothesis that we deal only with raw data files
-        # derived data file would require a completely separate approach
-        if node.type == DATA_FILE:
-            try:
-                log.debug('Assay conf. found: {}; {};'.format(
-                    measurement_type, technology_type)
-                )
-                m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \
-                    else measurement_type
-                t_type_term = technology_type.term if isinstance(technology_type, OntologyAnnotation) \
-                    else technology_type
-                curr_assay_opt = next(
-                    opt for opt in assays_opts if opt['measurement type'] == m_type_term and
-                    opt['technology type'] == t_type_term
-                )
-                log.debug('Assay conf. found: {}; {}; {};'.format(
-                    measurement_type, technology_type, curr_assay_opt)
-                )
-                isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
-                assert isa_class in {RawDataFile, RawSpectralDataFile}
-                return isa_class(
-                    filename='{}_{}-{}-{}'.format(
-                        urlify(node.name),
-                        assay_file_prefix,
-                        ix,
-                        counter[node.type]  # FIXME should this be changed to "counter[node.name]"?
-                    )
-                )
-            except StopIteration:
-                return RawDataFile(
-                    filename='{}_{}-{}-{}'.format(
-                        urlify(node.name),
-                        assay_file_prefix,
-                        ix,
-                        counter[node.type]  # FIXME should this be changed to "counter[node.name]"?
-                    )
-                )
-
 
 class StudyDesignEncoder(json.JSONEncoder):
 
diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py
index 1c7aac93..d8d74ac3 100644
--- a/tests/test_create_models_json.py
+++ b/tests/test_create_models_json.py
@@ -154,7 +154,7 @@ class OntologyAnnotationTest(unittest.TestCase):
     def test_simple_ontology_annotation(self):
         annotation = OntologyAnnotation(term="aspirin")
         annotation_json = json.dumps(annotation, cls=OntologyAnnotationEncoder, sort_keys=True, indent=4)
-        print(annotation_json)
+        log.debug(annotation_json)
         self.assertEqual(json.loads(annotation_json), {"term": "aspirin"})
 
 
@@ -461,7 +461,7 @@ def test_encode_single_treatment_cell_with_ontology_annotations(self):
         te1.factor_values = [f1v1, f2v1, f3v1]
         cell = StudyCell(name='test_cell', elements=(te1, ))
         json_cell = json.loads(json.dumps(cell, cls=StudyCellEncoder))
-        print(json.dumps(cell, cls=StudyCellEncoder, indent=4, sort_keys=True))
+        log.debug(json.dumps(cell, cls=StudyCellEncoder, indent=4, sort_keys=True))
         for factor_value_dict in json_cell['elements'][0]['factorValues']:
             self.assertIsNotNone(factor_value_dict['value'])
 
@@ -495,9 +495,9 @@ def test_decode_multi_treatment_cell(self):
             actual_cell = decoder.loads(json_text)
         self.assertEqual(len(self.cell_multi_elements_padded.elements), len(actual_cell.elements))
         for i in range(len(actual_cell.elements)):
-            print(i)
-            print(actual_cell.elements[i])
-            print(self.cell_multi_elements_padded.elements[i])
+            log.debug(i)
+            log.debug(actual_cell.elements[i])
+            log.debug(self.cell_multi_elements_padded.elements[i])
             self.assertEqual(self.cell_multi_elements_padded.elements[i], actual_cell.elements[i])
         self.assertEqual(self.cell_multi_elements_padded, actual_cell)
 
@@ -566,10 +566,10 @@ def test_decode_dna_rna_extraction_plan(self):
         self.assertEqual(self.plan.sample_plan, actual_plan.sample_plan)
         unmatched_expected = self.plan.assay_plan - actual_plan.assay_plan
         unmatched_actual = actual_plan.assay_plan - self.plan.assay_plan
-        print(unmatched_actual)
-        print(unmatched_expected)
+        log.debug(unmatched_actual)
+        log.debug(unmatched_expected)
         if unmatched_expected and unmatched_actual:
-            print('here we are')
+            log.debug('here we are')
             unmatched_expected_el = unmatched_expected.pop()
             unmatched_actual_el = unmatched_actual.pop()
             self.assertEqual(unmatched_expected_el.id, unmatched_actual_el.id)
@@ -578,7 +578,7 @@ def test_decode_dna_rna_extraction_plan(self):
             self.assertEqual(repr(unmatched_expected_el.links), repr(unmatched_actual_el.links))
             self.assertEqual(repr(unmatched_expected_el), repr(unmatched_actual_el))
             self.assertEqual(unmatched_expected_el, unmatched_actual_el)
-            print('all these test passed')
+            log.debug('all these test passed')
         self.assertEqual(self.plan.assay_plan, actual_plan.assay_plan)
         self.assertEqual(self.plan.sample_to_assay_map, actual_plan.sample_to_assay_map)
         self.assertEqual(self.plan, actual_plan)
@@ -607,7 +607,7 @@ def test_encode_sample_and_assay_plan_with_ontology_annotations(self):
         sample2assay_plan = {input_material: [nmr_assay_graph]}
         sap1.sample_to_assay_map = sample2assay_plan
         actual_json_plan = json.loads(json.dumps(sap1, cls=SampleAndAssayPlanEncoder))
-        print(json.dumps(sap1, cls=SampleAndAssayPlanEncoder, indent=4, sort_keys=True))
+        log.debug(json.dumps(sap1, cls=SampleAndAssayPlanEncoder, indent=4, sort_keys=True))
         assay_node_json = next(node for node in actual_json_plan["assayPlan"][0]["nodes"]
                                if node["@id"] == "nmr_spectroscopy_000_000")
         for param_val_json in assay_node_json["parameterValues"]:
@@ -625,8 +625,8 @@ def test_encode_arm_with_single_element_cells(self):
         with open(os.path.join(os.path.dirname(__file__), 'data', 'json', 'create',
                                'study-arm-with-single-element-cells.json')) as expected_json_fp:
             expected_json_arm = json.load(expected_json_fp)
-        print('expected source type is {}'.format(expected_json_arm['sourceType']))
-        print('actual source type is {}'.format(actual_json_arm['sourceType']))
+        log.debug('expected source type is {}'.format(expected_json_arm['sourceType']))
+        log.debug('actual source type is {}'.format(actual_json_arm['sourceType']))
         self.assertEqual(ordered(actual_json_arm["sourceType"]), ordered(expected_json_arm["sourceType"]))
         self.assertEqual(ordered(actual_json_arm), ordered(expected_json_arm))
 
@@ -726,13 +726,13 @@ def test_decode_study_design_with_three_arms(self):
         self.assertEqual(self.three_arm_study_design.name, actual_study_design.name)
         """
         for i, arm in enumerate(self.three_arm_study_design.study_arms):
-            print("comparing study arm #{0} - {1}".format(i, arm.name))
-            print("Difference:\n")
+            log.debug("comparing study arm #{0} - {1}".format(i, arm.name))
+            log.debug("Difference:\n")
             difflib.ndiff(arm, actual_study_design.study_arms[i])
-            print("\nExpected:\n")
-            print(arm)
-            print("\nActual:\n")
-            print(actual_study_design.study_arms[i])
+            log.debug("\nExpected:\n")
+            log.debug(arm)
+            log.debug("\nActual:\n")
+            log.debug(actual_study_design.study_arms[i])
             self.assertEqual(arm, actual_study_design.study_arms[i])
         self.assertEqual(self.three_arm_study_design.study_arms[0], actual_study_design.study_arms[0])
         self.assertEqual(self.three_arm_study_design.study_arms[1], actual_study_design.study_arms[1])
@@ -740,13 +740,13 @@ def test_decode_study_design_with_three_arms(self):
         self.assertEqual(expected_third_arm.name, actual_study_design.study_arms[2].name)
         self.assertEqual(expected_third_arm.group_size,
                          actual_study_design.study_arms[2].group_size)
-        # print("Arm map:")
-        # print(list(actual_study_design.study_arms[2].arm_map.keys()))
+        # log.debug("Arm map:")
+        # log.debug(list(actual_study_design.study_arms[2].arm_map.keys()))
         i = 0
         for cell, sample_assay_plan in expected_third_arm.arm_map.items():
-            print("testing cell {0}".format(cell.name))
-            print(cell)
-            print(list(actual_study_design.study_arms[2].arm_map.keys())[i])
+            log.debug("testing cell {0}".format(cell.name))
+            log.debug(cell)
+            log.debug(list(actual_study_design.study_arms[2].arm_map.keys())[i])
             self.assertTrue(cell in actual_study_design.study_arms[2].arm_map)
             self.assertEqual(sample_assay_plan, actual_study_design.study_arms[2].arm_map[cell])
             i = i + 1
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index de026a8c..e2879a07 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -93,7 +93,7 @@ def test_init_and_propeties(self):
     def test_repr(self):
         print(self.non_treatment.duration)
         self.assertEqual(repr(self.non_treatment),
-                         "isatools.create.models.NonTreatment(type='screen', duration=isatools.model.FactorValue("
+                         "isatools.create.model.NonTreatment(type='screen', duration=isatools.model.FactorValue("
                          "factor_name=isatools.model.StudyFactor(name='DURATION', "
                          "factor_type=isatools.model.OntologyAnnotation(term='time', term_source=None, "
                          "term_accession='', comments=[]), comments=[]), value=10.0, "
@@ -125,7 +125,7 @@ def setUp(self):
 
     def test_repr(self):
         self.assertEqual(repr(self.treatment),
-                         "isatools.create.models.Treatment(type=chemical intervention, "
+                         "isatools.create.model.Treatment(type=chemical intervention, "
                          "factor_values=[isatools.model.FactorValue(factor_name=isatools.model.StudyFactor(name='AGENT'"
                          ", factor_type=isatools.model.OntologyAnnotation(term='perturbation agent', term_source=None, "
                          "term_accession='', comments=[]), comments=[]), value='nitroglycerin', unit=None), "

From 2d2d343b0c2207e8c3aca7d4af3ba4143821be1f Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Wed, 18 Nov 2020 09:36:31 +0000
Subject: [PATCH 10/25] extending travis waiting time

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 54052510..196afc07 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ install:
 script:
 #- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py isatools
 - behave --no-capture --no-capture-stderr --format=progress features/isa-file-handler.feature
-- python -m unittest discover -s tests/
+- travis_wait python -m unittest discover -s tests/
 - coverage run -m unittest discover -s tests/
 - coverage report -m
 branches:

From 1ea6eb999bf92c3dd8d105c0e2e243ac3fc5b4fb Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Wed, 18 Nov 2020 18:55:51 +0000
Subject: [PATCH 11/25] fixing assay name prefix #370

---
 isatools/create/model.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index 5ca90835..88853a3b 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -377,7 +377,6 @@ def check_follow_up():
         }
         func = switcher.get(new_element.type, lambda: False)
         # lines = inspect.getsource(func)
-        # print('Element type: {element_type} \nfunc: {func}'.format(element_type=new_element.type, func=lines))
         return func()
 
     @staticmethod
@@ -1428,8 +1427,8 @@ def from_sample_and_assay_plan_dict(cls, name, sample_type_dicts, *assay_plan_di
                 assay_plan_dict,
                 # FIXME: this id cannot work as it is
                 id_=str(uuid.uuid4()) if use_guids
-                else assay_plan_dict['id'] if 'id' in assay_plan_dict
-                else '{0}{1}'.format(
+                else '{}{}'.format(ASSAY_GRAPH_PREFIX, assay_plan_dict['id']) if 'id' in assay_plan_dict
+                else '{}{}'.format(
                     ASSAY_GRAPH_PREFIX, str(i).zfill(n_digits(len(assay_plan_dicts)))
                 ),
                 quality_control=quality_controls[i] if len(quality_controls) > i else None

From 73233bfce761d152a3007bb4031d8095d033b7a5 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Wed, 18 Nov 2020 18:55:59 +0000
Subject: [PATCH 12/25] cleanup

---
 isatools/isatab.py   |  3 ---
 isatools/utils.py    | 29 +++++++++++------------------
 tests/test_mw2isa.py |  1 -
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/isatools/isatab.py b/isatools/isatab.py
index 136e83ce..5f845912 100644
--- a/isatools/isatab.py
+++ b/isatools/isatab.py
@@ -5681,7 +5681,6 @@ def pbar(x): return x
                 # don't drop duplicates
                 for _, object_series in pbar(DF.iterrows()):
                     # if _ == 0:
-                    #     print('processing: ', object_series[object_label])
                     protocol_ref = str(object_series[object_label])
                     process_key = process_keygen(
                         protocol_ref, column_group, _cg, DF.columns,
@@ -5855,8 +5854,6 @@ def pbar(x): return x
                             data_node.generated_from.append(
                                 sample_node_context)
 
-            # print('key sequence = ', process_key_sequence)
-
             # Link the processes in each sequence
             for pair in pairwise(process_key_sequence):
                 left = processes[pair[0]]  # get process on left of pair
diff --git a/isatools/utils.py b/isatools/utils.py
index 7bf1ebf2..113ad01d 100644
--- a/isatools/utils.py
+++ b/isatools/utils.py
@@ -6,20 +6,12 @@
 import logging
 import os
 import re
-import shutil
 import sys
-import tempfile
 import uuid
 from functools import reduce
 from zipfile import ZipFile
-
 import pandas as pd
-# import modin.pandas as pd_modin
-
-from mzml2isa.mzml import MzMLFile
-
 from isatools import isatab
-# from isatools.create import create_from_galaxy_parameters
 from isatools.model import (
     DerivedSpectralDataFile,
     ISAModelAttributeError,
@@ -141,7 +133,7 @@ def insert_distinct_parameter(table_fp, protocol_ref_to_unpool):
             break
 
     if name_header is not None:
-        print('Are you sure you want to add a column of hash values in {}? '
+        log.debug('Are you sure you want to add a column of hash values in {}? '
               'Y/(N)'.format(name_header))
         confirm = input()
         if confirm == 'Y':
@@ -149,7 +141,7 @@ def insert_distinct_parameter(table_fp, protocol_ref_to_unpool):
             table_fp.seek(0)
             df.to_csv(table_fp, index=None, header=headers, sep='\t')
     else:
-        print('Could not find appropriate column to fill with hashes')
+        log.debug('Could not find appropriate column to fill with hashes')
 
 
 def contains(small_list, big_list):
@@ -357,9 +349,9 @@ def check_loadable(tab_dir_root):
                       x.startswith('MTBLS')]:
         try:
             isatab.load(os.path.join(tab_dir_root, mtbls_dir))
-            print('{} load OK'.format(mtbls_dir))
+            log.debug('{} load OK'.format(mtbls_dir))
         except Exception as e:
-            print('{0} load FAIL, reason: {1}'.format(mtbls_dir, e))
+            log.debug('{0} load FAIL, reason: {1}'.format(mtbls_dir, e))
 
 
 def compute_study_factors_on_mtbls(tab_dir_root):
@@ -390,6 +382,7 @@ def compute_study_factors_on_mtbls(tab_dir_root):
             pass
 
 
+# TODO: is this any useful at all? (by Massi 18/11/2020)
 class IsaTabAnalyzer(object):
     """A utility to analyze ISA-Tabs"""
 
@@ -503,7 +496,7 @@ def generate_study_design_report(self, get_num_study_groups=True,
                                                       .drop_duplicates()))
                                              ))
                                 except Exception as e:
-                                    print("error in query, {}".format(e))
+                                    log.debug("error in query, {}".format(e))
                     study_design_report[-1]['assays'].append(assay_report)
         return study_design_report
 
@@ -574,7 +567,7 @@ def batch_fix_isatabs(settings):
     :return: None
     """
     for table_file_path in settings.keys():
-        print('Fixing {table_file_path}...'.format(
+        log.debug('Fixing {table_file_path}...'.format(
             table_file_path=table_file_path))
         fixer = IsaTabFixer(table_file_path=table_file_path)
         fixer.fix_factor(
@@ -897,8 +890,8 @@ def remove_unused_protocols(self):
                             process.executes_protocol.name)
                     except KeyError:
                         pass
-            print('Unused protocols: {}'.format(unused_protocol_names))
-            print('Location of unused protocols: {}'.format(
+            log.info('Unused protocols: {}'.format(unused_protocol_names))
+            log.info('Location of unused protocols: {}'.format(
                 list(map(lambda pr: True if pr.name in unused_protocol_names else False, study.protocols))
             ))
             # remove these protocols from study.protocols
@@ -910,9 +903,9 @@ def remove_unused_protocols(self):
             study.protocols = clean_protocols_list
             """
             clean_protocols = [pr for pr in study.protocols if pr.name not in unused_protocol_names]
-            print('Clean protocol list: {}'.format([pr.name for pr in clean_protocols]))
+            log.info('Clean protocol list: {}'.format([pr.name for pr in clean_protocols]))
             study.protocols = clean_protocols
-            print('Clean study.protocols: {}'.format([pr.name for pr in study.protocols]))
+            log.info('Clean study.protocols: {}'.format([pr.name for pr in study.protocols]))
         isatab.dump(
             investigation, output_path=os.path.dirname(self.path),
             i_file_name='{filename}.fix'.format(
diff --git a/tests/test_mw2isa.py b/tests/test_mw2isa.py
index 5496b311..3182b09c 100644
--- a/tests/test_mw2isa.py
+++ b/tests/test_mw2isa.py
@@ -31,7 +31,6 @@ def test_conversion(self):
             log.info("conversion successful, invoking the validator for " + study_id)
             with open(os.path.join(self._tmp_dir, study_id, 'i_investigation.txt')) as fp:
                 report = isatab.validate(fp)
-                print(report)
                 if len(report['errors']) > 0:
                     self.fail("conversion successful but validation failed")
         else:

From 387b126d827a2973bb7ff38e957b8dacbf43314f Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Wed, 18 Nov 2020 19:12:45 +0000
Subject: [PATCH 13/25] cleanup of print() v2

---
 isatools/model.py     | 4 ++--
 isatools/sampletab.py | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/isatools/model.py b/isatools/model.py
index b5c95694..f3f763eb 100644
--- a/isatools/model.py
+++ b/isatools/model.py
@@ -22,7 +22,6 @@
 from collections.abc import Iterable
 import networkx as nx
 
-
 from isatools.errors import ISAModelAttributeError
 
 
@@ -441,6 +440,7 @@ def add_ontology_source_reference(self, name='', version='',
             version: OntologySource version
             description: OntologySource description
             file: OntologySource file
+            comments: list
         """
         c = OntologySource(name=name, version=version, description=description,
                            file=file, comments=comments)
@@ -1749,7 +1749,7 @@ def add_factor(self, name, factor_type):
     def del_factor(self, name, are_you_sure=False):
         if self.get_factor(name=name) is None:
             log.warning(
-                'A factor with name "{}" hasnot been found in the study'
+                'A factor with name "{}" has not been found in the study'
                 .format(name))
         else:
             if are_you_sure:  # force user to say yes, to be sure to be sure
diff --git a/isatools/sampletab.py b/isatools/sampletab.py
index 08a500b4..c34bf98e 100644
--- a/isatools/sampletab.py
+++ b/isatools/sampletab.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import pandas as pd
-# import modin.pandas as pd_modin
 
 from progressbar import ETA, Bar, ProgressBar, SimpleProgress
 
@@ -149,7 +148,7 @@ def get_value(object_column, column_group, object_series,
             try:
                 value.term_source = ontology_source_map[term_source_value]
             except KeyError:
-                print('term source: ', term_source_value, ' not found')
+                log.warning('term source: ', term_source_value, ' not found')
 
         term_accession_value = str(object_series[offset_2r_col])
 
@@ -183,8 +182,8 @@ def get_value(object_column, column_group, object_series,
                     unit_term_value.term_source = \
                         ontology_source_map[unit_term_source_value]
                 except KeyError:
-                    print('term source: ', unit_term_source_value,
-                          ' not found')
+                    log.warning('term source: ', unit_term_source_value,
+                                ' not found')
 
             term_accession_value = object_series[offset_3r_col]
 

From 51c146c2df3567ceb659c04b2d4ba228aa718f22 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Thu, 19 Nov 2020 18:08:12 +0000
Subject: [PATCH 14/25] removed printout message #374

---
 isatools/net/biocrates2isatab.py            | 39 +++++++--------------
 isatools/net/mtbls-assay-definition2dict.py |  5 +--
 2 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/isatools/net/biocrates2isatab.py b/isatools/net/biocrates2isatab.py
index f2a2af1e..ea214f56 100644
--- a/isatools/net/biocrates2isatab.py
+++ b/isatools/net/biocrates2isatab.py
@@ -2,14 +2,7 @@
 """Functions for importing from BioCrates"""
 from time import time
 import os
-
-# os.environ["MODIN_ENGINE"] = "ray"
-# os.environ["MODIN_CPUS"] = "4"
 import pandas as pd
-
-# import ray
-# ray.init(num_cpus=1)
-# import modin.pandas as pd
 import glob
 import logging
 
@@ -40,8 +33,6 @@
     os.path.dirname(
         os.path.abspath(__file__)), 'resources', 'saxon9', 'saxon9he.jar')
 
-print(DEFAULT_SAXON_EXECUTABLE)
-
 BIOCRATES_DIR = os.path.join(os.path.dirname(__file__), 'resources',
                              'biocrates')
 
@@ -190,12 +181,12 @@ def biocrates_to_isatab_convert(biocrates_filename, saxon_jar_path=DEFAULT_SAXON
     buffer = BytesIO()
 
     destination_dir = os.path.abspath(dir_name)
-    print('Destination dir is: ' + destination_dir)
+    logger.debug('Destination dir is: ' + destination_dir)
     logger.info('Destination dir is: ' + destination_dir)
 
     if os.path.exists(destination_dir):
         logger.debug('Removing dir' + destination_dir)
-        print('Removing dir' + destination_dir)
+        logger.debug('Removing dir' + destination_dir)
         rmtree(destination_dir)
 
     try:
@@ -212,12 +203,12 @@ def biocrates_to_isatab_convert(biocrates_filename, saxon_jar_path=DEFAULT_SAXON
         logger.error("isatools.convert.biocrates2isatab: "
                      "CalledProcessError caught ", err.returncode)
 
-        print(err)
+        logger.debug(err)
 
     with ZipFile(buffer, 'w') as zip_file:
         # use relative dir_name to avoid absolute path on file names
         zipdir(dir_name, zip_file)
-        print("!", zip_file.namelist())
+        logger.debug("!", zip_file.namelist())
 
     # clean up the target directory after the ZIP file has been closed
     # rmtree(destination_dir)
@@ -278,7 +269,7 @@ def writeOutToFile(plate, polarity, usedop, platebarcode, output_dir,
     if len(pos_injection) > 0:
         filename = 'm_MTBLSXXX_' + usedop + '_' + platebarcode + '_' + polarity.lower() \
             + '_maf.txt'
-        print("filename: ", filename)
+        logger.debug("filename: ", filename)
         with open(os.path.join(output_dir, filename), 'w') as file_handler:
             # writing out the header
             file_handler.write('metabolite_identification')
@@ -331,18 +322,18 @@ def complete_MAF(maf_stub):
 def add_sample_metadata(sample_info_file, input_study_file):
 
     S_STUDY_LOC = os.path.join(DESTINATION_DIR, input_study_file)
-    print("study file location:", S_STUDY_LOC)
+    logger.debug("study file location:", S_STUDY_LOC)
 
     # data = pd_modin.read_csv(S_STUDY_LOC, sep='\t')
     data = pd.read_csv(S_STUDY_LOC, sep='\t')
-    print("study file:", data)
+    logger.debug("study file:", data)
 
     SAMPLE_METADATA_LOC = os.path.join(SAMPLE_METADATA_INPUT_DIR, sample_info_file)
-    print("sample metadata file location:", SAMPLE_METADATA_LOC)
+    logger.debug("sample metadata file location:", SAMPLE_METADATA_LOC)
 
     # sample_desc = pd_modin.read_csv(SAMPLE_METADATA_LOC)
     sample_desc = pd.read_csv(SAMPLE_METADATA_LOC)
-    print("sample metadata: ", sample_desc)
+    logger.debug("sample metadata: ", sample_desc)
 
     # data.join(sample_desc, on='Characteristics[barcode identifier]')
 
@@ -351,7 +342,7 @@ def add_sample_metadata(sample_info_file, input_study_file):
     # result = pd_modin.merge(data, sample_desc, on='Characteristics[barcode identifier]', left_index=True, how='outer')
     result = pd.merge(data, sample_desc, on='Characteristics[barcode identifier]', left_index=True, how='outer')
     cols = result.columns.tolist()
-    print(cols)
+    logger.debug(cols)
 
     result = result[['Source Name', 'Material Type', 'Characteristics[barcode identifier]', 'internal_ID', 'resolute_ID',
                      'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number',
@@ -389,11 +380,7 @@ def add_sample_metadata(sample_info_file, input_study_file):
                                     'Term Accession Number.1': 'Term Accession Number'
                                     })
 
-
-    # print("results:", result)
-    result.to_csv(S_STUDY_LOC , sep='\t', encoding='utf-8', index=False)
-
-
+    result.to_csv(S_STUDY_LOC, sep='\t', encoding='utf-8', index=False)
 
 
 def parseSample(biocrates_filename):
@@ -417,7 +404,7 @@ def parseSample(biocrates_filename):
     plates = soup.find_all('plate')
     for plate in plates:
         usedop = plate.get('usedop')
-        # print(usedop)
+        # logger.debug(usedop)
         platebarcode = plate.get('platebarcode')
         # extracting the the distinct column labels, metabolites,
         # and rawdatafilename collect the data into a dictionary
@@ -437,7 +424,7 @@ def parseSample(biocrates_filename):
     parseSample(biocrates_filename='biocrates-merged-output.xml')
     add_sample_metadata('EX0003_sample_metadata.csv', 's_study_biocrates.txt')
     end = time()
-    print('The conversion took {:.2f} s.'.format(end - start))
+    logger.debug('The conversion took {:.2f} s.'.format(end - start))
 # parseSample(sys.argv[1])
 # uncomment to run test
 # merged = merge_biocrates_files("/Users/Philippe/Documents/git/biocrates-DATA/Biocrates-TUM/input-Biocrates-XML-files/all-biocrates-xml-files/")
diff --git a/isatools/net/mtbls-assay-definition2dict.py b/isatools/net/mtbls-assay-definition2dict.py
index 2e750162..b9358c8a 100644
--- a/isatools/net/mtbls-assay-definition2dict.py
+++ b/isatools/net/mtbls-assay-definition2dict.py
@@ -2,8 +2,6 @@
 import csv
 from collections import OrderedDict
 import pandas as pd
-import modin.pandas as pd_modin
-import json
 import rdflib
 
 from rdflib import *
@@ -20,8 +18,6 @@
 
 MTBLS_ASSAY_DEF_FILE = os.path.join(MTBLS_DIR, MTBLS_FILE)
 
-print(MTBLS_ASSAY_DEF_FILE)
-
 xls = pd.ExcelFile(MTBLS_CV_FILE)
 
 logging.basicConfig()
@@ -48,6 +44,7 @@ def load_terms_from_owl():
 
     return class_labels, subclasses
 
+
 def build_params(record, assay_dictionary, datafr):
 
     # vocab_graph = rdflib.Graph()

From 8c7a44b3abd1479501585855424d4c4e2bd2dc37 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Thu, 19 Nov 2020 18:08:41 +0000
Subject: [PATCH 15/25] add support for other RawDataFile types

---
 isatools/create/constants.py | 2 +-
 isatools/create/model.py     | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/isatools/create/constants.py b/isatools/create/constants.py
index c883c853..1cf55218 100644
--- a/isatools/create/constants.py
+++ b/isatools/create/constants.py
@@ -67,7 +67,7 @@
 SAMPLE_PREFIX = 'SMP'
 EXTRACT_PREFIX = 'EXTR'
 LABELED_EXTRACT_PREFIX = 'LBLEXTR'
-ASSAY_GRAPH_PREFIX = 'ASSAY'
+ASSAY_GRAPH_PREFIX = 'AT'   # AT stands for Assay Type
 
 # constants specific to the sampling plan in the study generation from the study design
 RUN_ORDER = 'run order'
diff --git a/isatools/create/model.py b/isatools/create/model.py
index 88853a3b..eee1080c 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -23,7 +23,7 @@
     SCREEN, RUN_IN, WASHOUT, FOLLOW_UP, ELEMENT_TYPES, INTERVENTIONS,
     DURATION_FACTOR, BASE_FACTORS, SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT,
     DATA_FILE, GROUP_PREFIX, SUBJECT_PREFIX, SAMPLE_PREFIX,
-    EXTRACT_PREFIX, LABELED_EXTRACT_PREFIX, ASSAY_GRAPH_PREFIX,
+    ASSAY_GRAPH_PREFIX,
     RUN_ORDER, STUDY_CELL, assays_opts,
     DEFAULT_SOURCE_TYPE, SOURCE_QC_SOURCE_NAME, QC_SAMPLE_NAME,
     QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_POST_RUN,
@@ -47,6 +47,8 @@
     DataFile,
     RawDataFile,
     RawSpectralDataFile,  # this is required for the module to work
+    FreeInductionDecayDataFile,
+    ArrayDataFile,
     Extract,
     LabeledExtract,
     plink
@@ -2334,7 +2336,10 @@ def _isa_objects_factory(
                         measurement_type, technology_type, curr_assay_opt)
                     )
                     isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
-                    assert isa_class in {RawDataFile, RawSpectralDataFile}
+                    assert isa_class in {
+                        # expand this set if needed
+                        RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile
+                    }
                     return isa_class(
                         filename='{}_{}-{}-{}'.format(
                             urlify(node.name),

From 8329115a6d30672a85d104cfa3c6dac87515322a Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Mon, 23 Nov 2020 16:43:09 +0000
Subject: [PATCH 16/25] fixing TypeError: expected string or bytes-like object

---
 isatools/create/model.py        | 16 +++++++++-------
 tests/test_create_connectors.py | 15 +++++++++++++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index eee1080c..65402335 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -1025,15 +1025,17 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
             if node_key in ('id', 'name', 'selected_sample_types', 'measurement_type', 'technology_type'):
                 continue
 
+            node_name = node_key.term if isinstance(node_key, OntologyAnnotation) else node_key
+
             if isinstance(node_params, list):    # the node is a ProductNode
                 for i, node_params_dict in enumerate(node_params):
                     for j, prev_node in enumerate(previous_nodes):
                         # log.debug('count: {0}, prev_node: {1}'.format(j, prev_node.id))
                         product_node = ProductNode(
                             id_=str(uuid.uuid4()) if use_guids else '{0}_{1}_{2}'.format(
-                                re.sub(r'\s+', '_', node_key), str(i).zfill(3), str(j).zfill(3)
+                                re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3)
                             ),
-                            name=node_key, node_type=node_params_dict['node_type'], size=node_params_dict['size'],
+                            name=node_name, node_type=node_params_dict['node_type'], size=node_params_dict['size'],
                             characteristics=[
                                 Characteristic(category=node_params_dict['characteristics_category'],
                                                value=node_params_dict['characteristics_value'])
@@ -1051,13 +1053,13 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
                 pv_names, pv_all_values = list(node_params.keys()), list(node_params.values())
                 pv_combinations = itertools.product(*[val for val in pv_all_values])
                 for i, pv_combination in enumerate(pv_combinations):
-                    # log.debug('pv_combination: {0}'.format(pv_combination))
+                    log.debug('pv_combination: {0}'.format(pv_combination))
                     if not previous_nodes:
                         protocol_node = ProtocolNode(
                             id_=str(uuid.uuid4()) if use_guids else '{0}_{1}'.format(
-                                re.sub(r'\s+', '_', node_key), str(i).zfill(3)
+                                re.sub(r'\s+', '_', node_name), str(i).zfill(ZFILL_WIDTH)
                             ),
-                            name=node_key, protocol_type=node_key,
+                            name=node_name, protocol_type=node_key,
                             parameter_values=[
                                 ParameterValue(category=ProtocolParameter(parameter_name=pv_names[ix]),
                                                value=pv)
@@ -1072,9 +1074,9 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
                             # log.debug('count: {0}, prev_node: {1}'.format(j, prev_node.id))
                             protocol_node = ProtocolNode(
                                 id_=str(uuid.uuid4()) if use_guids else '{0}_{1}_{2}'.format(
-                                    re.sub(r'\s+', '_', node_key), str(i).zfill(3), str(j).zfill(3)
+                                    re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3)
                                 ),
-                                name=node_key, protocol_type=node_key,
+                                name=node_name, protocol_type=node_key,
                                 parameter_values=[
                                     ParameterValue(category=ProtocolParameter(parameter_name=pv_names[ix]),
                                                    value=pv)
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index f225499b..f51d8d90 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -143,4 +143,19 @@ def test_generate_study_design_from_config_with_observational_factors(self):
         inv_dict = json.loads(inv_json)
         self.assertIsInstance(inv_dict, dict)
         data_frames = isatab.dump_tables_to_dataframes(investigation)
+        self.assertIsInstance(data_frames, dict)
+
+    def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self):
+        ds_design_config = self._load_config('study-design-crossover-onto-annotated-ms-and-nnmr.json')
+        design = generate_study_design_from_config(ds_design_config)
+        self.assertIsInstance(design, StudyDesign)
+        investigation = Investigation(studies=[design.generate_isa_study()])
+        json.dumps(
+            investigation,
+            cls=ISAJSONEncoder,
+            sort_keys=True,
+            indent=4,
+            separators=(',', ': ')
+        )
+        data_frames = isatab.dump_tables_to_dataframes(investigation)
         self.assertIsInstance(data_frames, dict)
\ No newline at end of file

From 2479e02ea83fafd60e650e2c73d7a2fab7c3dc14 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Mon, 23 Nov 2020 16:43:29 +0000
Subject: [PATCH 17/25] cleanup

---
 isatools/sampletab.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/isatools/sampletab.py b/isatools/sampletab.py
index c34bf98e..3ba23b51 100644
--- a/isatools/sampletab.py
+++ b/isatools/sampletab.py
@@ -143,7 +143,7 @@ def get_value(object_column, column_group, object_series,
 
         term_source_value = object_series[offset_1r_col]
 
-        if term_source_value is not '':
+        if term_source_value != '':
 
             try:
                 value.term_source = ontology_source_map[term_source_value]
@@ -152,7 +152,7 @@ def get_value(object_column, column_group, object_series,
 
         term_accession_value = str(object_series[offset_2r_col])
 
-        if term_accession_value is not '':
+        if term_accession_value != '':
             value.term_accession = term_accession_value
 
         return value, None
@@ -176,7 +176,7 @@ def get_value(object_column, column_group, object_series,
 
             unit_term_source_value = object_series[offset_2r_col]
 
-            if unit_term_source_value is not '':
+            if unit_term_source_value != '':
 
                 try:
                     unit_term_value.term_source = \
@@ -187,7 +187,7 @@ def get_value(object_column, column_group, object_series,
 
             term_accession_value = object_series[offset_3r_col]
 
-            if term_accession_value is not '':
+            if term_accession_value != '':
                 unit_term_value.term_accession = term_accession_value
 
         return cell_value, unit_term_value

From 442f8dea4fce2f928f979a2a2ece8954148d46f7 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Mon, 23 Nov 2020 18:17:49 +0000
Subject: [PATCH 18/25] changed naming convention #370

---
 isatools/create/model.py        | 18 +++++++++---------
 tests/test_create_connectors.py |  6 +++++-
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index 65402335..61709f7b 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -2294,8 +2294,8 @@ def _isa_objects_factory(
         """
         if isinstance(node, ProtocolNode):
             return Process(
-                name='{}_{}-{}-<acquisition>{}'.format(
-                    urlify(node.name), assay_file_prefix, start_node_index, counter[node.name]
+                name='{}-S{}-{}-Acquisition-R{}'.format(
+                    assay_file_prefix, start_node_index, urlify(node.name), counter[node.name]
                 ),
                 executes_protocol=node,
                 performer=performer,
@@ -2306,17 +2306,17 @@ def _isa_objects_factory(
         if isinstance(node, ProductNode):
             if node.type == SAMPLE:
                 return Sample(
-                    name='{}-{}-Sample{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]),
+                    name='{}-S{}-Sample-R{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]),
                     characteristics=node.characteristics
                 )
             if node.type == EXTRACT:
                 return Extract(
-                    name='{}-{}-Extract{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]),
+                    name='{}-S{}-Extract-R{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]),
                     characteristics=node.characteristics
                 )
             if node.type == LABELED_EXTRACT:
                 return LabeledExtract(
-                    name='{}-{}-LE{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]),
+                    name='{}-S{}-LE-R{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]),
                     characteristics=node.characteristics
                 )
             # under the hypothesis that we deal only with raw data files
@@ -2343,19 +2343,19 @@ def _isa_objects_factory(
                         RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile
                     }
                     return isa_class(
-                        filename='{}_{}-{}-{}'.format(
-                            urlify(node.name),
+                        filename='{}-S{}-{}-R{}'.format(
                             assay_file_prefix,
                             start_node_index,
+                            urlify(node.name),
                             counter[node.name]
                         )
                     )
                 except StopIteration:
                     return RawDataFile(
-                        filename='{}_{}-{}-{}'.format(
-                            urlify(node.name),
+                        filename='{}-S{}-{}-R{}'.format(
                             assay_file_prefix,
                             start_node_index,
+                            urlify(node.name),
                             counter[node.name]
                         )
                     )
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index f51d8d90..51a7f0e0 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -150,6 +150,9 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo
         design = generate_study_design_from_config(ds_design_config)
         self.assertIsInstance(design, StudyDesign)
         investigation = Investigation(studies=[design.generate_isa_study()])
+        self.assertIsInstance(investigation.studies[0], Study)
+        """
+        # removed because it takes too long on CI and not really needed.
         json.dumps(
             investigation,
             cls=ISAJSONEncoder,
@@ -158,4 +161,5 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo
             separators=(',', ': ')
         )
         data_frames = isatab.dump_tables_to_dataframes(investigation)
-        self.assertIsInstance(data_frames, dict)
\ No newline at end of file
+        self.assertIsInstance(data_frames, dict)
+        """
\ No newline at end of file

From b65f0fb28042c1d3298f1e91a87e5fa6d74d7e92 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 24 Nov 2020 11:19:44 +0000
Subject: [PATCH 19/25] removed spurious print statement #374

---
 isatools/magetab.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/isatools/magetab.py b/isatools/magetab.py
index 51813858..1aa8e1e4 100644
--- a/isatools/magetab.py
+++ b/isatools/magetab.py
@@ -35,9 +35,9 @@
 
 try:
     log = logging.getLogger('isatools')
-    print("LOG:", log)
+    log.debug("LOG:", log)
 except IOError as ioe:
-    print("error:", ioe)
+    log.error("error:", ioe)
 
 
 def _get_sdrf_filenames(ISA):

From 9a665db2441c7792ef4fee74f3065128a1a16245 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 24 Nov 2020 11:29:17 +0000
Subject: [PATCH 20/25] counters starting from 1

---
 isatools/create/model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index 61709f7b..eca7016b 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -2052,7 +2052,7 @@ def _generate_sources(self, ontology_source_references):
             for subj_n in (str(ix).zfill(digits) for ix in range(1, s_arm.group_size + 1)):
                 src = copy.copy(source_prototype)
                 src.name = self._idgen_sources(
-                    s_arm.numeric_id if s_arm.numeric_id > -1 else s_ix,
+                    s_arm.numeric_id if s_arm.numeric_id > -1 else s_ix + 1,  # start counting from 1
                     subj_n
                 )
                 srcs.add(src)
@@ -2103,7 +2103,7 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer
                                 isinstance(sample_type.value, OntologyAnnotation) else sample_type.value
                             for samp_idx in range(0, sampling_size):
                                 sample = Sample(
-                                    name=self._idgen_samples(source.name, cell.name, str(samp_idx+1), sample_term),
+                                    name=self._idgen_samples(source.name, cell.name, str(samp_idx + 1), sample_term),
                                     factor_values=factor_values, characteristics=[sample_type], derives_from=[source]
                                 )
                                 sample_batches[sample_node].append(sample)
@@ -2261,8 +2261,8 @@ def generate_assay(assay_graph, assay_samples):
                     ix = i * len(assay_samples) * size + j * size + k
                     log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix))
                     processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node(
-                        node, assay_graph, assay_graph.id, start_node_index=ix, counter=None, processes=[], other_materials=[],
-                        data_files=[], previous_items=[sample]
+                        node, assay_graph, assay_graph.id, start_node_index=ix + 1, counter=None, processes=[],
+                        other_materials=[], data_files=[], previous_items=[sample]
                     )
                     assay.other_material.extend(other_materials)
                     assay.process_sequence.extend(processes)

From 4c07cab78d7c8cca5652f66090e3e20bd5cb8a40 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 24 Nov 2020 11:46:11 +0000
Subject: [PATCH 21/25] expanded assay list

---
 isatools/create/model.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index eca7016b..37ec19bb 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -46,9 +46,17 @@
     Material,
     DataFile,
     RawDataFile,
-    RawSpectralDataFile,  # this is required for the module to work
+    RawSpectralDataFile,
     FreeInductionDecayDataFile,
     ArrayDataFile,
+    DerivedDataFile,
+    DerivedSpectralDataFile,
+    DerivedArrayDataFile,
+    ProteinAssignmentFile,
+    PeptideAssignmentFile,
+    DerivedArrayDataMatrixFile,
+    PostTranslationalModificationAssignmentFile,
+    AcquisitionParameterDataFile,
     Extract,
     LabeledExtract,
     plink
@@ -2340,7 +2348,10 @@ def _isa_objects_factory(
                     isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')]
                     assert isa_class in {
                         # expand this set if needed
-                        RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile
+                        RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile,
+                        DerivedDataFile, DerivedSpectralDataFile, DerivedArrayDataFile,
+                        ProteinAssignmentFile, PeptideAssignmentFile, DerivedArrayDataMatrixFile,
+                        PostTranslationalModificationAssignmentFile, AcquisitionParameterDataFile
                     }
                     return isa_class(
                         filename='{}-S{}-{}-R{}'.format(

From 746fde3f67aafc0f0a9cc0e3014ea4854c035baf Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 24 Nov 2020 11:49:21 +0000
Subject: [PATCH 22/25] removed spurious print statement v2 #374

---
 isatools/magetab.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/isatools/magetab.py b/isatools/magetab.py
index 1aa8e1e4..0f3d0d6f 100644
--- a/isatools/magetab.py
+++ b/isatools/magetab.py
@@ -15,8 +15,6 @@
 from itertools import zip_longest
 import numpy as np
 import pandas as pd
-# import modin.pandas as pd_modin
-
 from isatools import isatab
 from isatools.model import (
     Assay,
@@ -33,11 +31,7 @@
 )
 import logging
 
-try:
-    log = logging.getLogger('isatools')
-    log.debug("LOG:", log)
-except IOError as ioe:
-    log.error("error:", ioe)
+log = logging.getLogger('isatools')
 
 
 def _get_sdrf_filenames(ISA):

From 138e90cf9a36677079b7dc03c6d3bc490848e126 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Mon, 14 Dec 2020 17:12:32 +0000
Subject: [PATCH 23/25] refactoring connectors to harmonize them to
 Datascriptor; support for chained protocols #373

---
 isatools/create/connectors.py            | 26 ++++++----
 isatools/create/constants.py             |  2 +-
 isatools/create/errors.py                |  7 ++-
 isatools/create/model.py                 | 60 ++++++++++++++++++++++--
 tests/test_create_connectors.py          | 23 +++++----
 tests/test_create_models_json.py         | 21 ++++++---
 tests/test_create_models_study_design.py | 10 ++++
 7 files changed, 113 insertions(+), 36 deletions(-)

diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py
index 22a15c92..badc0de4 100644
--- a/isatools/create/connectors.py
+++ b/isatools/create/connectors.py
@@ -1,6 +1,6 @@
 from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic
 from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
-from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART
+from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART, DEFAULT_SOURCE_TYPE
 from collections import OrderedDict
 
 AGENT = 'agent'
@@ -265,7 +265,7 @@ def generate_study_design_from_config(study_design_config):
     :return: isatools.create.StudyDesign
     """
     arms = []
-    for arm_ix, arm_dict in enumerate(study_design_config['selectedArms']):
+    for arm_ix, arm_dict in enumerate(study_design_config['arms']['selected']):
         arm_map = OrderedDict()
         for epoch_ix, epoch_dict in enumerate(arm_dict['epochs']):
             element_ids = epoch_dict.get('elements', [])
@@ -273,7 +273,7 @@ def generate_study_design_from_config(study_design_config):
                 _generate_element(element_dict) for element_dict in
                 filter(
                     lambda el: el['id'] in element_ids,
-                    study_design_config['generatedStudyDesign']['elements']
+                    study_design_config['elements']
                 )
             ]
             cell_name = 'A{}E{}'.format(arm_ix, epoch_ix)
@@ -288,9 +288,8 @@ def generate_study_design_from_config(study_design_config):
             assay_ord_dicts = [
                 generate_assay_ord_dict_from_config(
                     ds_assay_config, arm_dict['name'], epoch_ix
-                ) for ds_assay_config in study_design_config['assayConfigs']
-                if study_design_config['selectedAssayTypes'][ds_assay_config['name']] and
-                ds_assay_config['selectedCells'][arm_dict['name']][epoch_ix] is True
+                ) for ds_assay_config in study_design_config['assayPlan']
+                if ds_assay_config['selectedCells'][arm_dict['name']][epoch_ix] is True
             ]
             sa_plan_name = 'SAP_A{}E{}'.format(arm_ix, epoch_ix)
             # TODO this method will probably need some rework to bind a sample type to a specific assay plan
@@ -298,13 +297,17 @@ def generate_study_design_from_config(study_design_config):
                 sa_plan_name, sample_type_dicts, *assay_ord_dicts
             )
             arm_map[cell] = sa_plan
+            source_type = Characteristic(
+                category=DEFAULT_SOURCE_TYPE.category,
+                value=_map_ontology_annotation(
+                    arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
+                )
+            )
 
         arm = StudyArm(
             name=arm_dict['name'],
             # should we generate a Characteristic if subjectType is an OntologyAnnotation?
-            source_type=_map_ontology_annotation(
-                arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
-            ),
+            source_type=source_type,
             source_characteristics=[
                 _generate_characteristics_from_observational_factor(
                     obs_factor_dict
@@ -315,6 +318,9 @@ def generate_study_design_from_config(study_design_config):
         )
         arms.append(arm)
     return StudyDesign(
-        name=study_design_config['generatedStudyDesign']['type'],
+        # TODO should we actually add the properties 'name' and ''description' to the study design?
+        name=study_design_config['name'],
+        description=study_design_config.get('description', None),
+        design_type=_map_ontology_annotation(study_design_config['designType']),
         study_arms=arms
     )
diff --git a/isatools/create/constants.py b/isatools/create/constants.py
index 1cf55218..42e19c57 100644
--- a/isatools/create/constants.py
+++ b/isatools/create/constants.py
@@ -4,7 +4,7 @@
 from isatools.model import OntologyAnnotation, StudyFactor, OntologySource, Characteristic
 
 SCREEN = 'screen'
-RUN_IN = 'run in'
+RUN_IN = 'run-in'
 WASHOUT = 'washout'
 FOLLOW_UP = 'follow-up'
 ELEMENT_TYPES = dict(SCREEN=SCREEN, RUN_IN=RUN_IN, WASHOUT=WASHOUT, FOLLOW_UP=FOLLOW_UP)
diff --git a/isatools/create/errors.py b/isatools/create/errors.py
index 5d0bf933..ce6b595c 100644
--- a/isatools/create/errors.py
+++ b/isatools/create/errors.py
@@ -23,8 +23,9 @@
 
 # ERROR MESSAGES: ASSAY GRAPH
 INVALID_NODE_ERROR = 'Node must be instance of isatools.create.models.SequenceNode. {0} provided'
-INVALID_LINK_ERROR = "The link to be added is not valid. Link that can be created are " \
-                     "ProductNode->ProtocolNode or ProtocolNode->ProductNode."
+# INVALID_LINK_ERROR = "The link to be added is not valid. Link that can be created are ProductNode->ProtocolNode
+# or ProtocolNode->ProductNode."
+INVALID_LINK_ERROR = 'ProductNode->ProductNode links are not allowed in an assay workflow.'
 INVALID_MEASUREMENT_TYPE_ERROR = '{0} is an invalid value for measurement_type. ' \
                                  'Please provide an OntologyAnnotation or string.'
 INVALID_TECHNOLOGY_TYPE_ERROR = '{0} is an invalid value for technology_type. ' \
@@ -50,6 +51,8 @@
 
 # ERROR MESSAGES: STUDY DESIGN
 NAME_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'name\' must be a string'
+DESIGN_TYPE_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'design_type\' must be a string or OntologyAnnotation'
+DESCRIPTION_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'description\' must be text (i.e. string)'
 STUDY_ARM_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'study_arms\' must be an iterable'
 ADD_STUDY_ARM_PARAMETER_TYPE_ERROR = 'Not a valid study arm'
 ADD_STUDY_ARM_NAME_ALREADY_PRESENT_ERROR = 'A StudyArm with the same name is already present in the StudyDesign'
diff --git a/isatools/create/model.py b/isatools/create/model.py
index 37ec19bb..79d86aec 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -136,7 +136,7 @@ class NonTreatment(Element):
     def __init__(self, element_type=ELEMENT_TYPES['SCREEN'], duration_value=0.0, duration_unit=None):
         super(NonTreatment, self).__init__()
         if element_type not in ELEMENT_TYPES.values():
-            raise ValueError('element treatment type provided: ')
+            raise ValueError('element treatment type provided: {}'.format(element_type))
         self.__type = element_type
         if not isinstance(duration_value, Number):
             raise ValueError('duration_value must be a Number. Value provided is {0}'.format(duration_value))
@@ -1161,8 +1161,11 @@ def links(self):
                    for target_node in target_nodes)
 
     def add_link(self, start_node, target_node):
+        """
         if not (isinstance(start_node, ProductNode) and isinstance(target_node, ProtocolNode)) and \
                 not (isinstance(start_node, ProtocolNode) and isinstance(target_node, ProductNode)):
+        """
+        if isinstance(start_node, ProductNode) and isinstance(target_node, ProductNode):
             raise TypeError(errors.INVALID_LINK_ERROR)
         if start_node not in self.__graph_dict.keys() or target_node not in self.__graph_dict.keys():
             raise ValueError(errors.MISSING_NODE_ERROR)
@@ -1923,7 +1926,14 @@ class StudyDesign(object):
     StudyArms of different lengths (i.e. different number of cells) are allowed.
     """
 
-    def __init__(self, name='Study Design', source_type=DEFAULT_SOURCE_TYPE, study_arms=None):
+    def __init__(
+            self,
+            name='Study Design',
+            design_type=None,
+            description=None,
+            source_type=DEFAULT_SOURCE_TYPE,
+            study_arms=None
+    ):
         """
         :param name: str
         :param source_type: str or OntologyAnnotation
@@ -1931,11 +1941,17 @@ def __init__(self, name='Study Design', source_type=DEFAULT_SOURCE_TYPE, study_a
         """
         self.__study_arms = set()
         self.__name = name if isinstance(name, str) else 'Study Design'
+        self.__design_type = None
+        self.__description = None
         self.__source_type = None
 
         self.source_type = source_type
         if study_arms:
             self.study_arms = study_arms
+        if description:
+            self.description = description
+        if design_type:
+            self.design_type = design_type
 
     @property
     def name(self):
@@ -1947,6 +1963,26 @@ def name(self, name):
             raise AttributeError(errors.NAME_PROPERTY_ASSIGNMENT_ERROR)
         self.__name = name
 
+    @property
+    def description(self):
+        return self.__description
+
+    @description.setter
+    def description(self, description):
+        if not isinstance(description, str):
+            raise AttributeError(errors.DESCRIPTION_PROPERTY_ASSIGNMENT_ERROR)
+        self.__description = description
+
+    @property
+    def design_type(self):
+        return self.__design_type
+
+    @design_type.setter
+    def design_type(self, design_type):
+        if not isinstance(design_type, (str, OntologyAnnotation)):
+            raise AttributeError(errors.DESIGN_TYPE_PROPERTY_ASSIGNMENT_ERROR)
+        self.__design_type = design_type
+
     @property
     def source_type(self):
         return self.__source_type
@@ -2402,15 +2438,21 @@ def generate_isa_study(self):
     def __repr__(self):
         return '{0}.{1}(' \
                'name={name}, ' \
+               'design_type={design_type}, ' \
+               'description={description} ' \
+               'source_type={source_type}, ' \
                'study_arms={study_arms}' \
                ')'.format(self.__class__.__module__, self.__class__.__name__, study_arms=self.study_arms,
-                          name=self.name)
+                          name=self.name, design_type=self.design_type, description=self.description,
+                          source_type=self.source_type)
 
     def __str__(self):
         return """{0}(
                name={name},
+               description={description},
                study_arms={study_arms}
                )""".format(self.__class__.__name__,
+                           description=self.description,
                            study_arms=[arm.name for arm in sorted(self.study_arms)],
                            name=self.name)
 
@@ -2626,6 +2668,7 @@ class StudyDesignEncoder(json.JSONEncoder):
     def default(self, obj):
         if isinstance(obj, StudyDesign):
             arm_encoder = StudyArmEncoder()
+            onto_encoder = OntologyAnnotationEncoder()
             study_arms_dict = {
                 arm.name: arm_encoder.default(arm) for arm in obj.study_arms
             }
@@ -2634,6 +2677,8 @@ def default(self, obj):
                 arm.pop('name')
             return {
                 'name': obj.name,
+                'designType': onto_encoder.ontology_annotation(obj.design_type),
+                'description': obj.description,
                 'studyArms': study_arms_dict
             }
 
@@ -2650,7 +2695,14 @@ def loads(self, json_text):
             arm_dict['name'] = name
         study_arms = {self.arm_decoder.loads_arm(arm_dict) for arm_dict in json_dict["studyArms"].values()}
 
-        study_design = StudyDesign(name=json_dict['name'], study_arms=study_arms)
+        study_design = StudyDesign(
+            name=json_dict['name'],
+            description=json_dict['description'],
+            design_type=CharacteristicDecoder.loads_ontology_annotation(json_dict['designType']) if isinstance(
+                json_dict['designType'], dict
+            ) else json_dict['designType'],
+            study_arms=study_arms
+        )
         return study_design
 
 
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index 51a7f0e0..6a6887ff 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -83,9 +83,9 @@ def _load_config(file_name):
         return ds_design_config
 
     def test_generate_assay_ord_dict_from_datascriptor_config(self):
-        ds_design_config = self._load_config('study-design-3-repeated-treatment.json')
-        assay_config = ds_design_config['assayConfigs'][0]
-        test_arm_name = 'Arm_0'
+        ds_design_config = self._load_config('factorial-study-design-12-arms-blood-saliva-genomeseq-ms.json')
+        assay_config = ds_design_config['assayPlan'][0]
+        test_arm_name = ds_design_config['arms']['selected'][0]['name']
         test_epoch_no = -1   # last epoch, follow-up
         assay_odict = generate_assay_ord_dict_from_config(assay_config, test_arm_name, test_epoch_no)
         self.assertIsInstance(assay_odict, OrderedDict)
@@ -93,10 +93,10 @@ def test_generate_assay_ord_dict_from_datascriptor_config(self):
         self.assertIsInstance(assay_graph, AssayGraph)
 
     def test_generate_study_design_from_config(self):
-        ds_design_config = self._load_config('study-design-3-repeated-treatment.json')
+        ds_design_config = self._load_config('factorial-study-design-12-arms-blood-saliva-genomeseq-ms.json')
         design = generate_study_design_from_config(ds_design_config)
         self.assertIsInstance(design, StudyDesign)
-        self.assertEqual(len(design.study_arms), len(ds_design_config['selectedArms']))
+        self.assertEqual(len(design.study_arms), len(ds_design_config['arms']['selected']))
         for arm in design.study_arms:
             self.assertIsInstance(arm, StudyArm)
             for cell, samp_ass_plan in arm.arm_map.items():
@@ -117,8 +117,8 @@ def test_generate_study_design_from_config(self):
         self.assertIsInstance(data_frames, dict)
         self.assertGreater(len(data_frames), 1)
 
-    def test_generate_study_design_from_config_with_observational_factors(self):
-        ds_design_config = self._load_config('study-design-with-observational-factors.json')
+    def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self):
+        ds_design_config = self._load_config('crossover-study-design-4-arms-blood-derma-nmr-ms.json')
         design = generate_study_design_from_config(ds_design_config)
         self.assertIsInstance(design, StudyDesign)
         for ix, arm in enumerate(design.study_arms):
@@ -145,14 +145,13 @@ def test_generate_study_design_from_config_with_observational_factors(self):
         data_frames = isatab.dump_tables_to_dataframes(investigation)
         self.assertIsInstance(data_frames, dict)
 
-    def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self):
-        ds_design_config = self._load_config('study-design-crossover-onto-annotated-ms-and-nnmr.json')
+    def test_generate_study_design_from_config_with_chained_protocols_and_ontology_annotations(self):
+        ds_design_config = self._load_config('crossover-study-design-4-arms-blood-derma-nmr-ms-chipseq.json')
         design = generate_study_design_from_config(ds_design_config)
         self.assertIsInstance(design, StudyDesign)
         investigation = Investigation(studies=[design.generate_isa_study()])
         self.assertIsInstance(investigation.studies[0], Study)
-        """
-        # removed because it takes too long on CI and not really needed.
+        self.assertEqual(len(investigation.studies[0].assays), len(ds_design_config['assayPlan']))
         json.dumps(
             investigation,
             cls=ISAJSONEncoder,
@@ -162,4 +161,4 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo
         )
         data_frames = isatab.dump_tables_to_dataframes(investigation)
         self.assertIsInstance(data_frames, dict)
-        """
\ No newline at end of file
+        self.assertEqual(len(data_frames), len(ds_design_config['assayPlan']) + 1)
diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py
index d8d74ac3..eed12506 100644
--- a/tests/test_create_models_json.py
+++ b/tests/test_create_models_json.py
@@ -666,8 +666,8 @@ def test_decode_arm_with_multi_element_cells_mouse(self):
             json_text = json.dumps(json.load(expected_json_fp))
             actual_arm = decoder.loads(json_text)
         self.assertIsInstance(actual_arm, StudyArm)
-        log.info('Expected Arm source type: {}'.format(self.multi_treatment_cell_arm_mouse.source_type))
-        log.info('Actual Arm source type: {}'.format(actual_arm.source_type))
+        log.debug('Expected Arm source type: {}'.format(self.multi_treatment_cell_arm_mouse.source_type))
+        log.debug('Actual Arm source type: {}'.format(actual_arm.source_type))
         self.assertEqual(self.multi_treatment_cell_arm_mouse, actual_arm)
 
 
@@ -675,13 +675,20 @@ class StudyDesignEncoderTest(BaseTestCase):
 
     def setUp(self):
         super(StudyDesignEncoderTest, self).setUp()
-        self.three_arm_study_design = StudyDesign(name=TEST_STUDY_DESIGN_NAME_THREE_ARMS, study_arms={
-            self.single_treatment_cell_arm,
-            self.single_treatment_cell_arm_01,
-            self.single_treatment_cell_arm_02
+        self.three_arm_study_design = StudyDesign(
+            name=TEST_STUDY_DESIGN_NAME_THREE_ARMS,
+            description='This is a study design with three single-element arms',
+            design_type='unspecified design',
+            study_arms={
+                self.single_treatment_cell_arm,
+                self.single_treatment_cell_arm_01,
+                self.single_treatment_cell_arm_02
         })
         self.multi_element_cell_two_arm_study_design = StudyDesign(
-            name=TEST_STUDY_DESIGN_NAME_TWO_ARMS_MULTI_ELEMENT_CELLS, study_arms=[
+            name=TEST_STUDY_DESIGN_NAME_TWO_ARMS_MULTI_ELEMENT_CELLS,
+            description='This is a study design with two multi-element arms',
+            design_type='unspecified design',
+            study_arms=[
                 self.multi_treatment_cell_arm,
                 self.multi_treatment_cell_arm_01
             ])
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index e2879a07..e942fe47 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -1621,6 +1621,16 @@ def test_name_property(self):
             self.study_design.name = 128
         self.assertEqual(ex_cm.exception.args[0], errors.NAME_PROPERTY_ASSIGNMENT_ERROR)
 
+    def test_description_property(self):
+        test_study_description = 'some description in here'
+        self.study_design.description = test_study_description
+        self.assertEqual(self.study_design.description, test_study_description)
+
+    def test_design_type_property(self):
+        test_study_design_type = 'factorial design'
+        self.study_design.design_type = test_study_design_type
+        self.assertEqual(self.study_design.design_type, test_study_design_type)
+
     def test_study_arms_property(self):
         pass
 

From b6d2446144130ef46c11249c912fb62437e36f62 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 15 Dec 2020 11:44:10 +0000
Subject: [PATCH 24/25] adding study title, description and design type on
 creation #373

---
 isatools/create/model.py        | 7 ++++++-
 tests/test_create_connectors.py | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/isatools/create/model.py b/isatools/create/model.py
index 79d86aec..ac7673ea 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -2416,7 +2416,12 @@ def generate_isa_study(self):
                                'study-creator-config.yaml')) as yaml_file:
             config = yaml.load(yaml_file, Loader=yaml.FullLoader)
         study_config = config['study']
-        study = Study(filename=urlify(study_config['filename']))
+        study = Study(
+            title=self.name,
+            filename=urlify(study_config['filename']),
+            description=self.description,
+            design_descriptors=[self.design_type] if isinstance(self.design_type, OntologyAnnotation) else None
+        )
         study.ontology_source_references = [
             OntologySource(**study_config['ontology_source_references'][0])
         ]
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index 6a6887ff..1530ce60 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -104,6 +104,11 @@ def test_generate_study_design_from_config(self):
                 self.assertIsInstance(samp_ass_plan, SampleAndAssayPlan)
         study = design.generate_isa_study()
         self.assertIsInstance(study, Study)
+        self.assertEqual(study.title, ds_design_config['name'])
+        self.assertEqual(study.description, ds_design_config['description'])
+        self.assertIsInstance(study.design_descriptors[0], OntologyAnnotation)
+        self.assertEqual(study.design_descriptors[0].term, ds_design_config['designType']['term'])
+        self.assertEqual(study.design_descriptors[0].term_accession, ds_design_config['designType']['iri'])
         investigation = Investigation(studies=[study])
         inv_json = json.dumps(
             investigation,

From 47c366e57fee284318620310486949e6e5fa0f28 Mon Sep 17 00:00:00 2001
From: zigur <massimorgon@gmail.com>
Date: Tue, 15 Dec 2020 12:27:20 +0000
Subject: [PATCH 25/25] support for study identifier

---
 isatools/create/constants.py             | 4 ++++
 isatools/create/model.py                 | 6 ++++--
 tests/test_create_connectors.py          | 2 ++
 tests/test_create_models_study_design.py | 7 +++++--
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/isatools/create/constants.py b/isatools/create/constants.py
index 42e19c57..f260d7e0 100644
--- a/isatools/create/constants.py
+++ b/isatools/create/constants.py
@@ -104,3 +104,7 @@
 
 # Default performer
 DEFAULT_PERFORMER = 'Unknown'
+
+# Default study identifier
+DEFAULT_STUDY_IDENTIFIER = 's_01'
+DEFAULT_INVESTIGATION_IDENTIFIER = 'i_01'
diff --git a/isatools/create/model.py b/isatools/create/model.py
index ac7673ea..bd07bd87 100644
--- a/isatools/create/model.py
+++ b/isatools/create/model.py
@@ -27,7 +27,8 @@
     RUN_ORDER, STUDY_CELL, assays_opts,
     DEFAULT_SOURCE_TYPE, SOURCE_QC_SOURCE_NAME, QC_SAMPLE_NAME,
     QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_POST_RUN,
-    QC_SAMPLE_TYPE_INTERSPERSED, ZFILL_WIDTH, DEFAULT_PERFORMER
+    QC_SAMPLE_TYPE_INTERSPERSED, ZFILL_WIDTH, DEFAULT_PERFORMER,
+    DEFAULT_STUDY_IDENTIFIER
 )
 from isatools.model import (
     StudyFactor,
@@ -2407,7 +2408,7 @@ def _isa_objects_factory(
                         )
                     )
 
-    def generate_isa_study(self):
+    def generate_isa_study(self, identifier=None):
         """
         this is the core method to return the fully populated ISA Study object from the StudyDesign
         :return: isatools.model.Study
@@ -2417,6 +2418,7 @@ def generate_isa_study(self):
             config = yaml.load(yaml_file, Loader=yaml.FullLoader)
         study_config = config['study']
         study = Study(
+            identifier=identifier or DEFAULT_STUDY_IDENTIFIER,
             title=self.name,
             filename=urlify(study_config['filename']),
             description=self.description,
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
index 1530ce60..3f996e7e 100644
--- a/tests/test_create_connectors.py
+++ b/tests/test_create_connectors.py
@@ -27,6 +27,7 @@
     SampleAndAssayPlan,
     AssayGraph
 )
+from  isatools.create.constants import DEFAULT_STUDY_IDENTIFIER
 from isatools.isajson import ISAJSONEncoder
 from tests.create_sample_assay_plan_odicts import ms_assay_dict, annotated_ms_assay_dict
 
@@ -105,6 +106,7 @@ def test_generate_study_design_from_config(self):
         study = design.generate_isa_study()
         self.assertIsInstance(study, Study)
         self.assertEqual(study.title, ds_design_config['name'])
+        self.assertEqual(study.identifier, DEFAULT_STUDY_IDENTIFIER)
         self.assertEqual(study.description, ds_design_config['description'])
         self.assertIsInstance(study.design_descriptors[0], OntologyAnnotation)
         self.assertEqual(study.design_descriptors[0].term, ds_design_config['designType']['term'])
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index e942fe47..0f76a433 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -40,7 +40,7 @@
 from isatools.create.constants import (
     SCREEN, RUN_IN, WASHOUT, FOLLOW_UP, ELEMENT_TYPES, INTERVENTIONS, DURATION_FACTOR,
     BASE_FACTORS_, BASE_FACTORS, SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, default_ontology_source_reference,
-    DEFAULT_SOURCE_TYPE, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_INTERSPERSED
+    DEFAULT_SOURCE_TYPE, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_INTERSPERSED, DEFAULT_STUDY_IDENTIFIER
 )
 from tests.create_sample_assay_plan_odicts import sample_list, ms_assay_dict, lcdad_assay_dict, nmr_assay_dict
 
@@ -1768,6 +1768,7 @@ def test_generate_isa_study_single_arm_single_cell_elements(self):
         study_design = StudyDesign(study_arms=(single_arm,))
         study = study_design.generate_isa_study()
         self.assertIsInstance(study, Study)
+        self.assertEqual(study.identifier, DEFAULT_STUDY_IDENTIFIER)
         self.assertEqual(study.filename, study_config['filename'])
         self.assertEqual(len(study.sources), single_arm.group_size)
         for source in study.sources:
@@ -1829,7 +1830,9 @@ def test_generate_isa_study_two_arms_single_cell_elements(self):
             (self.cell_follow_up_01, self.nmr_sample_assay_plan)
         ]))
         study_design = StudyDesign(study_arms=(first_arm, second_arm))
-        study = study_design.generate_isa_study()
+        study_identifier = 'st_001'
+        study = study_design.generate_isa_study(identifier=study_identifier)
+        self.assertEqual(study.identifier, study_identifier)
         self.assertEqual(len(study.assays), 2)
         expected_num_of_samples_nmr_plan_first_arm = reduce(
             lambda acc_value, sample_node: acc_value + sample_node.size,