From e2cada1ccfbb5e830a5e0c9711f2331f61ed5567 Mon Sep 17 00:00:00 2001 From: zigur Date: Wed, 11 Nov 2020 19:39:53 +0000 Subject: [PATCH 01/25] groupings assays by assay type #369 (tests WIP) --- isatools/create/models.py | 51 ++++++++++++++---------- isatools/model.py | 2 +- tests/test_create_models_study_design.py | 22 +++++----- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/isatools/create/models.py b/isatools/create/models.py index 9a2d9e7c..7c4ca44f 100644 --- a/isatools/create/models.py +++ b/isatools/create/models.py @@ -2053,13 +2053,12 @@ def _generate_sources(self, ontology_source_references): src_map[s_arm.name] = list(srcs) return src_map - def _generate_samples(self, sources_map, sampling_protocol, performer, split_assays_by_sample_type): + def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer): """ Private method to be used in 'generate_isa_study'. :param sources_map: dict - the output of '_generate_sources' :param sampling_protocol: isatools.model.Protocol - :param performer - :param split_assays_by_sample_type: bool + :param performer: str :return: """ factors = set() @@ -2069,6 +2068,16 @@ def _generate_samples(self, sources_map, sampling_protocol, performer, split_ass process_sequence = [] assays = [] protocols = set() + unique_assay_types = { + assay_graph for arm in self.study_arms + for sample_assay_plan in arm.arm_map.values() if sample_assay_plan is not None + for assay_graph in sample_assay_plan.assay_plan if assay_graph is not None + } + samples_grouped_by_assay_graph = { + assay_graph: [] for assay_graph in unique_assay_types + } + + # generate samples for arm in self.study_arms: for cell, sample_assay_plan in arm.arm_map.items(): if not sample_assay_plan: @@ -2110,22 +2119,24 @@ def _generate_samples(self, sources_map, sampling_protocol, performer, split_ass process_sequence.append(process) for sample_node in sample_assay_plan.sample_plan: samples.extend(sample_batches[sample_node]) + for assay_graph in sample_assay_plan.assay_plan: - protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)}) - if split_assays_by_sample_type is True: - for sample_node in sorted(sample_assay_plan.sample_plan, key=lambda st: st.id): - if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]: - assays.append( - self._generate_assay(assay_graph, sample_batches[sample_node], cell.name) - ) - else: - sample_batch = [] - for sample_node in sample_assay_plan.sample_plan: - if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]: - sample_batch.extend(sample_batches[sample_node]) - assays.append( - self._generate_assay(assay_graph, sample_batch, cell.name) - ) + for sample_node in sample_assay_plan.sample_plan: + if assay_graph in sample_assay_plan.sample_to_assay_map[sample_node]: + try: + samples_grouped_by_assay_graph[assay_graph] += sample_batches[sample_node] + except AttributeError: + log.error('Assay graph is: {}'.format(assay_graph)) + problematic_sample_group = samples_grouped_by_assay_graph[assay_graph] + log.error('Sample bach for assay graph is: {}'.format( + problematic_sample_group + )) + + # generate assays + for assay_graph in unique_assay_types: + protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)}) + assays.append(self._generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph])) + return factors, protocols, samples, assays, process_sequence, ontology_sources @staticmethod @@ -2261,8 +2272,8 @@ def generate_isa_study(self, split_assays_by_sample_type=False): study.sources = [source for sources in sources_map.values() for source in sources] study.factors, protocols, study.samples, study.assays, study.process_sequence, \ study.ontology_source_references = \ - self._generate_samples( - sources_map, study.protocols[0], study_config['performers'][0]['name'], split_assays_by_sample_type + self._generate_samples_and_assays( + sources_map, study.protocols[0], study_config['performers'][0]['name'] ) for protocol in protocols: study.add_protocol(protocol) diff --git a/isatools/model.py b/isatools/model.py index 906cdae8..b5c95694 100644 --- a/isatools/model.py +++ b/isatools/model.py @@ -690,7 +690,7 @@ class OntologyAnnotation(Commentable): """ def __init__(self, term='', term_source=None, term_accession='', - comments=None, id_=str(uuid.uuid4())) : + comments=None, id_=str(uuid.uuid4())): super().__init__(comments) self.__term = term diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index 2b476fc7..574c5e9e 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -1749,17 +1749,18 @@ def test_generate_isa_study_single_arm_single_cell_elements(self): self.assertEqual(len(source.characteristics), 1) self.assertEqual(source.characteristics[0], DEFAULT_SOURCE_TYPE) - expected_num_of_samples_per_plan = reduce(lambda acc_value, sample_node: acc_value+sample_node.size, - self.nmr_sample_assay_plan.sample_plan, 0) * single_arm.group_size - expected_num_of_samples = expected_num_of_samples_per_plan * len([ + expected_num_of_samples = reduce( + lambda acc_value, sample_node: acc_value + sample_node.size, + self.nmr_sample_assay_plan.sample_plan, 0 + ) * single_arm.group_size * len([ a_plan for a_plan in single_arm.arm_map.values() if a_plan is not None ]) - print('Expected number of samples is: {0}'.format(expected_num_of_samples)) + log.debug('Expected number of samples is: {0}'.format(expected_num_of_samples)) self.assertEqual(len(study.samples), expected_num_of_samples) - self.assertEqual(len(study.assays), 2) + self.assertEqual(len(study.assays), 1) treatment_assay = next(iter(study.assays)) self.assertIsInstance(treatment_assay, Assay) - # self.assertEqual(len(treatment_assay.samples), expected_num_of_samples_per_plan) + # self.assertEqual(len(treatment_assay.samples), expected_num_of_samples) self.assertEqual(treatment_assay.measurement_type, nmr_assay_dict['measurement_type']) self.assertEqual(treatment_assay.technology_type, nmr_assay_dict['technology_type']) # pdb.set_trace() @@ -1767,12 +1768,13 @@ def test_generate_isa_study_single_arm_single_cell_elements(self): if process.executes_protocol.name == 'extraction'] nmr_processes = [process for process in treatment_assay.process_sequence if process.executes_protocol.name == 'nmr spectroscopy'] - self.assertEqual(len(extraction_processes), expected_num_of_samples_per_plan) - self.assertEqual(len(nmr_processes), 8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] - * expected_num_of_samples_per_plan) + self.assertEqual(len(extraction_processes), expected_num_of_samples) + self.assertEqual( + len(nmr_processes), + 8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] * expected_num_of_samples) self.assertEqual( len(treatment_assay.process_sequence), - (8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] + 1) * expected_num_of_samples_per_plan + (8 * nmr_assay_dict['nmr spectroscopy']['#replicates'] + 1) * expected_num_of_samples ) for ix, process in enumerate(extraction_processes): self.assertEqual(process.inputs, [study.samples[ix]]) From c9f81d79e45c1eae7fde120aada5eb9396b95f7f Mon Sep 17 00:00:00 2001 From: zigur Date: Thu, 12 Nov 2020 13:20:51 +0000 Subject: [PATCH 02/25] removing unused argument; tests wip #369 --- isatools/create/models.py | 2 +- tests/test_create_models_study_design.py | 32 +----------------------- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/isatools/create/models.py b/isatools/create/models.py index 7c4ca44f..e1b38a58 100644 --- a/isatools/create/models.py +++ b/isatools/create/models.py @@ -2251,7 +2251,7 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''): len(data_files))) return assay - def generate_isa_study(self, split_assays_by_sample_type=False): + def generate_isa_study(self): """ this is the core method to return the fully populated ISA Study object from the StudyDesign :return: isatools.model.Study diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index 574c5e9e..c3630187 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -1792,34 +1792,6 @@ def test_generate_isa_study_single_arm_single_cell_elements(self): log.debug('NMR assay graph: {0}'.format([(getattr(el, 'name', None), type(el)) for el in treatment_assay.graph.nodes()])) - def test_generate_isa_study_single_arm_single_cell_elements_split_assay_by_sample_type(self): - with open(os.path.join(os.path.dirname(__file__), '..', 'isatools', 'resources', 'config', 'yaml', - 'study-creator-config.yaml')) as yaml_file: - config = yaml.load(yaml_file, Loader=yaml.FullLoader) - # study_config = config['study'] - single_arm = StudyArm(name=TEST_STUDY_ARM_NAME_00, group_size=10, arm_map=OrderedDict([ - (self.cell_screen, None), (self.cell_run_in, None), - (self.cell_single_treatment_00, self.nmr_sample_assay_plan), - (self.cell_follow_up, self.nmr_sample_assay_plan) - ])) - study_design = StudyDesign(study_arms=(single_arm,)) - study = study_design.generate_isa_study(split_assays_by_sample_type=True) - self.assertEqual(len(study.assays), 6) - treatment_assay_st0, treatment_assay_st1, treatment_assay_st2 = study.assays[0:3] - self.assertIsInstance(treatment_assay_st0, Assay) - self.assertEqual(treatment_assay_st0.measurement_type, nmr_assay_dict['measurement_type']) - self.assertEqual(treatment_assay_st0.technology_type, nmr_assay_dict['technology_type']) - extraction_processes = [process for process in treatment_assay_st0.process_sequence - if process.executes_protocol.name == 'extraction'] - nmr_processes = [process for process in treatment_assay_st0.process_sequence - if process.executes_protocol.name == 'nmr spectroscopy'] - expected_num_of_samples_per_plan = reduce(lambda acc_value, sample_node: acc_value+sample_node.size, - self.nmr_sample_assay_plan.sample_plan, 0) * single_arm.group_size - expected_num_of_samples_first = sample_list[0]['size'] * single_arm.group_size - self.assertEqual(len(extraction_processes), expected_num_of_samples_first) - self.assertEqual(len(nmr_processes), 8 * 2 * expected_num_of_samples_first) - self.assertEqual(len(treatment_assay_st0.process_sequence), (8 * 2 + 1) * expected_num_of_samples_first) - def test_generate_isa_study_two_arms_single_cell_elements(self): first_arm = StudyArm(name=TEST_STUDY_ARM_NAME_00, group_size=20, arm_map=OrderedDict([ (self.cell_screen, None), (self.cell_run_in, None), @@ -1833,7 +1805,7 @@ def test_generate_isa_study_two_arms_single_cell_elements(self): ])) study_design = StudyDesign(study_arms=(first_arm, second_arm)) study = study_design.generate_isa_study() - self.assertEqual(len(study.assays), 4) + self.assertEqual(len(study.assays), 2) expected_num_of_samples_nmr_plan_first_arm = reduce( lambda acc_value, sample_node: acc_value + sample_node.size, self.nmr_sample_assay_plan.sample_plan, 0) * first_arm.group_size @@ -1904,8 +1876,6 @@ def test_generate_isa_study_two_arms_single_cell_elements_check_source_character self.assertEqual(source.characteristics, [control_source_type]) else: self.assertEqual(source.characteristics, [treatment_source_type]) - # self.assertIn(control_source_type.category, study.characteristic_categories) - # self.assertIn(treatment_source_type.category, study.characteristic_categories) class QualityControlServiceTest(BaseStudyDesignTest): From 6140f9e9811f6f81b66d2ea161a2dca78fd23032 Mon Sep 17 00:00:00 2001 From: zigur Date: Thu, 12 Nov 2020 16:56:08 +0000 Subject: [PATCH 03/25] tests work fix #369 --- isatools/create/connectors.py | 2 +- isatools/create/models.py | 44 +++++++++++++++++---------------- tests/test_create_connectors.py | 6 +++-- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py index 0a204c06..ede27417 100644 --- a/isatools/create/connectors.py +++ b/isatools/create/connectors.py @@ -209,7 +209,7 @@ def _generate_characteristics_from_observational_factor(observational_factor_dic def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epoch_no): res = OrderedDict() - res['name'] = datascriptor_assay_config['name'] + res['id'], res['name'] = datascriptor_assay_config['id'], datascriptor_assay_config['name'] res['measurement_type'] = _map_ontology_annotation( datascriptor_assay_config['measurement_type'], expand_strings=True ) diff --git a/isatools/create/models.py b/isatools/create/models.py index e1b38a58..f1002cf5 100644 --- a/isatools/create/models.py +++ b/isatools/create/models.py @@ -1021,7 +1021,7 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict, current_nodes = [] for node_key, node_params in assay_plan_dict.items(): - if node_key in ('name', 'selected_sample_types', 'measurement_type', 'technology_type'): + if node_key in ('id', 'name', 'selected_sample_types', 'measurement_type', 'technology_type'): continue if isinstance(node_params, list): # the node is a ProductNode @@ -1426,7 +1426,10 @@ def from_sample_and_assay_plan_dict(cls, name, sample_type_dicts, *assay_plan_di for i, assay_plan_dict in enumerate(assay_plan_dicts): assay_graph = AssayGraph.generate_assay_plan_from_dict( assay_plan_dict, - id_=str(uuid.uuid4()) if use_guids else '{0}{1}'.format( + # FIXME: this id cannot work as it is + id_=str(uuid.uuid4()) if use_guids + else assay_plan_dict['id'] if 'id' in assay_plan_dict + else '{0}{1}'.format( ASSAY_GRAPH_PREFIX, str(i).zfill(n_digits(len(assay_plan_dicts))) ), quality_control=quality_controls[i] if len(quality_controls) > i else None @@ -2135,7 +2138,7 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer # generate assays for assay_graph in unique_assay_types: protocols.update({node for node in assay_graph.nodes if isinstance(node, Protocol)}) - assays.append(self._generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph])) + assays.append(self.generate_assay(assay_graph, samples_grouped_by_assay_graph[assay_graph])) return factors, protocols, samples, assays, process_sequence, ontology_sources @@ -2206,7 +2209,7 @@ def _generate_isa_elements_from_node( return processes, other_materials, data_files, item, counter @staticmethod - def _generate_assay(assay_graph, assay_samples, cell_name=''): + def generate_assay(assay_graph, assay_samples): if not isinstance(assay_graph, AssayGraph): raise TypeError() """ @@ -2215,12 +2218,11 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''): else None """ measurement_type, technology_type = assay_graph.measurement_type, assay_graph.technology_type - assay_file_prefix = assay_graph.id if not cell_name else '{}_{}'.format(cell_name, assay_graph.id) assay = Assay( measurement_type=measurement_type, technology_type=technology_type, filename=urlify('a_{0}_{1}_{2}.txt'.format( - assay_file_prefix, + assay_graph.id, measurement_type.term if isinstance(measurement_type, OntologyAnnotation) else measurement_type, technology_type.term if isinstance(technology_type, OntologyAnnotation) else technology_type )) @@ -2241,7 +2243,7 @@ def _generate_assay(assay_graph, assay_samples, cell_name=''): ix = i * len(assay_samples) * size + j * size + k log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix)) processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node( - node, assay_graph, assay_file_prefix, ix=ix, jx=0, counter=0, processes=[], other_materials=[], + node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=0, processes=[], other_materials=[], data_files=[], previous_items=[sample] ) assay.other_material.extend(other_materials) @@ -2330,13 +2332,13 @@ def augment_study(cls, study, study_design, in_place=False): for assay_graph in study_assay_plan.assay_plan: assert isinstance(assay_graph, AssayGraph) if assay_graph.quality_control: - # CHECK the assumption here is that an assay file can univocally be identified + # CHECK the assumption here is that an assay file can unequivocally be identified # by StudyCell name, corresponding AssayGraph id and measurement type # Such an assumption is correct as far a the Assay filename convention is not modified measurement_type, technology_type = assay_graph.measurement_type, \ - assay_graph.technology_type - assay_filename = urlify('a_{0}_{1}_{2}_{3}.txt'.format( - cell.name, assay_graph.id, + assay_graph.technology_type + assay_filename = urlify('a_{0}_{1}_{2}.txt'.format( + assay_graph.id, measurement_type.term if isinstance(measurement_type, OntologyAnnotation) else measurement_type, technology_type.term if isinstance(technology_type, OntologyAnnotation) @@ -2368,8 +2370,7 @@ def augment_study(cls, study, study_design, in_place=False): post_run_samples=qc_samples_post_run, interspersed_samples=qc_samples_interspersed ) - qc_study.assays[index] = StudyDesign._generate_assay(assay_graph, augmented_samples, - cell_name=cell.name) + qc_study.assays[index] = StudyDesign.generate_assay(assay_graph, augmented_samples) return qc_study @staticmethod @@ -2520,13 +2521,13 @@ def isa_objects_factory( log.debug('sequence_no: {0}'.format(sequence_no)) if isinstance(node, ProtocolNode): return Process( - name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)), - executes_protocol=node, - performer=performer, - parameter_values=node.parameter_values, - inputs=[], - outputs=[], - ) + name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)), + executes_protocol=node, + performer=performer, + parameter_values=node.parameter_values, + inputs=[], + outputs=[], + ) if isinstance(node, ProductNode): if node.type == SAMPLE: return Sample( @@ -2562,10 +2563,11 @@ def isa_objects_factory( measurement_type, technology_type, curr_assay_opt) ) isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] + assert isa_class in {RawDataFile, RawSpectralDataFile} return isa_class( filename='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)) ) - except StopIteration as e: + except StopIteration: return RawDataFile( filename='{0}_{1}'.format(node.name, str(sequence_no).zfill(ZFILL_WIDTH)) ) diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index 5ec3e797..7c99444f 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -69,7 +69,7 @@ def test_assay_template_convert_ordered_dict_to_json_met_prof_mass_spec_annotate self.assertEqual(actual_annotated_json_mp_ms, { key: value for key, value in self.met_prof_jsons[1].items() if key not in ['@context'] }) - + @staticmethod def _load_config(file_name): ds_design_config_file_path = os.path.abspath( @@ -130,7 +130,9 @@ def test_generate_study_design_from_config_with_observational_factors(self): self.assertIsInstance(source_char, Characteristic) self.assertIsInstance(source_char.category, OntologyAnnotation) self.assertIsInstance(source_char.value, OntologyAnnotation) - investigation = Investigation(studies=[design.generate_isa_study(split_assays_by_sample_type=True)]) + investigation = Investigation(studies=[design.generate_isa_study()]) + # two assay types are selected, so we expect to find only two assays in the studies + self.assertEqual(len(investigation.studies[0].assays), 2) inv_json = json.dumps( investigation, cls=ISAJSONEncoder, From 07a34479c0fea7a2d7f5dfe84245148ca331c2b9 Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 16 Nov 2020 19:51:47 +0000 Subject: [PATCH 04/25] implemented counter by type to name nodes #370 --- isatools/create/models.py | 71 +++++++++++++++++------- tests/test_create_models_study_design.py | 21 ++++++- 2 files changed, 68 insertions(+), 24 deletions(-) diff --git a/isatools/create/models.py b/isatools/create/models.py index f1002cf5..5008aa01 100644 --- a/isatools/create/models.py +++ b/isatools/create/models.py @@ -660,10 +660,10 @@ def __init__(self, id_=str(uuid.uuid4()), name='', protocol_type=None, uri='', """ :param id_: - :param name: - :param protocol_type: - :param uri: - :param description: + :param name: the name of the protocol + :param protocol_type: the type of the protocol + :param uri: a uri pointing to a resource describing the protocol + :param description: a textual description of the protocol :param version: :param parameter_values: the values to be supplied to the Protocol Parameters :param replicates: int - the number of replicates (biological or technical) for this Protocol step. Must be a @@ -2142,6 +2142,16 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer return factors, protocols, samples, assays, process_sequence, ontology_sources + @staticmethod + def _increment_counter_by_node_type(counter, node): + if isinstance(node, ProductNode): + counter[node.type] = counter[node.type] + 1 if node.type in counter else 1 + # FIXME do we need a check by node.name for DATA_FILE? + if isinstance(node, ProtocolNode): + # the attribute "name" should contain the same value as "protocol_type.term" + counter[node.name] = counter[node.name] + 1 if node.name in counter else 1 + return counter + @staticmethod def _generate_isa_elements_from_node( node, @@ -2153,8 +2163,10 @@ def _generate_isa_elements_from_node( previous_items=None, ix=0, jx=0, - counter=0 + counter=None ): + if counter is None: + counter = {} if previous_items is None: previous_items = [] if data_files is None: @@ -2164,8 +2176,9 @@ def _generate_isa_elements_from_node( if processes is None: processes = [] log.debug('# processes: {0} - ix: {1}'.format(len(processes), ix)) + counter = StudyDesign._increment_counter_by_node_type(counter, node) item = isa_objects_factory( - node, sequence_no='{0}-{1}-{2}'.format(assay_file_prefix, ix, counter), + node, assay_file_prefix, ix, counter, measurement_type=assay_graph.measurement_type, technology_type=assay_graph.technology_type ) @@ -2184,7 +2197,7 @@ def _generate_isa_elements_from_node( for jj in range(size): jx = ii * size + jj log.debug('ii = {0} - jj = {1} - jx = {2}'.format(ii, jj, jx)) - counter += 1 + # counter += 1 processes, other_materials, data_files, next_item, counter = \ StudyDesign._generate_isa_elements_from_node( next_node, assay_graph, assay_file_prefix, processes, other_materials, data_files, @@ -2243,7 +2256,7 @@ def generate_assay(assay_graph, assay_samples): ix = i * len(assay_samples) * size + j * size + k log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix)) processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node( - node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=0, processes=[], other_materials=[], + node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=None, processes=[], other_materials=[], data_files=[], previous_items=[sample] ) assay.other_material.extend(other_materials) @@ -2502,26 +2515,32 @@ def _generate_quality_control_samples(quality_control, study_cell, sample_size=0 log.debug("Completed post-batch samples") return qc_sources, qc_samples_pre_run, qc_samples_interspersed, qc_samples_post_run, qc_processes - +# TODO: should I move this inside the StudyDesign class? def isa_objects_factory( node, - sequence_no, + assay_file_prefix, + ix, + counter, measurement_type=None, technology_type=None, performer=DEFAULT_PERFORMER - ): +): """ This method generates an ISA element from an ISA node :param technology_type: :param measurement_type: :param node: SequenceNode - can be either a ProductNode or a ProtocolNode - :param sequence_no: str - a sequential number to discriminate among items built in a batch + :param assay_file_prefix: str + :param ix: int the index of the starting node in the graph + :param counter: dict containing the counts for this specific subgraph + :param performer: str/Person :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files """ - log.debug('sequence_no: {0}'.format(sequence_no)) if isinstance(node, ProtocolNode): return Process( - name='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)), + name='{}_{}-{}-{}'.format( + urlify(node.name), assay_file_prefix, ix, counter[node.name] + ), # FIXME!! executes_protocol=node, performer=performer, parameter_values=node.parameter_values, @@ -2531,24 +2550,24 @@ def isa_objects_factory( if isinstance(node, ProductNode): if node.type == SAMPLE: return Sample( - name='{0}_{1}'.format(SAMPLE_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)), + name='{}-{}-Sample{}'.format(assay_file_prefix, ix, counter[SAMPLE]), characteristics=node.characteristics ) if node.type == EXTRACT: return Extract( - name='{0}_{1}'.format(EXTRACT_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)), + name='{}-{}-Extract{}'.format(assay_file_prefix, ix, counter[EXTRACT]), characteristics=node.characteristics ) if node.type == LABELED_EXTRACT: return LabeledExtract( - name='{0}_{1}'.format(LABELED_EXTRACT_PREFIX, str(sequence_no).zfill(ZFILL_WIDTH)), + name='{}-{}-LE{}'.format(assay_file_prefix, ix, counter[LABELED_EXTRACT]), characteristics=node.characteristics ) # under the hypothesis that we deal only with raw data files # derived data file would require a completely separate approach if node.type == DATA_FILE: try: - log.debug('isa_objects_factory: Assay conf. found: {}; {};'.format( + log.debug('Assay conf. found: {}; {};'.format( measurement_type, technology_type) ) m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \ @@ -2559,17 +2578,27 @@ def isa_objects_factory( opt for opt in assays_opts if opt['measurement type'] == m_type_term and opt['technology type'] == t_type_term ) - log.debug('isa_objects_factory: Assay conf. found: {}; {}; {};'.format( + log.debug('Assay conf. found: {}; {}; {};'.format( measurement_type, technology_type, curr_assay_opt) ) isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] assert isa_class in {RawDataFile, RawSpectralDataFile} return isa_class( - filename='{0}_{1}'.format(urlify(node.name), str(sequence_no).zfill(ZFILL_WIDTH)) + filename='{}_{}-{}-{}'.format( + urlify(node.name), + assay_file_prefix, + ix, + counter[node.type] # FIXME should this be changed to "counter[node.name]"? + ) ) except StopIteration: return RawDataFile( - filename='{0}_{1}'.format(node.name, str(sequence_no).zfill(ZFILL_WIDTH)) + filename='{}_{}-{}-{}'.format( + urlify(node.name), + assay_file_prefix, + ix, + counter[node.type] # FIXME should this be changed to "counter[node.name]"? + ) ) diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index c3630187..f9544a51 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -1704,6 +1704,21 @@ def test_generate_isa_study_00(self): print('Sources: {0}'.format(study.sources)) """ + def test_increment_counter_by_node_type(self): + assay_graph = AssayGraph.generate_assay_plan_from_dict(nmr_assay_dict) + extract_node = next( + node for node in assay_graph.nodes if isinstance(node, ProductNode) and node.type == EXTRACT + ) + counter = StudyDesign._increment_counter_by_node_type({}, extract_node) + self.assertEqual(counter[EXTRACT], 1) + counter = StudyDesign._increment_counter_by_node_type(counter, extract_node) + self.assertEqual(counter[EXTRACT], 2) + protocol_node = next(node for node in assay_graph.nodes if isinstance(node, ProtocolNode)) + counter = StudyDesign._increment_counter_by_node_type(counter, protocol_node) + self.assertEqual(counter[protocol_node.name], 1) + counter = StudyDesign._increment_counter_by_node_type(counter, protocol_node) + self.assertEqual(counter[protocol_node.name], 2) + def test__generate_isa_elements_from_node(self): assay_graph = AssayGraph.generate_assay_plan_from_dict(nmr_assay_dict) node = next(iter(assay_graph.start_nodes)) @@ -1716,10 +1731,10 @@ def test__generate_isa_elements_from_node(self): extraction_processes = [process for process in processes if process.executes_protocol.name == 'extraction'] self.assertEqual(len(extraction_processes), 1) nmr_processes = [process for process in processes if process.executes_protocol.name == 'nmr spectroscopy'] - self.assertEqual(len(nmr_processes), 8*2) - self.assertEqual(len(processes), 1+8*2) + self.assertEqual(len(nmr_processes), 8 * 2) + self.assertEqual(len(processes), 1 + 8 * 2) self.assertEqual(len(other_materials), 2) - self.assertEqual(len(data_files), 8*2) # 16 raw data files + self.assertEqual(len(data_files), 8 * 2) # 16 raw data files for nmr_process in nmr_processes: self.assertIsInstance(nmr_process, Process) print('expected previous process: {0}'.format(extraction_processes[0])) From 348f6324166f1b93e0a2ffefc392ec79952c76e3 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 17 Nov 2020 17:21:15 +0000 Subject: [PATCH 05/25] deleted unused files #368 --- .../create/MTBLS-purge-spurious-factors.py | 27 --- isatools/create/{models.py => model.py} | 0 isatools/create/mtbls-process.py | 200 ------------------ 3 files changed, 227 deletions(-) delete mode 100644 isatools/create/MTBLS-purge-spurious-factors.py rename isatools/create/{models.py => model.py} (100%) delete mode 100644 isatools/create/mtbls-process.py diff --git a/isatools/create/MTBLS-purge-spurious-factors.py b/isatools/create/MTBLS-purge-spurious-factors.py deleted file mode 100644 index 2fd8f392..00000000 --- a/isatools/create/MTBLS-purge-spurious-factors.py +++ /dev/null @@ -1,27 +0,0 @@ -from isatools.utils import IsaTabFixer - - -def main(): - fixer = IsaTabFixer('/Users/Philippe/Downloads/ftp.ebi.ac.uk/pub/' - 'databases/metabolights/studies/public/MTBLS81/' - 's_Study id.txt') - - this_factor = "Age at sacrifice" - - fixer.fix_factor(this_factor) # fixes by moving factor to charac - - # spurious_factors = "factor: Age at sacrifice" - # - # factornames = [] - # factornames = spurious_factors.split("factor: ") - # - # for element in factornames: - # - # this_factor = element.strip() - # this_factor = re.sub(";","", this_factor) - # this_factor = this_factor.strip() - # print(this_factor) - - -if __name__ == '__main__': - main() diff --git a/isatools/create/models.py b/isatools/create/model.py similarity index 100% rename from isatools/create/models.py rename to isatools/create/model.py diff --git a/isatools/create/mtbls-process.py b/isatools/create/mtbls-process.py deleted file mode 100644 index 08d6765f..00000000 --- a/isatools/create/mtbls-process.py +++ /dev/null @@ -1,200 +0,0 @@ - -import io -import json -from datetime import date - - -# Make it work for Python 2+3 and with Unicode - - -def main(): - blocks = [] - write_dir = "/Users/Philippe/Documents/git/isa-api/isatools/create/" - data_header = str.join("\t", ("Accession Number", - "calculated factor combinations", - "counted factor combinations", - "design automatic annotation", - "number of sources", - "number of samples", - "curation warnings", - "spurious factors")) - - fh = open(write_dir + "/" + str(date.today()) - + "-MTBLS-ISA-curation-report.txt", "w") - fh.writelines(data_header) - fh.writelines("\n") - - with open('/Users/Philippe/Documents/PhenoMenal/' - 'Metabolights-metadata-Testing/out.txt') as fp: - - for line in fp: - - begin = False - acc_num = 0 - # block = [] - if "load OK" in line: - block = [] - start = line.strip() - block.append(start) - begin = True - # print(begin) - # print(line.strip()) - - elif "load FAIL" in line: - block = [] - start = line.strip() - block.append(start) - begin = True - # print(begin) - # print(line.strip()) - - else: - begin = False - # print(line) - # print(begin) - block.append(line.strip()) - # print(block) - if begin: - blocks.append(block) - - # print(len(blocks)) - - # print("BLOCK: ", blocks[3]) - - data = [] - for e in blocks: - # print("L: ",e) - design = "" - factors = {} - factor_count = 0 - count_mat = {} - spurious_factor = {} - non_factors = [] - calc_nb_sg = -1 - - for x in e: - bits = [] - if "load OK" in x: - bits = x.split(" ") - acc_num = bits[0] - # print("acc_num: ", acc_num) - - elif "load FAIL" in x: - bits = x.split(",") - acc_num = bits[0] - - print("accnum: ", acc_num) - max_nb_study_group = -1 - calc_nb_sg = -1 - design = "_" - count_mat["source"] = "_" - count_mat["sample"] = "_" - sampling_event = bits[1] - non_factors_as_string = "_" - - elif x.startswith("Calculated"): - bits = x.split(" ") - calc_nb_sg = int(bits[1]) - # print("number of calculated study groups: ", calc_nb_sg ) - - elif x.startswith("Study sample level:"): - bits = x.split(',') - # print("group sizes: ", bits) - for bit in bits: - bob_a, bob_b = bit.split(" = ") - if "total sources" in bob_a: - count_mat["source"] = int(bob_b) - if "total samples" in bob_a: - count_mat["sample"] = int(bob_b) - - # print("check Source Definitions") - if count_mat["source"] == count_mat["sample"]: - sampling_event = "single sampling" - else: - # print("sample size: ", count_mat["source"], "| - # number of samples: ", count_mat["sample"]) - sampling_event = "multiple/repeated samping" - - elif x.startswith("factor: "): - bits = x.split("|") - # print("BITS:", bits[1]) - factor_count = factor_count + 1 - bits[1] = bits[1].strip("' levels=") - bits[1] = bits[1].strip(" '") - # print("factor bits:", bits[1]) - if int(bits[1]) == 1: - spurious_factor[bits[0]] = int(bits[1]) - # print("SPURIOUS FACTOR", bits[0], bits[1]) - non_factors.append(bits[0]) - else: - factors[bits[0]] = int(bits[1]) - - elif x.startswith("('"): - bits = x.split(",") - # print("treatment: ", bits) - - this_array = factors.values() - max_nb_study_group = 1 - for element in this_array: - # print("in array: ",element) - max_nb_study_group = element * max_nb_study_group - # print("max: ", max_nb_stdy_group) - - if count_mat["source"] == 1 and calc_nb_sg > 1: - sampling_event = "ERROR LIKELY: check source declaration" - # print(count_mat["source"], - # ":::", count_mat["sample"], "///", calc_nb_sg) - # print(sampling_event) - - if max_nb_study_group == calc_nb_sg: - design = "full factorial design" - # print(design) - - elif max_nb_study_group > calc_nb_sg > 0: - design = "fractional factorial design" - # print(design) - - elif calc_nb_sg == -1: - design = "none" - - # elif calc_nb_sg > 1 & - - else: - print("problem with study group declaration, please review study!") - - non_factors_as_string = ';'.join(non_factors) - print(acc_num, " \t ", max_nb_study_group, " \t ", calc_nb_sg, - " \t ", design, " \t ", count_mat["source"], " \t ", - count_mat["sample"], " \t ", sampling_event, " \t ", - non_factors_as_string) - - data_element = {"study_key": acc_num, - "total_study_groups": max_nb_study_group, - "sources": count_mat["source"], - "samples": count_mat["sample"], - "inferred_study_design": design, - "sampling": sampling_event, - "spurious_factors": non_factors_as_string} - data.append(data_element) - - fh.writelines(str.join('\t', (acc_num, str(max_nb_study_group), - str(calc_nb_sg), design, - str(count_mat["source"]), - str(count_mat["sample"]), - sampling_event, non_factors_as_string))) - fh.writelines("\n") - - try: - to_unicode = unicode - except NameError: - to_unicode = str - # Write JSON file - with io.open('data.json', 'w', encoding='utf8') as outfile: - str_ = json.dumps(data, - indent=4, sort_keys=True, - separators=(',', ': '), ensure_ascii=False) - outfile.write(to_unicode(str_)) - - -if __name__ == '__main__': - main() From 484c5557b6e71b1e51a5da9bec2fd1cc772db4ab Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 17 Nov 2020 17:22:02 +0000 Subject: [PATCH 06/25] deleted unused files #368 v2 --- isatools/create/study_design_wizard.py | 1885 ------------------------ 1 file changed, 1885 deletions(-) delete mode 100644 isatools/create/study_design_wizard.py diff --git a/isatools/create/study_design_wizard.py b/isatools/create/study_design_wizard.py deleted file mode 100644 index a3bd3391..00000000 --- a/isatools/create/study_design_wizard.py +++ /dev/null @@ -1,1885 +0,0 @@ -import datetime -import json -import random -import uuid -from itertools import permutations, product - -from isatools import isatab -from isatools.isatab import dump, write_study_table_files -from isatools.model import * - - -# from random import sample - -__author__ = 'proccaserra@gmail.com' - - -# def save_study_profile(): - # TODO: code the method - # save study parameters using YAML or JSON file - # -study_type: intervention or observation - # -number_of_intervention: integer - # -agent - # -intensity - # -duration - # -study_regularity: balanced_or_imbalanced - # -study group size: integer - # -study_variable_blocking: - # -study_hard_to_change_variable: yes_no - # - - -# def load_study_profile(): - # TODO: do it - - -def get_parameters( some_json_file ): - source = json.loads( some_json_file ) - return dict( - study_type = source.get('study_type',1), - treatment_multiplicity= source['treatment_multiplicity'], - hard_to_change_factor = source['hard_to_change_factor'], - intervention_type_list = source['intervention_type_list'], - sg_size = source["sg_size"]["size"], - sg_size_toall = source["sg_size"]["applied_to_all_flag"], - sample_list = source["sample_collection_list"]["sample_type"], - assay_plan = source["assay_type"][""][""] - ) - - - -def use_default_inv(): - try: - investigation = Investigation() - investigation.identifier = "" - investigation.title = "" - investigation.description = "" - investigation.submission_date = "" - investigation.public_release_date = "" - study = Study(filename="s_study.txt", comments=[]) - study.identifier = str(uuid.uuid4()) - study.title = "boilerplate title" - study.description = "boilerplate study description (testing purpose)" - study.submission_date = str(datetime.date.today()) - study.public_release_date = str(datetime.date.today() + datetime.timedelta(days=30)) - - sample_collection_protocol = Protocol(name="sample collection", - protocol_type=OntologyAnnotation(term="sample collection")) - - study.protocols.append(sample_collection_protocol) - investigation.studies.append(study) - - contact = Person(first_name="Boiler", last_name="Plate", affiliation="boiler plate affiliation") - # roles=[OntologyAnnotation(term="submitter")] - study.contacts.append(contact) - publication = Publication(title="boiler plate publication", author_list="A. Robertson, B. Robertson") - publication.pubmed_id = "12345678" - publication.status = OntologyAnnotation(term="published") - study.publications.append(publication) - - return investigation - - except IOError: - print("error in get_number_of_factors() method") - - -def remove_duplicate_from_list(some_list): - # and some_list.contains(',') - try: - if len(some_list) > 0: - # removes trailing whitespace in a list such as a,b ,c ,c - list_values = [x.strip() for x in some_list.split(',')] - # removes any duplicate values in a list such as a,a,b,c - list_values_nodup = list(set(list_values)) - # removes any empty string supplied as is a,,c,d - # list_values_nodup = filter(bool, list_values_nodup) - else: - print("the list you have supplied is not valid, please enter a csv list") - - return list_values_nodup - - except ValueError: - print("error in value in remove_duplicate_from_list() method") - - -def compute_study_groups(factor_and_levels): - # TODO: rename compute_study_groups to compute_treatment - try: - study_groups = [dict(zip(factor_and_levels, x)) for x in product(*factor_and_levels.values())] - # print study_groups - return study_groups - except IOError: - print("error in compute_study_groups() method") - - -def get_number_of_factors(): - try: - number = input("how many study non-interventional factors (i.e variable intrinsic to the model organism such as gender or strain or ethnicity) are there? (provide an integer): ") - return number - except IOError: - print("error in get_number_of_factors() method") - - -def intervention_or_observation(): - try: - is_intervention = True - inter_or_obs = input("is the study an intervention or an observation (please select key)?" - " (intervention [1]/observation [2])") - # intervention - if inter_or_obs == "1": - is_intervention = True - - # observation - elif inter_or_obs == "2": - is_intervention = False - - else: - print("answer should be either 'intervention' or 'observation'") - print("answer not recognized, choose between 'intervention' or 'observation'") - - return is_intervention - except IOError: - print("input error in intervention_or_observation() method") - - -def single_or_repeated_treatment(): - treatment_repeat = False - try: - treatment_repeat_input = input("are study subjects exposed to a single intervention or to multiple intervention" - " (applied sequentially)? (choose either 'single [1]' or 'multiple [2]')") - if treatment_repeat_input == '1': - treatment_repeat = False - elif treatment_repeat_input == '2': - treatment_repeat = True - else: - print('invalid input, please try again') - single_or_repeated_treatment() - - return treatment_repeat - except IOError: - print("input error in single_or_repeated_treatment() method") - - -def get_repeat_number(): - try: - nbr_of_repeats_input = input("how many interventions each subject receives in total (enter an integer)? ") - nbr_of_repeats = int(nbr_of_repeats_input) - return nbr_of_repeats - except IOError: - print("get_repeat_number() method error") - - -def get_processrun_random_token(number_of_elements): - try: - # number_of_elements = input("what is the size of the plate/list? ") - run_order = [] - my_list = list(range(int(number_of_elements))) - new_list = [x + 1 for x in my_list] - # print("my list: ",new_list) - run_order = random.sample(new_list, len(new_list)) - # print("run order: ", run_order) - return run_order - - except NotImplemented: - print('something went wrong in get_processrun_random_token() method') - - -def create_control_element(some_inv, control_type, quantity, frequency): - - try: - if control_type == "1": - for entity in 1..quantity: - control_source = Source(name='solvent blank', id_=entity) - some_inv.sources.append(control_source) - if control_type == "2": - for entity in 1..quantity: - control_source = Source(name='sample preparation blank', id_=entity) - some_inv.sources.append(control_source) - if control_type == "3": - for entity in 1..quantity: - control_source = Source(name='study reference material', id_=entity) - some_inv.sources.append(control_source) - else: - print('choice not,recognised,please try again') - # create_control_element() - - except NotImplemented: - print("something went wrong in create_control_element() method") - - -def get_list_of_interventions(some_investigation): - - try: - # IMPORTANT: we will first only support symmetric arms - treatment_type_list = input("list the different intervention types (comma-separated-values from the following" - " options {chemical intervention [1], behavioral intervention [2], " - "surgical intervention [3], " - "biological intervention [4], radiological intervention [5]}): ") - treatment_type_list = remove_duplicate_from_list(treatment_type_list) - - treatment_types = {} - for treatment_type in treatment_type_list: - treatment_type.strip() - if treatment_type == "1": - treatment_types["chemical intervention"] = {"agent": [], "dose": [], "duration of exposure": []} - f1 = StudyFactor(name="agent", factor_type=OntologyAnnotation(term="perturbation agent")) - some_investigation.studies[0].factors.append(f1) - f2 = StudyFactor(name="dose", factor_type=OntologyAnnotation(term="intensity")) - some_investigation.studies[0].factors.append(f2) - f3 = StudyFactor(name="duration of exposure", factor_type=OntologyAnnotation(term="time")) - some_investigation.studies[0].factors.append(f3) - - # set_factor_as_key("chemical agent", factor_dict) - - if treatment_type == "2": - # set_factor_as_key("behavioral agent", factor_dict) - treatment_types["behavioral intervention"] = {"agent": [], - "dose": [], - "duration of exposure": []} - - if treatment_type == "3": - # set_factor_as_key("surgery", factor_dict) - treatment_types["surgical intervention"] = {"surgery procedure": [], - "dose": [], - "duration post surgery": []} - - if treatment_type == "4": - # set_factor_as_key("biological agent", factor_dict) - treatment_types["biological intervention"] = {"agent": [], - "dose": [], - "duration of exposure": []} - - if treatment_type == "5": - # set_factor_as_key("radiological agent", factor_dict) - treatment_types["radiological intervention"] = {"agent": [], - "dose": [], - "duration of exposure": []} - - return treatment_types, some_investigation - - except IOError: - print("error in get_list_of_interventions() method") -# -# """if treatment_list != "" and treatment_list.isalnum(): -# return treatment_list -# else: -# print "the treatments supplied are not valid, please enter a string: " -# """ -# -# """if treatment""" -# -# -# """def get_factors_from_treatment_type(treatment_type_list):""" - - - -def compute_treatment_sequences(treatments, num_repeats): - try: - treatment_sequences = list(permutations(treatments, num_repeats)) - return treatment_sequences - except IOError: - print("error in compute_treatment_sequences() method") - - -def get_factor_name(): - try: - factor_name = input("provide factor name: ") - if factor_name != "" and factor_name.isalnum(): - return factor_name - else: - print("the factor supplied is not valid, please enter a string: ") - except IOError: - print("error in get_factor_name() method") - - -def set_factor_as_key(factor_name, factor_dict): - try: - this_factor_dict = factor_dict - if factor_name not in factor_dict.keys(): - this_factor_dict[factor_name] = [] - else: - print("factor already declared! define a new factor") - get_factor_name() - return this_factor_dict - except IOError: - print("error in set_factor_as_key() method") - - -def set_factor_values(factor_name, factor_dict): - try: - factor_values = input("provide the factor levels associated with '" + factor_name + - "' as a list of comma separated values: ") - factor_values = remove_duplicate_from_list(factor_values) - for element in factor_values: - factor_dict[factor_name].append(element) - return factor_dict - except IOError: - print("error in set_factor_values() method") - - -def balanced_design(): - try: - balanced_design_var = input("Are all study groups of the same size, i.e have the same number of subjects? " - "(in other words, are the groups balanced)? (balanced [1]/unbalanced [2])") - if balanced_design_var == "1": - is_balanced = True - return is_balanced - elif balanced_design_var == "2": - is_balanced = False - return is_balanced - else: - print("answer should be either 'balanced' or 'unbalanced'") - print("answer not recognized, choose between 'balanced' or 'unbalanced'") - except IOError: - print("Error in balanced_design() method") - - -def full_or_fractional(): - try: - full_or_fract = input("did you use a all possible groups or only a subset? (full [1]/fractional [2])") - if full_or_fract == "1": - full_or_fract = True - elif full_or_fract == "2": - full_or_fract = False - else: - print("answer not recognized, choose between 'full' or 'fractional'") - full_or_fractional() - - return full_or_fract - - except IOError: - print("error in full_or_fractional() method") - - -def free_or_restricted_randomization(): - try: - design = "" - hardtochange = input("Are there 'hard to change' factors," - " which restrict randomization of experimental unit? (no [1]/yes [2])") - if hardtochange == "1": - # free_randomization = True - design = "factorial design" - elif hardtochange == "2": - # free_randomization = False - splitplot = input("how many 'hard to change factors'? (1/2") - if splitplot == "1": - design = "split plot design" - elif splitplot == "2": - design = "split split plot design" - else: - print("answer not recognized, choose between 'yes' or 'no'") - free_or_restricted_randomization() - - return design - - except IOError: - print("error in complete_or_restricted_randomization() method") - - -def choose_fluid_or_solid_or_both(): - - this_sample_type = input("are the samples 'solid'[1] or 'fluid'[2] or 'both'[3]? ") - if this_sample_type == "1": - return this_sample_type - # collected_samples(sample_type) - elif this_sample_type == "2": - return this_sample_type - # collected_samples(sample_type) - elif this_sample_type == "3": - return this_sample_type - # collected_samples(sample_type) - else: - print("input not recognised") - choose_fluid_or_solid_or_both() - -# def sample_collection_plan(sample_types): -# -# samples_and_events = {} -# for s_type in sample_types: -# specific_sampling_events = input("for sample type " + "'" + str(s_type) + "'," + -# " how many times each of the samples have been collected (integer): ") -# # specific_sampling_events = remove_duplicate_from_list(specific_sampling_events) -# samples_and_events[str(s_type)] = specific_sampling_events -# -# return samples_and_events - - -def sample_collection_events(some_sample_type): - - try: - sample_types = [] - samples_and_events_plan = {} - - if some_sample_type == "1": - sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,flower): ") - sample_types = remove_duplicate_from_list(sample_types) - # return solid_samples - - elif some_sample_type == "2": - sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ") - # for example: blood,urine,sweat,muscle - sample_types = remove_duplicate_from_list(sample_types) - # return fluid_samples - - elif some_sample_type == "3": - sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ") - sample_types = remove_duplicate_from_list(sample_types) - # print(sample_types) - s_sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,2): ") - s_sample_types = remove_duplicate_from_list(s_sample_types) - sample_types.append(s_sample_types) - - else: - print("input not recognised") - # choose_fluid_or_solid_both() - print(sample_types) - for s_type in sample_types: - specific_sampling_events = input("for sample type " + "'" + str(s_type) + "'," + - " how many times each of the samples have been collected (integer): ") - # specific_sampling_events = remove_duplicate_from_list(specific_sampling_events) - samples_and_events_plan[str(s_type)] = specific_sampling_events - print(samples_and_events_plan) - - return samples_and_events_plan - except IOError: - print("error in sample_collection_events() method") - - -# def define_sample_collection_plan(): -# def define_assay_data_plan(): - -def create_study_subjects(group_size, this_study, group_uuid, group_factor_combo, some_sampling_event_plan): - try: - - if group_size > 0: - - # sample_type = choose_fluid_or_solid_or_both() - # collected_materials = collected_sample_types(sample_type) - # sampling_plan = sample_collection_plan(collected_materials) - - for individual in range(group_size): - source_name = "studygroup_" + str(group_uuid) + "_subject#" + str(individual) - ncbitaxon = OntologySource(name="NCBITaxon", description="NCBI Taxonomy") - characteristic_organism = Characteristic(category=OntologyAnnotation(term="organism"), - value=OntologyAnnotation(term="Homo sapiens", - term_source=ncbitaxon, - term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606")) - # TODO: request taxonomic information from users - source = Source(name=source_name) - source.characteristics.append(characteristic_organism) - # print("source: ", source.name, source.characteristics[0].category.term, - # source.characteristics[0].value.term) - this_study.studies[0].sources.append(source) - - for tissue, number_of_collections in some_sampling_event_plan.items(): - - for specimen_number in range(int(number_of_collections)): - - sample_name = source_name + "_" + "sample#" + str(specimen_number) + "_" + tissue - sample_template = Sample(name=sample_name, derives_from=[source]) - characteristic_op = Characteristic(category=OntologyAnnotation(term="organism part"), - value=OntologyAnnotation(term=tissue)) - sample_template.characteristics.append(characteristic_op) - characteristic_rk = Characteristic(category=OntologyAnnotation(term="collection event rank"), - value=OntologyAnnotation(term=str(specimen_number+1))) - sample_template.characteristics.append(characteristic_rk) - # print("sample type: " + key, ", number of collection events: " + value + " times.") - - # this_study.studies[0].samples = batch_create_materials(prototype_sample, n=2) - # for sam in this_study.studies[0].samples: - # sample_name = source_name + "_" + "sample#" + str(i) - # sample = Sample(name=sample_name, derives_from=[source]) - - combo = group_factor_combo - # print("this study group from create_study_subject: ", combo) - for key, value in combo.items(): - # print("this key:", key) - for item in range(0, len(this_study.studies[0].factors)): - if key == this_study.studies[0].factors[item].name: - # print("factor name: ", this_study.studies[0].factors[item].name) - fv = FactorValue(factor_name=this_study.studies[0].factors[item], - value=OntologyAnnotation(term=combo[key])) - sample_template.factor_values.append(fv) - - # print("sample: ", sample_template.name) - this_study.studies[0].samples.append(sample_template) - process_name = "protocol_" + str(group_uuid) - sample_collection_process = Process(name=process_name, - executes_protocol=this_study.studies[0].protocols[0], - date_=datetime.date.today() + datetime.timedelta(days=-30), - performer="bob") - sample_collection_process.outputs.append(sample_template) - sample_collection_process.inputs.append(source) - - this_study.studies[0].process_sequence.append(sample_collection_process) - # print("p: ", sample_collection_process.name, sample_collection_process.executes_protocol.name, - # sample_collection_process.inputs[0].name, sample_collection_process.outputs[0].name, - # sample_collection_process.date, sample_collection_process.performer) - - return this_study - - except NotImplemented: - print("error in create_study_subject() method") - - -def set_study_arms(list_of_study_group_dictionaries, this_investigation, this_repeats): - try: - # print(this_repeats) - study_groups = {} - # forf = full_or_fractional() - bd = balanced_design() - - if bd is True and this_repeats is False: - - size = input("provide the number of subject per study group (must be an integer): ") - if size.isdigit(): - size = int(size) - if size > 0: - - study_group_size = size - stato = OntologySource(name="STATO", description="Ontology for Statistical Methods") - this_investigation.ontology_source_references.append(stato) - design_term = OntologyAnnotation(term_source=stato) - design_term.term = "balanced design" - design_term.term_accession = "http://purl.obolibrary.org/obo/STATO_0000003" - this_investigation.studies[0].design_descriptors.append(design_term) - - comment1 = Comment(name="number of study groups", value=len(list_of_study_group_dictionaries)) - comment2 = Comment(name="study group size", value=int(study_group_size)) - - this_investigation.studies[0].comments.append(comment1) - this_investigation.studies[0].comments.append(comment2) - - sample_type = choose_fluid_or_solid_or_both() - current_sampling_plan = sample_collection_events(sample_type) - - sg_index = 0 - for sg_index in range(len(list_of_study_group_dictionaries)): - study_groups["guid"] = uuid.uuid4() - study_groups["id"] = sg_index - study_groups["factor_level_combo"] = list_of_study_group_dictionaries[sg_index] - # print("this study group: ", study_groups["factor_level_combo"]) - study_groups["size"] = study_group_size - this_investigation = create_study_subjects(study_group_size, - this_investigation, - study_groups["guid"], - study_groups["factor_level_combo"], - current_sampling_plan) - else: - print("invalid input, please try again") - - elif bd is False and this_repeats is False: - for sg_index in range(len(list_of_study_group_dictionaries)): - study_groups["guid"] = uuid.uuid4() - study_groups["id"] = sg_index - study_groups["factor_level_combo"] = list_of_study_group_dictionaries[sg_index] - size = input("provide the number of subject per study group (must be an integer): ") - size = int(size) - if int(size) > 0: - study_group_size = size - stato = OntologySource(name="STATO", description="Ontology for Statistical Methods") - this_investigation.ontology_source_references.append(stato) - design_term = OntologyAnnotation(term_source=stato) - design_term.term = "unbalanced design" - design_term.term_accession = "http://purl.obolibrary.org/obo/STATO_000000X" - this_investigation.studies[0].design_descriptors.append(design_term) - - study_groups["size"] = study_group_size - sample_type = choose_fluid_or_solid_or_both() - current_sampling_plan = sample_collection_events(sample_type) - - this_investigation = create_study_subjects(study_group_size, - this_investigation, - study_groups["guid"], - study_groups["factor_level_combo"], - current_sampling_plan) - else: - print("invalid input, please try again") - - print(study_groups) - - elif bd is False and this_repeats is True: - - nb_repeats = input("state the number of consecutive treatments (integer): ") - # print study_factor_combo - sequences = compute_treatment_sequences(list_of_study_group_dictionaries, int(nb_repeats)) - print("sequences") - for sg_index in range(len(sequences)): - study_groups["guid"] = uuid.uuid4() - study_groups["id"] = sg_index - study_groups["sequence"] = sequences[sg_index] - size = input("provide the number of subject per study arm (must be an integer): ") - size = int(size) - if int(size) > 0: - study_group_size = size - study_groups["size"] = study_group_size - sample_type = choose_fluid_or_solid_or_both() - current_sampling_plan = sample_collection_events(sample_type) - this_investigation = create_study_subjects(study_group_size, - this_investigation, - study_groups["guid"], - study_groups["factor_level_combo"], - current_sampling_plan) - - else: - print("invalid input, please try again") - - else: - nb_repeats = input("state the number of consecutive treatments (integer): ") - # print study_factor_combo - sequences = compute_treatment_sequences(list_of_study_group_dictionaries, int(nb_repeats)) - print(sequences) - for sg_index in range(len(sequences)): - study_groups["guid"] = uuid.uuid4() - study_groups["id"] = sg_index - study_groups["sequence"] = sequences[sg_index] - size = input("provide the number of subject per study arm (must be an integer): ") - size = int(size) - if int(size) > 0: - study_group_size = size - study_groups["size"] = study_group_size - sample_type = choose_fluid_or_solid_or_both() - current_sampling_plan = sample_collection_events(sample_type) - this_investigation = create_study_subjects(study_group_size, - this_investigation, - study_groups["guid"], - study_groups["factor_level_combo"], - current_sampling_plan) - else: - print("invalid input, please try again") - - # print(study_groups) - - return this_investigation, current_sampling_plan - - except IOError: - print("error in set_study_arms() method") - -# def sample_collection_plan(sample_types): -# -# samples_and_events = {} -# for s_type in sample_types: -# specific_sampling_events = input("for sample type " + "'" -# + str(s_type) + "'," + -# " how many times each of the samples have been collected (integer): ") -# specific_sampling_events = remove_duplicate_from_list( -# specific_sampling_events) -# samples_and_events[str(s_type)] = specific_sampling_events -# -# return samples_and_events - -# def collection_sample_type(): -# try: -# sample_types = input("list the type of sample collected from each -# study group member as csv list: ") -# # for example: blood,urine,sweat,muscle -# sample_types = remove_duplicate_from_list(sample_types) -# return sample_types -# except IOError: -# print("error in collection_sample_type() method") - -# def collected_sample_types(some_sample_type): -# TODO implement pulling the list of allowed values from ISA configuration or -# another configuration files -# -# if some_sample_type == "1": -# sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,flower): ") -# sample_types = remove_duplicate_from_list(sample_types) -# # return solid_samples -# -# elif some_sample_type == "2": -# sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ") -# # for example: blood,urine,sweat,muscle -# sample_types = remove_duplicate_from_list(sample_types) -# # return fluid_samples -# -# elif some_sample_type == "3": -# sample_types = input("select from the following list (urine,blood,csf,sweat,lavage): ") -# sample_types = remove_duplicate_from_list(sample_types) -# # print(sample_types) -# -# s_sample_types = input("select from the following list (liver,kidney,muscle,brain,lung,2): ") -# s_sample_types = remove_duplicate_from_list(s_sample_types) -# -# sample_types.append(s_sample_types) -# -# else: -# print("input not recognised") -# choose_fluid_or_solid_both() -# -# return sample_types - - -def define_assay_plan(some_investigation, some_sample_collection_events): - - try: - study_assay_plan = [] - - applies_to_all = input("will all samples be tested with the same set of assays? yes[1]/no[2]") - - if applies_to_all == "1": - initial_sample_assay_plan = input("provide assay types being used as a comma separated list:" - " [1]:transcription profiling using ngs, " - " [2]:transcription profiling using DNA microarray," - " [3]:targeted metabolite profiling using mass spectrometry," - " [4]:metabolite profiling using NMR spectroscopy? ") - - initial_sample_assay_plan = remove_duplicate_from_list(initial_sample_assay_plan) - print("initial sample assay plan:", initial_sample_assay_plan) - - for bio_material, nb_sampling_event in some_sample_collection_events.items(): - # print("biomat:", bio_material, "how many? ", nb_sampling_event) - for element in range(int(nb_sampling_event)): - for this_item in range(len(initial_sample_assay_plan)): - sample_assay_plan = {"sample_type": bio_material, - "sample_number": element+1, - "assay_type": initial_sample_assay_plan[this_item]} - study_assay_plan.append(sample_assay_plan) - print("final number of assay plans:", len(study_assay_plan)) - - elif applies_to_all == "2": - - # we need to iterate through each sample type and record the relevant assays for that sample type - - for bio_material, nb_sampling_event in some_sample_collection_events.items(): - # sample_assay_plan = {"sample_type": "", "sample_number": "", "assay_type": []} - print("biomat:", bio_material, "how many? ", nb_sampling_event) - initial_sample_assay_plan = input("select assay types being used for that sample type '" + bio_material - + "' as a comma separated list:" - " [1]:transcription profiling using ngs, " - " [2]:transcription profiling using DNA microarray," - " [3]:targeted metabolite profiling using mass spectrometry," - " [4]:metabolite profiling using NMR spectroscopy? ") - - initial_sample_assay_plan = remove_duplicate_from_list(initial_sample_assay_plan) - - to_all_of_these = input("will these assays be performed on all specimens" - " of this sample type? yes[1]/no[2]") - - if to_all_of_these == "2": - for element in range(int(nb_sampling_event)): - this_sample_assay_plan = input("select assay types being used for that sample type" - " as a comma separated list:" - " [1]:transcription profiling using ngs, " - " [2]:transcription profiling using DNA microarray," - " [3]:targeted metabolite profiling using mass spectrometry," - " [4]:metabolite profiling using NMR spectroscopy? ") - - study_assay_plan = remove_duplicate_from_list(this_sample_assay_plan) - - for this_item in range(len(initial_sample_assay_plan)): - sample_assay_plan = {"sample_type": bio_material, - "sample_number": element+1, - "assay_type": initial_sample_assay_plan[this_item]} - study_assay_plan.append(sample_assay_plan) - # [{"sample_type":"liver", "sample_number":"1", "assay_types": ["1","2","3"]}] - - print(this_sample_assay_plan[0]["sample_type"]) - - elif to_all_of_these == "1": - for element in range(int(nb_sampling_event)): - for this_item in range(len(initial_sample_assay_plan)): - sample_assay_plan = {"sample_type": bio_material, - "sample_number": element+1, - "assay_type": initial_sample_assay_plan[this_item]} - study_assay_plan.append(sample_assay_plan) - - else: - print("input not recognized, please reiterate your selection.") - - else: - print("input not recognized, please reiterate your selection.") - define_assay_plan(some_investigation, some_sample_collection_events) - print("number of assay plans from define_assay_plan(): ", len(study_assay_plan)) - - return some_investigation, study_assay_plan - - except IOError: - print("error in define_assay_plan() method") - - -def set_assay_type_topology_modifiers(this_sample_type, this_sampling_event, this_assay_type): - # TODO: refactor in order to implement modular assay specific topologies, switching between cases depending on assay - # TODO: types supplied by users with the define_assay_plan() method - try: - # this_assay_type = input( - # "which assay types are being used: [1]:transcription profiling using ngs, " - # " [2]:transcription profiling using DNA microarray," - # " [3]:targeted metabolite profiling using mass spectrometry," - # " [4]:metabolite profiling using NMR spectroscopy? ") - sample_assay_plans = [] - - # for this_assay_type in range(len(this_assay_type_array)): - with_topology_params = {"sample type": "", - "collection event": "", - "assay type": 0, - "params": { - "distinct libraries": 0, - "distinct array designs": 0, - "number of injection modes": 0, - "number of acquisition modes": 0, - "number of channels": 0, - "pulse sequences": 0, - "number of technical replicates": 0} - } - # print("this assay type:", this_assay_type_array[this_assay_type], "counter:", this_assay_type) - if int(this_assay_type) == 1: - - nb_library = input( - "how many distinct libraries per sample (provide an positive integer, default is 1)?") - nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?") - nb_technical_rep = input("how many technical replicate for each sample, default is 1?") - with_topology_params["sample type"] = this_sample_type - with_topology_params["collection event"] = this_sampling_event - with_topology_params["assay type"] = 1 - with_topology_params["params"]["distinct libraries"] = nb_library - with_topology_params["params"]["number of channels"] = nb_multiplexing_channels - with_topology_params["params"]["number of technical replicates"] = nb_technical_rep - - elif int(this_assay_type) == 2: - - nb_chip_design = input( - "how many distinct microarray designs (provide an positive integer, default is 1)?") - nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?") - nb_technical_rep = input("how many technical replicate for each sample, default is 1?") - - with_topology_params["sample type"] = this_sample_type - with_topology_params["collection event"] = this_sampling_event - with_topology_params["assay type"] = 2 - with_topology_params["params"]["distinct array designs"] = nb_chip_design - print("typology:", with_topology_params["params"]["distinct array designs"]) - with_topology_params["params"]["number of channels"] = nb_multiplexing_channels - with_topology_params["params"]["number of technical replicates"] = nb_technical_rep - - elif this_assay_type == "3": - - injection_modes = input( - "how many distinct sample introduction modes (1:FIA,2:LC,3:GC)?") - injection_modes = remove_duplicate_from_list(injection_modes) - acquisition_modes = input( - "how many distinct acquisition modes (1:negative mode, 2:positive mode) ?") - acquisition_modes = remove_duplicate_from_list(acquisition_modes) - # nb_channels = input("how many labels were used (provide an positive integer, default is 1)?") - nb_technical_rep = input("how many technical replicate for each sample, default is 1?") - - with_topology_params["sample type"] = this_sample_type - with_topology_params["collection event"] = this_sampling_event - with_topology_params["assay type"] = 3 - with_topology_params["params"]["injection modes"] = injection_modes - with_topology_params["params"]["number of channels"] = acquisition_modes - with_topology_params["params"]["number of technical replicates"] = nb_technical_rep - - elif this_assay_type == "4": - - injection_modes = input( - "how many distinct sample introduction modes (1:autoloader,2:LC,3:GC)?") - injection_modes = remove_duplicate_from_list(injection_modes) - acquisition_modes = input( - "how many distinct acquisition modes (1:COSY, 2:NOESY, 3:TOSCY, 4:CPMG ,5: INEPT, 6:HMQC, 7:WATERGATE) ?") - acquisition_modes = remove_duplicate_from_list(acquisition_modes) - # nb_multiplexing_channels = input("how many labels were used (provide an positive integer, default is 1)?") - nb_technical_rep = input("how many technical replicate for each sample, default is 1?") - - with_topology_params["sample type"] = this_sample_type - with_topology_params["collection event"] = this_sampling_event - with_topology_params["assay type"] = 4 - with_topology_params["params"]["injection modes"] = injection_modes - with_topology_params["params"]["pulse sequences"] = acquisition_modes - # typology_params["params"]["number of channels"] = acquisition_modes - with_topology_params["params"]["number of technical replicates"] = nb_technical_rep - - # else: - # print("input not recognised in set_assay_type_topology_modifiers() method") - # set_assay_type_topology_modifiers( this_sample_type, this_assay_type) - - # sample_assay_plans.append(typology_params) - - return with_topology_params - # nb_chip_design, nb_multiplexing_channels, nb_technical_rep - - except IOError: - print("error in set_assay_type_topology_modifiers() method") - - -# def create_assays(some_assay_plan,some_assay_object): -# # -# for item in range(len(assay_plan)): -# # print("assay definitions are: ", assay_definitions[item]) -# print("dealing with the first assay plan, for the specimen of sample type :", assay_plan[item]["sample type"], -# "for collection event:", assay_plan[item]["collection event"]) -# # print("sample type:", assay_definitions[item]["sample type"], -# # "| assay type: ", assay_definitions[item]["assay type"], -# # "| assay params: ", assay_definitions[item]["params"]) -# # dealing with Mass Spectrometry Applications -# if assay_plan[item]["assay type"] == 3: -# # TODO: implement get_or_create method and refactor -# ms = [a for a in new_inv.studies[0].assays if -# a.measurement_type.term == "metabolite profiling" and a.technology_type.term == "liquid chromatography mass spectrometry" and a.filename == "a_mp_lcms.txt"] -# if len(ngs) > 0: -# print("yes, exists in 1", ms) -# # if such an assay table already exists, we retrieve it -# this_assay = ms[0] -# else: -# # or print('nothing found, creating a new object)...') -# this_assay = Assay(measurement_type=OntologyAnnotation(term="metabolite profiling"), -# technology_type=OntologyAnnotation(term="liquid chromatography mass spectrometry"), -# filename="a_mp_lcms.txt") -# # the object is attached to the relevant study -# new_inv.studies[0].assays.append(this_assay) -# -# extraction_protocol = Protocol(name='metabolite extraction', -# protocol_type=OntologyAnnotation(term="material separation")) -# new_inv.studies[0].protocols.append(extraction_protocol) -# -# # lc_protocol = Protocol(name="liquid chromatography", -# # protocol_type=OntologyAnnotation(term="material separation")) -# # new_inv.studies[0].protocols.append(labeling_protocol) -# -# ms_protocol = Protocol(name='liquid chromatography mass spectrometry', -# protocol_type=OntologyAnnotation(term="data collection")) -# new_inv.studies[0].protocols.append(sequencing_protocol) -# -# index_i = 0 -# index_j = 0 -# index_k = 0 -# # for index_i, sample in enumerate(new_inv.studies[0].samples): -# some_sample_list = [sample for sample in new_inv.studies[0].samples if -# sample.characteristics[0].value.term == assay_plan[item]["sample type"] and -# sample.characteristics[1].value.term == assay_plan[item]["collection event"]] -# print("number of samples: ", len(some_sample_list)) -# extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if -# ext.type == "Extract Name"] -# # print("number of extracts", len(extractlist_before)) -# -# for index_i, sample in enumerate([sample for sample in new_inv.studies[0].samples if -# sample.characteristics[0].value.term == assay_plan[item][ -# "sample type"]]): -# # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term) -# # print("current collection event", assay_plan[item]["collection event"]) -# if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]): -# # create an extraction process that executes the extraction protocol -# extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols -# if prtcl.name == "metabolite extraction"][0], -# performer="rick", -# date_=datetime.datetime.now()) -# -# # extraction process takes as input a sample, and produces an extract material as output -# # we make sure only the right kind of samples get assayed so we check against the sample type -# # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]: -# # print("sample characteristics: ", sample.characteristics[0].value.term) -# -# extraction_process.inputs.append(sample) -# extract = Material(name=sample.name + "extract-{}".format(index_i)) -# extract.type = "Extract Name" -# extraction_process.outputs.append(extract) -# -# # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample -# # TODO: include a function to obtain the relevant parameters used for data acquisition -# for index_j in range(int(assay_plan[item]["params"]["platforms"])): -# # this inner is for handling multiple runs of the same platform, i.e. tech replicates -# for index_k in range( -# int(assay_plan[item]["params"]["number of technical replicates"])): -# prtcl_name = [prtcl for prtcl in new_inv.studie s[0].protocols -# if prtcl.name == "data collection"][0] -# -# data_acq_process = Process(executes_protocol=prtcl_name, -# performer="louis", -# date_=datetime.datetime.now()) -# -# platform_name = "platform-{}".format(index_j) -# data_acq_process.name = "assay-name-{}".format(index_i) + "_" + platform_name + \ -# "_run-{}".format(index_k) -# data_acq_process.inputs.append(labeling_process.outputs[0]) -# -# # data acquisition process usually has an output data file -# datafile = DataFile( -# filename="acquired-data-{}".format(index_i) + "_" + platform_name + -# "_run-{}".format(index_k) + ".mzml.gz", -# label="MS Raw Data File") -# data_acq_process.outputs.append(datafile) -# -# # ensure Processes are linked forward and backward -# extraction_process.next_process = data_acq_process -# # labeling_process.prev_process = extraction_process -# extraction_process.next_process = data_acq_process -# # data_acq_process.prev_process = labeling_process -# data_acq_process.prev_process = extraction_process -# -# # make sure extract(library), data file, and the processes are attached to the assay -# this_assay.data_files.append(datafile) -# this_assay.other_material.append(extract) -# # this_assay.other_material.append(le) -# this_assay.process_sequence.append(extraction_process) -# # this_assay.process_sequence.append(labeling_process) -# this_assay.process_sequence.append(data_acq_process) -# -# # extractlist_after = [ext for ext in new_inv.studies[0].assays[0].other_material if -# # ext.type == "Extract Name"] -# # print("number of extracts", len(extractlist_after)) - - -# MAIN METHOD: - -def main(): - - intervention_list = [] - - intervention_check = intervention_or_observation() - - if intervention_check is True: - - try: - new_inv = use_default_inv() - repeats = single_or_repeated_treatment() - free_or_restricted_design = free_or_restricted_randomization() - assay_plan = [] - - if repeats is False and "factorial" in free_or_restricted_design: - - obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations") - new_inv.ontology_source_references.append(obi) - stato = OntologySource(name="STATO", description="Ontology for Statistical Methods") - new_inv.ontology_source_references.append(stato) - design1 = OntologyAnnotation(term_source=obi) - design1.term = "intervention design" - design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115" - new_inv.studies[0].design_descriptors.append(design1) - design2 = OntologyAnnotation(term_source=stato) - design2.term = "full factorial design" - design2.term_accession = "http://purl.obolibrary.org/obo/STATO_0000270" - new_inv.studies[0].design_descriptors.append(design2) - - intervention_list, new_inv = get_list_of_interventions(new_inv) - - assay_plan = [] - for intervention_type in intervention_list.keys(): - # print("type of intervention: ", intervention_type) - for factor in intervention_list[intervention_type].keys(): - # print("factor :", factor) - set_factor_values(factor, intervention_list[intervention_type]) - # print("associated factor values:", intervention_list[intervention_type][factor]) - - # study_factor_combo = compute_study_groups(my_factors) - study_group_dictionaries = compute_study_groups(intervention_list[intervention_type]) - # print("study groups:", study_group_dictionaries) - new_inv, sampling_plan = set_study_arms(study_group_dictionaries, new_inv, repeats) - # print("is this correct?" , new_inv.studies[0].sources[0].name) - - new_inv, assay_plan = define_assay_plan(new_inv, sampling_plan) - - print("number of assay plans in Main: ", len(assay_plan)) - - for l in range(len(assay_plan)): - assay_plan[l] = set_assay_type_topology_modifiers(assay_plan[l]["sample_type"], - assay_plan[l]["sample_number"], - assay_plan[l]["assay_type"]) - - # applies_to_all_plan_of_that_assay_type = input("Apply this parameter selection to all plans using this assay type? [1]Yes/2[No]") - - # if applies_to_all_plan_of_that_assay_type == 1: - - - print(assay_plan[l]) - - # assay_definitions.append(set_assay_type_topology_modifiers(assay_plan[l]["sample_type"], - # assay_plan[l]["assay_type"])) - # print("assay plan: ", assay_plan[l]["sample_type"], "|", assay_plan[l]["assay_type"]) - - # for m in range(len(assay_plan[l]["assay_types"])): - - print("number of assay full definitions", len(assay_plan)) - # print(assay_definitions[0]["sample type"]) - - for item in range(len(assay_plan)): - # print("assay definitions are: ", assay_definitions[item]) - print("dealing with the first assay plan, for the specimen of sample type :", assay_plan[item]["sample type"], "for collection event:", assay_plan[item]["collection event"]) - # print("sample type:", assay_definitions[item]["sample type"], - # "| assay type: ", assay_definitions[item]["assay type"], - # "| assay params: ", assay_definitions[item]["params"]) - - if assay_plan[item]["assay type"] == 1: - # TODO: implement get_or_create method and refactor - ngs = [a for a in new_inv.studies[0].assays if a.measurement_type.term == "transcription profiling" and a.technology_type.term == "nucleic acid sequencing" and a.filename == "a_tp_ngs.txt"] - if len(ngs) > 0: - print("yes, exists in 1", ngs) - # if such an assay table already exists, we retrieve it - this_assay = ngs[0] - else: - # or print('nothing found, creating a new object)...') - this_assay = Assay(measurement_type=OntologyAnnotation(term="transcription profiling"), - technology_type=OntologyAnnotation(term="nucleic acid sequencing"), - filename="a_tp_ngs.txt") - # the object is attached to the relevant study - new_inv.studies[0].assays.append(this_assay) - - extraction_protocol = Protocol(name='RNA extraction', - protocol_type=OntologyAnnotation(term="material separation")) - new_inv.studies[0].protocols.append(extraction_protocol) - - labeling_protocol = Protocol(name="nucleic acid library preparation", - protocol_type=OntologyAnnotation(term="material labeling")) - new_inv.studies[0].protocols.append(labeling_protocol) - - sequencing_protocol = Protocol(name='nucleic acid sequencing', - protocol_type=OntologyAnnotation(term="data collection")) - new_inv.studies[0].protocols.append(sequencing_protocol) - - i = 0 - j = 0 - k = 0 - # for i, sample in enumerate(new_inv.studies[0].samples): - samplelist=[sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item]["sample type"] and sample.characteristics[1].value.term == assay_plan[item]["collection event"]] - # print("number of samples: ", len(samplelist)) - extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if ext.type == "Extract Name"] - # print("number of extracts", len(extractlist_before)) - - for i, sample in enumerate([sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item][ - "sample type"]]): - # print("i: ", i, "sample: ", sample.characteristics[1].value.term) - # print("current collection event", assay_plan[item]["collection event"]) - if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]): - # create an extraction process that executes the extraction protocol - extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "RNA extraction"][0], - performer="amy", - date_=datetime.datetime.now()) - - # extraction process takes as input a sample, and produces an extract material as output - # we make sure only the right kind of samples get assayed so we check against the sample type - # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]: - # print("sample characteristics: ", sample.characteristics[0].value.term) - - extraction_process.inputs.append(sample) - extract = Material(name=sample.name+"extract-{}".format(i)) - extract.type = "Extract Name" - extraction_process.outputs.append(extract) - - # TODO: support multiplex identifiers in a future release - labeling_process = Process( - executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "nucleic acid library preparation"][0], - performer="xua", - date_=datetime.datetime.now() - ) - # extraction process takes as input a sample, and produces an extract material as output - labeling_process.inputs.append(extract) - le = Material(name= extract.name +"labeled-extract-{}".format(i)) - le.type = "Labeled Extract Name" - dye = Characteristic(category=OntologyAnnotation(term="label"), - value=OntologyAnnotation(term="none")) - le.characteristics.append(dye) - labeling_process.outputs.append(le) - - # this loop is meant to handle the case where several libraries are produced from a sample - # TODO: include a function to obtain the relevant parameters used for library creation - for j in range(int(assay_plan[item]["params"]["distinct libraries"])): - # this inner is for handling multiple runs of the same library, ie tech replicates - for k in range( - int(assay_plan[item]["params"]["number of technical replicates"])): - prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "nucleic acid sequencing"][0] - - data_acq_process = Process(executes_protocol=prtcl_name, - performer="louis", - date_=datetime.datetime.now()) - - library_name = "library-{}".format(j) - data_acq_process.name = "assay-name-{}".format(i) + "_" + library_name + \ - "_run-{}".format(k) - data_acq_process.inputs.append(labeling_process.outputs[0]) - - # data acquisition process usually has an output data file - datafile = DataFile( - filename="sequence-data-{}".format(i) + "_" + library_name + - "_run-{}".format(k) + ".fastq.gz", - label="Raw Data File") - data_acq_process.outputs.append(datafile) - - # ensure Processes are linked forward and backward - extraction_process.next_process = labeling_process - labeling_process.prev_process = extraction_process - labeling_process.next_process = data_acq_process - data_acq_process.prev_process = labeling_process - - # make sure extract(library), data file, and the processes are attached to the assay - this_assay.data_files.append(datafile) - this_assay.other_material.append(extract) - this_assay.other_material.append(le) - this_assay.process_sequence.append(extraction_process) - this_assay.process_sequence.append(labeling_process) - this_assay.process_sequence.append(data_acq_process) - - extractlist_after = [ext for ext in new_inv.studies[0].assays[0].other_material if - ext.type == "Extract Name"] - print("number of extracts", len(extractlist_after)) - - elif assay_plan[item]["assay type"] == 2: - # TODO: refactor to rely on a specific function handling assay create (create_assays() method) - tx = [a for a in new_inv.studies[0].assays if a.measurement_type.term == "transcription profiling" and a.technology_type.term == "DNA microarray"] - if len(tx) > 0: - print("yes, exists in 2", tx) - this_assay = tx[0] - else: - this_assay = Assay(measurement_type=OntologyAnnotation(term="transcription profiling"), - technology_type=OntologyAnnotation(term="DNA microarray"), - filename="a_tp_microarray.txt") - # attach the assay to the study - new_inv.studies[0].assays.append(this_assay) - - extraction_protocol = Protocol(name='RNA extraction', - protocol_type=OntologyAnnotation(term="material separation")) - new_inv.studies[0].protocols.append(extraction_protocol) - - labeling_protocol = Protocol(name="nucleic acid labeling", - protocol_type=OntologyAnnotation(term="material labeling")) - new_inv.studies[0].protocols.append(labeling_protocol) - - hyb_protocol = Protocol(name='nucleic acid hybridization', - protocol_type=OntologyAnnotation(term="nucleic acid hybridization")) - - new_inv.studies[0].protocols.append(hyb_protocol) - - i = 0 - j = 0 - k = 0 - # for i, sample in enumerate(new_inv.studies[0].samples): - for i, sample in enumerate([sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item]["sample type"]]): - - if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]): - # print("i: ", i, "sample: ", sample.characteristics[0].value.term) - - # create an extraction process that executes the extraction protocol - # [prtcl for prtcl in inv.studies[0].protocols if prtcl.name == "RNA extraction"][0] - - extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "RNA extraction"][0], - performer="amy", - date_=datetime.datetime.now()) - - # extraction process takes as input a sample, and produces an extract material as output - # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]: - - extraction_process.inputs.append(sample) - extract = Material(name="extract-{}".format(i)) - extract.type = "Extract Name" - extraction_process.outputs.append(extract) - - labeling_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "nucleic acid labeling"][0], - performer="xua", - date_=datetime.datetime.now() - ) - - # extraction process takes as input a sample, and produces an extract material as output - labeling_process.inputs.append(extract) - le = Material(name="labeled-extract-{}".format(i)) - le.type = "Labeled Extract Name" - dye = Characteristic(category=OntologyAnnotation(term="label"), - value=OntologyAnnotation(term="biotin")) - le.characteristics.append(dye) - labeling_process.outputs.append(le) - - # create a data acquisition process that executes a data acquisition protocol - # print('number of array-design: ', - # assay_definitions[item][0]["params"]["distinct array designs"]) - #assay_modifier1) - # print('number of technical replicates:', - # assay_definitions[item][0]["params"]["number of technical replicates"]) - # assay_modifier3) - - for j in range(int(assay_plan[item]["params"]["distinct array designs"])): - - for k in range(int(assay_plan[item]["params"]["number of technical replicates"])): - - prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "nucleic acid hybridization"][0] - - data_acq_process = Process(executes_protocol=prtcl_name, - performer="louis", - date_=datetime.datetime.now()) - - array_design_name = "arraydesign-{}".format(j) - data_acq_process.array_design_ref = OntologyAnnotation(term=array_design_name) - # print("with array_design: ", array_design_name) - array_design_as_pv = ParameterValue( - category=ProtocolParameter(parameter_name=OntologyAnnotation(term="array_design_ref")), - value=OntologyAnnotation(term=array_design_name)) - data_acq_process.parameter_values.append(array_design_as_pv) - - # print("data acquisition protocol name:", prtcl_name.name) - # print("replicate: ", k) - data_acq_process.name = "assay-name-{}".format(i) + "_" + array_design_name +\ - "_run-{}".format(k) - data_acq_process.array_design_ref = array_design_name - # print(data_acq_process.name) - data_acq_process.inputs.append(labeling_process.outputs[0]) - - # process usually has an output data file - datafile = DataFile(filename="microarray-data-{}".format(i) + "_" + array_design_name + - "_run-{}".format(k), - label="Array Data File") - data_acq_process.outputs.append(datafile) - - # ensure Processes are linked forward and backward - extraction_process.next_process = labeling_process - labeling_process.prev_process = extraction_process - labeling_process.next_process = data_acq_process - data_acq_process.prev_process = labeling_process - - # make sure the extract, data file, and the processes are attached to the assay - this_assay.data_files.append(datafile) - this_assay.other_material.append(extract) - this_assay.other_material.append(le) - this_assay.process_sequence.append(extraction_process) - this_assay.process_sequence.append(labeling_process) - this_assay.process_sequence.append(data_acq_process) - - elif assay_plan[item]["assay type"] == 3: - # TODO: implement get_or_create method and refactor - - if len(assay_plan[item]["params"]["injection modes"]) > 0: - inj_mode = "" - acq_mode = "" - for inj_mode_code in range(len(assay_plan[item]["params"]["injection modes"])): - if inj_mode_code == 0: - print("YAY, this is FIA") - inj_mode = "FIA" - elif inj_mode_code == 1: - inj_mode = "LC" - elif inj_mode_code == 2: - inj_mode = "GC" - else: - print("error, injection method not recognized)") - - for acq_mode_code in range(len(assay_plan[item]["params"]["number of channels"])): - if acq_mode_code == 0: - acq_mode = "positive" - elif acq_mode_code == 1: - acq_mode = "negative" - else: - print("error, injection method not recognized)") - - techname = inj_mode + "-" + acq_mode + " mass spectrometry" - - ms_filename = "a_mp_" + inj_mode + "_" + acq_mode + "_ms.txt" - - ms = [a for a in new_inv.studies[0].assays if - a.measurement_type.term == "metabolite profiling" - and a.technology_type.term == techname and a.filename == ms_filename] - if len(ms) > 0: - print("yes, exists in :", ms) - # if such an assay table already exists, we retrieve it - this_assay = ms[0] - else: - # or print('nothing found, creating a new object)...') - this_assay = Assay(measurement_type=OntologyAnnotation(term="metabolite profiling"), - technology_type=OntologyAnnotation(term=techname), - filename=ms_filename) - - new_inv.studies[0].assays.append(this_assay) - - extraction_protocol = Protocol(name='metabolite extraction', - protocol_type=OntologyAnnotation(term="material separation")) - new_inv.studies[0].protocols.append(extraction_protocol) - - # lc_protocol = Protocol(name="liquid chromatography", - # protocol_type=OntologyAnnotation(term="material separation")) - # new_inv.studies[0].protocols.append(labeling_protocol) - - ms_protocol = Protocol(name=inj_mode + "-" + acq_mode +' mass spectrometry', - protocol_type=OntologyAnnotation(term="mass spectrometry")) - - randomized_run_order = ProtocolParameter(parameter_name=OntologyAnnotation(term="randomized run order")) - inj_param = ProtocolParameter(parameter_name=OntologyAnnotation(term="injection mode")) - - if inj_mode == 1 or inj_mode == 2: - ch_instr = ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography instrument")) - ch_column = ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column")) - ch_elu_p = ProtocolParameter(parameter_name=OntologyAnnotation(term="elution program")) - ms_protocol.parameters.append(ch_instr) - ms_protocol.parameters.append(ch_column) - ms_protocol.parameters.append(ch_elu_p) - ms_instr = ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument")) - acq_param = ProtocolParameter(parameter_name=OntologyAnnotation(term="scan polarity")) - - ms_protocol.parameters.append(randomized_run_order) - ms_protocol.parameters.append(inj_param) - ms_protocol.parameters.append(ms_instr) - ms_protocol.parameters.append(acq_param) - - new_inv.studies[0].protocols.append(ms_protocol) - - index_i = 0 - index_j = 0 - index_k = 0 - randomized_order = [] - # for index_i, sample in enumerate(new_inv.studies[0].samples): - #some_sample_list = [sample for sample in new_inv.studies[0].samples if - # sample.characteristics[0].value.term == assay_plan[item]["sample type"] and - # sample.characteristics[1].value.term == assay_plan[item]["collection event"]] - #print("number of samples: ", len(some_sample_list)) - #extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if - # ext.type == "Extract Name"] - # print("number of extracts", len(extractlist_before)) - expected_total_number_run = len([sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item][ - "sample type"]]) \ - * int( - assay_plan[item]["params"]["number of technical replicates"]) - # * len(assay_plan[item]["params"]["number of channels"]) \ - - print(len([sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item][ - "sample type"]])) - print("expected size:", expected_total_number_run) - - len(assay_plan) * int( - assay_plan[item]["params"]["number of technical replicates"]) - - randomized_order = get_processrun_random_token(expected_total_number_run) - counter = -1 - for index_i, sample in enumerate([sample for sample in new_inv.studies[0].samples if - sample.characteristics[0].value.term == assay_plan[item][ - "sample type"]]): - # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term) - # print("current collection event", assay_plan[item]["collection event"]) - if str(sample.characteristics[1].value.term) == str(assay_plan[item]["collection event"]): - # create an extraction process that executes the extraction protocol - extraction_process = Process(executes_protocol=[prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == "metabolite extraction"][0], - performer="rick", - date_=datetime.datetime.now()) - - # extraction process takes as input a sample, and produces an extract material as output - # we make sure only the right kind of samples get assayed so we check against the sample type - # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]: - # print("sample characteristics: ", sample.characteristics[0].value.term) - - extraction_process.inputs.append(sample) - extract = Material(name=sample.name + "extract-{}".format(index_i)) - extract.type = "Extract Name" - extraction_process.outputs.append(extract) - - # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample - # TODO: include a function to obtain the relevant parameters used for data acquisition - #for index_j in range(int(assay_plan[item]["params"]["injection modes"])): - # this inner is for handling multiple runs of the same mode, i.e. tech replicates - for index_k in range( - int(assay_plan[item]["params"]["number of technical replicates"])): - prtcl_name = [prtcl for prtcl in new_inv.studies[0].protocols - if prtcl.name == inj_mode + "-" + acq_mode + ' mass spectrometry'][0] - data_acq_process = Process(executes_protocol=prtcl_name, - performer="louis", - date_=datetime.datetime.now()) - counter = counter+1 - # print(counter, randomized_order[counter]) - run_order = randomized_order[counter] - pv_run_order = ParameterValue(category=ProtocolParameter( - parameter_name=OntologyAnnotation(term="randomized run order")), - value=OntologyAnnotation(term=str(run_order))) - - pv_1 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="injection mode")),value=OntologyAnnotation(term=inj_mode)) - - # if we are dealing with liquid or gas "C"hromatography - if "C" in inj_mode: - pv_1a = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography instrument")),value=OntologyAnnotation(term="Agilent Q12324A")) - pv_1b = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column")),value=OntologyAnnotation(term="AB Hydroxyapatite")) - pv_1c = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="elution program")),value=OntologyAnnotation(term="Acetonitrile 90%, water 10% for 30 min, flow rate: 1ml/min")) - - data_acq_process.parameter_values.append(pv_1a) - data_acq_process.parameter_values.append(pv_1b) - data_acq_process.parameter_values.append(pv_1c) - - pv_2 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument")),value=OntologyAnnotation(term="Agilent QTOF")) - pv_3 = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="scan polarity")),value=OntologyAnnotation(term=acq_mode)) - - data_acq_process.parameter_values.append(pv_1) - data_acq_process.parameter_values.append(pv_2) - data_acq_process.parameter_values.append(pv_3) - data_acq_process.parameter_values.append(pv_run_order) - - - # platform_name = "platform-{}".format(index_j) - platform_name = "platform-" + inj_mode - data_acq_process.name = "assay-name-{}".format(index_i) + "_" + platform_name + \ - "_run-{}".format(index_k) - data_acq_process.inputs.append(extraction_process.outputs[0]) - - # data acquisition process usually has an output data file - datafile = DataFile( - filename="acquired-data-{}".format(index_i) + "_" + platform_name + - "_run-{}".format(index_k) + ".mzml.gz", - label="Raw Spectral Data File") - data_acq_process.outputs.append(datafile) - - # ensure Processes are linked forward and backward - extraction_process.next_process = data_acq_process - # labeling_process.prev_process = extraction_process - extraction_process.next_process = data_acq_process - # data_acq_process.prev_process = labeling_process - data_acq_process.prev_process = extraction_process - - # make sure extract(library), data file, and the processes are attached to the assay - this_assay.data_files.append(datafile) - this_assay.other_material.append(extract) - # this_assay.other_material.append(le) - this_assay.process_sequence.append(extraction_process) - # this_assay.process_sequence.append(labeling_process) - this_assay.process_sequence.append(data_acq_process) - # for NMR: - elif assay_plan[item]["assay type"] == 4: - #TODO: implement get_or_create method and refactor - - if len(assay_plan[item]["params"]["injection modes"]) > 0: - inj_mode = "" - acq_mode = "" - for inj_mode_code in range( - len(assay_plan[item]["params"]["injection modes"])): - if inj_mode_code == 0: - print("YAY, this is autoloader") - inj_mode = "autoloader" - elif inj_mode_code == 1: - inj_mode = "LC" - elif inj_mode_code == 2: - inj_mode = "GC" - else: - print("error, injection method not recognized)") - - for acq_mode_code in range( - len(assay_plan[item]["params"]["pulse sequences"])): - print("CODE:", assay_plan[item]["params"]["pulse sequences"]) - if assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "1": - acq_mode = "COSY" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "2": - acq_mode = "NOESY" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "3": - acq_mode = "TOSCY" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "3": - acq_mode = "CPMG" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "4": - acq_mode = "INEPT" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "5": - acq_mode = "HMQC" - elif assay_plan[item]["params"]["pulse sequences"][acq_mode_code] == "6": - acq_mode = "WATERGATE" - else: - print("error, injection method not recognized)") - - techname = inj_mode + "-" + acq_mode + " nmr spectroscopy" - - nmr_filename = "a_mp_" + inj_mode + "_" + acq_mode + "_nmr.txt" - - nmr = [a for a in new_inv.studies[0].assays if - a.measurement_type.term == "metabolite profiling" - and a.technology_type.term == techname and a.filename == nmr_filename] - if len(nmr) > 0: - print("yes, exists in :", nmr) - # if such an assay table already exists, we retrieve it - this_assay = nmr[0] - else: - # or print('nothing found, creating a new object)...') - this_assay = Assay(measurement_type=OntologyAnnotation( - term="metabolite profiling"), - technology_type=OntologyAnnotation( - term=techname), - filename=nmr_filename) - - new_inv.studies[0].assays.append(this_assay) - - extraction_protocol = Protocol(name='metabolite extraction', - protocol_type=OntologyAnnotation( - term="material separation")) - new_inv.studies[0].protocols.append(extraction_protocol) - - # lc_protocol = Protocol(name="liquid chromatography", - # protocol_type=OntologyAnnotation(term="material separation")) - # new_inv.studies[0].protocols.append(labeling_protocol) - - nmr_protocol = Protocol( - name=inj_mode + "-" + acq_mode + ' nmr spectroscopy', - protocol_type=OntologyAnnotation( - term="nmr spectroscopy")) - inj_param = ProtocolParameter( - parameter_name=OntologyAnnotation( - term="injection mode")) - if inj_mode == 1 or inj_mode == 2: - ch_instr = ProtocolParameter( - parameter_name=OntologyAnnotation( - term="chromatography instrument")) - ch_column = ProtocolParameter( - parameter_name=OntologyAnnotation( - term="chromatography column")) - ch_elu_p = ProtocolParameter( - parameter_name=OntologyAnnotation( - term="elution program")) - nmr_protocol.parameters.append(ch_instr) - nmr_protocol.parameters.append(ch_column) - nmr_protocol.parameters.append(ch_elu_p) - nmr_instr = ProtocolParameter( - parameter_name=OntologyAnnotation( - term="nmr spectroscopy instrument")) - nmr_probe = ProtocolParameter(parameter_name=(OntologyAnnotation(term="NMR probe"))) - acq_param = ProtocolParameter( - parameter_name=OntologyAnnotation(term="pulse sequence")) - - nmr_protocol.parameters.append(inj_param) - nmr_protocol.parameters.append(nmr_instr) - nmr_protocol.parameters.append(nmr_probe) - nmr_protocol.parameters.append(acq_param) - - new_inv.studies[0].protocols.append(nmr_protocol) - - index_i = 0 - index_j = 0 - index_k = 0 - # for index_i, sample in enumerate(new_inv.studies[0].samples): - # some_sample_list = [sample for sample in new_inv.studies[0].samples if - # sample.characteristics[0].value.term == assay_plan[item]["sample type"] and - # sample.characteristics[1].value.term == assay_plan[item]["collection event"]] - # print("number of samples: ", len(some_sample_list)) - # extractlist_before = [ext for ext in new_inv.studies[0].assays[0].other_material if - # ext.type == "Extract Name"] - # print("number of extracts", len(extractlist_before)) - - for index_i, sample in enumerate([sample for sample in - new_inv.studies[0].materials[ - 'samples'] if - sample.characteristics[ - 0].value.term == - assay_plan[item][ - "sample type"]]): - # print("i: ", index_i, "sample: ", sample.characteristics[1].value.term) - # print("current collection event", assay_plan[item]["collection event"]) - if str(sample.characteristics[1].value.term) == str( - assay_plan[item]["collection event"]): - # create an extraction process that executes the extraction protocol - extraction_process = Process(executes_protocol= - [prtcl for prtcl in - new_inv.studies[ - 0].protocols - if - prtcl.name == "metabolite extraction"][ - 0], - performer="rick", - date_=datetime.datetime.now()) - - # extraction process takes as input a sample, and produces an extract material as output - # we make sure only the right kind of samples get assayed so we check against the sample type - # if sample.characteristics[0].value.term == assay_plan[item]["sample type"]: - # print("sample characteristics: ", sample.characteristics[0].value.term) - - extraction_process.inputs.append(sample) - extract = Material( - name=sample.name + "extract-{}".format(index_i)) - extract.type = "Extract Name" - extraction_process.outputs.append(extract) - - # this loop is meant to handle the case where several acquisition modes (e.g. Neg or positive) are used from a sample - # TODO: include a function to obtain the relevant parameters used for data acquisition - # for index_j in range(int(assay_plan[item]["params"]["injection modes"])): - # this inner is for handling multiple runs of the same mode, i.e. tech replicates - for index_k in range( - int(assay_plan[item]["params"][ - "number of technical replicates"])): - prtcl_name = \ - [prtcl for prtcl in new_inv.studies[0].protocols - if - prtcl.name == inj_mode + "-" + acq_mode + ' nmr spectroscopy'][ - 0] - data_acq_process = Process( - executes_protocol=prtcl_name, - performer="mitsuko", - date_=datetime.datetime.now()) - pv_1 = ParameterValue(category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="injection mode")), - value=OntologyAnnotation( - term=inj_mode)) - - # if we are dealing with liquid or gas "C"hromatography - if "C" in inj_mode: - pv_1a = ParameterValue( - category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="chromatography instrument")), - value=OntologyAnnotation( - term="Agilent Q12324A")) - pv_1b = ParameterValue( - category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="chromatography column")), - value=OntologyAnnotation( - term="AB Hydroxyapatite")) - pv_1c = ParameterValue( - category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="elution program")), - value=OntologyAnnotation( - term="acetonitrile 90%, water 10% for 30 min, flow rate: 1ml/min")) - - pv_1d = ParameterValue( - category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="NMR probe")), - value=OntologyAnnotation( - term="flow probe")) - - data_acq_process.parameter_values.append(pv_1a) - data_acq_process.parameter_values.append(pv_1b) - data_acq_process.parameter_values.append(pv_1c) - data_acq_process.parameter_values.append(pv_1d) - - else: - pv_1d = ParameterValue( - category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="NMR probe")), - value=OntologyAnnotation( - term="non-flow probe")) - - - - pv_2 = ParameterValue(category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="nmr spectroscopy instrument")), - value=OntologyAnnotation( - term="Bruker Avance III")) - pv_3 = ParameterValue(category=ProtocolParameter( - parameter_name=OntologyAnnotation( - term="pulse sequence")), - value=OntologyAnnotation( - term=acq_mode)) - - data_acq_process.parameter_values.append(pv_1) - data_acq_process.parameter_values.append(pv_2) - data_acq_process.parameter_values.append(pv_3) - - # platform_name = "platform-{}".format(index_j) - platform_name = "platform-" + inj_mode - data_acq_process.name = "assay-name-{}".format( - index_i) + "_" + platform_name + \ - "_run-{}".format(index_k) - data_acq_process.inputs.append( - extraction_process.outputs[0]) - - # data acquisition process usually has an output data file - datafile = DataFile( - filename="acquired-data-{}".format( - index_i) + "_" + platform_name + - "_run-{}".format(index_k) + ".nmrml.gz", - label="Free Induction Decay Data File") - data_acq_process.outputs.append(datafile) - - # ensure Processes are linked forward and backward - extraction_process.next_process = data_acq_process - # labeling_process.prev_process = extraction_process - extraction_process.next_process = data_acq_process - # data_acq_process.prev_process = labeling_process - data_acq_process.prev_process = extraction_process - - # make sure extract(library), data file, and the processes are attached to the assay - this_assay.data_files.append(datafile) - this_assay.other_material.append( - extract) - # this_assay.other_material.append(le) - this_assay.process_sequence.append( - extraction_process) - # this_assay.process_sequence.append(labeling_process) - this_assay.process_sequence.append(data_acq_process) - # else: - # print("no luck :(") - - elif repeats is True and "factorial" in free_or_restricted_design: - - obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations") - new_inv.ontology_source_references.append(obi) - stato = OntologySource(name="STATO", description="Ontology for Statistical Methods") - new_inv.ontology_source_references.append(stato) - design1 = OntologyAnnotation(term_source=obi) - design1.term = "intervention design" - design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115" - new_inv.studies[0].design_descriptors.append(design1) - design2 = OntologyAnnotation(term_source=stato) - design2.term = "full factorial design" - design2.term_accession = "http://purl.obolibrary.org/obo/STATO_0000270" - new_inv.studies[0].design_descriptors.append(design2) - design3 = OntologyAnnotation(term_source=obi) - design3.term = "repeated measures design" - design3.term_accession = "http://purl.obolibrary.org/obo/OBI_0500002" - new_inv.studies[0].design_descriptors.append(design3) - - intervention_list, new_inv = get_list_of_interventions(new_inv) - - assay_plan = [] - for intervention_type in intervention_list.keys(): - # print("type of intervention: ", intervention_type) - for factor in intervention_list[intervention_type].keys(): - # print("factor :", factor) - set_factor_values(factor, intervention_list[intervention_type]) - # print("associated factor values:", intervention_list[intervention_type][factor]) - - dump(isa_obj=new_inv, output_path='./') - - except NotImplemented: - print("we have recognized a cross over design & repeated treatment case, which is not yet fully implemented") - print("error in create_study_subject() method") - - # my_factors = {} - # study_group_dictionaries = [] - # number_of_repeats = get_repeat_number() - # intervention_list = get_list_of_interventions() - # """factors_for_treatment = get_factors_from_treatment_type(intervention_list)""" - # for intervention_type in intervention_list.keys(): - # print("type of intervention: ", intervention_type) - # for factor in intervention_list[intervention_type].keys(): - # print("factor :", factor) - # set_factor_values(factor, intervention_list[intervention_type]) - # print("associated factor values:", intervention_list[intervention_type][factor]) - # - # study_group_dictionaries.append(compute_study_groups(intervention_list[intervention_type])) - # print("study groups:", list_of_study_group_dictionaries) - # # set_study_arms() - # - # # for intervention in intervention_list: - # # int_dict = dict - # # set_factor_values(intervention, int_dict) - # print(compute_treatment_sequences(list_of_study_group_dictionaries, number_of_repeats)) - # # treatment_arms = compute_treatment_sequences(intervention_list, number_of_repeats) - # new_inv = set_study_arms(number_of_repeats) - # - # # for element in range(len(treatment_arms)): - - else: - try: - new_inv = use_default_inv() - - obi = OntologySource(name="OBI", description="Ontology for Biomedical Investigations") - new_inv.ontology_source_references.append(obi) - stato = OntologySource(name="STATO", description="Ontology for Statistical Methods") - new_inv.ontology_source_references.append(stato) - omiabis = OntologySource(name="OMIABIS", description="an ontological version of MIABIS (Minimum Information About BIobank data Sharing)") - new_inv.ontology_source_references.append(obi) - - design1 = OntologyAnnotation(term_source=obi) - design1.term = "observation design" - design1.term_accession = "http://purl.obolibrary.org/obo/OBI_0300311" - new_inv.studies[0].design_descriptors.append(design1) - design2 = OntologyAnnotation(term_source=omiabis) - design2.term = "cohort study design" - design2.term_accession = "http://purl.obolibrary.org/obo/OMIABIS_0001020" - new_inv.studies[0].design_descriptors.append(design2) - - # get_study_group() - # get_study_temporal_span() - # get_sample_collection_plan() - # get_assay_plan() - - except NotImplemented: - print("we have recognized an observation study, which is not yet fully implemented") - print("error in create_study_subject() method") - -if __name__ == '__main__': - main() From ea600940412876a48f73dba0c73bbae437fe3159 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 17 Nov 2020 17:39:45 +0000 Subject: [PATCH 07/25] deleted unused files #368 v3 --- isatools/create/data.json | 2351 ------------------------------------- 1 file changed, 2351 deletions(-) delete mode 100644 isatools/create/data.json diff --git a/isatools/create/data.json b/isatools/create/data.json deleted file mode 100644 index b0ac1d52..00000000 --- a/isatools/create/data.json +++ /dev/null @@ -1,2351 +0,0 @@ -[ - { - "inferred_study_design": "full factorial design", - "samples": 132, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS1", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS10", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": 15, - "sampling": "single sampling", - "sources": 15, - "spurious_factors": "", - "study_key": "MTBLS100", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 11, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS102", - "total_study_groups": 3 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS103 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS104 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS105 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Phytohormones ", - "study_key": "MTBLS107", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Phytohormones ", - "study_key": "MTBLS108", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Phytohormones ", - "study_key": "MTBLS109", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 107, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Total Light_100pc_after_28d ;factor: Medium_after_initiation ;factor: Total Light_0pc_after_28d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Medium_after_14d ;factor: Container_after_14d ", - "study_key": "MTBLS11", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Phytohormones ", - "study_key": "MTBLS110", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Phytohormones ", - "study_key": "MTBLS111", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 296, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS112", - "total_study_groups": 60 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 114, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS113", - "total_study_groups": 648 - }, - { - "inferred_study_design": "full factorial design", - "samples": 15, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS114", - "total_study_groups": 3 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS116 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 26, - "sampling": "single sampling", - "sources": 26, - "spurious_factors": "factor: Replicate ;factor: Time point ;factor: Infection ;factor: Replicate ;factor: Time point ;factor: Infection ", - "study_key": "MTBLS117", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS118 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS119 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 69, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Medium_after_initiation ;factor: Medium_after_14d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Container_after_14d ", - "study_key": "MTBLS12", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS120 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 1596, - "sampling": "single sampling", - "sources": 1596, - "spurious_factors": "", - "study_key": "MTBLS123", - "total_study_groups": 60 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 1138, - "sampling": "single sampling", - "sources": 1138, - "spurious_factors": "factor: Gender ;factor: Age ", - "study_key": "MTBLS124", - "total_study_groups": 10430784 - }, - { - "inferred_study_design": "full factorial design", - "samples": 60, - "sampling": "multiple/repeated samping", - "sources": 12, - "spurious_factors": "", - "study_key": "MTBLS125", - "total_study_groups": 20 - }, - { - "inferred_study_design": "full factorial design", - "samples": 72, - "sampling": "multiple/repeated samping", - "sources": 24, - "spurious_factors": "", - "study_key": "MTBLS126", - "total_study_groups": 12 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 26, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Cell line ;factor: Cell line ", - "study_key": "MTBLS127", - "total_study_groups": 6 - }, - { - "inferred_study_design": "full factorial design", - "samples": 16, - "sampling": "single sampling", - "sources": 16, - "spurious_factors": "", - "study_key": "MTBLS128", - "total_study_groups": 16 - }, - { - "inferred_study_design": "full factorial design", - "samples": 24, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Time ;factor: Time ", - "study_key": "MTBLS129", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 83, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Medium_after_initiation ;factor: Medium_after_14d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Container_after_14d ", - "study_key": "MTBLS13", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS131", - "total_study_groups": 12 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS132", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": 14, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS133", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": 15, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS134", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 41, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "", - "study_key": "MTBLS137", - "total_study_groups": 9 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 96, - "sampling": "multiple/repeated samping", - "sources": 16, - "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ", - "study_key": "MTBLS14", - "total_study_groups": 8 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 44, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS140", - "total_study_groups": 64 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS143 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 72, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS144", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS146 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": 978, - "sampling": "single sampling", - "sources": 978, - "spurious_factors": "", - "study_key": "MTBLS147", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS148 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 422, - "sampling": "multiple/repeated samping", - "sources": 6, - "spurious_factors": "", - "study_key": "MTBLS149", - "total_study_groups": 6 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 95, - "sampling": "multiple/repeated samping", - "sources": 16, - "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ", - "study_key": "MTBLS15", - "total_study_groups": 8 - }, - { - "inferred_study_design": "full factorial design", - "samples": 162, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "factor: Peturbation ;factor: Time Point ;factor: Cell Type ", - "study_key": "MTBLS150", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 120, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS151", - "total_study_groups": 150 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 134, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS152", - "total_study_groups": 28 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 24, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: control ;factor: time ;factor: inorganic phosphate ;factor: control ", - "study_key": "MTBLS154", - "total_study_groups": 16 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 34, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: control ", - "study_key": "MTBLS155", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS156 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS157 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 83, - "sampling": "multiple/repeated samping", - "sources": 14, - "spurious_factors": "factor: Temperature_after_04d ;factor: Salt_Quantity_after_initiation ;factor: RelativeHumidity_after_04d ;factor: Container_after_initiation ;factor: Watering_after_04d ;factor: Container_after_04d ;factor: Medium_after_initiation ;factor: Total Light_Off_after_04d ;factor: Total Light_On_after_04d ;factor: Medium_after_04d ", - "study_key": "MTBLS16", - "total_study_groups": 8 - }, - { - "inferred_study_design": "full factorial design", - "samples": 60, - "sampling": "multiple/repeated samping", - "sources": 223, - "spurious_factors": "", - "study_key": "MTBLS160", - "total_study_groups": 60 - }, - { - "inferred_study_design": "full factorial design", - "samples": 117, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS161", - "total_study_groups": 2 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 64, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "", - "study_key": "MTBLS162", - "total_study_groups": 17856 - }, - { - "inferred_study_design": "none", - "samples": 24, - "sampling": "single sampling", - "sources": 24, - "spurious_factors": "", - "study_key": "MTBLS163", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 390, - "sampling": "multiple/repeated samping", - "sources": 51, - "spurious_factors": "", - "study_key": "MTBLS164", - "total_study_groups": 45 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 57, - "sampling": "multiple/repeated samping", - "sources": 37, - "spurious_factors": "factor: Panda mother ;factor: Mother age ", - "study_key": "MTBLS165", - "total_study_groups": 261 - }, - { - "inferred_study_design": "none", - "samples": 14, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS166", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 63, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS168", - "total_study_groups": 3520 - }, - { - "inferred_study_design": "full factorial design", - "samples": 14, - "sampling": "single sampling", - "sources": 14, - "spurious_factors": "", - "study_key": "MTBLS169", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 1050, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS17", - "total_study_groups": 12 - }, - { - "inferred_study_design": "full factorial design", - "samples": 68, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS170", - "total_study_groups": 6 - }, - { - "inferred_study_design": "full factorial design", - "samples": 24, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS171", - "total_study_groups": 6 - }, - { - "inferred_study_design": "none", - "samples": 140, - "sampling": "single sampling", - "sources": 140, - "spurious_factors": "", - "study_key": "MTBLS172", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 131, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS173", - "total_study_groups": 16 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 18, - "sampling": "single sampling", - "sources": 18, - "spurious_factors": "factor: Metabolic syndrome ", - "study_key": "MTBLS174", - "total_study_groups": 3520 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 96, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS175", - "total_study_groups": 8 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS176 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 42, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS177", - "total_study_groups": 112 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 180, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS178", - "total_study_groups": 18 - }, - { - "inferred_study_design": "full factorial design", - "samples": 41, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS185", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 342, - "sampling": "single sampling", - "sources": 342, - "spurious_factors": "", - "study_key": "MTBLS187", - "total_study_groups": 24 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 7, - "sampling": "single sampling", - "sources": 7, - "spurious_factors": "", - "study_key": "MTBLS188", - "total_study_groups": 10 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 192, - "sampling": "multiple/repeated samping", - "sources": 5, - "spurious_factors": "factor: drug intervention ;factor: drug intervention ", - "study_key": "MTBLS189", - "total_study_groups": 6 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS19 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 56, - "sampling": "single sampling", - "sources": 56, - "spurious_factors": "", - "study_key": "MTBLS191", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 311, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "", - "study_key": "MTBLS193", - "total_study_groups": 192 - }, - { - "inferred_study_design": "full factorial design", - "samples": 60, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS194", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 168, - "sampling": "single sampling", - "sources": 168, - "spurious_factors": "", - "study_key": "MTBLS196", - "total_study_groups": 18 - }, - { - "inferred_study_design": "full factorial design", - "samples": 130, - "sampling": "multiple/repeated samping", - "sources": 13, - "spurious_factors": "", - "study_key": "MTBLS197", - "total_study_groups": 2 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 504, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS198", - "total_study_groups": 48 - }, - { - "inferred_study_design": "full factorial design", - "samples": 16, - "sampling": "single sampling", - "sources": 16, - "spurious_factors": "", - "study_key": "MTBLS2", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 148, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Material sample ;factor: Material sample ", - "study_key": "MTBLS20", - "total_study_groups": 2 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS200 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 63, - "sampling": "single sampling", - "sources": 63, - "spurious_factors": "", - "study_key": "MTBLS202", - "total_study_groups": 6 - }, - { - "inferred_study_design": "full factorial design", - "samples": 50, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS203", - "total_study_groups": 5 - }, - { - "inferred_study_design": "full factorial design", - "samples": 27, - "sampling": "single sampling", - "sources": 27, - "spurious_factors": "", - "study_key": "MTBLS208", - "total_study_groups": 8 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS209 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS21 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 62, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS210", - "total_study_groups": 4 - }, - { - "inferred_study_design": "full factorial design", - "samples": 87, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS212", - "total_study_groups": 24 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS213 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 15, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS214", - "total_study_groups": 15 - }, - { - "inferred_study_design": "full factorial design", - "samples": 231, - "sampling": "single sampling", - "sources": 231, - "spurious_factors": "factor: Ionisation ;factor: Ionisation ", - "study_key": "MTBLS215", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS216", - "total_study_groups": 6 - }, - { - "inferred_study_design": "full factorial design", - "samples": 34, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS217", - "total_study_groups": 6 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 134, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "", - "study_key": "MTBLS218", - "total_study_groups": 16 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 62, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS219", - "total_study_groups": 9 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "multiple/repeated samping", - "sources": 5, - "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ", - "study_key": "MTBLS22", - "total_study_groups": 120 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS224 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 134, - "sampling": "multiple/repeated samping", - "sources": 29, - "spurious_factors": "", - "study_key": "MTBLS225", - "total_study_groups": 40 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 64, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS226", - "total_study_groups": 12 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS227 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 19, - "sampling": "single sampling", - "sources": 19, - "spurious_factors": "", - "study_key": "MTBLS228", - "total_study_groups": 10 - }, - { - "inferred_study_design": "full factorial design", - "samples": 39, - "sampling": "single sampling", - "sources": 39, - "spurious_factors": "", - "study_key": "MTBLS229", - "total_study_groups": 20 - }, - { - "inferred_study_design": "full factorial design", - "samples": 12, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS23", - "total_study_groups": 2 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 44, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS233", - "total_study_groups": 192 - }, - { - "inferred_study_design": "full factorial design", - "samples": 33, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS234", - "total_study_groups": 33 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 16, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS235", - "total_study_groups": 78 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 240, - "sampling": "multiple/repeated samping", - "sources": 114, - "spurious_factors": "", - "study_key": "MTBLS237", - "total_study_groups": 3139184885760 - }, - { - "inferred_study_design": "full factorial design", - "samples": 106, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Time Collected Urine sample ", - "study_key": "MTBLS24", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 16, - "sampling": "multiple/repeated samping", - "sources": 8, - "spurious_factors": "", - "study_key": "MTBLS240", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 465, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS242", - "total_study_groups": 5 - }, - { - "inferred_study_design": "full factorial design", - "samples": 22, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS243", - "total_study_groups": 2 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "factor: Substrate ;factor: Labeling ;factor: Substrate ;factor: Labeling ", - "study_key": "MTBLS247", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 250, - "sampling": "multiple/repeated samping", - "sources": 124, - "spurious_factors": "factor: Baecke modified ;factor: Packs-years ;factor: Inspiratory capacity / Total Lung capacity ", - "study_key": "MTBLS249", - "total_study_groups": 668224566533597895713700836288057359530593353728000000000000 - }, - { - "inferred_study_design": "none", - "samples": 8, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS25", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 86, - "sampling": "single sampling", - "sources": 86, - "spurious_factors": "", - "study_key": "MTBLS253", - "total_study_groups": 10 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 35, - "sampling": "multiple/repeated samping", - "sources": 16, - "spurious_factors": "", - "study_key": "MTBLS256", - "total_study_groups": 10 - }, - { - "inferred_study_design": "full factorial design", - "samples": 75, - "sampling": "multiple/repeated samping", - "sources": 4, - "spurious_factors": "", - "study_key": "MTBLS259", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 18, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Genotype ;factor: Genotype ;factor: Genotype ", - "study_key": "MTBLS26", - "total_study_groups": 3 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS260 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 6, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS263", - "total_study_groups": 12 - }, - { - "inferred_study_design": "full factorial design", - "samples": 48, - "sampling": "multiple/repeated samping", - "sources": 4, - "spurious_factors": "", - "study_key": "MTBLS264", - "total_study_groups": 48 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS265", - "total_study_groups": 60 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS266", - "total_study_groups": 60 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS267", - "total_study_groups": 60 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 204, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS270", - "total_study_groups": 27 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 219, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS272", - "total_study_groups": 16 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 111, - "sampling": "multiple/repeated samping", - "sources": 23, - "spurious_factors": "", - "study_key": "MTBLS273", - "total_study_groups": 243 - }, - { - "inferred_study_design": "full factorial design", - "samples": 18, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS274", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 20, - "sampling": "single sampling", - "sources": 20, - "spurious_factors": "", - "study_key": "MTBLS275", - "total_study_groups": 4 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS276 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 105, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS277", - "total_study_groups": 168 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 33, - "sampling": "single sampling", - "sources": 33, - "spurious_factors": "", - "study_key": "MTBLS278", - "total_study_groups": 9 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 88, - "sampling": "single sampling", - "sources": 88, - "spurious_factors": "", - "study_key": "MTBLS279", - "total_study_groups": 10 - }, - { - "inferred_study_design": "full factorial design", - "samples": 2010, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS28", - "total_study_groups": 24 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 88, - "sampling": "single sampling", - "sources": 88, - "spurious_factors": "", - "study_key": "MTBLS280", - "total_study_groups": 10 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS281", - "total_study_groups": 15 - }, - { - "inferred_study_design": "full factorial design", - "samples": 13, - "sampling": "single sampling", - "sources": 13, - "spurious_factors": "", - "study_key": "MTBLS282", - "total_study_groups": 13 - }, - { - "inferred_study_design": "full factorial design", - "samples": 36, - "sampling": "multiple/repeated samping", - "sources": 6, - "spurious_factors": "", - "study_key": "MTBLS283", - "total_study_groups": 36 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 81, - "sampling": "single sampling", - "sources": 81, - "spurious_factors": "", - "study_key": "MTBLS286", - "total_study_groups": 30 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 81, - "sampling": "single sampling", - "sources": 81, - "spurious_factors": "", - "study_key": "MTBLS287", - "total_study_groups": 30 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 81, - "sampling": "single sampling", - "sources": 81, - "spurious_factors": "", - "study_key": "MTBLS288", - "total_study_groups": 30 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 108, - "sampling": "multiple/repeated samping", - "sources": 47, - "spurious_factors": "", - "study_key": "MTBLS289", - "total_study_groups": 19035 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 1704, - "sampling": "single sampling", - "sources": 1704, - "spurious_factors": "factor: Treatment ;factor: Timepoint ;factor: Level measurement type ;factor: Cell number ", - "study_key": "MTBLS29", - "total_study_groups": 3200 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 95, - "sampling": "single sampling", - "sources": 95, - "spurious_factors": "", - "study_key": "MTBLS290", - "total_study_groups": 296 - }, - { - "inferred_study_design": "full factorial design", - "samples": 8, - "sampling": "single sampling", - "sources": 8, - "spurious_factors": "", - "study_key": "MTBLS291", - "total_study_groups": 5 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 196, - "sampling": "multiple/repeated samping", - "sources": 97, - "spurious_factors": "factor: siRNA ", - "study_key": "MTBLS292", - "total_study_groups": 36864 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 15, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS293", - "total_study_groups": 40 - }, - { - "inferred_study_design": "full factorial design", - "samples": 147, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS295", - "total_study_groups": 4 - }, - { - "inferred_study_design": "full factorial design", - "samples": 192, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ;factor: metabolomics/lipidomics ", - "study_key": "MTBLS296", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS297 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 60, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS298", - "total_study_groups": 504 - }, - { - "inferred_study_design": "full factorial design", - "samples": 63, - "sampling": "single sampling", - "sources": 63, - "spurious_factors": "", - "study_key": "MTBLS3", - "total_study_groups": 6 - }, - { - "inferred_study_design": "full factorial design", - "samples": 300, - "sampling": "multiple/repeated samping", - "sources": 5, - "spurious_factors": "", - "study_key": "MTBLS30", - "total_study_groups": 300 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 75, - "sampling": "single sampling", - "sources": 75, - "spurious_factors": "", - "study_key": "MTBLS303", - "total_study_groups": 40 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 129, - "sampling": "multiple/repeated samping", - "sources": 128, - "spurious_factors": "", - "study_key": "MTBLS306", - "total_study_groups": 336 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 89, - "sampling": "multiple/repeated samping", - "sources": 34, - "spurious_factors": "", - "study_key": "MTBLS307", - "total_study_groups": 189 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS309 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 168, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS31", - "total_study_groups": 72 - }, - { - "inferred_study_design": "full factorial design", - "samples": 288, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS311", - "total_study_groups": 288 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS312 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": 4, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS313", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Study Factor from Factor Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS315 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 30, - "sampling": "multiple/repeated samping", - "sources": 10, - "spurious_factors": "", - "study_key": "MTBLS316", - "total_study_groups": 30 - }, - { - "inferred_study_design": "none", - "samples": 1, - "sampling": "single sampling", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS317", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 121, - "sampling": "multiple/repeated samping", - "sources": 119, - "spurious_factors": "", - "study_key": "MTBLS319", - "total_study_groups": 40 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 23, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS32", - "total_study_groups": 20 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 109, - "sampling": "multiple/repeated samping", - "sources": 108, - "spurious_factors": "", - "study_key": "MTBLS320", - "total_study_groups": 40 - }, - { - "inferred_study_design": "full factorial design", - "samples": 25, - "sampling": "single sampling", - "sources": 25, - "spurious_factors": "", - "study_key": "MTBLS321", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 39, - "sampling": "single sampling", - "sources": 39, - "spurious_factors": "", - "study_key": "MTBLS327", - "total_study_groups": 4 - }, - { - "inferred_study_design": "none", - "samples": 30, - "sampling": "single sampling", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS328", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 25, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS33", - "total_study_groups": 16 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 26, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS333", - "total_study_groups": 4320 - }, - { - "inferred_study_design": "none", - "samples": 140, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS335", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS336 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 27, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS337", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 222, - "sampling": "single sampling", - "sources": 222, - "spurious_factors": "", - "study_key": "MTBLS338", - "total_study_groups": 57 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 36, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS34", - "total_study_groups": 63 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS340 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 55, - "sampling": "multiple/repeated samping", - "sources": 19, - "spurious_factors": "", - "study_key": "MTBLS341", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 129, - "sampling": "single sampling", - "sources": 129, - "spurious_factors": "factor: Facility ;factor: Strain ;factor: Sex ", - "study_key": "MTBLS345", - "total_study_groups": 32 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 18, - "sampling": "single sampling", - "sources": 18, - "spurious_factors": "", - "study_key": "MTBLS35", - "total_study_groups": 54 - }, - { - "inferred_study_design": "full factorial design", - "samples": 24, - "sampling": "single sampling", - "sources": 24, - "spurious_factors": "factor: Extract ;factor: Extract ", - "study_key": "MTBLS350", - "total_study_groups": 12 - }, - { - "inferred_study_design": "full factorial design", - "samples": 730, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "", - "study_key": "MTBLS352", - "total_study_groups": 4 - }, - { - "inferred_study_design": "full factorial design", - "samples": 239, - "sampling": "single sampling", - "sources": 239, - "spurious_factors": "", - "study_key": "MTBLS354", - "total_study_groups": 2 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS355 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS358 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 27, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS359", - "total_study_groups": 27 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 165, - "sampling": "multiple/repeated samping", - "sources": 7, - "spurious_factors": "", - "study_key": "MTBLS36", - "total_study_groups": 294 - }, - { - "inferred_study_design": "none", - "samples": 4, - "sampling": "single sampling", - "sources": 4, - "spurious_factors": "", - "study_key": "MTBLS362", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 140, - "sampling": "single sampling", - "sources": 140, - "spurious_factors": "", - "study_key": "MTBLS364", - "total_study_groups": 270 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 20, - "sampling": "single sampling", - "sources": 20, - "spurious_factors": "", - "study_key": "MTBLS366", - "total_study_groups": 18 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS368 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 12, - "sampling": "single sampling", - "sources": 12, - "spurious_factors": "", - "study_key": "MTBLS37", - "total_study_groups": 4 - }, - { - "inferred_study_design": "none", - "samples": 59, - "sampling": "multiple/repeated samping", - "sources": 30, - "spurious_factors": "", - "study_key": "MTBLS370", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 10, - "sampling": "multiple/repeated samping", - "sources": 30, - "spurious_factors": "factor: Geographic location ;factor: Geographic location ", - "study_key": "MTBLS372", - "total_study_groups": 32 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 14655, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS373", - "total_study_groups": 14650 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS374 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS376 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": 1, - "sampling": "single sampling", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS378", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": 57, - "sampling": "multiple/repeated samping", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS38", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 1, - "sampling": "single sampling", - "sources": 1, - "spurious_factors": "factor: spike-in concentration ", - "study_key": "MTBLS381", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 145, - "sampling": "single sampling", - "sources": 145, - "spurious_factors": "", - "study_key": "MTBLS384", - "total_study_groups": 36 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS385 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 27, - "sampling": "single sampling", - "sources": 27, - "spurious_factors": "", - "study_key": "MTBLS39", - "total_study_groups": 27 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 60, - "sampling": "multiple/repeated samping", - "sources": 10, - "spurious_factors": "", - "study_key": "MTBLS4", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 51, - "sampling": "single sampling", - "sources": 51, - "spurious_factors": "", - "study_key": "MTBLS40", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 49, - "sampling": "single sampling", - "sources": 49, - "spurious_factors": "", - "study_key": "MTBLS403", - "total_study_groups": 40 - }, - { - "inferred_study_design": "none", - "samples": 211, - "sampling": "single sampling", - "sources": 211, - "spurious_factors": "", - "study_key": "MTBLS404", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 28, - "sampling": "multiple/repeated samping", - "sources": 5, - "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ", - "study_key": "MTBLS41", - "total_study_groups": 120 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 76, - "sampling": "single sampling", - "sources": 76, - "spurious_factors": "", - "study_key": "MTBLS414", - "total_study_groups": 40 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS415 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS419 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 30, - "sampling": "multiple/repeated samping", - "sources": 5, - "spurious_factors": "factor: Medium_after_initiation ;factor: Salt_Quantity_after_initiation ;factor: Container_after_initiation ;factor: Watering_after_initiation ", - "study_key": "MTBLS42", - "total_study_groups": 120 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 68, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Sex ", - "study_key": "MTBLS422", - "total_study_groups": 27 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS424 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 96, - "sampling": "single sampling", - "sources": 96, - "spurious_factors": "", - "study_key": "MTBLS427", - "total_study_groups": 2 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS428 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 47, - "sampling": "multiple/repeated samping", - "sources": 7, - "spurious_factors": "", - "study_key": "MTBLS43", - "total_study_groups": 480 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS433 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS435 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 46, - "sampling": "multiple/repeated samping", - "sources": 7, - "spurious_factors": "", - "study_key": "MTBLS44", - "total_study_groups": 480 - }, - { - "inferred_study_design": "full factorial design", - "samples": 54, - "sampling": "single sampling", - "sources": 54, - "spurious_factors": "", - "study_key": "MTBLS45", - "total_study_groups": 4 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 153, - "sampling": "multiple/repeated samping", - "sources": 10, - "spurious_factors": "factor: Experiment ;factor: Experiment ;factor: Treatment ;factor: Replicate ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Treatment ;factor: Experiment ;factor: Experiment ;factor: Timepoint ;factor: Experiment ;factor: Experiment ;factor: Experiment ", - "study_key": "MTBLS459", - "total_study_groups": 48 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS46 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 15, - "sampling": "single sampling", - "sources": 15, - "spurious_factors": "", - "study_key": "MTBLS461", - "total_study_groups": 18 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 239, - "sampling": "multiple/repeated samping", - "sources": 7, - "spurious_factors": "factor: Batch ;factor: Batch ;factor: Batch ;factor: Batch ;factor: Batch ", - "study_key": "MTBLS464", - "total_study_groups": 108 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 659, - "sampling": "single sampling", - "sources": 659, - "spurious_factors": "", - "study_key": "MTBLS47", - "total_study_groups": 318 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 34, - "sampling": "multiple/repeated samping", - "sources": 17, - "spurious_factors": "", - "study_key": "MTBLS472", - "total_study_groups": 20 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 33, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS49", - "total_study_groups": 20 - }, - { - "inferred_study_design": "full factorial design", - "samples": 35, - "sampling": "multiple/repeated samping", - "sources": 9, - "spurious_factors": "factor: CarbonDioxide_Quantity_after_initiation ;factor: CarbonDioxide_Quantity_after_01d ", - "study_key": "MTBLS5", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS52 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 40, - "sampling": "multiple/repeated samping", - "sources": 7, - "spurious_factors": "", - "study_key": "MTBLS54", - "total_study_groups": 480 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 183, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Storage time ;factor: Storage time ", - "study_key": "MTBLS55", - "total_study_groups": 48 - }, - { - "inferred_study_design": "full factorial design", - "samples": 54, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS56", - "total_study_groups": 10 - }, - { - "inferred_study_design": "full factorial design", - "samples": 48, - "sampling": "single sampling", - "sources": 48, - "spurious_factors": "", - "study_key": "MTBLS57", - "total_study_groups": 48 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS59 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 19, - "sampling": "single sampling", - "sources": 19, - "spurious_factors": "factor: Relative Intensity ;factor: Retention Time Modulation ;factor: Skewing ;factor: Min Number Ions ;factor: Gap Penalty ", - "study_key": "MTBLS6", - "total_study_groups": 48 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 44, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "factor: Ectonucleotidase inhibition ;factor: Gender ", - "study_key": "MTBLS61", - "total_study_groups": 12 - }, - { - "inferred_study_design": "full factorial design", - "samples": 12, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS67", - "total_study_groups": 4 - }, - { - "inferred_study_design": "full factorial design", - "samples": 18, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS69", - "total_study_groups": 3 - }, - { - "inferred_study_design": "full factorial design", - "samples": 113, - "sampling": "multiple/repeated samping", - "sources": 3, - "spurious_factors": "factor: Total Light_100pc_after_28d ;factor: Medium_after_initiation ;factor: Total Light_0pc_after_28d ;factor: Container_after_initiation ;factor: Total Light_100pc_after_07d ;factor: Plants/Container_Quantity_after_14d ;factor: Total Light_0pc_after_initiation ;factor: Total Light_100pc_after_initiation ;factor: Total Light_0pc_after_07d ;factor: Medium_after_14d ;factor: Container_after_14d ", - "study_key": "MTBLS7", - "total_study_groups": 1 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not find protocol matching '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS71 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 1252, - "sampling": "multiple/repeated samping", - "sources": 383, - "spurious_factors": "", - "study_key": "MTBLS72", - "total_study_groups": 2 - }, - { - "inferred_study_design": "full factorial design", - "samples": 108, - "sampling": "multiple/repeated samping", - "sources": 27, - "spurious_factors": "", - "study_key": "MTBLS74", - "total_study_groups": 4 - }, - { - "inferred_study_design": "full factorial design", - "samples": 48, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS75", - "total_study_groups": 48 - }, - { - "inferred_study_design": "full factorial design", - "samples": 73, - "sampling": "single sampling", - "sources": 73, - "spurious_factors": "", - "study_key": "MTBLS77", - "total_study_groups": 8 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: '\t' expected after '\"'", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS79 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 126, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS8", - "total_study_groups": 18 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 12, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "factor: Age at sacrifice ;factor: Age at sacrifice ;factor: Age at sacrifice ", - "study_key": "MTBLS81", - "total_study_groups": 576 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 34, - "sampling": "multiple/repeated samping", - "sources": 2, - "spurious_factors": "", - "study_key": "MTBLS85", - "total_study_groups": 40 - }, - { - "inferred_study_design": "none", - "samples": "_", - "sampling": " reason: ('Could not resolve Protocol parameter from Parameter Value '", - "sources": "_", - "spurious_factors": "", - "study_key": "MTBLS86 load FAIL", - "total_study_groups": 1 - }, - { - "inferred_study_design": "full factorial design", - "samples": 14, - "sampling": "multiple/repeated samping", - "sources": 4, - "spurious_factors": "", - "study_key": "MTBLS87", - "total_study_groups": 5 - }, - { - "inferred_study_design": "full factorial design", - "samples": 12, - "sampling": "ERROR LIKELY: check source declaration", - "sources": 1, - "spurious_factors": "", - "study_key": "MTBLS88", - "total_study_groups": 3 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 968, - "sampling": "single sampling", - "sources": 968, - "spurious_factors": "factor: Age ", - "study_key": "MTBLS90", - "total_study_groups": 738816 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 138, - "sampling": "single sampling", - "sources": 138, - "spurious_factors": "", - "study_key": "MTBLS91", - "total_study_groups": 27 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 447, - "sampling": "single sampling", - "sources": 447, - "spurious_factors": "", - "study_key": "MTBLS92", - "total_study_groups": 192000 - }, - { - "inferred_study_design": "fractional factorial design", - "samples": 2139, - "sampling": "single sampling", - "sources": 2139, - "spurious_factors": "", - "study_key": "MTBLS93", - "total_study_groups": 1956740800 - } -] \ No newline at end of file From 0ddcd89fc9fa8219d2c3ba3543dd6ee5fca10c8f Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 17 Nov 2020 17:40:17 +0000 Subject: [PATCH 08/25] renamed isatools.create.models to isatools.create.model --- .travis.yml | 2 +- isatools/create/assay_templates.py | 2 +- isatools/create/connectors.py | 2 +- isatools/examples/createFromSamplePlan.py | 2 +- tests/test_create_connectors.py | 2 +- tests/test_create_models_json.py | 2 +- tests/test_create_models_study_design.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7684dc7b..54052510 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ install: - pip install --upgrade pip && pip install -r requirements.txt - pip install tox-travis script: -#- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py,study_design_wizard.py isatools +#- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py isatools - behave --no-capture --no-capture-stderr --format=progress features/isa-file-handler.feature - python -m unittest discover -s tests/ - coverage run -m unittest discover -s tests/ diff --git a/isatools/create/assay_templates.py b/isatools/create/assay_templates.py index fde8a681..05ac0691 100644 --- a/isatools/create/assay_templates.py +++ b/isatools/create/assay_templates.py @@ -1,4 +1,4 @@ -from isatools.create.models import * +from isatools.create.model import * NAME = 'name' diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py index ede27417..22a15c92 100644 --- a/isatools/create/connectors.py +++ b/isatools/create/connectors.py @@ -1,5 +1,5 @@ from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic -from isatools.create.models import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan +from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART from collections import OrderedDict diff --git a/isatools/examples/createFromSamplePlan.py b/isatools/examples/createFromSamplePlan.py index b077dcaa..4e6f0384 100644 --- a/isatools/examples/createFromSamplePlan.py +++ b/isatools/examples/createFromSamplePlan.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from isatools import isatab -from isatools.create.models import ( +from isatools.create.model import ( IsaModelObjectFactory, SampleAssayPlan, TreatmentFactory, diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index 7c99444f..f225499b 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -20,7 +20,7 @@ Study, Investigation ) -from isatools.create.models import ( +from isatools.create.model import ( StudyDesign, StudyArm, StudyCell, diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py index 3f2186fc..1c7aac93 100644 --- a/tests/test_create_models_json.py +++ b/tests/test_create_models_json.py @@ -14,7 +14,7 @@ ParameterValue, OntologySource ) -from isatools.create.models import ( +from isatools.create.model import ( NonTreatment, Treatment, StudyCell, diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index f9544a51..de026a8c 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -21,7 +21,7 @@ Assay, Process ) -from isatools.create.models import ( +from isatools.create.model import ( NonTreatment, Treatment, TreatmentFactory, From eaebbc629a0d272ceed7c231a7833c9bcba0b6f4 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 17 Nov 2020 18:00:25 +0000 Subject: [PATCH 09/25] clean-up for naming coventions #370 --- isatools/create/model.py | 198 ++++++++++++----------- tests/test_create_models_json.py | 46 +++--- tests/test_create_models_study_design.py | 4 +- 3 files changed, 125 insertions(+), 123 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index 5008aa01..5ca90835 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -2145,8 +2145,12 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer @staticmethod def _increment_counter_by_node_type(counter, node): if isinstance(node, ProductNode): - counter[node.type] = counter[node.type] + 1 if node.type in counter else 1 - # FIXME do we need a check by node.name for DATA_FILE? + # use node.name for DATA_FILE, node.type for other Product Nodes + if node.type == DATA_FILE: + counter[node.name] = counter[node.name] + 1 if node.name in counter else 1 + else: + counter[node.type] = counter[node.type] + 1 if node.type in counter else 1 + if isinstance(node, ProtocolNode): # the attribute "name" should contain the same value as "protocol_type.term" counter[node.name] = counter[node.name] + 1 if node.name in counter else 1 @@ -2161,8 +2165,7 @@ def _generate_isa_elements_from_node( other_materials=None, data_files=None, previous_items=None, - ix=0, - jx=0, + start_node_index=0, counter=None ): if counter is None: @@ -2175,10 +2178,10 @@ def _generate_isa_elements_from_node( other_materials = [] if processes is None: processes = [] - log.debug('# processes: {0} - ix: {1}'.format(len(processes), ix)) + log.debug('# processes: {0} - ix: {1}'.format(len(processes), start_node_index)) counter = StudyDesign._increment_counter_by_node_type(counter, node) - item = isa_objects_factory( - node, assay_file_prefix, ix, counter, + item = StudyDesign._isa_objects_factory( + node, assay_file_prefix, start_node_index, counter, measurement_type=assay_graph.measurement_type, technology_type=assay_graph.technology_type ) @@ -2195,13 +2198,12 @@ def _generate_isa_elements_from_node( else next_node.replicates if isinstance(next_node, ProtocolNode) \ else 1 for jj in range(size): - jx = ii * size + jj - log.debug('ii = {0} - jj = {1} - jx = {2}'.format(ii, jj, jx)) + log.debug('ii = {} - jj = {}'.format(ii, jj)) # counter += 1 processes, other_materials, data_files, next_item, counter = \ StudyDesign._generate_isa_elements_from_node( next_node, assay_graph, assay_file_prefix, processes, other_materials, data_files, - [item], ix=ix, jx=jx, counter=counter + [item], start_node_index=start_node_index, counter=counter ) if isinstance(node, ProtocolNode): item.outputs.append(next_item) @@ -2218,7 +2220,7 @@ def _generate_isa_elements_from_node( assert isinstance(previous_process, Process) assert isinstance(item, Process) log.debug('linking process {0} to process {1}'.format(previous_process.name, item.name)) - plink(previous_process, item) # TODO this does not work + plink(previous_process, item) # TODO check if this generates any issue return processes, other_materials, data_files, item, counter @staticmethod @@ -2256,7 +2258,7 @@ def generate_assay(assay_graph, assay_samples): ix = i * len(assay_samples) * size + j * size + k log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix)) processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node( - node, assay_graph, assay_graph.id, ix=ix, jx=0, counter=None, processes=[], other_materials=[], + node, assay_graph, assay_graph.id, start_node_index=ix, counter=None, processes=[], other_materials=[], data_files=[], previous_items=[sample] ) assay.other_material.extend(other_materials) @@ -2266,6 +2268,92 @@ def generate_assay(assay_graph, assay_samples): len(data_files))) return assay + @staticmethod + def _isa_objects_factory( + node, + assay_file_prefix, + start_node_index, + counter, + measurement_type=None, + technology_type=None, + performer=DEFAULT_PERFORMER + ): + """ + This method generates an ISA element from an ISA node + :param technology_type: + :param measurement_type: + :param node: SequenceNode - can be either a ProductNode or a ProtocolNode + :param assay_file_prefix: str + :param start_node_index: int the index of the starting node in the graph + :param counter: dict containing the counts for this specific subgraph + :param performer: str/Person + :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files + """ + if isinstance(node, ProtocolNode): + return Process( + name='{}_{}-{}-{}'.format( + urlify(node.name), assay_file_prefix, start_node_index, counter[node.name] + ), + executes_protocol=node, + performer=performer, + parameter_values=node.parameter_values, + inputs=[], + outputs=[], + ) + if isinstance(node, ProductNode): + if node.type == SAMPLE: + return Sample( + name='{}-{}-Sample{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]), + characteristics=node.characteristics + ) + if node.type == EXTRACT: + return Extract( + name='{}-{}-Extract{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]), + characteristics=node.characteristics + ) + if node.type == LABELED_EXTRACT: + return LabeledExtract( + name='{}-{}-LE{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]), + characteristics=node.characteristics + ) + # under the hypothesis that we deal only with raw data files + # derived data file would require a completely separate approach + if node.type == DATA_FILE: + try: + log.debug('Assay conf. found: {}; {};'.format( + measurement_type, technology_type) + ) + m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \ + else measurement_type + t_type_term = technology_type.term if isinstance(technology_type, OntologyAnnotation) \ + else technology_type + curr_assay_opt = next( + opt for opt in assays_opts if opt['measurement type'] == m_type_term and + opt['technology type'] == t_type_term + ) + log.debug('Assay conf. found: {}; {}; {};'.format( + measurement_type, technology_type, curr_assay_opt) + ) + isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] + assert isa_class in {RawDataFile, RawSpectralDataFile} + return isa_class( + filename='{}_{}-{}-{}'.format( + urlify(node.name), + assay_file_prefix, + start_node_index, + counter[node.name] + ) + ) + except StopIteration: + return RawDataFile( + filename='{}_{}-{}-{}'.format( + urlify(node.name), + assay_file_prefix, + start_node_index, + counter[node.name] + ) + ) + def generate_isa_study(self): """ this is the core method to return the fully populated ISA Study object from the StudyDesign @@ -2515,92 +2603,6 @@ def _generate_quality_control_samples(quality_control, study_cell, sample_size=0 log.debug("Completed post-batch samples") return qc_sources, qc_samples_pre_run, qc_samples_interspersed, qc_samples_post_run, qc_processes -# TODO: should I move this inside the StudyDesign class? -def isa_objects_factory( - node, - assay_file_prefix, - ix, - counter, - measurement_type=None, - technology_type=None, - performer=DEFAULT_PERFORMER -): - """ - This method generates an ISA element from an ISA node - :param technology_type: - :param measurement_type: - :param node: SequenceNode - can be either a ProductNode or a ProtocolNode - :param assay_file_prefix: str - :param ix: int the index of the starting node in the graph - :param counter: dict containing the counts for this specific subgraph - :param performer: str/Person - :return: either a Sample or a Material or a DataFile. So far only RawDataFile is supported among files - """ - if isinstance(node, ProtocolNode): - return Process( - name='{}_{}-{}-{}'.format( - urlify(node.name), assay_file_prefix, ix, counter[node.name] - ), # FIXME!! - executes_protocol=node, - performer=performer, - parameter_values=node.parameter_values, - inputs=[], - outputs=[], - ) - if isinstance(node, ProductNode): - if node.type == SAMPLE: - return Sample( - name='{}-{}-Sample{}'.format(assay_file_prefix, ix, counter[SAMPLE]), - characteristics=node.characteristics - ) - if node.type == EXTRACT: - return Extract( - name='{}-{}-Extract{}'.format(assay_file_prefix, ix, counter[EXTRACT]), - characteristics=node.characteristics - ) - if node.type == LABELED_EXTRACT: - return LabeledExtract( - name='{}-{}-LE{}'.format(assay_file_prefix, ix, counter[LABELED_EXTRACT]), - characteristics=node.characteristics - ) - # under the hypothesis that we deal only with raw data files - # derived data file would require a completely separate approach - if node.type == DATA_FILE: - try: - log.debug('Assay conf. found: {}; {};'.format( - measurement_type, technology_type) - ) - m_type_term = measurement_type.term if isinstance(measurement_type, OntologyAnnotation) \ - else measurement_type - t_type_term = technology_type.term if isinstance(technology_type, OntologyAnnotation) \ - else technology_type - curr_assay_opt = next( - opt for opt in assays_opts if opt['measurement type'] == m_type_term and - opt['technology type'] == t_type_term - ) - log.debug('Assay conf. found: {}; {}; {};'.format( - measurement_type, technology_type, curr_assay_opt) - ) - isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] - assert isa_class in {RawDataFile, RawSpectralDataFile} - return isa_class( - filename='{}_{}-{}-{}'.format( - urlify(node.name), - assay_file_prefix, - ix, - counter[node.type] # FIXME should this be changed to "counter[node.name]"? - ) - ) - except StopIteration: - return RawDataFile( - filename='{}_{}-{}-{}'.format( - urlify(node.name), - assay_file_prefix, - ix, - counter[node.type] # FIXME should this be changed to "counter[node.name]"? - ) - ) - class StudyDesignEncoder(json.JSONEncoder): diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py index 1c7aac93..d8d74ac3 100644 --- a/tests/test_create_models_json.py +++ b/tests/test_create_models_json.py @@ -154,7 +154,7 @@ class OntologyAnnotationTest(unittest.TestCase): def test_simple_ontology_annotation(self): annotation = OntologyAnnotation(term="aspirin") annotation_json = json.dumps(annotation, cls=OntologyAnnotationEncoder, sort_keys=True, indent=4) - print(annotation_json) + log.debug(annotation_json) self.assertEqual(json.loads(annotation_json), {"term": "aspirin"}) @@ -461,7 +461,7 @@ def test_encode_single_treatment_cell_with_ontology_annotations(self): te1.factor_values = [f1v1, f2v1, f3v1] cell = StudyCell(name='test_cell', elements=(te1, )) json_cell = json.loads(json.dumps(cell, cls=StudyCellEncoder)) - print(json.dumps(cell, cls=StudyCellEncoder, indent=4, sort_keys=True)) + log.debug(json.dumps(cell, cls=StudyCellEncoder, indent=4, sort_keys=True)) for factor_value_dict in json_cell['elements'][0]['factorValues']: self.assertIsNotNone(factor_value_dict['value']) @@ -495,9 +495,9 @@ def test_decode_multi_treatment_cell(self): actual_cell = decoder.loads(json_text) self.assertEqual(len(self.cell_multi_elements_padded.elements), len(actual_cell.elements)) for i in range(len(actual_cell.elements)): - print(i) - print(actual_cell.elements[i]) - print(self.cell_multi_elements_padded.elements[i]) + log.debug(i) + log.debug(actual_cell.elements[i]) + log.debug(self.cell_multi_elements_padded.elements[i]) self.assertEqual(self.cell_multi_elements_padded.elements[i], actual_cell.elements[i]) self.assertEqual(self.cell_multi_elements_padded, actual_cell) @@ -566,10 +566,10 @@ def test_decode_dna_rna_extraction_plan(self): self.assertEqual(self.plan.sample_plan, actual_plan.sample_plan) unmatched_expected = self.plan.assay_plan - actual_plan.assay_plan unmatched_actual = actual_plan.assay_plan - self.plan.assay_plan - print(unmatched_actual) - print(unmatched_expected) + log.debug(unmatched_actual) + log.debug(unmatched_expected) if unmatched_expected and unmatched_actual: - print('here we are') + log.debug('here we are') unmatched_expected_el = unmatched_expected.pop() unmatched_actual_el = unmatched_actual.pop() self.assertEqual(unmatched_expected_el.id, unmatched_actual_el.id) @@ -578,7 +578,7 @@ def test_decode_dna_rna_extraction_plan(self): self.assertEqual(repr(unmatched_expected_el.links), repr(unmatched_actual_el.links)) self.assertEqual(repr(unmatched_expected_el), repr(unmatched_actual_el)) self.assertEqual(unmatched_expected_el, unmatched_actual_el) - print('all these test passed') + log.debug('all these test passed') self.assertEqual(self.plan.assay_plan, actual_plan.assay_plan) self.assertEqual(self.plan.sample_to_assay_map, actual_plan.sample_to_assay_map) self.assertEqual(self.plan, actual_plan) @@ -607,7 +607,7 @@ def test_encode_sample_and_assay_plan_with_ontology_annotations(self): sample2assay_plan = {input_material: [nmr_assay_graph]} sap1.sample_to_assay_map = sample2assay_plan actual_json_plan = json.loads(json.dumps(sap1, cls=SampleAndAssayPlanEncoder)) - print(json.dumps(sap1, cls=SampleAndAssayPlanEncoder, indent=4, sort_keys=True)) + log.debug(json.dumps(sap1, cls=SampleAndAssayPlanEncoder, indent=4, sort_keys=True)) assay_node_json = next(node for node in actual_json_plan["assayPlan"][0]["nodes"] if node["@id"] == "nmr_spectroscopy_000_000") for param_val_json in assay_node_json["parameterValues"]: @@ -625,8 +625,8 @@ def test_encode_arm_with_single_element_cells(self): with open(os.path.join(os.path.dirname(__file__), 'data', 'json', 'create', 'study-arm-with-single-element-cells.json')) as expected_json_fp: expected_json_arm = json.load(expected_json_fp) - print('expected source type is {}'.format(expected_json_arm['sourceType'])) - print('actual source type is {}'.format(actual_json_arm['sourceType'])) + log.debug('expected source type is {}'.format(expected_json_arm['sourceType'])) + log.debug('actual source type is {}'.format(actual_json_arm['sourceType'])) self.assertEqual(ordered(actual_json_arm["sourceType"]), ordered(expected_json_arm["sourceType"])) self.assertEqual(ordered(actual_json_arm), ordered(expected_json_arm)) @@ -726,13 +726,13 @@ def test_decode_study_design_with_three_arms(self): self.assertEqual(self.three_arm_study_design.name, actual_study_design.name) """ for i, arm in enumerate(self.three_arm_study_design.study_arms): - print("comparing study arm #{0} - {1}".format(i, arm.name)) - print("Difference:\n") + log.debug("comparing study arm #{0} - {1}".format(i, arm.name)) + log.debug("Difference:\n") difflib.ndiff(arm, actual_study_design.study_arms[i]) - print("\nExpected:\n") - print(arm) - print("\nActual:\n") - print(actual_study_design.study_arms[i]) + log.debug("\nExpected:\n") + log.debug(arm) + log.debug("\nActual:\n") + log.debug(actual_study_design.study_arms[i]) self.assertEqual(arm, actual_study_design.study_arms[i]) self.assertEqual(self.three_arm_study_design.study_arms[0], actual_study_design.study_arms[0]) self.assertEqual(self.three_arm_study_design.study_arms[1], actual_study_design.study_arms[1]) @@ -740,13 +740,13 @@ def test_decode_study_design_with_three_arms(self): self.assertEqual(expected_third_arm.name, actual_study_design.study_arms[2].name) self.assertEqual(expected_third_arm.group_size, actual_study_design.study_arms[2].group_size) - # print("Arm map:") - # print(list(actual_study_design.study_arms[2].arm_map.keys())) + # log.debug("Arm map:") + # log.debug(list(actual_study_design.study_arms[2].arm_map.keys())) i = 0 for cell, sample_assay_plan in expected_third_arm.arm_map.items(): - print("testing cell {0}".format(cell.name)) - print(cell) - print(list(actual_study_design.study_arms[2].arm_map.keys())[i]) + log.debug("testing cell {0}".format(cell.name)) + log.debug(cell) + log.debug(list(actual_study_design.study_arms[2].arm_map.keys())[i]) self.assertTrue(cell in actual_study_design.study_arms[2].arm_map) self.assertEqual(sample_assay_plan, actual_study_design.study_arms[2].arm_map[cell]) i = i + 1 diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index de026a8c..e2879a07 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -93,7 +93,7 @@ def test_init_and_propeties(self): def test_repr(self): print(self.non_treatment.duration) self.assertEqual(repr(self.non_treatment), - "isatools.create.models.NonTreatment(type='screen', duration=isatools.model.FactorValue(" + "isatools.create.model.NonTreatment(type='screen', duration=isatools.model.FactorValue(" "factor_name=isatools.model.StudyFactor(name='DURATION', " "factor_type=isatools.model.OntologyAnnotation(term='time', term_source=None, " "term_accession='', comments=[]), comments=[]), value=10.0, " @@ -125,7 +125,7 @@ def setUp(self): def test_repr(self): self.assertEqual(repr(self.treatment), - "isatools.create.models.Treatment(type=chemical intervention, " + "isatools.create.model.Treatment(type=chemical intervention, " "factor_values=[isatools.model.FactorValue(factor_name=isatools.model.StudyFactor(name='AGENT'" ", factor_type=isatools.model.OntologyAnnotation(term='perturbation agent', term_source=None, " "term_accession='', comments=[]), comments=[]), value='nitroglycerin', unit=None), " From 2d2d343b0c2207e8c3aca7d4af3ba4143821be1f Mon Sep 17 00:00:00 2001 From: zigur Date: Wed, 18 Nov 2020 09:36:31 +0000 Subject: [PATCH 10/25] extending travis waiting time --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 54052510..196afc07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ install: script: #- flake8 --show-source --exclude=.svn,CVS,.bzr,.hg,.git,isatab_configurator.py isatools - behave --no-capture --no-capture-stderr --format=progress features/isa-file-handler.feature -- python -m unittest discover -s tests/ +- travis_wait python -m unittest discover -s tests/ - coverage run -m unittest discover -s tests/ - coverage report -m branches: From 1ea6eb999bf92c3dd8d105c0e2e243ac3fc5b4fb Mon Sep 17 00:00:00 2001 From: zigur Date: Wed, 18 Nov 2020 18:55:51 +0000 Subject: [PATCH 11/25] fixing assay name prefix #370 --- isatools/create/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index 5ca90835..88853a3b 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -377,7 +377,6 @@ def check_follow_up(): } func = switcher.get(new_element.type, lambda: False) # lines = inspect.getsource(func) - # print('Element type: {element_type} \nfunc: {func}'.format(element_type=new_element.type, func=lines)) return func() @staticmethod @@ -1428,8 +1427,8 @@ def from_sample_and_assay_plan_dict(cls, name, sample_type_dicts, *assay_plan_di assay_plan_dict, # FIXME: this id cannot work as it is id_=str(uuid.uuid4()) if use_guids - else assay_plan_dict['id'] if 'id' in assay_plan_dict - else '{0}{1}'.format( + else '{}{}'.format(ASSAY_GRAPH_PREFIX, assay_plan_dict['id']) if 'id' in assay_plan_dict + else '{}{}'.format( ASSAY_GRAPH_PREFIX, str(i).zfill(n_digits(len(assay_plan_dicts))) ), quality_control=quality_controls[i] if len(quality_controls) > i else None From 73233bfce761d152a3007bb4031d8095d033b7a5 Mon Sep 17 00:00:00 2001 From: zigur Date: Wed, 18 Nov 2020 18:55:59 +0000 Subject: [PATCH 12/25] cleanup --- isatools/isatab.py | 3 --- isatools/utils.py | 29 +++++++++++------------------ tests/test_mw2isa.py | 1 - 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/isatools/isatab.py b/isatools/isatab.py index 136e83ce..5f845912 100644 --- a/isatools/isatab.py +++ b/isatools/isatab.py @@ -5681,7 +5681,6 @@ def pbar(x): return x # don't drop duplicates for _, object_series in pbar(DF.iterrows()): # if _ == 0: - # print('processing: ', object_series[object_label]) protocol_ref = str(object_series[object_label]) process_key = process_keygen( protocol_ref, column_group, _cg, DF.columns, @@ -5855,8 +5854,6 @@ def pbar(x): return x data_node.generated_from.append( sample_node_context) - # print('key sequence = ', process_key_sequence) - # Link the processes in each sequence for pair in pairwise(process_key_sequence): left = processes[pair[0]] # get process on left of pair diff --git a/isatools/utils.py b/isatools/utils.py index 7bf1ebf2..113ad01d 100644 --- a/isatools/utils.py +++ b/isatools/utils.py @@ -6,20 +6,12 @@ import logging import os import re -import shutil import sys -import tempfile import uuid from functools import reduce from zipfile import ZipFile - import pandas as pd -# import modin.pandas as pd_modin - -from mzml2isa.mzml import MzMLFile - from isatools import isatab -# from isatools.create import create_from_galaxy_parameters from isatools.model import ( DerivedSpectralDataFile, ISAModelAttributeError, @@ -141,7 +133,7 @@ def insert_distinct_parameter(table_fp, protocol_ref_to_unpool): break if name_header is not None: - print('Are you sure you want to add a column of hash values in {}? ' + log.debug('Are you sure you want to add a column of hash values in {}? ' 'Y/(N)'.format(name_header)) confirm = input() if confirm == 'Y': @@ -149,7 +141,7 @@ def insert_distinct_parameter(table_fp, protocol_ref_to_unpool): table_fp.seek(0) df.to_csv(table_fp, index=None, header=headers, sep='\t') else: - print('Could not find appropriate column to fill with hashes') + log.debug('Could not find appropriate column to fill with hashes') def contains(small_list, big_list): @@ -357,9 +349,9 @@ def check_loadable(tab_dir_root): x.startswith('MTBLS')]: try: isatab.load(os.path.join(tab_dir_root, mtbls_dir)) - print('{} load OK'.format(mtbls_dir)) + log.debug('{} load OK'.format(mtbls_dir)) except Exception as e: - print('{0} load FAIL, reason: {1}'.format(mtbls_dir, e)) + log.debug('{0} load FAIL, reason: {1}'.format(mtbls_dir, e)) def compute_study_factors_on_mtbls(tab_dir_root): @@ -390,6 +382,7 @@ def compute_study_factors_on_mtbls(tab_dir_root): pass +# TODO: is this any useful at all? (by Massi 18/11/2020) class IsaTabAnalyzer(object): """A utility to analyze ISA-Tabs""" @@ -503,7 +496,7 @@ def generate_study_design_report(self, get_num_study_groups=True, .drop_duplicates())) )) except Exception as e: - print("error in query, {}".format(e)) + log.debug("error in query, {}".format(e)) study_design_report[-1]['assays'].append(assay_report) return study_design_report @@ -574,7 +567,7 @@ def batch_fix_isatabs(settings): :return: None """ for table_file_path in settings.keys(): - print('Fixing {table_file_path}...'.format( + log.debug('Fixing {table_file_path}...'.format( table_file_path=table_file_path)) fixer = IsaTabFixer(table_file_path=table_file_path) fixer.fix_factor( @@ -897,8 +890,8 @@ def remove_unused_protocols(self): process.executes_protocol.name) except KeyError: pass - print('Unused protocols: {}'.format(unused_protocol_names)) - print('Location of unused protocols: {}'.format( + log.info('Unused protocols: {}'.format(unused_protocol_names)) + log.info('Location of unused protocols: {}'.format( list(map(lambda pr: True if pr.name in unused_protocol_names else False, study.protocols)) )) # remove these protocols from study.protocols @@ -910,9 +903,9 @@ def remove_unused_protocols(self): study.protocols = clean_protocols_list """ clean_protocols = [pr for pr in study.protocols if pr.name not in unused_protocol_names] - print('Clean protocol list: {}'.format([pr.name for pr in clean_protocols])) + log.info('Clean protocol list: {}'.format([pr.name for pr in clean_protocols])) study.protocols = clean_protocols - print('Clean study.protocols: {}'.format([pr.name for pr in study.protocols])) + log.info('Clean study.protocols: {}'.format([pr.name for pr in study.protocols])) isatab.dump( investigation, output_path=os.path.dirname(self.path), i_file_name='{filename}.fix'.format( diff --git a/tests/test_mw2isa.py b/tests/test_mw2isa.py index 5496b311..3182b09c 100644 --- a/tests/test_mw2isa.py +++ b/tests/test_mw2isa.py @@ -31,7 +31,6 @@ def test_conversion(self): log.info("conversion successful, invoking the validator for " + study_id) with open(os.path.join(self._tmp_dir, study_id, 'i_investigation.txt')) as fp: report = isatab.validate(fp) - print(report) if len(report['errors']) > 0: self.fail("conversion successful but validation failed") else: From 387b126d827a2973bb7ff38e957b8dacbf43314f Mon Sep 17 00:00:00 2001 From: zigur Date: Wed, 18 Nov 2020 19:12:45 +0000 Subject: [PATCH 13/25] cleanup of print() v2 --- isatools/model.py | 4 ++-- isatools/sampletab.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/isatools/model.py b/isatools/model.py index b5c95694..f3f763eb 100644 --- a/isatools/model.py +++ b/isatools/model.py @@ -22,7 +22,6 @@ from collections.abc import Iterable import networkx as nx - from isatools.errors import ISAModelAttributeError @@ -441,6 +440,7 @@ def add_ontology_source_reference(self, name='', version='', version: OntologySource version description: OntologySource description file: OntologySource file + comments: list """ c = OntologySource(name=name, version=version, description=description, file=file, comments=comments) @@ -1749,7 +1749,7 @@ def add_factor(self, name, factor_type): def del_factor(self, name, are_you_sure=False): if self.get_factor(name=name) is None: log.warning( - 'A factor with name "{}" hasnot been found in the study' + 'A factor with name "{}" has not been found in the study' .format(name)) else: if are_you_sure: # force user to say yes, to be sure to be sure diff --git a/isatools/sampletab.py b/isatools/sampletab.py index 08a500b4..c34bf98e 100644 --- a/isatools/sampletab.py +++ b/isatools/sampletab.py @@ -11,7 +11,6 @@ import numpy as np import pandas as pd -# import modin.pandas as pd_modin from progressbar import ETA, Bar, ProgressBar, SimpleProgress @@ -149,7 +148,7 @@ def get_value(object_column, column_group, object_series, try: value.term_source = ontology_source_map[term_source_value] except KeyError: - print('term source: ', term_source_value, ' not found') + log.warning('term source: ', term_source_value, ' not found') term_accession_value = str(object_series[offset_2r_col]) @@ -183,8 +182,8 @@ def get_value(object_column, column_group, object_series, unit_term_value.term_source = \ ontology_source_map[unit_term_source_value] except KeyError: - print('term source: ', unit_term_source_value, - ' not found') + log.warning('term source: ', unit_term_source_value, + ' not found') term_accession_value = object_series[offset_3r_col] From 51c146c2df3567ceb659c04b2d4ba228aa718f22 Mon Sep 17 00:00:00 2001 From: zigur Date: Thu, 19 Nov 2020 18:08:12 +0000 Subject: [PATCH 14/25] removed printout message #374 --- isatools/net/biocrates2isatab.py | 39 +++++++-------------- isatools/net/mtbls-assay-definition2dict.py | 5 +-- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/isatools/net/biocrates2isatab.py b/isatools/net/biocrates2isatab.py index f2a2af1e..ea214f56 100644 --- a/isatools/net/biocrates2isatab.py +++ b/isatools/net/biocrates2isatab.py @@ -2,14 +2,7 @@ """Functions for importing from BioCrates""" from time import time import os - -# os.environ["MODIN_ENGINE"] = "ray" -# os.environ["MODIN_CPUS"] = "4" import pandas as pd - -# import ray -# ray.init(num_cpus=1) -# import modin.pandas as pd import glob import logging @@ -40,8 +33,6 @@ os.path.dirname( os.path.abspath(__file__)), 'resources', 'saxon9', 'saxon9he.jar') -print(DEFAULT_SAXON_EXECUTABLE) - BIOCRATES_DIR = os.path.join(os.path.dirname(__file__), 'resources', 'biocrates') @@ -190,12 +181,12 @@ def biocrates_to_isatab_convert(biocrates_filename, saxon_jar_path=DEFAULT_SAXON buffer = BytesIO() destination_dir = os.path.abspath(dir_name) - print('Destination dir is: ' + destination_dir) + logger.debug('Destination dir is: ' + destination_dir) logger.info('Destination dir is: ' + destination_dir) if os.path.exists(destination_dir): logger.debug('Removing dir' + destination_dir) - print('Removing dir' + destination_dir) + logger.debug('Removing dir' + destination_dir) rmtree(destination_dir) try: @@ -212,12 +203,12 @@ def biocrates_to_isatab_convert(biocrates_filename, saxon_jar_path=DEFAULT_SAXON logger.error("isatools.convert.biocrates2isatab: " "CalledProcessError caught ", err.returncode) - print(err) + logger.debug(err) with ZipFile(buffer, 'w') as zip_file: # use relative dir_name to avoid absolute path on file names zipdir(dir_name, zip_file) - print("!", zip_file.namelist()) + logger.debug("!", zip_file.namelist()) # clean up the target directory after the ZIP file has been closed # rmtree(destination_dir) @@ -278,7 +269,7 @@ def writeOutToFile(plate, polarity, usedop, platebarcode, output_dir, if len(pos_injection) > 0: filename = 'm_MTBLSXXX_' + usedop + '_' + platebarcode + '_' + polarity.lower() \ + '_maf.txt' - print("filename: ", filename) + logger.debug("filename: ", filename) with open(os.path.join(output_dir, filename), 'w') as file_handler: # writing out the header file_handler.write('metabolite_identification') @@ -331,18 +322,18 @@ def complete_MAF(maf_stub): def add_sample_metadata(sample_info_file, input_study_file): S_STUDY_LOC = os.path.join(DESTINATION_DIR, input_study_file) - print("study file location:", S_STUDY_LOC) + logger.debug("study file location:", S_STUDY_LOC) # data = pd_modin.read_csv(S_STUDY_LOC, sep='\t') data = pd.read_csv(S_STUDY_LOC, sep='\t') - print("study file:", data) + logger.debug("study file:", data) SAMPLE_METADATA_LOC = os.path.join(SAMPLE_METADATA_INPUT_DIR, sample_info_file) - print("sample metadata file location:", SAMPLE_METADATA_LOC) + logger.debug("sample metadata file location:", SAMPLE_METADATA_LOC) # sample_desc = pd_modin.read_csv(SAMPLE_METADATA_LOC) sample_desc = pd.read_csv(SAMPLE_METADATA_LOC) - print("sample metadata: ", sample_desc) + logger.debug("sample metadata: ", sample_desc) # data.join(sample_desc, on='Characteristics[barcode identifier]') @@ -351,7 +342,7 @@ def add_sample_metadata(sample_info_file, input_study_file): # result = pd_modin.merge(data, sample_desc, on='Characteristics[barcode identifier]', left_index=True, how='outer') result = pd.merge(data, sample_desc, on='Characteristics[barcode identifier]', left_index=True, how='outer') cols = result.columns.tolist() - print(cols) + logger.debug(cols) result = result[['Source Name', 'Material Type', 'Characteristics[barcode identifier]', 'internal_ID', 'resolute_ID', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', @@ -389,11 +380,7 @@ def add_sample_metadata(sample_info_file, input_study_file): 'Term Accession Number.1': 'Term Accession Number' }) - - # print("results:", result) - result.to_csv(S_STUDY_LOC , sep='\t', encoding='utf-8', index=False) - - + result.to_csv(S_STUDY_LOC, sep='\t', encoding='utf-8', index=False) def parseSample(biocrates_filename): @@ -417,7 +404,7 @@ def parseSample(biocrates_filename): plates = soup.find_all('plate') for plate in plates: usedop = plate.get('usedop') - # print(usedop) + # logger.debug(usedop) platebarcode = plate.get('platebarcode') # extracting the the distinct column labels, metabolites, # and rawdatafilename collect the data into a dictionary @@ -437,7 +424,7 @@ def parseSample(biocrates_filename): parseSample(biocrates_filename='biocrates-merged-output.xml') add_sample_metadata('EX0003_sample_metadata.csv', 's_study_biocrates.txt') end = time() - print('The conversion took {:.2f} s.'.format(end - start)) + logger.debug('The conversion took {:.2f} s.'.format(end - start)) # parseSample(sys.argv[1]) # uncomment to run test # merged = merge_biocrates_files("/Users/Philippe/Documents/git/biocrates-DATA/Biocrates-TUM/input-Biocrates-XML-files/all-biocrates-xml-files/") diff --git a/isatools/net/mtbls-assay-definition2dict.py b/isatools/net/mtbls-assay-definition2dict.py index 2e750162..b9358c8a 100644 --- a/isatools/net/mtbls-assay-definition2dict.py +++ b/isatools/net/mtbls-assay-definition2dict.py @@ -2,8 +2,6 @@ import csv from collections import OrderedDict import pandas as pd -import modin.pandas as pd_modin -import json import rdflib from rdflib import * @@ -20,8 +18,6 @@ MTBLS_ASSAY_DEF_FILE = os.path.join(MTBLS_DIR, MTBLS_FILE) -print(MTBLS_ASSAY_DEF_FILE) - xls = pd.ExcelFile(MTBLS_CV_FILE) logging.basicConfig() @@ -48,6 +44,7 @@ def load_terms_from_owl(): return class_labels, subclasses + def build_params(record, assay_dictionary, datafr): # vocab_graph = rdflib.Graph() From 8c7a44b3abd1479501585855424d4c4e2bd2dc37 Mon Sep 17 00:00:00 2001 From: zigur Date: Thu, 19 Nov 2020 18:08:41 +0000 Subject: [PATCH 15/25] add support for other RawDataFile types --- isatools/create/constants.py | 2 +- isatools/create/model.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/isatools/create/constants.py b/isatools/create/constants.py index c883c853..1cf55218 100644 --- a/isatools/create/constants.py +++ b/isatools/create/constants.py @@ -67,7 +67,7 @@ SAMPLE_PREFIX = 'SMP' EXTRACT_PREFIX = 'EXTR' LABELED_EXTRACT_PREFIX = 'LBLEXTR' -ASSAY_GRAPH_PREFIX = 'ASSAY' +ASSAY_GRAPH_PREFIX = 'AT' # AT stands for Assay Type # constants specific to the sampling plan in the study generation from the study design RUN_ORDER = 'run order' diff --git a/isatools/create/model.py b/isatools/create/model.py index 88853a3b..eee1080c 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -23,7 +23,7 @@ SCREEN, RUN_IN, WASHOUT, FOLLOW_UP, ELEMENT_TYPES, INTERVENTIONS, DURATION_FACTOR, BASE_FACTORS, SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, DATA_FILE, GROUP_PREFIX, SUBJECT_PREFIX, SAMPLE_PREFIX, - EXTRACT_PREFIX, LABELED_EXTRACT_PREFIX, ASSAY_GRAPH_PREFIX, + ASSAY_GRAPH_PREFIX, RUN_ORDER, STUDY_CELL, assays_opts, DEFAULT_SOURCE_TYPE, SOURCE_QC_SOURCE_NAME, QC_SAMPLE_NAME, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_POST_RUN, @@ -47,6 +47,8 @@ DataFile, RawDataFile, RawSpectralDataFile, # this is required for the module to work + FreeInductionDecayDataFile, + ArrayDataFile, Extract, LabeledExtract, plink @@ -2334,7 +2336,10 @@ def _isa_objects_factory( measurement_type, technology_type, curr_assay_opt) ) isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] - assert isa_class in {RawDataFile, RawSpectralDataFile} + assert isa_class in { + # expand this set if needed + RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile + } return isa_class( filename='{}_{}-{}-{}'.format( urlify(node.name), From 8329115a6d30672a85d104cfa3c6dac87515322a Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 23 Nov 2020 16:43:09 +0000 Subject: [PATCH 16/25] fixing TypeError: expected string or bytes-like object --- isatools/create/model.py | 16 +++++++++------- tests/test_create_connectors.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index eee1080c..65402335 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -1025,15 +1025,17 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict, if node_key in ('id', 'name', 'selected_sample_types', 'measurement_type', 'technology_type'): continue + node_name = node_key.term if isinstance(node_key, OntologyAnnotation) else node_key + if isinstance(node_params, list): # the node is a ProductNode for i, node_params_dict in enumerate(node_params): for j, prev_node in enumerate(previous_nodes): # log.debug('count: {0}, prev_node: {1}'.format(j, prev_node.id)) product_node = ProductNode( id_=str(uuid.uuid4()) if use_guids else '{0}_{1}_{2}'.format( - re.sub(r'\s+', '_', node_key), str(i).zfill(3), str(j).zfill(3) + re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3) ), - name=node_key, node_type=node_params_dict['node_type'], size=node_params_dict['size'], + name=node_name, node_type=node_params_dict['node_type'], size=node_params_dict['size'], characteristics=[ Characteristic(category=node_params_dict['characteristics_category'], value=node_params_dict['characteristics_value']) @@ -1051,13 +1053,13 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict, pv_names, pv_all_values = list(node_params.keys()), list(node_params.values()) pv_combinations = itertools.product(*[val for val in pv_all_values]) for i, pv_combination in enumerate(pv_combinations): - # log.debug('pv_combination: {0}'.format(pv_combination)) + log.debug('pv_combination: {0}'.format(pv_combination)) if not previous_nodes: protocol_node = ProtocolNode( id_=str(uuid.uuid4()) if use_guids else '{0}_{1}'.format( - re.sub(r'\s+', '_', node_key), str(i).zfill(3) + re.sub(r'\s+', '_', node_name), str(i).zfill(ZFILL_WIDTH) ), - name=node_key, protocol_type=node_key, + name=node_name, protocol_type=node_key, parameter_values=[ ParameterValue(category=ProtocolParameter(parameter_name=pv_names[ix]), value=pv) @@ -1072,9 +1074,9 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict, # log.debug('count: {0}, prev_node: {1}'.format(j, prev_node.id)) protocol_node = ProtocolNode( id_=str(uuid.uuid4()) if use_guids else '{0}_{1}_{2}'.format( - re.sub(r'\s+', '_', node_key), str(i).zfill(3), str(j).zfill(3) + re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3) ), - name=node_key, protocol_type=node_key, + name=node_name, protocol_type=node_key, parameter_values=[ ParameterValue(category=ProtocolParameter(parameter_name=pv_names[ix]), value=pv) diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index f225499b..f51d8d90 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -143,4 +143,19 @@ def test_generate_study_design_from_config_with_observational_factors(self): inv_dict = json.loads(inv_json) self.assertIsInstance(inv_dict, dict) data_frames = isatab.dump_tables_to_dataframes(investigation) + self.assertIsInstance(data_frames, dict) + + def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self): + ds_design_config = self._load_config('study-design-crossover-onto-annotated-ms-and-nnmr.json') + design = generate_study_design_from_config(ds_design_config) + self.assertIsInstance(design, StudyDesign) + investigation = Investigation(studies=[design.generate_isa_study()]) + json.dumps( + investigation, + cls=ISAJSONEncoder, + sort_keys=True, + indent=4, + separators=(',', ': ') + ) + data_frames = isatab.dump_tables_to_dataframes(investigation) self.assertIsInstance(data_frames, dict) \ No newline at end of file From 2479e02ea83fafd60e650e2c73d7a2fab7c3dc14 Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 23 Nov 2020 16:43:29 +0000 Subject: [PATCH 17/25] cleanup --- isatools/sampletab.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/isatools/sampletab.py b/isatools/sampletab.py index c34bf98e..3ba23b51 100644 --- a/isatools/sampletab.py +++ b/isatools/sampletab.py @@ -143,7 +143,7 @@ def get_value(object_column, column_group, object_series, term_source_value = object_series[offset_1r_col] - if term_source_value is not '': + if term_source_value != '': try: value.term_source = ontology_source_map[term_source_value] @@ -152,7 +152,7 @@ def get_value(object_column, column_group, object_series, term_accession_value = str(object_series[offset_2r_col]) - if term_accession_value is not '': + if term_accession_value != '': value.term_accession = term_accession_value return value, None @@ -176,7 +176,7 @@ def get_value(object_column, column_group, object_series, unit_term_source_value = object_series[offset_2r_col] - if unit_term_source_value is not '': + if unit_term_source_value != '': try: unit_term_value.term_source = \ @@ -187,7 +187,7 @@ def get_value(object_column, column_group, object_series, term_accession_value = object_series[offset_3r_col] - if term_accession_value is not '': + if term_accession_value != '': unit_term_value.term_accession = term_accession_value return cell_value, unit_term_value From 442f8dea4fce2f928f979a2a2ece8954148d46f7 Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 23 Nov 2020 18:17:49 +0000 Subject: [PATCH 18/25] changed naming convention #370 --- isatools/create/model.py | 18 +++++++++--------- tests/test_create_connectors.py | 6 +++++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index 65402335..61709f7b 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -2294,8 +2294,8 @@ def _isa_objects_factory( """ if isinstance(node, ProtocolNode): return Process( - name='{}_{}-{}-{}'.format( - urlify(node.name), assay_file_prefix, start_node_index, counter[node.name] + name='{}-S{}-{}-Acquisition-R{}'.format( + assay_file_prefix, start_node_index, urlify(node.name), counter[node.name] ), executes_protocol=node, performer=performer, @@ -2306,17 +2306,17 @@ def _isa_objects_factory( if isinstance(node, ProductNode): if node.type == SAMPLE: return Sample( - name='{}-{}-Sample{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]), + name='{}-S{}-Sample-R{}'.format(assay_file_prefix, start_node_index, counter[SAMPLE]), characteristics=node.characteristics ) if node.type == EXTRACT: return Extract( - name='{}-{}-Extract{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]), + name='{}-S{}-Extract-R{}'.format(assay_file_prefix, start_node_index, counter[EXTRACT]), characteristics=node.characteristics ) if node.type == LABELED_EXTRACT: return LabeledExtract( - name='{}-{}-LE{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]), + name='{}-S{}-LE-R{}'.format(assay_file_prefix, start_node_index, counter[LABELED_EXTRACT]), characteristics=node.characteristics ) # under the hypothesis that we deal only with raw data files @@ -2343,19 +2343,19 @@ def _isa_objects_factory( RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile } return isa_class( - filename='{}_{}-{}-{}'.format( - urlify(node.name), + filename='{}-S{}-{}-R{}'.format( assay_file_prefix, start_node_index, + urlify(node.name), counter[node.name] ) ) except StopIteration: return RawDataFile( - filename='{}_{}-{}-{}'.format( - urlify(node.name), + filename='{}-S{}-{}-R{}'.format( assay_file_prefix, start_node_index, + urlify(node.name), counter[node.name] ) ) diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index f51d8d90..51a7f0e0 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -150,6 +150,9 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo design = generate_study_design_from_config(ds_design_config) self.assertIsInstance(design, StudyDesign) investigation = Investigation(studies=[design.generate_isa_study()]) + self.assertIsInstance(investigation.studies[0], Study) + """ + # removed because it takes too long on CI and not really needed. json.dumps( investigation, cls=ISAJSONEncoder, @@ -158,4 +161,5 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo separators=(',', ': ') ) data_frames = isatab.dump_tables_to_dataframes(investigation) - self.assertIsInstance(data_frames, dict) \ No newline at end of file + self.assertIsInstance(data_frames, dict) + """ \ No newline at end of file From b65f0fb28042c1d3298f1e91a87e5fa6d74d7e92 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 24 Nov 2020 11:19:44 +0000 Subject: [PATCH 19/25] removed spurious print statement #374 --- isatools/magetab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isatools/magetab.py b/isatools/magetab.py index 51813858..1aa8e1e4 100644 --- a/isatools/magetab.py +++ b/isatools/magetab.py @@ -35,9 +35,9 @@ try: log = logging.getLogger('isatools') - print("LOG:", log) + log.debug("LOG:", log) except IOError as ioe: - print("error:", ioe) + log.error("error:", ioe) def _get_sdrf_filenames(ISA): From 9a665db2441c7792ef4fee74f3065128a1a16245 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 24 Nov 2020 11:29:17 +0000 Subject: [PATCH 20/25] counters starting from 1 --- isatools/create/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index 61709f7b..eca7016b 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -2052,7 +2052,7 @@ def _generate_sources(self, ontology_source_references): for subj_n in (str(ix).zfill(digits) for ix in range(1, s_arm.group_size + 1)): src = copy.copy(source_prototype) src.name = self._idgen_sources( - s_arm.numeric_id if s_arm.numeric_id > -1 else s_ix, + s_arm.numeric_id if s_arm.numeric_id > -1 else s_ix + 1, # start counting from 1 subj_n ) srcs.add(src) @@ -2103,7 +2103,7 @@ def _generate_samples_and_assays(self, sources_map, sampling_protocol, performer isinstance(sample_type.value, OntologyAnnotation) else sample_type.value for samp_idx in range(0, sampling_size): sample = Sample( - name=self._idgen_samples(source.name, cell.name, str(samp_idx+1), sample_term), + name=self._idgen_samples(source.name, cell.name, str(samp_idx + 1), sample_term), factor_values=factor_values, characteristics=[sample_type], derives_from=[source] ) sample_batches[sample_node].append(sample) @@ -2261,8 +2261,8 @@ def generate_assay(assay_graph, assay_samples): ix = i * len(assay_samples) * size + j * size + k log.debug('i = {0}, j = {1}, k={2}, ix={3}'.format(i, j, k, ix)) processes, other_materials, data_files, _, __ = StudyDesign._generate_isa_elements_from_node( - node, assay_graph, assay_graph.id, start_node_index=ix, counter=None, processes=[], other_materials=[], - data_files=[], previous_items=[sample] + node, assay_graph, assay_graph.id, start_node_index=ix + 1, counter=None, processes=[], + other_materials=[], data_files=[], previous_items=[sample] ) assay.other_material.extend(other_materials) assay.process_sequence.extend(processes) From 4c07cab78d7c8cca5652f66090e3e20bd5cb8a40 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 24 Nov 2020 11:46:11 +0000 Subject: [PATCH 21/25] expanded assay list --- isatools/create/model.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index eca7016b..37ec19bb 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -46,9 +46,17 @@ Material, DataFile, RawDataFile, - RawSpectralDataFile, # this is required for the module to work + RawSpectralDataFile, FreeInductionDecayDataFile, ArrayDataFile, + DerivedDataFile, + DerivedSpectralDataFile, + DerivedArrayDataFile, + ProteinAssignmentFile, + PeptideAssignmentFile, + DerivedArrayDataMatrixFile, + PostTranslationalModificationAssignmentFile, + AcquisitionParameterDataFile, Extract, LabeledExtract, plink @@ -2340,7 +2348,10 @@ def _isa_objects_factory( isa_class = globals()[curr_assay_opt['raw data file'].replace(' ', '')] assert isa_class in { # expand this set if needed - RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile + RawDataFile, RawSpectralDataFile, ArrayDataFile, FreeInductionDecayDataFile, + DerivedDataFile, DerivedSpectralDataFile, DerivedArrayDataFile, + ProteinAssignmentFile, PeptideAssignmentFile, DerivedArrayDataMatrixFile, + PostTranslationalModificationAssignmentFile, AcquisitionParameterDataFile } return isa_class( filename='{}-S{}-{}-R{}'.format( From 746fde3f67aafc0f0a9cc0e3014ea4854c035baf Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 24 Nov 2020 11:49:21 +0000 Subject: [PATCH 22/25] removed spurious print statement v2 #374 --- isatools/magetab.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/isatools/magetab.py b/isatools/magetab.py index 1aa8e1e4..0f3d0d6f 100644 --- a/isatools/magetab.py +++ b/isatools/magetab.py @@ -15,8 +15,6 @@ from itertools import zip_longest import numpy as np import pandas as pd -# import modin.pandas as pd_modin - from isatools import isatab from isatools.model import ( Assay, @@ -33,11 +31,7 @@ ) import logging -try: - log = logging.getLogger('isatools') - log.debug("LOG:", log) -except IOError as ioe: - log.error("error:", ioe) +log = logging.getLogger('isatools') def _get_sdrf_filenames(ISA): From 138e90cf9a36677079b7dc03c6d3bc490848e126 Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 14 Dec 2020 17:12:32 +0000 Subject: [PATCH 23/25] refactoring connectors to harmonize them to Datascriptor; support for chained protocols #373 --- isatools/create/connectors.py | 26 ++++++---- isatools/create/constants.py | 2 +- isatools/create/errors.py | 7 ++- isatools/create/model.py | 60 ++++++++++++++++++++++-- tests/test_create_connectors.py | 23 +++++---- tests/test_create_models_json.py | 21 ++++++--- tests/test_create_models_study_design.py | 10 ++++ 7 files changed, 113 insertions(+), 36 deletions(-) diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py index 22a15c92..badc0de4 100644 --- a/isatools/create/connectors.py +++ b/isatools/create/connectors.py @@ -1,6 +1,6 @@ from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan -from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART +from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART, DEFAULT_SOURCE_TYPE from collections import OrderedDict AGENT = 'agent' @@ -265,7 +265,7 @@ def generate_study_design_from_config(study_design_config): :return: isatools.create.StudyDesign """ arms = [] - for arm_ix, arm_dict in enumerate(study_design_config['selectedArms']): + for arm_ix, arm_dict in enumerate(study_design_config['arms']['selected']): arm_map = OrderedDict() for epoch_ix, epoch_dict in enumerate(arm_dict['epochs']): element_ids = epoch_dict.get('elements', []) @@ -273,7 +273,7 @@ def generate_study_design_from_config(study_design_config): _generate_element(element_dict) for element_dict in filter( lambda el: el['id'] in element_ids, - study_design_config['generatedStudyDesign']['elements'] + study_design_config['elements'] ) ] cell_name = 'A{}E{}'.format(arm_ix, epoch_ix) @@ -288,9 +288,8 @@ def generate_study_design_from_config(study_design_config): assay_ord_dicts = [ generate_assay_ord_dict_from_config( ds_assay_config, arm_dict['name'], epoch_ix - ) for ds_assay_config in study_design_config['assayConfigs'] - if study_design_config['selectedAssayTypes'][ds_assay_config['name']] and - ds_assay_config['selectedCells'][arm_dict['name']][epoch_ix] is True + ) for ds_assay_config in study_design_config['assayPlan'] + if ds_assay_config['selectedCells'][arm_dict['name']][epoch_ix] is True ] sa_plan_name = 'SAP_A{}E{}'.format(arm_ix, epoch_ix) # TODO this method will probably need some rework to bind a sample type to a specific assay plan @@ -298,13 +297,17 @@ def generate_study_design_from_config(study_design_config): sa_plan_name, sample_type_dicts, *assay_ord_dicts ) arm_map[cell] = sa_plan + source_type = Characteristic( + category=DEFAULT_SOURCE_TYPE.category, + value=_map_ontology_annotation( + arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None) + ) + ) arm = StudyArm( name=arm_dict['name'], # should we generate a Characteristic if subjectType is an OntologyAnnotation? - source_type=_map_ontology_annotation( - arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None) - ), + source_type=source_type, source_characteristics=[ _generate_characteristics_from_observational_factor( obs_factor_dict @@ -315,6 +318,9 @@ def generate_study_design_from_config(study_design_config): ) arms.append(arm) return StudyDesign( - name=study_design_config['generatedStudyDesign']['type'], + # TODO should we actually add the properties 'name' and ''description' to the study design? + name=study_design_config['name'], + description=study_design_config.get('description', None), + design_type=_map_ontology_annotation(study_design_config['designType']), study_arms=arms ) diff --git a/isatools/create/constants.py b/isatools/create/constants.py index 1cf55218..42e19c57 100644 --- a/isatools/create/constants.py +++ b/isatools/create/constants.py @@ -4,7 +4,7 @@ from isatools.model import OntologyAnnotation, StudyFactor, OntologySource, Characteristic SCREEN = 'screen' -RUN_IN = 'run in' +RUN_IN = 'run-in' WASHOUT = 'washout' FOLLOW_UP = 'follow-up' ELEMENT_TYPES = dict(SCREEN=SCREEN, RUN_IN=RUN_IN, WASHOUT=WASHOUT, FOLLOW_UP=FOLLOW_UP) diff --git a/isatools/create/errors.py b/isatools/create/errors.py index 5d0bf933..ce6b595c 100644 --- a/isatools/create/errors.py +++ b/isatools/create/errors.py @@ -23,8 +23,9 @@ # ERROR MESSAGES: ASSAY GRAPH INVALID_NODE_ERROR = 'Node must be instance of isatools.create.models.SequenceNode. {0} provided' -INVALID_LINK_ERROR = "The link to be added is not valid. Link that can be created are " \ - "ProductNode->ProtocolNode or ProtocolNode->ProductNode." +# INVALID_LINK_ERROR = "The link to be added is not valid. Link that can be created are ProductNode->ProtocolNode +# or ProtocolNode->ProductNode." +INVALID_LINK_ERROR = 'ProductNode->ProductNode links are not allowed in an assay workflow.' INVALID_MEASUREMENT_TYPE_ERROR = '{0} is an invalid value for measurement_type. ' \ 'Please provide an OntologyAnnotation or string.' INVALID_TECHNOLOGY_TYPE_ERROR = '{0} is an invalid value for technology_type. ' \ @@ -50,6 +51,8 @@ # ERROR MESSAGES: STUDY DESIGN NAME_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'name\' must be a string' +DESIGN_TYPE_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'design_type\' must be a string or OntologyAnnotation' +DESCRIPTION_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'description\' must be text (i.e. string)' STUDY_ARM_PROPERTY_ASSIGNMENT_ERROR = 'The value assigned to \'study_arms\' must be an iterable' ADD_STUDY_ARM_PARAMETER_TYPE_ERROR = 'Not a valid study arm' ADD_STUDY_ARM_NAME_ALREADY_PRESENT_ERROR = 'A StudyArm with the same name is already present in the StudyDesign' diff --git a/isatools/create/model.py b/isatools/create/model.py index 37ec19bb..79d86aec 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -136,7 +136,7 @@ class NonTreatment(Element): def __init__(self, element_type=ELEMENT_TYPES['SCREEN'], duration_value=0.0, duration_unit=None): super(NonTreatment, self).__init__() if element_type not in ELEMENT_TYPES.values(): - raise ValueError('element treatment type provided: ') + raise ValueError('element treatment type provided: {}'.format(element_type)) self.__type = element_type if not isinstance(duration_value, Number): raise ValueError('duration_value must be a Number. Value provided is {0}'.format(duration_value)) @@ -1161,8 +1161,11 @@ def links(self): for target_node in target_nodes) def add_link(self, start_node, target_node): + """ if not (isinstance(start_node, ProductNode) and isinstance(target_node, ProtocolNode)) and \ not (isinstance(start_node, ProtocolNode) and isinstance(target_node, ProductNode)): + """ + if isinstance(start_node, ProductNode) and isinstance(target_node, ProductNode): raise TypeError(errors.INVALID_LINK_ERROR) if start_node not in self.__graph_dict.keys() or target_node not in self.__graph_dict.keys(): raise ValueError(errors.MISSING_NODE_ERROR) @@ -1923,7 +1926,14 @@ class StudyDesign(object): StudyArms of different lengths (i.e. different number of cells) are allowed. """ - def __init__(self, name='Study Design', source_type=DEFAULT_SOURCE_TYPE, study_arms=None): + def __init__( + self, + name='Study Design', + design_type=None, + description=None, + source_type=DEFAULT_SOURCE_TYPE, + study_arms=None + ): """ :param name: str :param source_type: str or OntologyAnnotation @@ -1931,11 +1941,17 @@ def __init__(self, name='Study Design', source_type=DEFAULT_SOURCE_TYPE, study_a """ self.__study_arms = set() self.__name = name if isinstance(name, str) else 'Study Design' + self.__design_type = None + self.__description = None self.__source_type = None self.source_type = source_type if study_arms: self.study_arms = study_arms + if description: + self.description = description + if design_type: + self.design_type = design_type @property def name(self): @@ -1947,6 +1963,26 @@ def name(self, name): raise AttributeError(errors.NAME_PROPERTY_ASSIGNMENT_ERROR) self.__name = name + @property + def description(self): + return self.__description + + @description.setter + def description(self, description): + if not isinstance(description, str): + raise AttributeError(errors.DESCRIPTION_PROPERTY_ASSIGNMENT_ERROR) + self.__description = description + + @property + def design_type(self): + return self.__design_type + + @design_type.setter + def design_type(self, design_type): + if not isinstance(design_type, (str, OntologyAnnotation)): + raise AttributeError(errors.DESIGN_TYPE_PROPERTY_ASSIGNMENT_ERROR) + self.__design_type = design_type + @property def source_type(self): return self.__source_type @@ -2402,15 +2438,21 @@ def generate_isa_study(self): def __repr__(self): return '{0}.{1}(' \ 'name={name}, ' \ + 'design_type={design_type}, ' \ + 'description={description} ' \ + 'source_type={source_type}, ' \ 'study_arms={study_arms}' \ ')'.format(self.__class__.__module__, self.__class__.__name__, study_arms=self.study_arms, - name=self.name) + name=self.name, design_type=self.design_type, description=self.description, + source_type=self.source_type) def __str__(self): return """{0}( name={name}, + description={description}, study_arms={study_arms} )""".format(self.__class__.__name__, + description=self.description, study_arms=[arm.name for arm in sorted(self.study_arms)], name=self.name) @@ -2626,6 +2668,7 @@ class StudyDesignEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, StudyDesign): arm_encoder = StudyArmEncoder() + onto_encoder = OntologyAnnotationEncoder() study_arms_dict = { arm.name: arm_encoder.default(arm) for arm in obj.study_arms } @@ -2634,6 +2677,8 @@ def default(self, obj): arm.pop('name') return { 'name': obj.name, + 'designType': onto_encoder.ontology_annotation(obj.design_type), + 'description': obj.description, 'studyArms': study_arms_dict } @@ -2650,7 +2695,14 @@ def loads(self, json_text): arm_dict['name'] = name study_arms = {self.arm_decoder.loads_arm(arm_dict) for arm_dict in json_dict["studyArms"].values()} - study_design = StudyDesign(name=json_dict['name'], study_arms=study_arms) + study_design = StudyDesign( + name=json_dict['name'], + description=json_dict['description'], + design_type=CharacteristicDecoder.loads_ontology_annotation(json_dict['designType']) if isinstance( + json_dict['designType'], dict + ) else json_dict['designType'], + study_arms=study_arms + ) return study_design diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index 51a7f0e0..6a6887ff 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -83,9 +83,9 @@ def _load_config(file_name): return ds_design_config def test_generate_assay_ord_dict_from_datascriptor_config(self): - ds_design_config = self._load_config('study-design-3-repeated-treatment.json') - assay_config = ds_design_config['assayConfigs'][0] - test_arm_name = 'Arm_0' + ds_design_config = self._load_config('factorial-study-design-12-arms-blood-saliva-genomeseq-ms.json') + assay_config = ds_design_config['assayPlan'][0] + test_arm_name = ds_design_config['arms']['selected'][0]['name'] test_epoch_no = -1 # last epoch, follow-up assay_odict = generate_assay_ord_dict_from_config(assay_config, test_arm_name, test_epoch_no) self.assertIsInstance(assay_odict, OrderedDict) @@ -93,10 +93,10 @@ def test_generate_assay_ord_dict_from_datascriptor_config(self): self.assertIsInstance(assay_graph, AssayGraph) def test_generate_study_design_from_config(self): - ds_design_config = self._load_config('study-design-3-repeated-treatment.json') + ds_design_config = self._load_config('factorial-study-design-12-arms-blood-saliva-genomeseq-ms.json') design = generate_study_design_from_config(ds_design_config) self.assertIsInstance(design, StudyDesign) - self.assertEqual(len(design.study_arms), len(ds_design_config['selectedArms'])) + self.assertEqual(len(design.study_arms), len(ds_design_config['arms']['selected'])) for arm in design.study_arms: self.assertIsInstance(arm, StudyArm) for cell, samp_ass_plan in arm.arm_map.items(): @@ -117,8 +117,8 @@ def test_generate_study_design_from_config(self): self.assertIsInstance(data_frames, dict) self.assertGreater(len(data_frames), 1) - def test_generate_study_design_from_config_with_observational_factors(self): - ds_design_config = self._load_config('study-design-with-observational-factors.json') + def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self): + ds_design_config = self._load_config('crossover-study-design-4-arms-blood-derma-nmr-ms.json') design = generate_study_design_from_config(ds_design_config) self.assertIsInstance(design, StudyDesign) for ix, arm in enumerate(design.study_arms): @@ -145,14 +145,13 @@ def test_generate_study_design_from_config_with_observational_factors(self): data_frames = isatab.dump_tables_to_dataframes(investigation) self.assertIsInstance(data_frames, dict) - def test_generate_study_design_from_config_with_observational_factors_and_ontology_annotations(self): - ds_design_config = self._load_config('study-design-crossover-onto-annotated-ms-and-nnmr.json') + def test_generate_study_design_from_config_with_chained_protocols_and_ontology_annotations(self): + ds_design_config = self._load_config('crossover-study-design-4-arms-blood-derma-nmr-ms-chipseq.json') design = generate_study_design_from_config(ds_design_config) self.assertIsInstance(design, StudyDesign) investigation = Investigation(studies=[design.generate_isa_study()]) self.assertIsInstance(investigation.studies[0], Study) - """ - # removed because it takes too long on CI and not really needed. + self.assertEqual(len(investigation.studies[0].assays), len(ds_design_config['assayPlan'])) json.dumps( investigation, cls=ISAJSONEncoder, @@ -162,4 +161,4 @@ def test_generate_study_design_from_config_with_observational_factors_and_ontolo ) data_frames = isatab.dump_tables_to_dataframes(investigation) self.assertIsInstance(data_frames, dict) - """ \ No newline at end of file + self.assertEqual(len(data_frames), len(ds_design_config['assayPlan']) + 1) diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py index d8d74ac3..eed12506 100644 --- a/tests/test_create_models_json.py +++ b/tests/test_create_models_json.py @@ -666,8 +666,8 @@ def test_decode_arm_with_multi_element_cells_mouse(self): json_text = json.dumps(json.load(expected_json_fp)) actual_arm = decoder.loads(json_text) self.assertIsInstance(actual_arm, StudyArm) - log.info('Expected Arm source type: {}'.format(self.multi_treatment_cell_arm_mouse.source_type)) - log.info('Actual Arm source type: {}'.format(actual_arm.source_type)) + log.debug('Expected Arm source type: {}'.format(self.multi_treatment_cell_arm_mouse.source_type)) + log.debug('Actual Arm source type: {}'.format(actual_arm.source_type)) self.assertEqual(self.multi_treatment_cell_arm_mouse, actual_arm) @@ -675,13 +675,20 @@ class StudyDesignEncoderTest(BaseTestCase): def setUp(self): super(StudyDesignEncoderTest, self).setUp() - self.three_arm_study_design = StudyDesign(name=TEST_STUDY_DESIGN_NAME_THREE_ARMS, study_arms={ - self.single_treatment_cell_arm, - self.single_treatment_cell_arm_01, - self.single_treatment_cell_arm_02 + self.three_arm_study_design = StudyDesign( + name=TEST_STUDY_DESIGN_NAME_THREE_ARMS, + description='This is a study design with three single-element arms', + design_type='unspecified design', + study_arms={ + self.single_treatment_cell_arm, + self.single_treatment_cell_arm_01, + self.single_treatment_cell_arm_02 }) self.multi_element_cell_two_arm_study_design = StudyDesign( - name=TEST_STUDY_DESIGN_NAME_TWO_ARMS_MULTI_ELEMENT_CELLS, study_arms=[ + name=TEST_STUDY_DESIGN_NAME_TWO_ARMS_MULTI_ELEMENT_CELLS, + description='This is a study design with two multi-element arms', + design_type='unspecified design', + study_arms=[ self.multi_treatment_cell_arm, self.multi_treatment_cell_arm_01 ]) diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index e2879a07..e942fe47 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -1621,6 +1621,16 @@ def test_name_property(self): self.study_design.name = 128 self.assertEqual(ex_cm.exception.args[0], errors.NAME_PROPERTY_ASSIGNMENT_ERROR) + def test_description_property(self): + test_study_description = 'some description in here' + self.study_design.description = test_study_description + self.assertEqual(self.study_design.description, test_study_description) + + def test_design_type_property(self): + test_study_design_type = 'factorial design' + self.study_design.design_type = test_study_design_type + self.assertEqual(self.study_design.design_type, test_study_design_type) + def test_study_arms_property(self): pass From b6d2446144130ef46c11249c912fb62437e36f62 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 15 Dec 2020 11:44:10 +0000 Subject: [PATCH 24/25] adding study title, description and design type on creation #373 --- isatools/create/model.py | 7 ++++++- tests/test_create_connectors.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/isatools/create/model.py b/isatools/create/model.py index 79d86aec..ac7673ea 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -2416,7 +2416,12 @@ def generate_isa_study(self): 'study-creator-config.yaml')) as yaml_file: config = yaml.load(yaml_file, Loader=yaml.FullLoader) study_config = config['study'] - study = Study(filename=urlify(study_config['filename'])) + study = Study( + title=self.name, + filename=urlify(study_config['filename']), + description=self.description, + design_descriptors=[self.design_type] if isinstance(self.design_type, OntologyAnnotation) else None + ) study.ontology_source_references = [ OntologySource(**study_config['ontology_source_references'][0]) ] diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index 6a6887ff..1530ce60 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -104,6 +104,11 @@ def test_generate_study_design_from_config(self): self.assertIsInstance(samp_ass_plan, SampleAndAssayPlan) study = design.generate_isa_study() self.assertIsInstance(study, Study) + self.assertEqual(study.title, ds_design_config['name']) + self.assertEqual(study.description, ds_design_config['description']) + self.assertIsInstance(study.design_descriptors[0], OntologyAnnotation) + self.assertEqual(study.design_descriptors[0].term, ds_design_config['designType']['term']) + self.assertEqual(study.design_descriptors[0].term_accession, ds_design_config['designType']['iri']) investigation = Investigation(studies=[study]) inv_json = json.dumps( investigation, From 47c366e57fee284318620310486949e6e5fa0f28 Mon Sep 17 00:00:00 2001 From: zigur Date: Tue, 15 Dec 2020 12:27:20 +0000 Subject: [PATCH 25/25] support for study identifier --- isatools/create/constants.py | 4 ++++ isatools/create/model.py | 6 ++++-- tests/test_create_connectors.py | 2 ++ tests/test_create_models_study_design.py | 7 +++++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/isatools/create/constants.py b/isatools/create/constants.py index 42e19c57..f260d7e0 100644 --- a/isatools/create/constants.py +++ b/isatools/create/constants.py @@ -104,3 +104,7 @@ # Default performer DEFAULT_PERFORMER = 'Unknown' + +# Default study identifier +DEFAULT_STUDY_IDENTIFIER = 's_01' +DEFAULT_INVESTIGATION_IDENTIFIER = 'i_01' diff --git a/isatools/create/model.py b/isatools/create/model.py index ac7673ea..bd07bd87 100644 --- a/isatools/create/model.py +++ b/isatools/create/model.py @@ -27,7 +27,8 @@ RUN_ORDER, STUDY_CELL, assays_opts, DEFAULT_SOURCE_TYPE, SOURCE_QC_SOURCE_NAME, QC_SAMPLE_NAME, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_POST_RUN, - QC_SAMPLE_TYPE_INTERSPERSED, ZFILL_WIDTH, DEFAULT_PERFORMER + QC_SAMPLE_TYPE_INTERSPERSED, ZFILL_WIDTH, DEFAULT_PERFORMER, + DEFAULT_STUDY_IDENTIFIER ) from isatools.model import ( StudyFactor, @@ -2407,7 +2408,7 @@ def _isa_objects_factory( ) ) - def generate_isa_study(self): + def generate_isa_study(self, identifier=None): """ this is the core method to return the fully populated ISA Study object from the StudyDesign :return: isatools.model.Study @@ -2417,6 +2418,7 @@ def generate_isa_study(self): config = yaml.load(yaml_file, Loader=yaml.FullLoader) study_config = config['study'] study = Study( + identifier=identifier or DEFAULT_STUDY_IDENTIFIER, title=self.name, filename=urlify(study_config['filename']), description=self.description, diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py index 1530ce60..3f996e7e 100644 --- a/tests/test_create_connectors.py +++ b/tests/test_create_connectors.py @@ -27,6 +27,7 @@ SampleAndAssayPlan, AssayGraph ) +from isatools.create.constants import DEFAULT_STUDY_IDENTIFIER from isatools.isajson import ISAJSONEncoder from tests.create_sample_assay_plan_odicts import ms_assay_dict, annotated_ms_assay_dict @@ -105,6 +106,7 @@ def test_generate_study_design_from_config(self): study = design.generate_isa_study() self.assertIsInstance(study, Study) self.assertEqual(study.title, ds_design_config['name']) + self.assertEqual(study.identifier, DEFAULT_STUDY_IDENTIFIER) self.assertEqual(study.description, ds_design_config['description']) self.assertIsInstance(study.design_descriptors[0], OntologyAnnotation) self.assertEqual(study.design_descriptors[0].term, ds_design_config['designType']['term']) diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py index e942fe47..0f76a433 100644 --- a/tests/test_create_models_study_design.py +++ b/tests/test_create_models_study_design.py @@ -40,7 +40,7 @@ from isatools.create.constants import ( SCREEN, RUN_IN, WASHOUT, FOLLOW_UP, ELEMENT_TYPES, INTERVENTIONS, DURATION_FACTOR, BASE_FACTORS_, BASE_FACTORS, SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, default_ontology_source_reference, - DEFAULT_SOURCE_TYPE, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_INTERSPERSED + DEFAULT_SOURCE_TYPE, QC_SAMPLE_TYPE_PRE_RUN, QC_SAMPLE_TYPE_INTERSPERSED, DEFAULT_STUDY_IDENTIFIER ) from tests.create_sample_assay_plan_odicts import sample_list, ms_assay_dict, lcdad_assay_dict, nmr_assay_dict @@ -1768,6 +1768,7 @@ def test_generate_isa_study_single_arm_single_cell_elements(self): study_design = StudyDesign(study_arms=(single_arm,)) study = study_design.generate_isa_study() self.assertIsInstance(study, Study) + self.assertEqual(study.identifier, DEFAULT_STUDY_IDENTIFIER) self.assertEqual(study.filename, study_config['filename']) self.assertEqual(len(study.sources), single_arm.group_size) for source in study.sources: @@ -1829,7 +1830,9 @@ def test_generate_isa_study_two_arms_single_cell_elements(self): (self.cell_follow_up_01, self.nmr_sample_assay_plan) ])) study_design = StudyDesign(study_arms=(first_arm, second_arm)) - study = study_design.generate_isa_study() + study_identifier = 'st_001' + study = study_design.generate_isa_study(identifier=study_identifier) + self.assertEqual(study.identifier, study_identifier) self.assertEqual(len(study.assays), 2) expected_num_of_samples_nmr_plan_first_arm = reduce( lambda acc_value, sample_node: acc_value + sample_node.size,