diff --git a/isatools/isatab.py b/isatools/isatab.py index c80ade79..1e85dd6e 100644 --- a/isatools/isatab.py +++ b/isatools/isatab.py @@ -5703,7 +5703,12 @@ def pbar(x): return x output_node_index = find_gt(node_cols, object_label_index) output_proc_index = find_gt(proc_cols, object_label_index) - if output_proc_index < output_node_index > -1: + post_chained_protocol = any( + col_name for col_name in DF.columns[(object_label_index + 1): output_node_index].values + if col_name.startswith('Protocol REF') + ) + + if (output_proc_index < output_node_index > -1 and not post_chained_protocol) or (output_proc_index > output_node_index): output_node_label = DF.columns[output_node_index] output_node_value = str( @@ -5727,7 +5732,12 @@ def pbar(x): return x input_node_index = find_lt(node_cols, object_label_index) input_proc_index = find_lt(proc_cols, object_label_index) - if input_proc_index < input_node_index > -1: + previous_chained_protocol = any( + col_name for col_name in DF.columns[input_node_index: (object_label_index - 1)].values + if col_name.startswith('Protocol REF') + ) + + if input_proc_index < input_node_index > -1 and not previous_chained_protocol: input_node_label = DF.columns[input_node_index] input_node_value = str(object_series[input_node_label]) diff --git a/tests/test_isatab.py b/tests/test_isatab.py index 019d346b..b2fca9a6 100644 --- a/tests/test_isatab.py +++ b/tests/test_isatab.py @@ -1589,6 +1589,39 @@ def test_isatab_factor_value_parsing_issue270(self): for sample in s.samples: self.assertGreater(len(sample.factor_values), 0) + def test_isatab_protocol_chain_parsing(self): + logging.info("Testing") + with open(os.path.join(self._tab_data_dir, 'BII-S-3', 'i_gilbert.txt'), + encoding='utf-8') as fp: + investigation = isatab.load(fp) + self.assertIsInstance(investigation, Investigation) + study = investigation.studies[0] + nucleotide_sequencing_assay = next( + assay for assay in study.assays if assay.technology_type.term == 'nucleotide sequencing' + ) + nucl_ac_extraction_process = next( + proc for proc in nucleotide_sequencing_assay.process_sequence + if proc.executes_protocol.name == 'nucleic acid extraction - standard procedure 2' + ) + gen_dna_extraction_process = next( + proc for proc in nucleotide_sequencing_assay.process_sequence + if proc.executes_protocol.name == 'genomic DNA extraction - standard procedure 4' + ) + extract = next( + mat for mat in nucleotide_sequencing_assay.materials['other_material'] if mat.name == 'GSM255770.e1' + ) + self.assertTrue(nucl_ac_extraction_process.next_process is gen_dna_extraction_process) + self.assertEqual(len(gen_dna_extraction_process.outputs), 1) + self.assertFalse(nucl_ac_extraction_process.outputs) + self.assertTrue(gen_dna_extraction_process.outputs[0] is extract) + self.assertTrue(nucl_ac_extraction_process.inputs) + self.assertFalse(gen_dna_extraction_process.inputs) + # FIXME characteristics are not loaded into the extract name + # self.assertTrue(extract.characteristics) + dumps_out = isatab.dumps(investigation) + expected_chained_protocol_snippet = """Sample Name\tProtocol REF\tProtocol REF\tExtract Name""" + self.assertIn(expected_chained_protocol_snippet, dumps_out) + class TestTransposedTabParser(unittest.TestCase): @@ -1615,30 +1648,3 @@ def test_parse(self): 'header': ['label1', 'label2'] } self.assertEqual(ttable_dict, expected_ttable) - - -class UnitTestIsaStudyGroups(): - - def setUp(self): - self.fp = open(os.path.join(self._tab_data_dir, 'MTBLS404', 'i_sacurine.txt'), encoding='utf-8') - self.i_df = isatab.load_investigation(fp=self.fp) - for i, study_df in enumerate(self.i_df['studies']): - study_filename = study_df.iloc[0]['Study File Name'] - self.s_fp = open(os.path.join(os.path.dirname(self.fp.name), study_filename), encoding='utf-8') - self.study_sample_table = isatab.load_table(self.s_fp) - self.study_sample_table.filename = study_filename - - def tearDown(self): - self.fp.close() - self.s_fp.close() - - def test_get_num_study_groups(self): - num_study_groups = isatab.get_num_study_groups(self.study_sample_table, self.study_filename) - self.assertEqual(num_study_groups, 1) - - def test_check_study_groups(self): - self.assertTrue(isatab.NUMBER_OF_STUDY_GROUPS in self.study_df.columns) - study_group_sizes = self.study_df[isatab.NUMBER_OF_STUDY_GROUPS] - study_group_size_in_comment = next(iter(study_group_sizes)) - self.assertTrue(isatab.check_study_groups(self.study_sample_table, self.study_filename, study_group_size_in_comment)) -