Merge pull request #408 from ISA-tools/protocol-chain-isatab-load

@terazus @proccaserra merging this as the test successfully loads and then dumps the BII-S-3 dataset. Characteristics are not attached to material nodes, but this is a separate issue spotted today
ISA-tools · Jul 7, 2021 · 9e96b29 · 9e96b29
2 parents 7949765 + 0450899
commit 9e96b29
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 29 deletions.
diff --git a/isatools/isatab.py b/isatools/isatab.py
@@ -5703,7 +5703,12 @@ def pbar(x): return x
                     output_node_index = find_gt(node_cols, object_label_index)
                     output_proc_index = find_gt(proc_cols, object_label_index)
 
-                    if output_proc_index < output_node_index > -1:
+                    post_chained_protocol = any(
+                        col_name for col_name in DF.columns[(object_label_index + 1): output_node_index].values
+                        if col_name.startswith('Protocol REF')
+                    )
+
+                    if (output_proc_index < output_node_index > -1 and not post_chained_protocol) or (output_proc_index > output_node_index):
 
                         output_node_label = DF.columns[output_node_index]
                         output_node_value = str(
@@ -5727,7 +5732,12 @@ def pbar(x): return x
                     input_node_index = find_lt(node_cols, object_label_index)
                     input_proc_index = find_lt(proc_cols, object_label_index)
 
-                    if input_proc_index < input_node_index > -1:
+                    previous_chained_protocol = any(
+                        col_name for col_name in DF.columns[input_node_index: (object_label_index - 1)].values
+                        if col_name.startswith('Protocol REF')
+                    )
+
+                    if input_proc_index < input_node_index > -1 and not previous_chained_protocol:
 
                         input_node_label = DF.columns[input_node_index]
                         input_node_value = str(object_series[input_node_label])

diff --git a/tests/test_isatab.py b/tests/test_isatab.py
@@ -1589,6 +1589,39 @@ def test_isatab_factor_value_parsing_issue270(self):
             for sample in s.samples:
                 self.assertGreater(len(sample.factor_values), 0)
 
+    def test_isatab_protocol_chain_parsing(self):
+        logging.info("Testing")
+        with open(os.path.join(self._tab_data_dir, 'BII-S-3', 'i_gilbert.txt'),
+                  encoding='utf-8') as fp:
+            investigation = isatab.load(fp)
+            self.assertIsInstance(investigation, Investigation)
+            study = investigation.studies[0]
+            nucleotide_sequencing_assay = next(
+                assay for assay in study.assays if assay.technology_type.term == 'nucleotide sequencing'
+            )
+            nucl_ac_extraction_process = next(
+                proc for proc in nucleotide_sequencing_assay.process_sequence
+                if proc.executes_protocol.name == 'nucleic acid extraction - standard procedure 2'
+            )
+            gen_dna_extraction_process = next(
+                proc for proc in nucleotide_sequencing_assay.process_sequence
+                if proc.executes_protocol.name == 'genomic DNA extraction - standard procedure 4'
+            )
+            extract = next(
+                mat for mat in nucleotide_sequencing_assay.materials['other_material'] if mat.name == 'GSM255770.e1'
+            )
+            self.assertTrue(nucl_ac_extraction_process.next_process is gen_dna_extraction_process)
+            self.assertEqual(len(gen_dna_extraction_process.outputs), 1)
+            self.assertFalse(nucl_ac_extraction_process.outputs)
+            self.assertTrue(gen_dna_extraction_process.outputs[0] is extract)
+            self.assertTrue(nucl_ac_extraction_process.inputs)
+            self.assertFalse(gen_dna_extraction_process.inputs)
+            # FIXME characteristics are not loaded into the extract name
+            # self.assertTrue(extract.characteristics)
+            dumps_out = isatab.dumps(investigation)
+            expected_chained_protocol_snippet = """Sample Name\tProtocol REF\tProtocol REF\tExtract Name"""
+            self.assertIn(expected_chained_protocol_snippet, dumps_out)
+
 
 class TestTransposedTabParser(unittest.TestCase):
 
@@ -1615,30 +1648,3 @@ def test_parse(self):
             'header': ['label1', 'label2']
         }
         self.assertEqual(ttable_dict, expected_ttable)
-
-
-class UnitTestIsaStudyGroups():
-
-    def setUp(self):
-        self.fp = open(os.path.join(self._tab_data_dir, 'MTBLS404', 'i_sacurine.txt'), encoding='utf-8')
-        self.i_df = isatab.load_investigation(fp=self.fp)
-        for i, study_df in enumerate(self.i_df['studies']):
-            study_filename = study_df.iloc[0]['Study File Name']
-            self.s_fp = open(os.path.join(os.path.dirname(self.fp.name), study_filename), encoding='utf-8')
-            self.study_sample_table = isatab.load_table(self.s_fp)
-            self.study_sample_table.filename = study_filename
-
-    def tearDown(self):
-        self.fp.close()
-        self.s_fp.close()
-
-    def test_get_num_study_groups(self):
-        num_study_groups = isatab.get_num_study_groups(self.study_sample_table, self.study_filename)
-        self.assertEqual(num_study_groups, 1)
-
-    def test_check_study_groups(self):
-        self.assertTrue(isatab.NUMBER_OF_STUDY_GROUPS in self.study_df.columns)
-        study_group_sizes = self.study_df[isatab.NUMBER_OF_STUDY_GROUPS]
-        study_group_size_in_comment = next(iter(study_group_sizes))
-        self.assertTrue(isatab.check_study_groups(self.study_sample_table, self.study_filename, study_group_size_in_comment))
-