Skip to content

Commit

Permalink
Merge pull request #408 from ISA-tools/protocol-chain-isatab-load
Browse files Browse the repository at this point in the history
@terazus @proccaserra merging this as the test successfully loads and then dumps the BII-S-3 dataset.
Characteristics are not attached to material nodes, but this is a separate issue spotted today
  • Loading branch information
Zigur committed Jul 7, 2021
2 parents 7949765 + 0450899 commit 9e96b29
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 29 deletions.
14 changes: 12 additions & 2 deletions isatools/isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5703,7 +5703,12 @@ def pbar(x): return x
output_node_index = find_gt(node_cols, object_label_index)
output_proc_index = find_gt(proc_cols, object_label_index)

if output_proc_index < output_node_index > -1:
post_chained_protocol = any(
col_name for col_name in DF.columns[(object_label_index + 1): output_node_index].values
if col_name.startswith('Protocol REF')
)

if (output_proc_index < output_node_index > -1 and not post_chained_protocol) or (output_proc_index > output_node_index):

output_node_label = DF.columns[output_node_index]
output_node_value = str(
Expand All @@ -5727,7 +5732,12 @@ def pbar(x): return x
input_node_index = find_lt(node_cols, object_label_index)
input_proc_index = find_lt(proc_cols, object_label_index)

if input_proc_index < input_node_index > -1:
previous_chained_protocol = any(
col_name for col_name in DF.columns[input_node_index: (object_label_index - 1)].values
if col_name.startswith('Protocol REF')
)

if input_proc_index < input_node_index > -1 and not previous_chained_protocol:

input_node_label = DF.columns[input_node_index]
input_node_value = str(object_series[input_node_label])
Expand Down
60 changes: 33 additions & 27 deletions tests/test_isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,39 @@ def test_isatab_factor_value_parsing_issue270(self):
for sample in s.samples:
self.assertGreater(len(sample.factor_values), 0)

def test_isatab_protocol_chain_parsing(self):
logging.info("Testing")
with open(os.path.join(self._tab_data_dir, 'BII-S-3', 'i_gilbert.txt'),
encoding='utf-8') as fp:
investigation = isatab.load(fp)
self.assertIsInstance(investigation, Investigation)
study = investigation.studies[0]
nucleotide_sequencing_assay = next(
assay for assay in study.assays if assay.technology_type.term == 'nucleotide sequencing'
)
nucl_ac_extraction_process = next(
proc for proc in nucleotide_sequencing_assay.process_sequence
if proc.executes_protocol.name == 'nucleic acid extraction - standard procedure 2'
)
gen_dna_extraction_process = next(
proc for proc in nucleotide_sequencing_assay.process_sequence
if proc.executes_protocol.name == 'genomic DNA extraction - standard procedure 4'
)
extract = next(
mat for mat in nucleotide_sequencing_assay.materials['other_material'] if mat.name == 'GSM255770.e1'
)
self.assertTrue(nucl_ac_extraction_process.next_process is gen_dna_extraction_process)
self.assertEqual(len(gen_dna_extraction_process.outputs), 1)
self.assertFalse(nucl_ac_extraction_process.outputs)
self.assertTrue(gen_dna_extraction_process.outputs[0] is extract)
self.assertTrue(nucl_ac_extraction_process.inputs)
self.assertFalse(gen_dna_extraction_process.inputs)
# FIXME characteristics are not loaded into the extract name
# self.assertTrue(extract.characteristics)
dumps_out = isatab.dumps(investigation)
expected_chained_protocol_snippet = """Sample Name\tProtocol REF\tProtocol REF\tExtract Name"""
self.assertIn(expected_chained_protocol_snippet, dumps_out)


class TestTransposedTabParser(unittest.TestCase):

Expand All @@ -1615,30 +1648,3 @@ def test_parse(self):
'header': ['label1', 'label2']
}
self.assertEqual(ttable_dict, expected_ttable)


class UnitTestIsaStudyGroups():

def setUp(self):
self.fp = open(os.path.join(self._tab_data_dir, 'MTBLS404', 'i_sacurine.txt'), encoding='utf-8')
self.i_df = isatab.load_investigation(fp=self.fp)
for i, study_df in enumerate(self.i_df['studies']):
study_filename = study_df.iloc[0]['Study File Name']
self.s_fp = open(os.path.join(os.path.dirname(self.fp.name), study_filename), encoding='utf-8')
self.study_sample_table = isatab.load_table(self.s_fp)
self.study_sample_table.filename = study_filename

def tearDown(self):
self.fp.close()
self.s_fp.close()

def test_get_num_study_groups(self):
num_study_groups = isatab.get_num_study_groups(self.study_sample_table, self.study_filename)
self.assertEqual(num_study_groups, 1)

def test_check_study_groups(self):
self.assertTrue(isatab.NUMBER_OF_STUDY_GROUPS in self.study_df.columns)
study_group_sizes = self.study_df[isatab.NUMBER_OF_STUDY_GROUPS]
study_group_size_in_comment = next(iter(study_group_sizes))
self.assertTrue(isatab.check_study_groups(self.study_sample_table, self.study_filename, study_group_size_in_comment))

0 comments on commit 9e96b29

Please sign in to comment.