From b8f2f55b4e046f1841ffba7d6fb190c87918375f Mon Sep 17 00:00:00 2001 From: zigur Date: Mon, 1 Feb 2021 16:14:43 +0000 Subject: [PATCH] extracted get_header_name() method to fix bug on protocol type mapping --- isatools/isatab.py | 90 ++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 51 deletions(-) diff --git a/isatools/isatab.py b/isatools/isatab.py index 4e1a3a11..a9f5933e 100644 --- a/isatools/isatab.py +++ b/isatools/isatab.py @@ -1406,33 +1406,11 @@ def flatten(l): return [item for sublist in l for item in sublist] protrefcount += 1 columns += flatten(map(lambda x: get_pv_columns(olabel, x), node.parameter_values)) - oname_label = None if node.executes_protocol.protocol_type: - if node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \ - + protocol_types_dict["phenotyping"][SYNONYMS]: - oname_label = "Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["data collection"][SYNONYMS]: - oname_label = "Scan Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["mass spectrometry"][SYNONYMS]: - oname_label = "MS Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nmr spectroscopy"][SYNONYMS]: - oname_label = "NMR Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["data transformation"][SYNONYMS]: - oname_label = "Data Transformation Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["sequence analysis data transformation"][SYNONYMS]: - oname_label = "Normalization Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["normalization"][SYNONYMS]: - oname_label = "Normalization Name" - if node.executes_protocol.protocol_type.term.lower() \ - == "unknown protocol": - oname_label = "Unknown Protocol Name" + oname_label = get_column_header( + node.executes_protocol.protocol_type.term, + protocol_types_dict + ) if oname_label is not None: columns.append(oname_label) elif node.executes_protocol.protocol_type.term.lower() \ @@ -1492,32 +1470,11 @@ def pbar(x): return x node.executes_protocol.name ) df_dict[olabel][-1] = node.executes_protocol.name - oname_label = None if node.executes_protocol.protocol_type: - if node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nucleic acid sequencing"][SYNONYMS]: - oname_label = "Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["data collection"][SYNONYMS]: - oname_label = "Scan Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["mass spectrometry"][SYNONYMS]: - oname_label = "MS Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["nmr spectroscopy"][SYNONYMS]: - oname_label = "NMR Assay Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["data transformation"][SYNONYMS]: - oname_label = "Data Transformation Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["sequence analysis data transformation"][SYNONYMS]: - oname_label = "Data Transformation Name" - elif node.executes_protocol.protocol_type.term.lower() in \ - protocol_types_dict["normalization"][SYNONYMS]: - oname_label = "Normalization Name" - if node.executes_protocol.protocol_type.term.lower() == \ - "unknown protocol": - oname_label = "Unknown Protocol Name" + oname_label = get_column_header( + node.executes_protocol.protocol_type.term, + protocol_types_dict + ) if oname_label is not None: df_dict[oname_label][-1] = node.name elif node.executes_protocol.protocol_type.term.lower() in \ @@ -1643,6 +1600,37 @@ def pbar(x): return x encoding='utf-8') +def get_column_header(protocol_type_term, protocol_types_dict): + column_header = None + if protocol_type_term.lower() in \ + protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \ + + protocol_types_dict["phenotyping"][SYNONYMS] \ + + protocol_types_dict["data acquisition"][SYNONYMS]: + column_header = "Assay Name" + elif protocol_type_term.lower() in \ + protocol_types_dict["data collection"][SYNONYMS]: + column_header = "Scan Name" + elif protocol_type_term.lower() in \ + protocol_types_dict["mass spectrometry"][SYNONYMS]: + column_header = "MS Assay Name" + elif protocol_type_term.lower() in \ + protocol_types_dict["nmr spectroscopy"][SYNONYMS]: + column_header = "NMR Assay Name" + elif protocol_type_term.lower() in \ + protocol_types_dict["data transformation"][SYNONYMS] \ + + protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \ + + protocol_types_dict["metabolite identification"][SYNONYMS] \ + + protocol_types_dict["protein identification"][SYNONYMS]: + column_header = "Data Transformation Name" + elif protocol_type_term.lower() in \ + protocol_types_dict["normalization"][SYNONYMS]: + column_header = "Normalization Name" + if protocol_type_term.lower() \ + == "unknown protocol": + column_header = "Unknown Protocol Name" + return column_header + + def get_value_columns(label, x): """Generates the appropriate columns based on the value of the object. For example, if the object's .value value is an OntologyAnnotation,