Merge pull request #380 from ISA-tools/file-extension

File extension
ISA-tools · Jan 18, 2021 · 702ad66 · 702ad66
2 parents 75d864a + baf177f
commit 702ad66
Show file tree

Hide file tree

Showing 6 changed files with 88 additions and 33 deletions.
diff --git a/isatools/create/connectors.py b/isatools/create/connectors.py
@@ -1,6 +1,15 @@
 from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic
 from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
-from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART, DEFAULT_SOURCE_TYPE
+from isatools.create.constants import (
+    SCREEN,
+    INTERVENTIONS,
+    BASE_FACTORS,
+    SAMPLE,
+    ORGANISM_PART,
+    DEFAULT_SOURCE_TYPE,
+    DATA_FILE,
+    DEFAULT_EXTENSION
+)
 from collections import OrderedDict
 
 AGENT = 'agent'
@@ -238,6 +247,8 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
                     ]
         elif 'node_type' in node:
             # this is a product node
+            extension = node['extension']['value'] if 'extension' in node else DEFAULT_EXTENSION \
+                if node['node_type'] == DATA_FILE else None
             if "characteristics_value" in node:
                 prepared_nodes = [
                     dict(
@@ -246,13 +257,18 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
                                                                           expand_strings=True),
                         characteristics_value=_map_ontology_annotation(value),
                         size=node.get('size', 1),
-                        is_input_to_next_protocols=node['is_input_to_next_protocols']['value']
+                        is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
+                        extension=extension
                     ) for value in node["characteristics_value"]["values"]
                 ]
             else:
-                prepared_nodes = [dict(node_type=node['node_type'],
-                                       size=node.get('size', 1),
-                                       is_input_to_next_protocols=node['is_input_to_next_protocols']['value'])]
+                prepared_nodes = [dict(
+                    node_type=node['node_type'],
+                    size=node.get('size', 1),
+                    is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
+                    extension=extension
+                )]
+
         res[_map_ontology_annotation(name)] = prepared_nodes
     return res
 
@@ -297,13 +313,12 @@ def generate_study_design_from_config(study_design_config):
                 sa_plan_name, sample_type_dicts, *assay_ord_dicts
             )
             arm_map[cell] = sa_plan
-            source_type = Characteristic(
-                category=DEFAULT_SOURCE_TYPE.category,
-                value=_map_ontology_annotation(
-                    arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
-                )
+        source_type = Characteristic(
+            category=DEFAULT_SOURCE_TYPE.category,
+            value=_map_ontology_annotation(
+                arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
             )
-
+        )
         arm = StudyArm(
             name=arm_dict['name'],
             # should we generate a Characteristic if subjectType is an OntologyAnnotation?
@@ -318,7 +333,6 @@ def generate_study_design_from_config(study_design_config):
         )
         arms.append(arm)
     return StudyDesign(
-        # TODO should we actually add the properties 'name' and ''description' to the study design?
         name=study_design_config['name'],
         description=study_design_config.get('description', None),
         design_type=_map_ontology_annotation(study_design_config['designType']),

diff --git a/isatools/create/constants.py b/isatools/create/constants.py
@@ -108,3 +108,6 @@
 # Default study identifier
 DEFAULT_STUDY_IDENTIFIER = 's_01'
 DEFAULT_INVESTIGATION_IDENTIFIER = 'i_01'
+
+# Default file extension (no dot required)
+DEFAULT_EXTENSION = 'raw'
diff --git a/isatools/create/errors.py b/isatools/create/errors.py
@@ -10,6 +10,7 @@
 PRODUCT_NODE_NAME_ERROR = 'ProductNode name must be a string, {0} supplied of type {1}'
 SIZE_ERROR = 'ProductNode size must be a natural number, i.e integer >= 0'
 CHARACTERISTIC_TYPE_ERROR = 'A characteristic must be either a string or a Characteristic, {0} supplied'
+PRODUCT_NODE_EXTENSION_ERROR = 'ProductNode extension must be either a string or an OntologyAnnotation.'
 
 # ERROR MESSAGES: QC SAMPLE (QUALITY CONTROL)
 QC_SAMPLE_TYPE_ERROR = 'qc_sample_type must be one of {0}'

diff --git a/isatools/create/model.py b/isatools/create/model.py
@@ -770,30 +770,42 @@ class ProductNode(SequenceNode):
     """
     ALLOWED_TYPES = {SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, DATA_FILE}
 
-    def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0):
+    def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0, extension=None):
+        """
+        ProductNode constructor method
+        :param id_: an identifier for the ProductNode
+        :param node_type: str - the type of ProductNode. Must be one of the allowed types.
+        :param name: str - the name of the ProductNone
+        :param characteristics: list<Characteristics> - characteristics of the node
+        :param size: int
+        :param extension: str/OntologyAnnotation - an extension to be appended to the elements generated from this
+                          ProductNode. It can be used to specify file extensions to a DATA_FILE node
+        """
         super().__init__()
         self.__id = id_
         self.__type = None
         self.__name = None
         self.__characteristics = []
         self.__size = None
+        self.__extension = None
         self.type = node_type
         self.name = name
         self.characteristics = characteristics
         self.size = size
+        if extension:
+            self.extension = extension
 
     def __repr__(self):
         return '{0}.{1}(id={2.id}, type={2.type}, name={2.name}, ' \
-               'characteristics={2.characteristics}, size={2.size})'.format(
+               'characteristics={2.characteristics}, size={2.size}, ' \
+               'extension={2.extension})'.format(
                 self.__class__.__module__, self.__class__.__name__, self)
 
     def __str__(self):
         return """{0}(
-        id={1.id}, 
-        type={1.type}, 
-        name={1.name}, 
-        characteristics={1.characteristics}, 
-        size={1.size}
+        id={1.id},
+        type={1.type},
+        name={1.name},
         )""".format(self.__class__.__name__, self)
 
     def __hash__(self):
@@ -802,7 +814,7 @@ def __hash__(self):
     def __eq__(self, other):
         return isinstance(other, ProductNode) and self.id == other.id and self.type == other.type \
                and self.name == other.name and self.characteristics == other.characteristics \
-               and self.size == other.size
+               and self.size == other.size and self.extension == other.extension
 
     def __ne__(self, other):
         return not self == other
@@ -862,6 +874,16 @@ def size(self, size):
             raise AttributeError(errors.SIZE_ERROR)
         self.__size = size
 
+    @property
+    def extension(self):
+        return self.__extension
+
+    @extension.setter
+    def extension(self, extension):
+        if not isinstance(extension, (str, OntologyAnnotation)):
+            raise AttributeError(errors.PRODUCT_NODE_EXTENSION_ERROR)
+        self.__extension = extension
+
 
 class QualityControlSource(Source):
     pass
@@ -1045,6 +1067,7 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
                                 re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3)
                             ),
                             name=node_name, node_type=node_params_dict['node_type'], size=node_params_dict['size'],
+                            extension=node_params_dict.get('extension', None),
                             characteristics=[
                                 Characteristic(category=node_params_dict['characteristics_category'],
                                                value=node_params_dict['characteristics_value'])
@@ -2390,12 +2413,14 @@ def _isa_objects_factory(
                         ProteinAssignmentFile, PeptideAssignmentFile, DerivedArrayDataMatrixFile,
                         PostTranslationalModificationAssignmentFile, AcquisitionParameterDataFile
                     }
+                    file_extension = '.{}'.format(node.extension) if node.extension else ''
                     return isa_class(
-                        filename='{}-S{}-{}-R{}'.format(
+                        filename='{}-S{}-{}-R{}{}'.format(
                             assay_file_prefix,
                             start_node_index,
                             urlify(node.name),
-                            counter[node.name]
+                            counter[node.name],
+                            file_extension
                         )
                     )
                 except StopIteration:

diff --git a/requirements.txt b/requirements.txt
@@ -1,30 +1,30 @@
-wheel~=0.35.1
-setuptools~=50.3.2
-numpy~=1.19.4
+wheel~=0.36.2
+setuptools~=51.1.2
+numpy~=1.19.5
 jsonschema~=3.2.0
-pandas~=1.1.4
+pandas~=1.2.0
 networkx~=2.5
-lxml~=4.6.1
-requests~=2.24.0
+lxml~=4.6.2
+requests~=2.25.1
 iso8601~=0.1.13
-chardet~=3.0.4
+chardet~=4.0.0
 jinja2~=2.11.2
 beautifulsoup4~=4.9.3
 mzml2isa==1.0.3
 #-e git+http://github.com/ISA-tools/mzml2isa@5866b3e8e185ddbb3b784e4d6a2ef6fbbcfcb256#egg=mzml2isa
 #-e git+http://github.com/ISA-tools/isa-api@4e38b09abac60c6acb787169e6eaeac0ac35c1ae#egg=isatools
 biopython~=1.78
 progressbar2~=3.53.1
-deepdiff~=5.0.2
+deepdiff~=5.2.1
 PyYAML~=5.3.1
 bokeh~=2.2.3
 # test dependencies
 # tox==3.14.0
 # nose==1.3.7
-certifi==2020.11.8
+certifi==2020.12.5
 flake8==3.8.4
 ddt==1.4.1
 behave==1.2.6
-httpretty==1.0.2
+httpretty==1.0.5
 sure==1.4.11
-coveralls~=2.1.2
+coveralls~=3.0.0
diff --git a/tests/test_create_connectors.py b/tests/test_create_connectors.py
@@ -159,6 +159,18 @@ def test_generate_study_design_from_config_with_chained_protocols_and_ontology_a
         investigation = Investigation(studies=[design.generate_isa_study()])
         self.assertIsInstance(investigation.studies[0], Study)
         self.assertEqual(len(investigation.studies[0].assays), len(ds_design_config['assayPlan']))
+        ms_assay = next(
+            assay for assay in investigation.studies[0].assays if assay.filename.endswith('mass-spectrometry.txt')
+        )
+        self.assertTrue(
+            all(data_file.filename.split('.')[-1] == 'mzML' for data_file in ms_assay.data_files)
+        )
+        nmr_assay = next(
+            assay for assay in investigation.studies[0].assays if assay.filename.endswith('NMR-spectroscopy.txt')
+        )
+        self.assertTrue(
+            all(data_file.filename.split('.')[-1] == 'raw' for data_file in nmr_assay.data_files)
+        )
         json.dumps(
             investigation,
             cls=ISAJSONEncoder,