Skip to content

Commit

Permalink
Merge pull request #380 from ISA-tools/file-extension
Browse files Browse the repository at this point in the history
File extension
  • Loading branch information
Zigur committed Jan 18, 2021
2 parents 75d864a + baf177f commit 702ad66
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 33 deletions.
38 changes: 26 additions & 12 deletions isatools/create/connectors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic
from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART, DEFAULT_SOURCE_TYPE
from isatools.create.constants import (
SCREEN,
INTERVENTIONS,
BASE_FACTORS,
SAMPLE,
ORGANISM_PART,
DEFAULT_SOURCE_TYPE,
DATA_FILE,
DEFAULT_EXTENSION
)
from collections import OrderedDict

AGENT = 'agent'
Expand Down Expand Up @@ -238,6 +247,8 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
]
elif 'node_type' in node:
# this is a product node
extension = node['extension']['value'] if 'extension' in node else DEFAULT_EXTENSION \
if node['node_type'] == DATA_FILE else None
if "characteristics_value" in node:
prepared_nodes = [
dict(
Expand All @@ -246,13 +257,18 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
expand_strings=True),
characteristics_value=_map_ontology_annotation(value),
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value']
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
extension=extension
) for value in node["characteristics_value"]["values"]
]
else:
prepared_nodes = [dict(node_type=node['node_type'],
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'])]
prepared_nodes = [dict(
node_type=node['node_type'],
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
extension=extension
)]

res[_map_ontology_annotation(name)] = prepared_nodes
return res

Expand Down Expand Up @@ -297,13 +313,12 @@ def generate_study_design_from_config(study_design_config):
sa_plan_name, sample_type_dicts, *assay_ord_dicts
)
arm_map[cell] = sa_plan
source_type = Characteristic(
category=DEFAULT_SOURCE_TYPE.category,
value=_map_ontology_annotation(
arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
)
source_type = Characteristic(
category=DEFAULT_SOURCE_TYPE.category,
value=_map_ontology_annotation(
arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
)

)
arm = StudyArm(
name=arm_dict['name'],
# should we generate a Characteristic if subjectType is an OntologyAnnotation?
Expand All @@ -318,7 +333,6 @@ def generate_study_design_from_config(study_design_config):
)
arms.append(arm)
return StudyDesign(
# TODO should we actually add the properties 'name' and ''description' to the study design?
name=study_design_config['name'],
description=study_design_config.get('description', None),
design_type=_map_ontology_annotation(study_design_config['designType']),
Expand Down
3 changes: 3 additions & 0 deletions isatools/create/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,6 @@
# Default study identifier
DEFAULT_STUDY_IDENTIFIER = 's_01'
DEFAULT_INVESTIGATION_IDENTIFIER = 'i_01'

# Default file extension (no dot required)
DEFAULT_EXTENSION = 'raw'
1 change: 1 addition & 0 deletions isatools/create/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
PRODUCT_NODE_NAME_ERROR = 'ProductNode name must be a string, {0} supplied of type {1}'
SIZE_ERROR = 'ProductNode size must be a natural number, i.e integer >= 0'
CHARACTERISTIC_TYPE_ERROR = 'A characteristic must be either a string or a Characteristic, {0} supplied'
PRODUCT_NODE_EXTENSION_ERROR = 'ProductNode extension must be either a string or an OntologyAnnotation.'

# ERROR MESSAGES: QC SAMPLE (QUALITY CONTROL)
QC_SAMPLE_TYPE_ERROR = 'qc_sample_type must be one of {0}'
Expand Down
45 changes: 35 additions & 10 deletions isatools/create/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,30 +770,42 @@ class ProductNode(SequenceNode):
"""
ALLOWED_TYPES = {SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, DATA_FILE}

def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0):
def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0, extension=None):
"""
ProductNode constructor method
:param id_: an identifier for the ProductNode
:param node_type: str - the type of ProductNode. Must be one of the allowed types.
:param name: str - the name of the ProductNone
:param characteristics: list<Characteristics> - characteristics of the node
:param size: int
:param extension: str/OntologyAnnotation - an extension to be appended to the elements generated from this
ProductNode. It can be used to specify file extensions to a DATA_FILE node
"""
super().__init__()
self.__id = id_
self.__type = None
self.__name = None
self.__characteristics = []
self.__size = None
self.__extension = None
self.type = node_type
self.name = name
self.characteristics = characteristics
self.size = size
if extension:
self.extension = extension

def __repr__(self):
return '{0}.{1}(id={2.id}, type={2.type}, name={2.name}, ' \
'characteristics={2.characteristics}, size={2.size})'.format(
'characteristics={2.characteristics}, size={2.size}, ' \
'extension={2.extension})'.format(
self.__class__.__module__, self.__class__.__name__, self)

def __str__(self):
return """{0}(
id={1.id},
type={1.type},
name={1.name},
characteristics={1.characteristics},
size={1.size}
id={1.id},
type={1.type},
name={1.name},
)""".format(self.__class__.__name__, self)

def __hash__(self):
Expand All @@ -802,7 +814,7 @@ def __hash__(self):
def __eq__(self, other):
return isinstance(other, ProductNode) and self.id == other.id and self.type == other.type \
and self.name == other.name and self.characteristics == other.characteristics \
and self.size == other.size
and self.size == other.size and self.extension == other.extension

def __ne__(self, other):
return not self == other
Expand Down Expand Up @@ -862,6 +874,16 @@ def size(self, size):
raise AttributeError(errors.SIZE_ERROR)
self.__size = size

@property
def extension(self):
return self.__extension

@extension.setter
def extension(self, extension):
if not isinstance(extension, (str, OntologyAnnotation)):
raise AttributeError(errors.PRODUCT_NODE_EXTENSION_ERROR)
self.__extension = extension


class QualityControlSource(Source):
pass
Expand Down Expand Up @@ -1045,6 +1067,7 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3)
),
name=node_name, node_type=node_params_dict['node_type'], size=node_params_dict['size'],
extension=node_params_dict.get('extension', None),
characteristics=[
Characteristic(category=node_params_dict['characteristics_category'],
value=node_params_dict['characteristics_value'])
Expand Down Expand Up @@ -2390,12 +2413,14 @@ def _isa_objects_factory(
ProteinAssignmentFile, PeptideAssignmentFile, DerivedArrayDataMatrixFile,
PostTranslationalModificationAssignmentFile, AcquisitionParameterDataFile
}
file_extension = '.{}'.format(node.extension) if node.extension else ''
return isa_class(
filename='{}-S{}-{}-R{}'.format(
filename='{}-S{}-{}-R{}{}'.format(
assay_file_prefix,
start_node_index,
urlify(node.name),
counter[node.name]
counter[node.name],
file_extension
)
)
except StopIteration:
Expand Down
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
wheel~=0.35.1
setuptools~=50.3.2
numpy~=1.19.4
wheel~=0.36.2
setuptools~=51.1.2
numpy~=1.19.5
jsonschema~=3.2.0
pandas~=1.1.4
pandas~=1.2.0
networkx~=2.5
lxml~=4.6.1
requests~=2.24.0
lxml~=4.6.2
requests~=2.25.1
iso8601~=0.1.13
chardet~=3.0.4
chardet~=4.0.0
jinja2~=2.11.2
beautifulsoup4~=4.9.3
mzml2isa==1.0.3
#-e git+http://github.com/ISA-tools/mzml2isa@5866b3e8e185ddbb3b784e4d6a2ef6fbbcfcb256#egg=mzml2isa
#-e git+http://github.com/ISA-tools/isa-api@4e38b09abac60c6acb787169e6eaeac0ac35c1ae#egg=isatools
biopython~=1.78
progressbar2~=3.53.1
deepdiff~=5.0.2
deepdiff~=5.2.1
PyYAML~=5.3.1
bokeh~=2.2.3
# test dependencies
# tox==3.14.0
# nose==1.3.7
certifi==2020.11.8
certifi==2020.12.5
flake8==3.8.4
ddt==1.4.1
behave==1.2.6
httpretty==1.0.2
httpretty==1.0.5
sure==1.4.11
coveralls~=2.1.2
coveralls~=3.0.0
12 changes: 12 additions & 0 deletions tests/test_create_connectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,18 @@ def test_generate_study_design_from_config_with_chained_protocols_and_ontology_a
investigation = Investigation(studies=[design.generate_isa_study()])
self.assertIsInstance(investigation.studies[0], Study)
self.assertEqual(len(investigation.studies[0].assays), len(ds_design_config['assayPlan']))
ms_assay = next(
assay for assay in investigation.studies[0].assays if assay.filename.endswith('mass-spectrometry.txt')
)
self.assertTrue(
all(data_file.filename.split('.')[-1] == 'mzML' for data_file in ms_assay.data_files)
)
nmr_assay = next(
assay for assay in investigation.studies[0].assays if assay.filename.endswith('NMR-spectroscopy.txt')
)
self.assertTrue(
all(data_file.filename.split('.')[-1] == 'raw' for data_file in nmr_assay.data_files)
)
json.dumps(
investigation,
cls=ISAJSONEncoder,
Expand Down

0 comments on commit 702ad66

Please sign in to comment.