Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File extension #380

Merged
merged 4 commits into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions isatools/create/connectors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from isatools.model import OntologyAnnotation, OntologySource, FactorValue, Characteristic
from isatools.create.model import StudyDesign, NonTreatment, Treatment, StudyCell, StudyArm, SampleAndAssayPlan
from isatools.create.constants import SCREEN, INTERVENTIONS, BASE_FACTORS, SAMPLE, ORGANISM_PART, DEFAULT_SOURCE_TYPE
from isatools.create.constants import (
SCREEN,
INTERVENTIONS,
BASE_FACTORS,
SAMPLE,
ORGANISM_PART,
DEFAULT_SOURCE_TYPE,
DATA_FILE,
DEFAULT_EXTENSION
)
from collections import OrderedDict

AGENT = 'agent'
Expand Down Expand Up @@ -238,6 +247,8 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
]
elif 'node_type' in node:
# this is a product node
extension = node['extension']['value'] if 'extension' in node else DEFAULT_EXTENSION \
if node['node_type'] == DATA_FILE else None
if "characteristics_value" in node:
prepared_nodes = [
dict(
Expand All @@ -246,13 +257,18 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
expand_strings=True),
characteristics_value=_map_ontology_annotation(value),
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value']
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
extension=extension
) for value in node["characteristics_value"]["values"]
]
else:
prepared_nodes = [dict(node_type=node['node_type'],
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'])]
prepared_nodes = [dict(
node_type=node['node_type'],
size=node.get('size', 1),
is_input_to_next_protocols=node['is_input_to_next_protocols']['value'],
extension=extension
)]

res[_map_ontology_annotation(name)] = prepared_nodes
return res

Expand Down Expand Up @@ -297,13 +313,12 @@ def generate_study_design_from_config(study_design_config):
sa_plan_name, sample_type_dicts, *assay_ord_dicts
)
arm_map[cell] = sa_plan
source_type = Characteristic(
category=DEFAULT_SOURCE_TYPE.category,
value=_map_ontology_annotation(
arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
)
source_type = Characteristic(
category=DEFAULT_SOURCE_TYPE.category,
value=_map_ontology_annotation(
arm_dict.get('subjectType', None) or study_design_config.get('subjectType', None)
)

)
arm = StudyArm(
name=arm_dict['name'],
# should we generate a Characteristic if subjectType is an OntologyAnnotation?
Expand All @@ -318,7 +333,6 @@ def generate_study_design_from_config(study_design_config):
)
arms.append(arm)
return StudyDesign(
# TODO should we actually add the properties 'name' and ''description' to the study design?
name=study_design_config['name'],
description=study_design_config.get('description', None),
design_type=_map_ontology_annotation(study_design_config['designType']),
Expand Down
3 changes: 3 additions & 0 deletions isatools/create/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,6 @@
# Default study identifier
DEFAULT_STUDY_IDENTIFIER = 's_01'
DEFAULT_INVESTIGATION_IDENTIFIER = 'i_01'

# Default file extension (no dot required)
DEFAULT_EXTENSION = 'raw'
1 change: 1 addition & 0 deletions isatools/create/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
PRODUCT_NODE_NAME_ERROR = 'ProductNode name must be a string, {0} supplied of type {1}'
SIZE_ERROR = 'ProductNode size must be a natural number, i.e integer >= 0'
CHARACTERISTIC_TYPE_ERROR = 'A characteristic must be either a string or a Characteristic, {0} supplied'
PRODUCT_NODE_EXTENSION_ERROR = 'ProductNode extension must be either a string or an OntologyAnnotation.'

# ERROR MESSAGES: QC SAMPLE (QUALITY CONTROL)
QC_SAMPLE_TYPE_ERROR = 'qc_sample_type must be one of {0}'
Expand Down
45 changes: 35 additions & 10 deletions isatools/create/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,30 +770,42 @@ class ProductNode(SequenceNode):
"""
ALLOWED_TYPES = {SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, DATA_FILE}

def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0):
def __init__(self, id_=str(uuid.uuid4()), node_type=SOURCE, name='', characteristics=[], size=0, extension=None):
"""
ProductNode constructor method
:param id_: an identifier for the ProductNode
:param node_type: str - the type of ProductNode. Must be one of the allowed types.
:param name: str - the name of the ProductNone
:param characteristics: list<Characteristics> - characteristics of the node
:param size: int
:param extension: str/OntologyAnnotation - an extension to be appended to the elements generated from this
ProductNode. It can be used to specify file extensions to a DATA_FILE node
"""
super().__init__()
self.__id = id_
self.__type = None
self.__name = None
self.__characteristics = []
self.__size = None
self.__extension = None
self.type = node_type
self.name = name
self.characteristics = characteristics
self.size = size
if extension:
self.extension = extension

def __repr__(self):
return '{0}.{1}(id={2.id}, type={2.type}, name={2.name}, ' \
'characteristics={2.characteristics}, size={2.size})'.format(
'characteristics={2.characteristics}, size={2.size}, ' \
'extension={2.extension})'.format(
self.__class__.__module__, self.__class__.__name__, self)

def __str__(self):
return """{0}(
id={1.id},
type={1.type},
name={1.name},
characteristics={1.characteristics},
size={1.size}
id={1.id},
type={1.type},
name={1.name},
)""".format(self.__class__.__name__, self)

def __hash__(self):
Expand All @@ -802,7 +814,7 @@ def __hash__(self):
def __eq__(self, other):
return isinstance(other, ProductNode) and self.id == other.id and self.type == other.type \
and self.name == other.name and self.characteristics == other.characteristics \
and self.size == other.size
and self.size == other.size and self.extension == other.extension

def __ne__(self, other):
return not self == other
Expand Down Expand Up @@ -862,6 +874,16 @@ def size(self, size):
raise AttributeError(errors.SIZE_ERROR)
self.__size = size

@property
def extension(self):
return self.__extension

@extension.setter
def extension(self, extension):
if not isinstance(extension, (str, OntologyAnnotation)):
raise AttributeError(errors.PRODUCT_NODE_EXTENSION_ERROR)
self.__extension = extension


class QualityControlSource(Source):
pass
Expand Down Expand Up @@ -1045,6 +1067,7 @@ def generate_assay_plan_from_dict(cls, assay_plan_dict,
re.sub(r'\s+', '_', node_name), str(i).zfill(3), str(j).zfill(3)
),
name=node_name, node_type=node_params_dict['node_type'], size=node_params_dict['size'],
extension=node_params_dict.get('extension', None),
characteristics=[
Characteristic(category=node_params_dict['characteristics_category'],
value=node_params_dict['characteristics_value'])
Expand Down Expand Up @@ -2390,12 +2413,14 @@ def _isa_objects_factory(
ProteinAssignmentFile, PeptideAssignmentFile, DerivedArrayDataMatrixFile,
PostTranslationalModificationAssignmentFile, AcquisitionParameterDataFile
}
file_extension = '.{}'.format(node.extension) if node.extension else ''
return isa_class(
filename='{}-S{}-{}-R{}'.format(
filename='{}-S{}-{}-R{}{}'.format(
assay_file_prefix,
start_node_index,
urlify(node.name),
counter[node.name]
counter[node.name],
file_extension
)
)
except StopIteration:
Expand Down
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
wheel~=0.35.1
setuptools~=50.3.2
numpy~=1.19.4
wheel~=0.36.2
setuptools~=51.1.2
numpy~=1.19.5
jsonschema~=3.2.0
pandas~=1.1.4
pandas~=1.2.0
networkx~=2.5
lxml~=4.6.1
requests~=2.24.0
lxml~=4.6.2
requests~=2.25.1
iso8601~=0.1.13
chardet~=3.0.4
chardet~=4.0.0
jinja2~=2.11.2
beautifulsoup4~=4.9.3
mzml2isa==1.0.3
#-e git+http://github.com/ISA-tools/mzml2isa@5866b3e8e185ddbb3b784e4d6a2ef6fbbcfcb256#egg=mzml2isa
#-e git+http://github.com/ISA-tools/isa-api@4e38b09abac60c6acb787169e6eaeac0ac35c1ae#egg=isatools
biopython~=1.78
progressbar2~=3.53.1
deepdiff~=5.0.2
deepdiff~=5.2.1
PyYAML~=5.3.1
bokeh~=2.2.3
# test dependencies
# tox==3.14.0
# nose==1.3.7
certifi==2020.11.8
certifi==2020.12.5
flake8==3.8.4
ddt==1.4.1
behave==1.2.6
httpretty==1.0.2
httpretty==1.0.5
sure==1.4.11
coveralls~=2.1.2
coveralls~=3.0.0
12 changes: 12 additions & 0 deletions tests/test_create_connectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,18 @@ def test_generate_study_design_from_config_with_chained_protocols_and_ontology_a
investigation = Investigation(studies=[design.generate_isa_study()])
self.assertIsInstance(investigation.studies[0], Study)
self.assertEqual(len(investigation.studies[0].assays), len(ds_design_config['assayPlan']))
ms_assay = next(
assay for assay in investigation.studies[0].assays if assay.filename.endswith('mass-spectrometry.txt')
)
self.assertTrue(
all(data_file.filename.split('.')[-1] == 'mzML' for data_file in ms_assay.data_files)
)
nmr_assay = next(
assay for assay in investigation.studies[0].assays if assay.filename.endswith('NMR-spectroscopy.txt')
)
self.assertTrue(
all(data_file.filename.split('.')[-1] == 'raw' for data_file in nmr_assay.data_files)
)
json.dumps(
investigation,
cls=ISAJSONEncoder,
Expand Down