Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle type refs in _map_refs #166

Merged
merged 9 commits into from Sep 6, 2019
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
55 changes: 33 additions & 22 deletions cidc_schemas/json_validation.py
Expand Up @@ -60,28 +60,39 @@ def _map_refs(node: dict, on_refs: Callable[[str], dict]) -> dict:
Note: _map_refs is shallow, i.e., if calling `on_refs` on a node produces
a new node that contains refs, those refs will not be resolved.
"""
if isinstance(node, collections.abc.Mapping) and '$ref' in node:
extra_keys = set(node.keys()).difference({'$ref', '$comment'})
if extra_keys:
# As for json-schema.org:
# "... You will always use $ref as the only key in an object:
# any other keys you put there will be ignored by the validator."
# So we raise on that, to notify schema creator that s/he should not
# expect those additional keys to be verified by schema validator.
raise Exception(f"Schema node with '$ref' should not contain anything else (besides '$comment' for docs). \
\nOn: {node} \nOffending keys {extra_keys}")

# We found a ref, so return it mapped through `on_refs`
new_node = on_refs(node['$ref'])
# Plus concatenated new and old '$comment' fields
# which should be just ignored anyways.
if '$comment' in new_node or '$comment' in node:
new_node['$comment'] = new_node.get('$comment', '') + node.get('$comment', '')
return new_node
elif isinstance(node, collections.abc.Mapping):
# Look for all refs in this mapping
for k, v in node.items():
node[k] = _map_refs(v, on_refs)
if isinstance(node, collections.abc.Mapping):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how this works, I though map_refs will be called whenever json schema walker walks into a node with '$ref'. So from that understanding it will not be called at all on some node : {"type_ref": "..."}

Copy link
Contributor Author

@jacoblurye jacoblurye Sep 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think _map_refs is the json schema walker you're referring to? On the next line, we look for $refs and type_refs, then handle each case slightly differently. The test test_json_validation.test_map_refs might be a bit helpful, too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never mind

if '$ref' in node or 'type_ref' in node:
ref_key = '$ref' if '$ref' in node else 'type_ref'

if ref_key == '$ref':
extra_keys = set(node.keys()).difference({'$ref', '$comment'})
if extra_keys:
# As for json-schema.org:
# "... You will always use $ref as the only key in an object:
# any other keys you put there will be ignored by the validator."
# So we raise on that, to notify schema creator that s/he should not
# expect those additional keys to be verified by schema validator.
raise Exception(f"Schema node with '$ref' should not contain anything else (besides '$comment' for docs). \
\nOn: {node} \nOffending keys {extra_keys}")

# We found a ref, so return it mapped through `on_refs`
new_node = on_refs(node[ref_key])

if ref_key == 'type_ref':
# For type_ref's, we don't want to clobber the other properties in node,
# so merge new_node and node.
new_node.update(node)

# Plus concatenated new and old '$comment' fields
# which should be just ignored anyways.
if '$comment' in new_node or '$comment' in node:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As comments contain development related TODOs and stuff and we're now sending links to these docs to people, I think we should leave comments to ourselves and not add them in a visible manner to the docs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevermind

new_node['$comment'] = new_node.get(
'$comment', '') + node.get('$comment', '')
return new_node
else:
# Look for all refs further down in this mapping
for k, v in node.items():
node[k] = _map_refs(v, on_refs)
elif isinstance(node, (list, tuple)):
# Look for all refs in this list
for i in range(len(node)):
Expand Down
3 changes: 2 additions & 1 deletion cidc_schemas/schemas/assays/components/local_file.json
Expand Up @@ -6,8 +6,9 @@
"description": "A type to store local file paths",
"properties": {
"file_path": {
"$id": "local_file_path",
"type": "string",
"description": "Path to a file on a users computer."
"description": "Path to a file on a user's computer."
}
},
"required": []
Expand Down
6 changes: 3 additions & 3 deletions cidc_schemas/schemas/templates/metadata/olink_template.json
Expand Up @@ -36,7 +36,7 @@
"combined file": {
"merge_pointer": "0/study/study_npx",
"is_artifact" : 1,
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
},
"combined file npx manager version": {
"merge_pointer": "0/study/npx_manager_version",
Expand All @@ -53,13 +53,13 @@
"merge_pointer": "0/files/assay_npx",
"is_artifact" : 1,
"gcs_prefix_format": "{chip_barcode}",
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
},
"raw ct file": {
"merge_pointer": "0/files/assay_raw_ct",
"is_artifact" : 1,
"gcs_prefix_format": "{chip_barcode}",
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
},
"run date": {
"merge_pointer": "0/run_date",
Expand Down
6 changes: 3 additions & 3 deletions cidc_schemas/schemas/templates/metadata/wes_template.json
Expand Up @@ -87,19 +87,19 @@
"merge_pointer": "0/files/fastq_1",
"gcs_prefix_format": "{cimac_participant_id}/{cimac_sample_id}/{cimac_aliquot_id}",
"is_artifact" : 1,
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
},
"reverse fastq": {
"merge_pointer": "0/files/fastq_2",
"gcs_prefix_format": "{cimac_participant_id}/{cimac_sample_id}/{cimac_aliquot_id}",
"is_artifact" : 1,
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
},
"read group mapping file": {
"merge_pointer": "0/files/read_group_mapping_file",
"gcs_prefix_format": "{cimac_participant_id}/{cimac_sample_id}/{cimac_aliquot_id}",
"is_artifact" : 1,
"type_ref": "assays/components/local_file.json"
"type_ref": "assays/components/local_file.json#properties/file_path"
}
}
}
Expand Down
42 changes: 21 additions & 21 deletions cidc_schemas/template.py
Expand Up @@ -25,7 +25,8 @@ def _get_template_path_map() -> dict:
if os.path.isdir(abs_type_dir):
for template_file in os.listdir(abs_type_dir):
template_type = template_file.replace('_template.json', '')
template_schema_path = os.path.join(abs_type_dir, template_file)
template_schema_path = os.path.join(
abs_type_dir, template_file)
path_map[template_type] = template_schema_path

return path_map
Expand Down Expand Up @@ -65,7 +66,6 @@ def generate_all_templates(target_dir: str):
target_path = os.path.join(target_subdir, template_xlsx_file)
generate_empty_template(schema_path, target_path)


class Template:
"""
Configuration describing a manifest or assay template
Expand Down Expand Up @@ -137,7 +137,6 @@ def process_fields(schema: dict) -> dict:

return processed_worksheet


@staticmethod
def _get_ref_coerce(ref: str):
"""
Expand Down Expand Up @@ -174,7 +173,10 @@ def _get_coerce(t: str, object_id = None):
and determines the best python
function to type the value.
"""

# if it's an artifact that will be loaded through local file
# we just return uuid as value
if object_id == "local_file_path":
return lambda _: str(uuid.uuid4())
if t == 'string':
return str
elif t == 'integer':
Expand All @@ -183,15 +185,9 @@ def _get_coerce(t: str, object_id = None):
return float
elif t == 'boolean':
return bool

# if it's an artifact that will be loaded through local file
# we just return uuid as value
elif t == 'object' and object_id == "local_file":
return lambda _: str(uuid.uuid4())
else:
raise NotImplementedError(f"no coercion available for type:{t}")


def _load_keylookup(self) -> dict:
"""
The excel spreadsheet uses human friendly (no _) names
Expand All @@ -209,14 +205,19 @@ def _load_keylookup(self) -> dict:
# create a key lookup dictionary
key_lu = {}

def _add_coerce(field_def:dict) -> dict:
def _add_coerce(field_def: dict) -> dict:
# checks if we have a cast func for that 'type_ref'
if 'value_template_format' in field_def:
coerce = lambda x: field_def['value_template_format'].format_map(x)
coerce = lambda x: field_def['value_template_format'].format_map(x)
if 'type' in field_def:
coerce = self._get_coerce(field_def['type'])
if '$id' in field_def:
coerce = self._get_coerce(
field_def['type'], field_def['$id'])
else:
coerce = self._get_coerce(field_def['type'])
else:
coerce = self._get_ref_coerce(field_def.get('type_ref') or field_def['$ref'])
coerce = self._get_ref_coerce(
field_def.get('type_ref') or field_def['$ref'])

return dict(coerce=coerce, **field_def)

Expand All @@ -232,7 +233,7 @@ def _add_coerce(field_def:dict) -> dict:
# (as for template.schema) - "merge_pointer" and "type_ref"

# load the data columns
for section_key, section_def in ws_schema.get('data_columns', {}).items():
for section_key, section_def in ws_schema.get('data_columns', {}).items():
for column_key, column_def in section_def.items():

# populate lookup
Expand All @@ -244,10 +245,10 @@ def _add_coerce(field_def:dict) -> dict:

# XlTemplateReader only knows how to format these types of sections
VALID_WS_SECTIONS = set(['preamble_rows',
'data_columns',
'prism_preamble_object_pointer',
'prism_data_object_pointer',
'prism_preamble_object_schema'])
'data_columns',
'prism_preamble_object_pointer',
'prism_data_object_pointer',
'prism_preamble_object_schema'])

@staticmethod
def _validate_worksheet(ws_title: str, ws_schema: dict):
Expand Down Expand Up @@ -276,8 +277,7 @@ def from_json(template_schema_path: str, schema_root: str = SCHEMA_DIR):
template_schema_path {str} -- path to the template schema file
schema_root {str} -- path to the directory where all schemas are stored
"""
template_schema = load_and_validate_schema(
template_schema_path, schema_root)
template_schema = load_and_validate_schema(template_schema_path, schema_root)

return Template(template_schema, name=template_schema_path)

Expand Down
49 changes: 48 additions & 1 deletion docs/docs/artifacts.artifact_bam.html
Expand Up @@ -87,6 +87,8 @@ <h3>Properties</h3>








Expand Down Expand Up @@ -119,6 +121,29 @@ <h3>Properties</h3>



</td>
</tr>



<tr>
<td>

<span id="data_format">
data_format
</span>


</td>
<td width=55%>



</td>
<td>

&lt; const: <strong>BAM</strong> &gt;

</td>
</tr>

Expand All @@ -145,7 +170,8 @@ <h3>artifact_bam.full.json</h3>
"uploaded_timestamp",
"file_size_bytes",
"md5_hash",
"artifact_category"
"artifact_category",
"data_format"
]
},
{
Expand Down Expand Up @@ -175,6 +201,24 @@ <h3>artifact_bam.full.json</h3>
],
"type": "string"
},
"data_format": {
"description": "Data Format.",
"enum": [
"FASTA",
"FASTQ",
"IMAGE",
"VCF",
"CSV",
"XLSX",
"NPX",
"BAM",
"MAF",
"BINARY",
"TEXT",
"[NOT SET]"
],
"type": "string"
},
"file_name": {
"$comment": "_____!!!_____ What if file name contains PHI? _____!!!_____",
"description": "The name of the file with extension",
Expand Down Expand Up @@ -218,6 +262,9 @@ <h3>artifact_bam.full.json</h3>
},
{
"properties": {
"data_format": {
"const": "BAM"
},
"info": {
"type": "object"
}
Expand Down