Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for arrays as "type"; Allow strings as IHC "numbers" #452

Merged
merged 7 commits into from May 3, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cidc_schemas/__init__.py
Expand Up @@ -2,4 +2,4 @@

__author__ = """James Lindsay"""
__email__ = "jlindsay@jimmy.harvard.edu"
__version__ = "0.24.23"
__version__ = "0.24.24"
29 changes: 8 additions & 21 deletions cidc_schemas/schemas/assays/ihc_assay.json
Expand Up @@ -39,6 +39,7 @@
"record": {
"type": "object",
"additionalProperties": false,

"properties": {
"cimac_id": {
"$comment": "Id of an sample within this clinical trial, that this assay record is based upon.",
Expand All @@ -55,45 +56,31 @@
},
"tumor_proportion_score": {
"description": "Tumor Proportion Score (TPS) is the percentage of viable tumor cells showing marker staining relative to all viable tumor cells. (0-1)",
"type": "number",
"minimum": 0,
"maximum": 1
"type": ["number", "string"]
},
"combined_positive_score": {
"description": "Combined Positive Score (CPS) is the percentage of marker staining cells (tumor cells and cells that are non-tumor) relative to all viable tumor cells. (0-1)",
"type": "number",
"minimum": 0,
"maximum": 1
"type": ["number", "string"]
},
"inflammatory_cells": {
"description": "Percentage of inflammatory cells (non-tumor cells) showing marker staining relative to all inflammatory cells. (0-1)",
"type": "number",
"minimum": 0,
"maximum": 1
"type": ["number", "string"]
},
"positive_inflammatory_cell_area": {
"description": "Area of PD-L1+ Inflammatory Cells over the area of TSI + IT as a percentage. (0-1)",
"type": "number",
"minimum": 0,
"maximum": 1
"type": ["number", "string"]
},
"intensity": {
"description": "A measure of the intensity or brightness of the protein. (0-3)",
"type": "number",
"minimum": 0,
"maximum": 3
"type": ["number", "string"]
},
"percentage_expression": {
"description": "A percentage of the relevant cells considered positive. (0-100)",
"type": "number",
"minimum": 0,
"maximum": 100
"type": ["number", "string"]
},
"h_score": {
"description": "A summation of the percentage of area stained at each intensity level multiplied by the weighted intensity. (0-300)",
"type": "integer",
"minimum": 0,
"maximum": 300
"type": ["integer", "string"]
},
"comment": {
"description": "A text comment regarding this slide.",
Expand Down
43 changes: 41 additions & 2 deletions cidc_schemas/template.py
Expand Up @@ -21,6 +21,7 @@
Callable,
)
from collections import OrderedDict, defaultdict
from pandas import to_numeric

from .constants import SCHEMA_DIR, TEMPLATE_DIR
from .json_validation import _load_dont_validate_schema
Expand Down Expand Up @@ -619,7 +620,7 @@ def process_value(
except Exception as e:
_field_name = self.merge_pointer.rsplit("/", 1)[-1]
raise ParsingException(
f"Cannot extract {_field_name} from {self.key_name} value: {raw_val!r}"
f"Cannot extract {_field_name} from {self.key_name} value: {raw_val!r}\n{e}"
) from e

# or set/update value in-place in data_obj dictionary
Expand Down Expand Up @@ -910,7 +911,45 @@ def _get_typed_entry_coerce(entry: dict):
if entry.get("$id") in ["local_file_path", "local_file_path_list"]:
return Template._gen_upload_placeholder_uuid

return Template._get_simple_type_coerce(entry["type"])
if isinstance(entry["type"], list):
return Template._get_list_type_coerce(entry["type"])
else:
return Template._get_simple_type_coerce(entry["type"])

@staticmethod
def _get_list_type_coerce(type_list: List[str]):
scvannost marked this conversation as resolved.
Show resolved Hide resolved
coerce_fns = {t: Template._get_simple_type_coerce(t) for t in type_list}

def coerce(val, func_map: Dict[str, Callable]):
scvannost marked this conversation as resolved.
Show resolved Hide resolved
values, errors = {}, {}
for t, f in func_map.items():
try:
new_val = f(val)
except Exception as e:
errors[t] = e
else:
values[t] = new_val

if len(values) > 1 and "string" in values:
# if there's something else, assert we don't want the string
values.pop("string")
if "integer" in values and "number" in values and "boolean" not in values:
# integer is a subset of number, but will also pass boolean if there
return values["integer"]

if len(values) == 1:
# if there's only one possible conversion, that's the one we want
return list(values.values())[0]
elif len(values):
# if there's multiple, we don't which one to pick
raise ParsingException(
f"Multiple valid coercions detected, unable to choose between: {values}"
)
else:
# if there are none, error
raise ParsingException(f"No valid coercion found: {errors}")

return lambda v: coerce(v, coerce_fns)

@staticmethod
def _get_simple_type_coerce(t: str):
Expand Down
7 changes: 6 additions & 1 deletion cidc_schemas/template_writer.py
Expand Up @@ -291,7 +291,12 @@ def _get_legend_typeformat(property_schema: dict):
return "Enum"

try:
property_type = property_schema["type"].capitalize()
if isinstance(property_schema["type"], list):
property_type = ", ".join(
[t.capitalize() for t in property_schema["type"]]
)
else:
property_type = property_schema["type"].capitalize()
except KeyError:
raise KeyError(
f"Property schema is missing type annotation:\n{property_schema}"
Expand Down
Binary file modified template_examples/ihc_template.xlsx
Binary file not shown.
4 changes: 3 additions & 1 deletion tests/prism/cidc_test_data/assay_data.py
@@ -1,3 +1,5 @@
from pandas import to_numeric
scvannost marked this conversation as resolved.
Show resolved Hide resolved

from cidc_schemas.prism import SUPPORTED_ASSAYS, PROTOCOL_ID_FIELD_NAME

from .utils import (
Expand Down Expand Up @@ -447,7 +449,7 @@ def ihc() -> PrismTestData:
}
},
"marker_positive": "positive",
"tumor_proportion_score": 0.2,
"tumor_proportion_score": "NE",
"intensity": 3.0,
"percentage_expression": 100.0,
"h_score": 300,
Expand Down