Skip to content

Commit

Permalink
Merge 7f0eed7 into ad986ee
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed Feb 5, 2024
2 parents ad986ee + 7f0eed7 commit 0af57c5
Show file tree
Hide file tree
Showing 11 changed files with 1,100 additions and 81 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ Change Log
----------


8.7.2
=====

* Changes to itemize SMaHT submission ingestion create/update/diff situation (portal_object_utils).
* Changes to structured_data to handle property deletes (portal_object_utils).


8.7.1
=====

Expand Down
22 changes: 20 additions & 2 deletions dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,22 @@
# Forward type references for type hints.
Excel = Type["Excel"]

# Cell values(s) indicating property deletion.
_CELL_DELETION_VALUES = ["*delete*"]


# Special cell deletion sentinel value (note make sure on deepcopy it remains the same).
class _CellDeletionSentinal(str):
def __new__(cls):
return super(_CellDeletionSentinal, cls).__new__(cls, _CELL_DELETION_VALUES[0])
def __deepcopy__(self, memo): # noqa
return self


class RowReader(abc.ABC):

CELL_DELETION_SENTINEL = _CellDeletionSentinal()

def __init__(self):
self.header = None
self.row_number = 0
Expand Down Expand Up @@ -45,8 +58,13 @@ def is_comment_row(self, row: Union[List[Optional[Any]], Tuple[Optional[Any]]])
def is_terminating_row(self, row: Union[List[Optional[Any]], Tuple[Optional[Any]]]) -> bool:
return False

def cell_value(self, value: Optional[Any]) -> Optional[Any]:
return str(value).strip() if value is not None else ""
def cell_value(self, value: Optional[Any]) -> str:
if value is None:
return ""
elif (value := str(value).strip()) in _CELL_DELETION_VALUES:
return RowReader.CELL_DELETION_SENTINEL
else:
return value

def open(self) -> None:
pass
Expand Down
27 changes: 23 additions & 4 deletions dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
This file contains functions that might be generally useful.
"""

from collections import namedtuple
import contextlib
import datetime
import functools
Expand All @@ -17,6 +18,7 @@
import rfc3986.validators
import rfc3986.exceptions
import time
import uuid
import warnings
import webtest # importing the library makes it easier to mock testing

Expand Down Expand Up @@ -1148,16 +1150,22 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
return text[:len(text)-len(suffix)]


def remove_empty_properties(data: Optional[Union[list, dict]]) -> None:
def remove_empty_properties(data: Optional[Union[list, dict]],
isempty: Optional[Callable] = None,
isempty_array_element: Optional[Callable] = None) -> None:
def _isempty(value: Any) -> bool: # noqa
return isempty(value) if callable(isempty) else value in [None, "", {}, []]
if isinstance(data, dict):
for key in list(data.keys()):
if (value := data[key]) in [None, "", {}, []]:
if _isempty(value := data[key]):
del data[key]
else:
remove_empty_properties(value)
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
elif isinstance(data, list):
for item in data:
remove_empty_properties(item)
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
if callable(isempty_array_element):
data[:] = [item for item in data if not isempty_array_element(item)]


class ObsoleteError(Exception):
Expand Down Expand Up @@ -1519,6 +1527,17 @@ def create_dict(**kwargs) -> dict:
return result


def create_readonly_object(**kwargs):
"""
Returns a new/unique object instance with readonly properties equal to the give kwargs.
"""
readonly_class_name = "readonlyclass_" + str(uuid.uuid4()).replace("-", "")
readonly_class_args = " ".join(kwargs.keys())
readonly_class = namedtuple(readonly_class_name, readonly_class_args)
readonly_object = readonly_class(**kwargs)
return readonly_object


def is_c4_arn(arn: str) -> bool:
"""
Returns True iff the given (presumed) AWS ARN string value looks like it
Expand Down
182 changes: 160 additions & 22 deletions dcicutils/portal_object_utils.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,40 @@
from copy import deepcopy
from functools import lru_cache
from typing import List, Optional, Tuple, Type, Union
from typing import Any, List, Optional, Tuple, Type, Union
from dcicutils.data_readers import RowReader
from dcicutils.misc_utils import create_readonly_object
from dcicutils.portal_utils import Portal
from dcicutils.schema_utils import Schema

PortalObject = Type["PortalObject"] # Forward type reference for type hints.


class PortalObject:

def __init__(self, portal: Portal, portal_object: dict, portal_object_type: Optional[str] = None) -> None:
self._portal = portal
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL

def __init__(self, portal_object: dict, portal: Portal = None, type: Optional[str] = None) -> None:
self._data = portal_object
self._type = portal_object_type if isinstance(portal_object_type, str) and portal_object_type else None
self._portal = portal
self._type = type if isinstance(type, str) and type else None

@property
def data(self):
def data(self) -> dict:
return self._data

@property
def portal(self) -> Optional[Portal]:
return self._portal

@property
@lru_cache(maxsize=1)
def type(self):
def type(self) -> Optional[str]:
return self._type or Portal.get_schema_type(self._data)

@property
@lru_cache(maxsize=1)
def types(self):
return self._type or Portal.get_schema_types(self._data)
def types(self) -> Optional[List[str]]:
return [self._type] if self._type else Portal.get_schema_types(self._data)

@property
@lru_cache(maxsize=1)
Expand All @@ -33,19 +43,22 @@ def uuid(self) -> Optional[str]:

@property
@lru_cache(maxsize=1)
def schema(self):
return self._portal.get_schema(self.type)
def schema(self) -> Optional[dict]:
return self._portal.get_schema(self.type) if self._portal else None

def copy(self) -> PortalObject:
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)

@property
@lru_cache(maxsize=1)
def identifying_properties(self) -> List[str]:
def identifying_properties(self) -> Optional[List[str]]:
"""
Returns the list of all identifying property names of this Portal object which actually have values.
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
"""
if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
return []
return None
identifying_properties = []
for identifying_property in schema_identifying_properties:
if identifying_property not in ["uuid", "identifier", "aliases"]:
Expand All @@ -57,17 +70,21 @@ def identifying_properties(self) -> List[str]:
identifying_properties.insert(0, "uuid")
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
identifying_properties.append("aliases")
return identifying_properties
return identifying_properties or None

@property
@lru_cache(maxsize=1)
def identifying_paths(self) -> List[str]:
def identifying_paths(self) -> Optional[List[str]]:
"""
Returns a list of the possible Portal URL paths identifying this Portal object.
"""
if not (identifying_properties := self.identifying_properties):
return []
identifying_paths = []
if not (identifying_properties := self.identifying_properties):
if self.uuid:
if self.type:
identifying_paths.append(f"/{self.type}/{self.uuid}")
identifying_paths.append(f"/{self.uuid}")
return identifying_paths
for identifying_property in identifying_properties:
if (identifying_value := self._data.get(identifying_property)):
if identifying_property == "uuid":
Expand All @@ -82,12 +99,14 @@ def identifying_paths(self) -> List[str]:
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
elif isinstance(identifying_value, list):
for identifying_value_item in identifying_value:
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
if self.type:
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
identifying_paths.append(f"/{identifying_value_item}")
else:
identifying_paths.append(f"/{self.type}/{identifying_value}")
if self.type:
identifying_paths.append(f"/{self.type}/{identifying_value}")
identifying_paths.append(f"/{identifying_value}")
return identifying_paths
return identifying_paths or None

@property
@lru_cache(maxsize=1)
Expand All @@ -104,9 +123,128 @@ def lookup_identifying_path(self) -> Optional[str]:

def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
try:
for identifying_path in self.identifying_paths:
if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
if identifying_paths := self.identifying_paths:
for identifying_path in identifying_paths:
if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
return PortalObject(value.json(),
portal=self._portal, type=self.type if raw else None), identifying_path
except Exception:
pass
return None, self.identifying_path

def compare(self, value: Union[dict, PortalObject],
consider_refs: bool = False, resolved_refs: List[dict] = None) -> dict:
if consider_refs and isinstance(resolved_refs, list):
this_data = self.normalized_refs(refs=resolved_refs).data
else:
this_data = self.data
if isinstance(value, PortalObject):
comparing_data = value.data
elif isinstance(value, dict):
comparing_data = value
else:
return {}
return PortalObject._compare(this_data, comparing_data)

@staticmethod
def _compare(a: Any, b: Any, _path: Optional[str] = None) -> dict:
def diff_creating(value: Any) -> object: # noqa
return create_readonly_object(value=value,
creating_value=True, updating_value=None, deleting_value=False)
def diff_updating(value: Any, updating_value: Any) -> object: # noqa
return create_readonly_object(value=value,
creating_value=False, updating_value=updating_value, deleting_value=False)
def diff_deleting(value: Any) -> object: # noqa
return create_readonly_object(value=value,
creating_value=False, updating_value=None, deleting_value=True)
diffs = {}
if isinstance(a, dict) and isinstance(b, dict):
for key in a:
path = f"{_path}.{key}" if _path else key
if key not in b:
if a[key] != PortalObject._PROPERTY_DELETION_SENTINEL:
diffs[path] = diff_creating(a[key])
else:
diffs.update(PortalObject._compare(a[key], b[key], _path=path))
elif isinstance(a, list) and isinstance(b, list):
# Ignore order of array elements; not absolutely technically correct but suits our purpose.
for index in range(len(a)):
path = f"{_path or ''}#{index}"
if not isinstance(a[index], dict) and not isinstance(a[index], list):
if a[index] not in b:
if a[index] != PortalObject._PROPERTY_DELETION_SENTINEL:
if index < len(b):
diffs[path] = diff_updating(a[index], b[index])
else:
diffs[path] = diff_creating(a[index])
else:
if index < len(b):
diffs[path] = diff_deleting(b[index])
elif index < len(b):
diffs.update(PortalObject._compare(a[index], b[index], _path=path))
else:
diffs[path] = diff_creating(a[index])
elif a != b:
if a == PortalObject._PROPERTY_DELETION_SENTINEL:
diffs[_path] = diff_deleting(b)
else:
diffs[_path] = diff_updating(a, b)
return diffs

def normalize_refs(self, refs: List[dict]) -> None:
"""
Turns any (linkTo) references which are paths (e.g. /SubmissionCenter/uwsc_gcc) within
this Portal object into the uuid style reference (e.g. d1b67068-300f-483f-bfe8-63d23c93801f),
based on the given "refs" list which is assumed to be a list of dictionaries, where each
contains a "path" and a "uuid" property; this list is typically (for our first usage of
this function) the value of structured_data.StructuredDataSet.resolved_refs_with_uuid.
Changes are made to this Portal object in place; use normalized_refs function to make a copy.
If there are no "refs" (None or empty) or if the speicified reference is not found in this
list then the references will be looked up via Portal calls (via Portal.get_metadata).
"""
PortalObject._normalize_refs(self.data, refs=refs, schema=self.schema, portal=self.portal)

def normalized_refs(self, refs: List[dict]) -> PortalObject:
"""
Same as normalize_ref but does not make this change to this Portal object in place,
rather it returns a new instance of this Portal object wrapped in a new PortalObject.
"""
portal_object = self.copy()
portal_object.normalize_refs(refs)
return portal_object

@staticmethod
def _normalize_refs(value: Any, refs: List[dict], schema: dict, portal: Portal, _path: Optional[str] = None) -> Any:
if not value or not isinstance(schema, dict):
return value
if isinstance(value, dict):
for key in value:
path = f"{_path}.{key}" if _path else key
value[key] = PortalObject._normalize_refs(value[key], refs=refs,
schema=schema, portal=portal, _path=path)
elif isinstance(value, list):
for index in range(len(value)):
path = f"{_path or ''}#{index}"
value[index] = PortalObject._normalize_refs(value[index], refs=refs,
schema=schema, portal=portal, _path=path)
elif value_type := Schema.get_property_by_path(schema, _path):
if link_to := value_type.get("linkTo"):
ref_path = f"/{link_to}/{value}"
if not isinstance(refs, list):
refs = []
if ref_uuids := [ref.get("uuid") for ref in refs if ref.get("path") == ref_path]:
ref_uuid = ref_uuids[0]
else:
ref_uuid = None
if ref_uuid:
return ref_uuid
# Here our (linkTo) reference appears not to be in the given refs; if these refs came
# from structured_data.StructuredDataSet.resolved_refs_with_uuid (in the context of
# smaht-submitr, which is the typical/first use case for this function) then this could
# be because the reference was to an internal object, i.e. another object existing within
# the data/spreadsheet being submitted. In any case, we don't have the associated uuid
# so let us look it up here.
if isinstance(portal, Portal):
if (ref_object := portal.get_metadata(ref_path)) and (ref_uuid := ref_object.get("uuid")):
return ref_uuid
return value
Loading

0 comments on commit 0af57c5

Please sign in to comment.