Skip to content

Commit

Permalink
Merge 3e9fe18 into 6359cca
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed Mar 31, 2024
2 parents 6359cca + 3e9fe18 commit 41c6540
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 71 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ Change Log
----------


8.8.2
=====
* Support for ExtraFiles pseudo-type, to handle extra_files in smaht-submitr..
* Minor structured_data fix related to counting unresolved references;
not functionally consequential; only incorrect user feedback in smaht-submitr.
* Support in structured_data for norefs (completely ignore references).
* Minor fix in portal_object_utils.PortalObject._compare for lists.
* Minor structured_data changes for smaht-submitr validation/submission progress tracking.
* Minor structured_data code cleanup.
* Added progress_constants for sharing between smaht-submitr, snovault, smaht-portal;
not ideal living here but driving us nuts maintaining in separate locations.


8.8.1
=====
* Changes to troubleshooting utility script view-portal-object.
Expand Down
11 changes: 9 additions & 2 deletions dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ def file(self) -> Optional[str]:
def warnings(self) -> List[str]:
warnings = []
if self._warning_empty_headers:
warnings.append({"src": create_dict(file=self.file),
if hasattr(self, "sheet_name") and self.sheet_name:
src = {"sheet": self.sheet_name}
else:
src = {"file": self.file}
warnings.append({"src": src,
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
if self._warning_extra_values:
for row_number in self._warning_extra_values:
Expand Down Expand Up @@ -181,7 +185,10 @@ def is_hidden_sheet(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> bool
if not self._include_hidden_sheets:
if sheet.sheet_state == "hidden":
return True
if sheet.title.startswith("(") and sheet.title.endswith(")"):
if ((sheet.title.startswith("(") and sheet.title.endswith(")")) or
(sheet.title.startswith("[") and sheet.title.endswith("]")) or
(sheet.title.startswith("{") and sheet.title.endswith("}")) or
(sheet.title.startswith("<") and sheet.title.endswith(">"))): # noqa
return True
return False

Expand Down
12 changes: 6 additions & 6 deletions dcicutils/portal_object_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,17 @@ def diff_deleting(value: Any) -> object: # noqa
if not isinstance(a[index], dict) and not isinstance(a[index], list):
if a[index] not in b:
if a[index] != PortalObject._PROPERTY_DELETION_SENTINEL:
if index < len(b):
diffs[path] = diff_updating(a[index], b[index])
else:
diffs[path] = diff_creating(a[index])
diffs[path] = diff_creating(a[index])
else:
if index < len(b):
diffs[path] = diff_deleting(b[index])
diffs[path] = diff_deleting(b[index])
elif index < len(b):
diffs.update(PortalObject._compare(a[index], b[index], _path=path))
else:
diffs[path] = diff_creating(a[index])
for index in range(len(b)):
path = f"{_path or ''}#{index}.deleting"
if b[index] not in a:
diffs[path] = diff_deleting(b[index])
elif a != b:
if a == PortalObject._PROPERTY_DELETION_SENTINEL:
diffs[_path] = diff_deleting(b)
Expand Down
10 changes: 8 additions & 2 deletions dcicutils/portal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ def ping(self) -> bool:
except Exception:
return False

@lru_cache(maxsize=100)
def get_schema(self, schema_name: str) -> Optional[dict]:
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)

Expand All @@ -308,8 +309,13 @@ def get_schemas(self) -> dict:
return self.get("/profiles/").json()

@staticmethod
@lru_cache(maxsize=100)
def schema_name(name: str) -> str:
return to_camel_case(name.replace(" ", "") if not name.endswith(".json") else name[:-5])
name = os.path.basename(name).replace(" ", "") if isinstance(name, str) else ""
if (dot := name.rfind(".")) > 0:
name = name[0:dot]
return to_camel_case(name)
# return to_camel_case(name.replace(" ", "") if not name.endswith(".json") else name[:-5])

def is_schema_type(self, schema_name_or_portal_object: Union[str, dict], target_schema_name: str,
_schemas_super_type_map: Optional[list] = None) -> bool:
Expand Down Expand Up @@ -398,7 +404,7 @@ def list_breadth_first(super_type_map: dict, super_type_name: str) -> dict:
super_type_map_flattened[super_type_name] = list_breadth_first(super_type_map, super_type_name)
return super_type_map_flattened

@lru_cache(maxsize=64)
@lru_cache(maxsize=100)
def get_schema_subtype_names(self, type_name: str) -> List[str]:
if not (schemas_super_type_map := self.get_schemas_super_type_map()):
return []
Expand Down
79 changes: 79 additions & 0 deletions dcicutils/progress_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from datetime import datetime
from enum import Enum

# Constants for progress tracking for smaht-submitr.
# Here only to share between smaht-portal, snovault, and smaht-submitr.


class _Enum(Enum):
# Automatically make enumerators within the enumeration resolve to its value property.
def __get__(self, instance, owner):
return self.value
# But doing the above does not take when iterating; so make provide a values method.
@classmethod # noqa
def values(cls):
return [enumerator.value for enumerator in cls]


class PROGRESS_INGESTER(_Enum):
VALIDATION = "ingester_validation"
QUEUED = "ingester_queued"
INITIATE = "ingester_initiate"
DONE = "ingester_done"
PARSE_LOAD_INITIATE = "ingester_parse_initiate"
PARSE_LOAD_DONE = "ingester_parse_done"
VALIDATE_LOAD_INITIATE = "ingester_validate_initiate"
VALIDATE_LOAD_DONE = "ingester_validate_done"
LOADXL_INITIATE = "ingester_loadxl_initiate"
LOADXL_DONE = "ingester_loadxl_done"
MESSAGE = "ingester_message"
MESSAGE_VERBOSE = "ingester_message_verbose"
MESSAGE_DEBUG = "ingester_message_debug"
NOW = lambda: _NOW() # noqa


class PROGRESS_PARSE(_Enum):
LOAD_START = "parse_start"
LOAD_ITEM = "parse_item"
LOAD_DONE = "parse_done"
LOAD_COUNT_SHEETS = "parse_sheets"
LOAD_COUNT_ROWS = "parse_rows"
LOAD_COUNT_REFS = "parse_refs"
LOAD_COUNT_REFS_FOUND = "parse_refs_found"
LOAD_COUNT_REFS_NOT_FOUND = "parse_refs_not_found"
LOAD_COUNT_REFS_LOOKUP = "parse_refs_lookup"
LOAD_COUNT_REFS_LOOKUP_CACHE_HIT = "parse_refs_lookup_cache_hit"
LOAD_COUNT_REFS_EXISTS_CACHE_HIT = "parse_refs_exists_cache_hit"
LOAD_COUNT_REFS_INVALID = "parse_refs_invalid"
ANALYZE_START = "parse_analyze_start"
ANALYZE_COUNT_TYPES = "parse_analyze_types"
ANALYZE_COUNT_ITEMS = "parse_analyze_objects"
ANALYZE_COUNT_LOOKUP = "parse_analyze_lookups"
ANALYZE_CREATE = "parse_analyze_create"
ANALYZE_UPDATE = "parse_analyze_update"
ANALYZE_DONE = "parse_analyze_done"
MESSAGE = "parse_message"
MESSAGE_VERBOSE = "parse_message_verbose"
MESSAGE_DEBUG = "parse_message_debug"
NOW = lambda: _NOW() # noqa


class PROGRESS_LOADXL(_Enum):
START = "loadxl_start"
START_SECOND_ROUND = "loadxl_start_second_round"
ITEM = "loadxl_item"
ITEM_SECOND_ROUND = "loadxl_item_second_round"
GET = "loadxl_lookup"
POST = "loadxl_post"
PATCH = "loadxl_patch"
ERROR = "loadxl_error"
DONE = "loadxl_done"
TOTAL = "loadxl_total"
MESSAGE = "loadxl_message"
MESSAGE_VERBOSE = "loadxl_message_verbose"
MESSAGE_DEBUG = "loadxl_message_debug"
NOW = lambda: _NOW() # noqa


def _NOW() -> str:
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")

0 comments on commit 41c6540

Please sign in to comment.