Skip to content

Commit

Permalink
Merge 1448c7a into 9e4bfb7
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed Apr 27, 2023
2 parents 9e4bfb7 + 1448c7a commit e4beb16
Show file tree
Hide file tree
Showing 12 changed files with 1,101 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/main-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install Python dependencies for publish
run: pip install requests toml
- name: Publish
env:
PYPI_USER: ${{ secrets.PYPI_USER }}
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ Change Log
because it implicates ``docopt`` library, which needs ``2to3``, and would fail.


7.3.0
=====

* In ``dcicutils.env_utils`` added function ``get_portal_url``;
first usage of which was in foursight-core. 2023-04-16.

* Added ``dcicutils.ssl_certificate_utils``;
first usage of which was in foursight-core. 2023-04-16.

* Added ``dcicutils.scripts.publish_to_pypi``; 2023-04-24.

* Added ``dcicutils.function_cache_decorator``; 2023-04-24;
future help in simplifying some caching in foursight-core APIs.



7.2.0
=====

Expand Down
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,17 @@ update: # updates dependencies
poetry update

publish:
scripts/publish
# New Python based publish script (2023-04-25).
poetry run publish-to-pypi

publish-for-ga:
scripts/publish --noconfirm
# New Python based publish script (2023-04-25).
# For some reason, have NOT been able to get the required pip install of
# requests and toml to "take" - when using with the poetry run publish
# command - either here or in .main-publish.yml; still get module not
# found error for requests in GA; so invoking directly with python.
# poetry run publish-to-pypi --noconfirm
python -m dcicutils.scripts.publish_to_pypi --noconfirm

help:
@make info
Expand Down
10 changes: 10 additions & 0 deletions dcicutils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,16 @@ def _find_public_url_entry(envname):
return entry


def get_portal_url(envname: EnvName) -> UrlString:
"""
Returns the Portal URL for the given environment name.
Effectively same ase get_env_real_url (below) but does not actually access
the URL (to get the health page is that function does); i.e. so we can get
the URL without exception even if there is a problem with the Portal.
"""
return get_env_real_url(full_env_name(envname))


@if_orchestrated
def get_env_real_url(envname: EnvName) -> UrlString:

Expand Down
149 changes: 149 additions & 0 deletions dcicutils/function_cache_decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from collections import OrderedDict
from datetime import datetime, timedelta
import json
import sys


def function_cache(*decorator_args, **decorator_kwargs):
"""
Exactly analogous to the functools.lru_cache decorator, but also allows
specifying that if the return value of the function is None then no caching
is to be done; use the do_not_cache_none=True as a decorator argument to do this.
Also like @lru_cache, this supports the maxsize decorator argument, as well
as the cache_info and cache_clear functions on the decorated function.
The maxsize can be specified either as the first argument to the
decorator or as a maxsize kwarg to the decorator.
In addition we support a time_to_live (ttl) decorator kwarg which can be specified,
as a timedelta type. If the cached value for the function is more than the specified
ttl then it is considered stale and the function will be called to get a fresh value.
And, there is a separate time_to_live_none (ttl_none) supported which will do the
same as ttl but will apply only if the cached value is None.
There is also a serialize_key (serialize) decorator kwarg which if specified as True,
will serialize the arguments (args and kwargs) to the function call and use that value,
converted to a string, as the key for caching the function result; this will allow
caching for functions which take non-hashable structured types (i.e. dict or list)
as arguments, which normally would not be possible, i.e. e.g. in which case this
error would be generated: TypeError: unhashable type: 'dict'
And a custom key decorator kwarg may be specified as a lambda/callable which
computes the key by which the function results should be cached; it is passed
the exact same arguments as the function itself.
Looked/tried and could not find an way to do this using @lru_cache;
and also had issues trying to wrap @lru_cache with this functionality.
First created (April 2023) to try simplify some of the caching in foursight-core APIs.
"""
cache = OrderedDict()
nhits = nmisses = 0

if len(decorator_args) == 1 and callable(decorator_args[0]) and decorator_args[0].__name__ != "<lambda>":
decorator_invoked_without_args = True
decorator_target_function = decorator_args[0]
decorator_args = decorator_args[1:]
else:
decorator_invoked_without_args = False
decorator_target_function = None

maxsize = sys.maxsize
nocache_none = False
ttl = None
ttl_none = None
key = None
serialize_key = False

if decorator_args:
maxsize_arg = decorator_args[0]
if isinstance(maxsize_arg, int) and maxsize_arg > 0:
maxsize = maxsize_arg
if decorator_kwargs:
maxsize_kwarg = decorator_kwargs.get("maxsize")
if isinstance(maxsize_kwarg, int) and maxsize_kwarg > 0:
maxsize = maxsize_kwarg
nocache_none_kwarg = decorator_kwargs.get("do_not_cache_none", decorator_kwargs.get("nocache_none"))
if isinstance(nocache_none_kwarg, bool):
nocache_none = nocache_none_kwarg
ttl_kwarg = decorator_kwargs.get("time_to_live", decorator_kwargs.get("ttl"))
if isinstance(ttl_kwarg, timedelta):
ttl = ttl_kwarg
ttl_none_kwarg = decorator_kwargs.get("time_to_live_none", decorator_kwargs.get("ttl_none"))
if isinstance(ttl_none_kwarg, timedelta):
ttl_none = ttl_none_kwarg
key_kwarg = decorator_kwargs.get("key")
if callable(key_kwarg):
key = key_kwarg
serialize_key_kwarg = decorator_kwargs.get("serialize_key", decorator_kwargs.get("serialize"))
if isinstance(serialize_key_kwarg, bool):
serialize_key = serialize_key_kwarg

def function_cache_decorator_registration(wrapped_function):

def function_wrapper(*args, **kwargs):

cache_key = key(*args, **kwargs) if key else args + tuple(sorted(kwargs.items()))
if serialize_key:
cache_key = json.dumps(cache_key, default=str, separators=(",", ":"))
cached = cache.get(cache_key, None)
now = None

if cached is not None:

if ttl or ttl_none:
now = datetime.now()

def is_stale():
if ttl and now > cached["timestamp"] + ttl:
return True
if ttl_none and cached["value"] is None and now > cached["timestamp"] + ttl_none:
return True
return False

if not is_stale():
nonlocal nhits
nhits += 1
cache.move_to_end(cache_key)
return cached["value"]

nonlocal nmisses
nmisses += 1
value = wrapped_function(*args, **kwargs)

if value is not None or not nocache_none:
if len(cache) >= maxsize:
cache.popitem(last=False)
if not now:
now = datetime.now()
cache[cache_key] = {"value": value, "timestamp": now}

return value

def cache_info():
info = {
"hits": nhits,
"misses": nmisses,
"size": len(cache)
}
if maxsize != sys.maxsize:
info["maxsize"] = maxsize
if ttl:
info["ttl"] = ttl
if ttl_none:
info["ttl_none"] = ttl_none
if serialize_key:
info["serialize"] = serialize_key
return info

def cache_clear():
nonlocal nhits, nmisses
nhits = nmisses = 0
cache.clear()

function_wrapper.cache_info = cache_info
function_wrapper.cache_clear = cache_clear
return function_wrapper

if decorator_invoked_without_args:
return function_cache_decorator_registration(decorator_target_function)
return function_cache_decorator_registration

0 comments on commit e4beb16

Please sign in to comment.