diff --git a/.gitignore b/.gitignore index 5712cc2d..800fb9f0 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ cove/lib/org-ids.json cove/lib/org-ids.json.lock chromedriver/ src/ +/requests_cache_dir/*.pkl +/requests_cache_dir/*.sqlite diff --git a/cove_iati/lib/iati.py b/cove_iati/lib/iati.py index 8388c8a2..c78cdf06 100644 --- a/cove_iati/lib/iati.py +++ b/cove_iati/lib/iati.py @@ -14,6 +14,7 @@ from cove_iati.lib.exceptions import UnrecognisedFileTypeXML from cove_iati.lib.process_codelists import invalid_embedded_codelist_values, invalid_non_embedded_codelist_values from .schema import SchemaIATI +from django.conf import settings def get_tree(data_file): @@ -496,9 +497,9 @@ def check_activity_org_refs(tree): root = tree.getroot() try: - publisher_request = requests.get("https://codelists.codeforiati.org/api/json/en/ReportingOrganisation.json") + publisher_request = settings.REQUESTS_SESSION_WITH_CACHING.get("https://codelists.codeforiati.org/api/json/en/ReportingOrganisation.json") publisher_request.raise_for_status() - registration_agency_request = requests.get("https://codelists.codeforiati.org/api/json/en/OrganisationRegistrationAgency.json") + registration_agency_request = settings.REQUESTS_SESSION_WITH_CACHING.get("https://codelists.codeforiati.org/api/json/en/OrganisationRegistrationAgency.json") registration_agency_request.raise_for_status() except requests.RequestException: return {"error": "Unable to fetch data to do organisation checks", 'not_found_orgs_count': 0} diff --git a/cove_iati/settings.py b/cove_iati/settings.py index 24cd4e03..e470c811 100644 --- a/cove_iati/settings.py +++ b/cove_iati/settings.py @@ -1,15 +1,18 @@ import os import environ +from datetime import timedelta # Needs a noqa comment to come after the above import from cove import settings # noqa: E408 +from requests_cache import FileCache, CachedSession BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) env = environ.Env( # set default values and casting DB_NAME=(str, os.path.join(BASE_DIR, 'db.sqlite3')), SENTRY_DSN=(str, ''), + REQUESTS_CACHE_DIR=(str, os.path.join(BASE_DIR, 'requests_cache_dir')) ) # We use the setting to choose whether to show the section about Sentry in the @@ -107,3 +110,18 @@ # https://github.com/OpenDataServices/cove/issues/1098 FILE_UPLOAD_PERMISSIONS = 0o644 + + +# Note on deploying new versions of this app the cache directory should be cleared +# as new versions of requests & requests_cache libraries may use different versions +# https://requests-cache.readthedocs.io/en/latest/user_guide/troubleshooting.html#potential-issues +REQUESTS_SESSION_WITH_CACHING = CachedSession( + 'iati_cove_cache', + backend=FileCache( + cache_name=env('REQUESTS_CACHE_DIR') + ), + expire_after=timedelta(days=1), # Expire responses after one day + allowable_methods=['GET'], # Cache POST requests to avoid sending the same data twice + allowable_codes=[200, 400], # Cache 400 responses as a solemn reminder of your failures + stale_if_error=True, # In case of request errors, use stale cache data if possible +) diff --git a/requests_cache_dir/.gitkeep b/requests_cache_dir/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/requirements.in b/requirements.in index cf03f4dc..151a68d8 100644 --- a/requirements.in +++ b/requirements.in @@ -6,6 +6,7 @@ libcoveweb django-bootstrap3 django-debug-toolbar requests +requests-cache cached-property dealer django-environ diff --git a/requirements.txt b/requirements.txt index 5de0994e..8f7c4d48 100755 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,9 @@ # pip-compile requirements.in # attrs==21.2.0 - # via jsonschema + # via + # jsonschema + # requests-cache backports-datetime-fromisoformat==1.0.0 # via flattentool bleach==3.3.0 @@ -63,6 +65,8 @@ importlib-metadata==2.1.1 # via # django-bootstrap3 # jsonschema +itsdangerous==2.0.1 + # via requests-cache json-merge-patch==0.2 # via -r requirements.in jsonref==0.2 @@ -107,13 +111,18 @@ pytz==2021.1 # via # django # flattentool +pyyaml==5.4.1 + # via requests-cache rangedict==0.1.6 # via -r requirements.in +requests-cache==0.7.5 + # via -r requirements.in requests==2.25.1 # via # -r requirements.in # libcove # libcoveweb + # requests-cache rfc3987==1.3.8 # via # -r requirements.in @@ -129,6 +138,7 @@ six==1.16.0 # bleach # jsonschema # python-dateutil + # url-normalize # zodb sqlparse==0.4.1 # via @@ -142,6 +152,8 @@ transaction==3.0.1 # via zodb uc-rfc6266-parser==0.1.0 # via libcoveweb +url-normalize==1.4.3 + # via requests-cache urllib3==1.26.6 # via # requests diff --git a/requirements_dev.txt b/requirements_dev.txt index eebef3f2..a13e7ae2 100755 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,6 +11,7 @@ attrs==21.2.0 # hypothesis # jsonschema # pytest + # requests-cache babel==2.9.1 # via sphinx backports-datetime-fromisoformat==1.0.0 @@ -102,6 +103,8 @@ importlib-metadata==2.1.1 # pytest iniconfig==1.1.1 # via pytest +itsdangerous==2.0.1 + # via requests-cache jinja2==3.0.1 # via sphinx json-merge-patch==0.2 @@ -189,16 +192,21 @@ pytz==2021.1 # babel # django # flattentool +pyyaml==5.4.1 + # via requests-cache rangedict==0.1.6 # via -r requirements.in recommonmark==0.7.1 # via -r requirements_dev.in +requests-cache==0.7.5 + # via -r requirements.in requests==2.25.1 # via # -r requirements.in # coveralls # libcove # libcoveweb + # requests-cache # sphinx # transifex-client rfc3987==1.3.8 @@ -220,6 +228,7 @@ six==1.16.0 # libsass # python-dateutil # transifex-client + # url-normalize # zodb smmap==4.0.0 # via gitdb @@ -268,6 +277,8 @@ typing-extensions==3.10.0.0 # via gitpython uc-rfc6266-parser==0.1.0 # via libcoveweb +url-normalize==1.4.3 + # via requests-cache urllib3==1.26.6 # via # requests