From c32f7669cd40daffdfb2607907a98bc2eabd1201 Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Thu, 5 Sep 2019 09:13:01 +0300 Subject: [PATCH 01/33] update ga4gh service info endpoint, with tests and docs --- beacon_api/__init__.py | 5 ++++- beacon_api/api/info.py | 17 +++++++++++------ beacon_api/conf/__init__.py | 2 ++ beacon_api/conf/config.ini | 9 ++++++++- deploy/test/auth_test.ini | 7 +++++++ deploy/test/integ_test.py | 4 ++-- docs/example.rst | 12 ++++++++++-- docs/instructions.rst | 4 ++-- docs/optionals.rst | 2 +- tests/test.ini | 7 +++++++ 10 files changed, 54 insertions(+), 15 deletions(-) diff --git a/beacon_api/__init__.py b/beacon_api/__init__.py index f8bf597a..2d06d62e 100644 --- a/beacon_api/__init__.py +++ b/beacon_api/__init__.py @@ -26,7 +26,6 @@ __createtime__ = CONFIG_INFO.createtime __updatetime__ = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') # Every restart of the application means an update to it - __org_id__ = CONFIG_INFO.org_id __org_name__ = CONFIG_INFO.org_name __org_description__ = CONFIG_INFO.org_description @@ -37,3 +36,7 @@ __org_info__ = {'orgInfo': CONFIG_INFO.org_info} __sample_queries__ = SAMPLE_QUERIES + +# GA4GH Discovery +__service_type__ = f'{CONFIG_INFO.service_type}:{__apiVersion__}' +__service_env__ = CONFIG_INFO.environment diff --git a/beacon_api/api/info.py b/beacon_api/api/info.py index 379e5c97..634a72d1 100644 --- a/beacon_api/api/info.py +++ b/beacon_api/api/info.py @@ -9,7 +9,7 @@ from .. import __apiVersion__, __title__, __version__, __description__, __url__, __alturl__, __handover_beacon__ from .. import __createtime__, __updatetime__, __org_id__, __org_name__, __org_description__ from .. import __org_address__, __org_logoUrl__, __org_welcomeUrl__, __org_info__, __org_contactUrl__ -from .. import __sample_queries__, __handover_drs__, __docs_url__ +from .. import __sample_queries__, __handover_drs__, __docs_url__, __service_type__, __service_env__ from ..utils.data_query import fetch_dataset_metadata from ..extensions.handover import make_handover from aiocache import cached @@ -26,13 +26,18 @@ async def ga4gh_info(host): # TO DO implement some fallback mechanism for ID 'id': '.'.join(reversed(host.split('.'))), 'name': __title__, - "type": "urn:ga4gh:beacon", + "type": __service_type__, 'description': __description__, - 'documentationUrl': __docs_url__, - "organization": __org_id__, + "organization": { + "name": __org_name__, + "url": __org_welcomeUrl__, + }, 'contactUrl': __org_contactUrl__, - 'version': __version__, - "extension": {} + 'documentationUrl': __docs_url__, + 'createdAt': __createtime__, + 'updatedAt': __updatetime__, + 'environment': __service_env__, + 'version': __version__ } return beacon_info diff --git a/beacon_api/conf/__init__.py b/beacon_api/conf/__init__.py index 0b66620c..336f7119 100644 --- a/beacon_api/conf/__init__.py +++ b/beacon_api/conf/__init__.py @@ -33,6 +33,8 @@ def parse_config_file(path): 'url': config.get('beacon_api_info', 'url'), 'alturl': config.get('beacon_api_info', 'alturl'), 'createtime': config.get('beacon_api_info', 'createtime'), + 'service_type': config.get('beacon_api_info', 'service_type'), + 'environment': config.get('beacon_api_info', 'environment'), 'org_id': config.get('organisation_info', 'org_id'), 'org_name': config.get('organisation_info', 'org_name'), 'org_description': config.get('organisation_info', 'org_description'), diff --git a/beacon_api/conf/config.ini b/beacon_api/conf/config.ini index 850dc69f..1a01eebc 100644 --- a/beacon_api/conf/config.ini +++ b/beacon_api/conf/config.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.4.1 # Author of this software author=CSC developers @@ -41,6 +41,13 @@ alturl= # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=prod + [organisation_info] # Globally unique identifier for organisation that hosts this Beacon service diff --git a/deploy/test/auth_test.ini b/deploy/test/auth_test.ini index ad464b86..0a5f27c2 100644 --- a/deploy/test/auth_test.ini +++ b/deploy/test/auth_test.ini @@ -40,6 +40,13 @@ alturl=https://ega-archive.org/ # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=prod + [handover_info] # The base url for all handovers diff --git a/deploy/test/integ_test.py b/deploy/test/integ_test.py index 1c03a775..6666b3c0 100644 --- a/deploy/test/integ_test.py +++ b/deploy/test/integ_test.py @@ -703,6 +703,6 @@ async def test_32(): data = await resp.json() # GA4GH Discovery Service-Info is small and its length should be between 3 and 6, when the Beacon info is very long # https://github.com/ga4gh-discovery/service-info/blob/develop/service-info.yaml - assert 5 <= len(data) <= 9, 'Service info size error' # ga4gh service-info has 5 required keys and at most 9 with optionals - assert data['type'] == 'urn:ga4gh:beacon', 'Service type error' # a new key used in beacon network + assert len(data) >= 5, 'Service info size error' # ga4gh service-info has 5 required keys, and option to add custom keys + assert data['type'].startswith('org.ga4gh:beacon'), 'Service type error' # a new key used in beacon network assert resp.status == 200, 'HTTP Status code error' diff --git a/docs/example.rst b/docs/example.rst index 0359439c..7a5fdb56 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -115,10 +115,18 @@ Example Response: { "id": "localhost:5050", "name": "GA4GHBeacon at CSC", + "type": "org.ga4gh:beacon:1.1.0", "description": "Beacon API Web Server based on the GA4GH Beacon API", - "documentationUrl": "https://beacon-python.readthedocs.io/en/latest/", + "organization": { + "name": "CSC - IT Center for Science", + "url": "https://www.csc.fi/" + }, "contactUrl": "https://www.csc.fi/contact-info", - "version": "1.4.0" + "documentationUrl": "https://beacon-network.readthedocs.io/en/latest/", + "createdAt": "2019-09-04T12:00:00Z", + "updatedAt": "2019-09-05T05:55:18Z", + "environment": "prod", + "version": "1.4.1" } Query Endpoint diff --git a/docs/instructions.rst b/docs/instructions.rst index 14522834..84e0f8d3 100644 --- a/docs/instructions.rst +++ b/docs/instructions.rst @@ -69,7 +69,7 @@ pointing to the location of the file using `CONFIG_FILE` environment variable. .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 1-68 + :lines: 1-75 .. _oauth2: @@ -81,7 +81,7 @@ The configuration variables reside in the same `CONFIG_FILE` as described above .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 90-116 + :lines: 97-123 * ``server`` should point to an API that returns a public key, which can be used to validate the received JWTBearer token. * ``issuers`` is a string of comma separated values, e.g. `one,two,three` without spaces. The issuers string should contain diff --git a/docs/optionals.rst b/docs/optionals.rst index a5c2d67b..e209848c 100644 --- a/docs/optionals.rst +++ b/docs/optionals.rst @@ -17,7 +17,7 @@ The handover protocol can be configured in ``config.ini`` as follows: .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 71-87 + :lines: 78-94 .. note:: Handover protocol is disabled by default, as shown by the commented out ``drs`` variable. This variable should point to the server, which serves the additional data. To enable the handover protocol, uncomment the ``drs`` variable. diff --git a/tests/test.ini b/tests/test.ini index 8a541212..bc5d48ad 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -40,6 +40,13 @@ alturl=https://ega-archive.org/ # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=test + [handover_info] # The base url for all handovers From 51afbbde7911a761d0835c0d7d9941e2a7eb774e Mon Sep 17 00:00:00 2001 From: Ott Oopkaup Date: Mon, 7 Oct 2019 14:38:16 +0300 Subject: [PATCH 02/33] Fix integer overflow in beacon_dataset_counts_table Simple fix in init.sql currently seems all that is needed. When updating callcounts and variantcounts variant count is likely to overflow on even medium-sized datasets. --- data/init.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/init.sql b/data/init.sql index 53c327b7..e2862870 100644 --- a/data/init.sql +++ b/data/init.sql @@ -29,7 +29,7 @@ variantcount: SELECT count(*) FROM beacon_data_table; CREATE TABLE IF NOT EXISTS beacon_dataset_counts_table ( datasetId VARCHAR(128), callCount INTEGER DEFAULT NULL, - variantCount INTEGER DEFAULT NULL + variantCount BIGINT DEFAULT NULL ); CREATE TABLE IF NOT EXISTS beacon_data_table ( From f3534f49467140a4d6831ec5192b46bf99479153 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 17:04:20 +0300 Subject: [PATCH 03/33] fix typose deploy docs --- docs/deploy.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/deploy.rst b/docs/deploy.rst index 07f284cd..e8858988 100644 --- a/docs/deploy.rst +++ b/docs/deploy.rst @@ -76,7 +76,7 @@ For use with Kubernetes we provide ``YAML`` configuration. role: beacon spec: containers: - - image: cscfi/beacon + - image: cscfi/beacon-python imagePullPolicy: Always name: beacon ports: @@ -88,8 +88,9 @@ For use with Kubernetes we provide ``YAML`` configuration. name: data volumes: - name: data - persistentVolumeClaim: - claimName: beaconpy + # change below with preferred volume class + hostPath: + path: /local/disk/path --- apiVersion: v1 kind: Service From 2b3ae666822dbf584aa897d03ba2014fed14bc02 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 17:52:58 +0300 Subject: [PATCH 04/33] test exceptions db calls --- tests/test_data_query.py | 29 ++++++++++++++++++++++++++++- tests/test_db_load.py | 20 ++++++++++++++++++++ tests/test_mate_name.py | 14 +++++++++++++- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/tests/test_data_query.py b/tests/test_data_query.py index 2f6317a6..0fae3981 100644 --- a/tests/test_data_query.py +++ b/tests/test_data_query.py @@ -1,4 +1,5 @@ import asynctest +import aiohttp from unittest import mock from beacon_api.utils.data_query import filter_exists, transform_record from beacon_api.utils.data_query import transform_misses, transform_metadata, find_datasets, add_handover @@ -6,7 +7,7 @@ from beacon_api.extensions.handover import make_handover from datetime import datetime from beacon_api.utils.data_query import handle_wildcard -from .test_db_load import Connection +from .test_db_load import Connection, ConnectionException class Record: @@ -154,6 +155,13 @@ async def test_datasets_access_call_public(self): # in Connection() class self.assertEqual(result, (['mock:public:id'], [], [])) + async def test_datasets_access_call_exception(self): + """Test db call of getting public datasets access with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_datasets_access(pool, None) + async def test_datasets_access_call_registered(self): """Test db call of getting registered datasets access.""" pool = asynctest.CoroutineMock() @@ -195,6 +203,13 @@ async def test_fetch_dataset_metadata_call(self): # in Connection() class self.assertEqual(result, []) + async def test_fetch_dataset_metadata_call_exception(self): + """Test db call of getting datasets metadata with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_dataset_metadata(pool, None, None) + async def test_fetch_filtered_dataset_call(self): """Test db call for retrieving main data.""" pool = asynctest.CoroutineMock() @@ -212,6 +227,18 @@ async def test_fetch_filtered_dataset_call(self): result_miss = await fetch_filtered_dataset(pool, assembly_id, position, chromosome, reference, alternate, None, None, True) self.assertEqual(result_miss, []) + async def test_fetch_filtered_dataset_call_exception(self): + """Test db call of retrieving main data with exception.""" + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + alternate = ('DUP', None) + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_filtered_dataset(pool, assembly_id, position, chromosome, reference, alternate, None, None, False) + def test_handle_wildcard(self): """Test PostgreSQL wildcard handling.""" sequence1 = 'ATCG' diff --git a/tests/test_db_load.py b/tests/test_db_load.py index f69467da..b31f2149 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -119,6 +119,26 @@ def transaction(self, *args, **kwargs): return Transaction(*args, **kwargs) +class ConnectionException: + """Class Connection with Exception. + + Mock this from asyncpg. + """ + + def __init__(self): + """Initialize class.""" + pass + + def transaction(self, *args, **kwargs): + """Mimic transaction.""" + return Transaction(*args, **kwargs) + + @asyncio.coroutine + def prepare(self, query): + """Mimic prepare.""" + return Exception + + class DatabaseTestCase(asynctest.TestCase): """Test database operations.""" diff --git a/tests/test_mate_name.py b/tests/test_mate_name.py index ca7d6d86..028c91f7 100644 --- a/tests/test_mate_name.py +++ b/tests/test_mate_name.py @@ -1,6 +1,7 @@ import asynctest +import aiohttp from beacon_api.extensions.mate_name import find_fusion, fetch_fusion_dataset -from .test_db_load import Connection +from .test_db_load import Connection, ConnectionException class TestDataQueryFunctions(asynctest.TestCase): @@ -41,6 +42,17 @@ async def test_fetch_fusion_dataset_call(self): result_miss = await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, True) self.assertEqual(result_miss, []) + async def test_fetch_fusion_dataset_call_exception(self): + """Test db call for retrieving mate data with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, False) + if __name__ == '__main__': asynctest.main() From a337e6a1770ed8308bf5b6a5417f524e562a60c7 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 18:08:21 +0300 Subject: [PATCH 05/33] checking the request here is a bit redundant --- beacon_api/utils/validate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 151694fe..2157e8f7 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -132,8 +132,6 @@ def token_auth(): """ @web.middleware async def token_middleware(request, handler): - if not isinstance(request, web.Request): - raise BeaconBadRequest(request, request.host, "invalid request", "This does not seem a valid HTTP Request.") if request.path in ['/query'] and 'Authorization' in request.headers: _, obj = await parse_request_object(request) try: From 0bc388335a138294cf594e3261cd0f05e0d45bf2 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 18:08:49 +0300 Subject: [PATCH 06/33] fail coverage under 80% --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index c8597cb0..333d5bd0 100644 --- a/tox.ini +++ b/tox.ini @@ -36,7 +36,7 @@ deps = .[test] -rrequirements.txt # Stop after first failure -commands = py.test -x --cov=beacon_api tests/ +commands = py.test -x --cov=beacon_api tests/ --cov-fail-under=80 python {toxinidir}/tests/coveralls.py [travis] From ce54db15f21dadc6f4e01abce7724541ae1d7cb7 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 18:09:47 +0300 Subject: [PATCH 07/33] verify aud claim in a function --- beacon_api/utils/validate.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 2157e8f7..dae9aadd 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -124,6 +124,19 @@ def token_scheme_check(token, scheme, obj, host): raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') +def verify_aud_claim(): + """Verify audience claim.""" + aud = [] + verify_aud = OAUTH2_CONFIG.verify_aud # Option to skip verification of `aud` claim + if verify_aud: + aud = os.environ.get('JWT_AUD', OAUTH2_CONFIG.audience) # List of intended audiences of token + # if verify_aud is set to True, we expect that a desired aud is then supplied. + # However, if verify_aud=True and no aud is supplied, we use aud=[None] which will fail for + # all tokens as a security measure. If aud=[], all tokens will pass (as is the default value). + aud = aud.split(',') if aud is not None else [None] + return verify_aud, aud + + def token_auth(): """Check if token is valid and authenticate. @@ -145,14 +158,7 @@ async def token_middleware(request, handler): # Token decoding parameters key = await get_key() # JWK used to decode token with - aud = [] - verify_aud = OAUTH2_CONFIG.verify_aud # Option to skip verification of `aud` claim - if verify_aud: - aud = os.environ.get('JWT_AUD', OAUTH2_CONFIG.audience) # List of intended audiences of token - # if verify_aud is set to True, we expect that a desired aud is then supplied. - # However, if verify_aud=True and no aud is supplied, we use aud=[None] which will fail for - # all tokens as a security measure. If aud=[], all tokens will pass (as is the default value). - aud = aud.split(',') if aud is not None else [None] + verify_aud, aud = verify_aud_claim() # Prepare JWTClaims validation # can be populated with claims that are required to be present in the payload of the token claims_options = { @@ -193,14 +199,15 @@ async def token_middleware(request, handler): # currently if a token is valid that means request is authenticated "authenticated": True} return await handler(request) + # Testing the exceptions is done in integration tests except MissingClaimError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Missing claim(s): {e}') + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Missing claim(s): {e}') # pragma: no cover except ExpiredTokenError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') # pragma: no cover except InvalidClaimError as e: - raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') + raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') # pragma: no cover except InvalidTokenError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') # pragma: no cover else: request["token"] = {"bona_fide_status": False, "permissions": None, From 026a60250a954a24cdc9cce7b08b664accc39ad0 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 18:10:28 +0300 Subject: [PATCH 08/33] test token scheme just to be sure --- tests/test_basic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_basic.py b/tests/test_basic.py index 0606d4df..3a4c0c08 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -3,6 +3,7 @@ from beacon_api.utils.db_load import parse_arguments, init_beacon_db, main from beacon_api.conf.config import init_db_pool from beacon_api.api.query import access_resolution +from beacon_api.utils.validate import token_scheme_check from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide from .test_app import PARAMS from testfixtures import TempDirectory @@ -105,6 +106,12 @@ def test_main_db(self, mock_init): main() mock_init.assert_called() + def test_token_scheme_check_bad(self): + """Test token scheme no token.""" + # This might never happen, yet let prepare for it + with self.assertRaises(aiohttp.web_exceptions.HTTPUnauthorized): + token_scheme_check(None, 'https', {}, 'localhost') + def test_access_resolution_base(self): """Test assumptions for access resolution. From c9a64a36f429fdf2caff039159c0b2777dc533f0 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 18:26:10 +0300 Subject: [PATCH 09/33] fix docs for validate --- docs/permissions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index ce23362c..bb70bc75 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -61,7 +61,7 @@ The permissions are then passed in :meth:`beacon_api.utils.validate` as illustra .. literalinclude:: /../beacon_api/utils/validate.py :language: python :dedent: 16 - :lines: 175-188 + :lines: 179-192 If there is no claim for GA4GH permissions as illustrated above, they will not be added to ``controlled_datasets``. From 8224cadf4b9fa31c6e2a828f3aa85b5cdcebb3ed Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 19:04:36 +0300 Subject: [PATCH 10/33] exclude lines teste in integration tests --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 7e254e0b..0fb74614 100644 --- a/.coveragerc +++ b/.coveragerc @@ -14,6 +14,7 @@ omit = [report] # Regexes for lines to exclude from consideration exclude_lines = + pragma: no cover # Don't complain about missing debug-only code: def __repr__ if self\.debug From 3f42e2f877e9e082b3b5ee5d7a6002c5fbfebe9d Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Oct 2019 19:56:25 +0300 Subject: [PATCH 11/33] fixes for db_load and its tests --- beacon_api/utils/db_load.py | 9 ++-- tests/test_db_load.py | 84 +++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/beacon_api/utils/db_load.py b/beacon_api/utils/db_load.py index 8ccac2af..679cdfa6 100644 --- a/beacon_api/utils/db_load.py +++ b/beacon_api/utils/db_load.py @@ -222,11 +222,11 @@ async def load_metadata(self, vcf, metafile, datafile): LOG.info(f'Calculate number of samples from {datafile}') len_samples = len(vcf.samples) LOG.info(f'Parse metadata from {metafile}') - with open(metafile, 'r') as metafile: + with open(metafile, 'r') as meta_file: # read metadata from given JSON file # TO DO: parse metadata directly from datafile if possible - LOG.info(metafile) - metadata = json.load(metafile) + LOG.info(meta_file) + metadata = json.load(meta_file) LOG.info(metadata) LOG.info('Metadata has been parsed') try: @@ -255,7 +255,8 @@ async def load_metadata(self, vcf, metafile, datafile): LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT METADATA -> {e}') except Exception as e: LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> {e}') - return metadata['datasetId'] + else: + return metadata['datasetId'] def _chunks(self, iterable, size): """Chunk records. diff --git a/tests/test_db_load.py b/tests/test_db_load.py index b31f2149..9f5c84ac 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -11,7 +11,7 @@ class Variant: Mock this for Variant calculations. """ - def __init__(self, ALT, REF, INF, call_rate, var_type, num_called): + def __init__(self, ALT, REF, INF, call_rate, var_type, num_called, is_sv=False): """Initialize class.""" self.INFO = INF self.ALT = ALT @@ -19,7 +19,7 @@ def __init__(self, ALT, REF, INF, call_rate, var_type, num_called): self.call_rate = call_rate self.var_type = var_type self.num_called = num_called - self.is_sv = False + self.is_sv = is_sv class INFO: @@ -28,12 +28,13 @@ class INFO: Mock this for storing VCF info. """ - def __init__(self, AC, VT, AN): + def __init__(self, AC, VT, AN, AF, SVTYPE=None): """Initialize class.""" self.AC = AC self.VT = VT self.AN = AN - self.AF = None + self.AF = AF + self.SVTYPE = SVTYPE def get(self, key): """Inside `__getitem__` method.""" @@ -133,6 +134,10 @@ def transaction(self, *args, **kwargs): """Mimic transaction.""" return Transaction(*args, **kwargs) + async def execute(self, query, *args): + """Mimic execute.""" + return Exception + @asyncio.coroutine def prepare(self, query): """Mimic prepare.""" @@ -170,7 +175,8 @@ def setUp(self): #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 19 112 . A G 10 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 - 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.""" + 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + chrM 15011 . T C . PASS . GT:GQ:DP:RO:QR:AO:QA:GL 1:160:970:0:0:968:31792:-2860.58,0 1:160:970:0:0:968:31792:-2860.58,0""" self.datafile = self._dir.write('data.csv', self.data.encode('utf-8')) def tearDown(self): @@ -184,6 +190,18 @@ async def test_rchop(self, db_mock): await self._db.connection() result = self._db._rchop('INS:ME:LINE1', ":LINE1") self.assertEqual('INS:ME', result) + result_no_ending = self._db._rchop('INS', ":LINE1") + self.assertEqual('INS', result_no_ending) + + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_handle_type(self, db_mock): + """Test handle type.""" + db_mock.return_value = Connection() + await self._db.connection() + result = self._db._handle_type(1, int) + self.assertEqual([1], result) + result_tuple = self._db._handle_type((0.1, 0.2), float) + self.assertEqual([0.1, 0.2], result_tuple) @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_bnd_parts(self, db_mock): @@ -225,6 +243,16 @@ async def test_create_tables(self, db_mock, mock_log): # Should assert logs mock_log.info.assert_called_with('Tables have been created') + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_create_tables_exception(self, db_mock, mock_log): + """Test creating tables exception.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + await self._db.create_tables('sql.init') + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO CREATE TABLES -> [Errno 2] No such file or directory: 'sql.init'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') @asynctest.mock.patch('beacon_api.utils.db_load.VCF') @@ -250,6 +278,18 @@ async def test_load_metadata(self, mock_vcf, db_mock, mock_log): mock_log.info.mock_calls = [f'Parsing metadata from {metafile}', 'Metadata has been parsed'] + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_load_metadata_exception(self, db_mock, mock_log): + """Test load metadata error.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + vcf = asynctest.mock.MagicMock(name='samples') + vcf.samples.return_value = [1, 2, 3] + await self._db.load_metadata(vcf, 'meta.are', 'datafile') + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> [Errno 2] No such file or directory: 'meta.are'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_load_datafile(self, db_mock, mock_log): @@ -277,12 +317,6 @@ async def test_insert_variants(self, db_mock, mock_log): mock_log.info.mock_calls = [f'Received 1 variants for insertion to DATASET1', 'Insert variants into the database'] - # This was the case when BeaconDB() was initiated with a URL parameter, now it happens with environment variables - # def test_bad_init(self): - # """Capture error in case of anything wrong with initializing BeaconDB.""" - # with self.assertRaises(TypeError): - # BeaconDB() - @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_close(self, db_mock, mock_log): @@ -299,18 +333,26 @@ async def test_unpack(self, db_mock, mock_log): """Test database URL fetching.""" db_mock.return_value = Connection() await self._db.connection() - inf1 = INFO((1), 'S', 3) - variant = Variant(['C'], 'T', inf1, 0.7, 'snp', 3) - result = self._db._unpack(variant) + inf1 = INFO((1), 'S', 3, None) + variant_1 = Variant(['C'], 'T', inf1, 0.7, 'snp', 3) + result = self._db._unpack(variant_1) self.assertEqual(([0.3333333333333333], [1], ['SNP'], ['C'], 3, []), result) - inf2 = INFO(1, 'S', 3) - variant = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'snp', 3) - result = self._db._unpack(variant) + inf2 = INFO(1, 'S', 3, None) + variant_2 = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'snp', 3) + result = self._db._unpack(variant_2) self.assertEqual(([0.3333333333333333], [1], ['DEL', 'DEL'], ['AT', 'A'], 3, []), result) - inf3 = INFO((1), 'S', 3) - variant = Variant(['TC'], 'T', inf3, 0.7, 'snp', 3) - result = self._db._unpack(variant) - self.assertEqual(([0.3333333333333333], [1], ['INS'], ['TC'], 3, []), result) + inf3 = INFO((1), 'S', 3, 0.5) + variant_3 = Variant(['TC'], 'T', inf3, 0.7, 'snp', 3) + result = self._db._unpack(variant_3) + self.assertEqual(([0.5], [1], ['INS'], ['TC'], 3, []), result) + inf4 = INFO((1), '', 3, None, 'BND') + variant_4 = Variant(['TC'], 'T', inf4, 0.7, 'snp', 3) + result = self._db._unpack(variant_4) + self.assertEqual(([0.3333333333333333], [1], ['SNP'], ['TC'], 3, []), result) + inf5 = INFO((1), 'S', 3, None, '') + variant_5 = Variant(['TC'], 'T', inf5, 0.7, 'ins', 3) + result5 = self._db._unpack(variant_5) + self.assertEqual(([0.3333333333333333], [1], ['INS'], ['TC'], 3, []), result5) @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') From 9b946774931fa2c8f9e6cb2d5308965cb66227a9 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 15 Oct 2019 14:14:11 +0300 Subject: [PATCH 12/33] update jsonschema package --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7ea21431..1b167433 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp aiohttp_cors asyncpg -jsonschema==3.0.2 +jsonschema Cython numpy cyvcf2==0.10.1; python_version < '3.7' diff --git a/setup.py b/setup.py index 5987b3b9..ab7b6dae 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ 'Programming Language :: Python :: 3.7', ], install_requires=['aiohttp', 'asyncpg', 'authlib', - 'jsonschema==3.0.2', 'gunicorn'], + 'jsonschema', 'gunicorn'], extras_require={ 'test': ['coverage', 'pytest', 'pytest-cov', 'coveralls', 'testfixtures', 'tox', From c01be2eeae78f6353897833352a51789bcbbe9ea Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 08:19:55 +0300 Subject: [PATCH 13/33] outdated pipfile --- Pipfile | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 Pipfile diff --git a/Pipfile b/Pipfile deleted file mode 100644 index d3fda1c9..00000000 --- a/Pipfile +++ /dev/null @@ -1,18 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[dev-packages] - -[packages] -aiohttp = "*" -asyncpg = "*" -cryptography = "*" -jsonschema = "==3.0.0" -"cyvcf2" = "*" -PyJWT = "*" -Cython = "*" - -[requires] -python_version = "3.6" From cff5bcbcb29bee6b32da7d8373bd6fb1b800a4cb Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 08:21:45 +0300 Subject: [PATCH 14/33] bump db_load coverage --- tests/test_db_load.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_db_load.py b/tests/test_db_load.py index 9f5c84ac..c70cd8fe 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -333,12 +333,12 @@ async def test_unpack(self, db_mock, mock_log): """Test database URL fetching.""" db_mock.return_value = Connection() await self._db.connection() - inf1 = INFO((1), 'S', 3, None) - variant_1 = Variant(['C'], 'T', inf1, 0.7, 'snp', 3) + inf1 = INFO((1), 'i', 3, None) + variant_1 = Variant(['C'], 'T', inf1, 0.7, 'indel', 3) result = self._db._unpack(variant_1) self.assertEqual(([0.3333333333333333], [1], ['SNP'], ['C'], 3, []), result) - inf2 = INFO(1, 'S', 3, None) - variant_2 = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'snp', 3) + inf2 = INFO(1, 'M', 3, None) + variant_2 = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'mnp', 3) result = self._db._unpack(variant_2) self.assertEqual(([0.3333333333333333], [1], ['DEL', 'DEL'], ['AT', 'A'], 3, []), result) inf3 = INFO((1), 'S', 3, 0.5) From b5aa2b2d4281a564113e18739d0b91b516b2ee56 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 09:02:22 +0300 Subject: [PATCH 15/33] updates to tox --- tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 333d5bd0..e1bb94c4 100644 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,7 @@ commands = sphinx-build -W -c docs/ -b html docs/ docs/_build/html [testenv:bandit] skip_install = true +; plain search for known vulnerable code deps = bandit commands = bandit -r beacon_api/ -c .bandit.yml @@ -23,7 +24,7 @@ commands = bandit -r beacon_api/ -c .bandit.yml [testenv:flake8] skip_install = true deps = - pydocstyle==3.0.0 + pydocstyle flake8 flake8-docstrings commands = flake8 . From 90c8bc19d480d1f99db9acccc2f903680e789888 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 09:59:03 +0300 Subject: [PATCH 16/33] no invalid request is ever going to reach this --- beacon_api/utils/validate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index dae9aadd..4c647c76 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -76,8 +76,6 @@ def wrapper(func): @wraps(func) async def wrapped(*args): request = args[-1] - if not isinstance(request, web.Request): - raise BeaconBadRequest(request, request.host, "invalid request", "This does not seem a valid HTTP Request.") try: _, obj = await parse_request_object(request) except Exception: From f1c538a770b0ecadd56e6d10b6e4c19b09937ff3 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 10:02:39 +0300 Subject: [PATCH 17/33] fix bug in handover --- beacon_api/__init__.py | 2 +- beacon_api/extensions/handover.py | 4 ++-- tests/test.ini | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/beacon_api/__init__.py b/beacon_api/__init__.py index 2d06d62e..f92eecc2 100644 --- a/beacon_api/__init__.py +++ b/beacon_api/__init__.py @@ -13,7 +13,7 @@ __license__ = CONFIG_INFO.license __copyright__ = CONFIG_INFO.copyright __docs_url__ = CONFIG_INFO.docs_url -__handover_drs__ = CONFIG_INFO.handover_drs +__handover_drs__ = CONFIG_INFO.handover_drs.rstrip("/") __handover_datasets__ = CONFIG_INFO.handover_datasets __handover_beacon__ = CONFIG_INFO.handover_beacon __handover_base__ = CONFIG_INFO.handover_base diff --git a/beacon_api/extensions/handover.py b/beacon_api/extensions/handover.py index 5f25f33c..245caa8d 100644 --- a/beacon_api/extensions/handover.py +++ b/beacon_api/extensions/handover.py @@ -22,7 +22,7 @@ def make_handover(paths, datasetIds, chr='', start=0, end=0, ref='', alt='', var for dataset in set(datasetIds): handovers.append({"handoverType": {"id": "CUSTOM", "label": label}, "description": desc, - "url": __handover_drs__ + path.format(dataset=dataset, chr=chr, start=start, - end=end, ref=ref, alt=alt)}) + "url": __handover_drs__ + "/" + path.format(dataset=dataset, chr=chr, start=start, + end=end, ref=ref, alt=alt)}) return handovers diff --git a/tests/test.ini b/tests/test.ini index bc5d48ad..060c87d6 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -50,7 +50,7 @@ environment=test [handover_info] # The base url for all handovers -drs=https://examplebrowser.org +drs=https://examplebrowser.org/ # Make the handovers 1- or 0-based handover_base = 1 From dc614f1068e527a607ca14b369594acf58f366a8 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 10:12:31 +0300 Subject: [PATCH 18/33] test actual db responses --- tests/test_data_query.py | 31 +++++++++++++++++++++++++++++-- tests/test_mate_name.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/tests/test_data_query.py b/tests/test_data_query.py index 0fae3981..587f0e90 100644 --- a/tests/test_data_query.py +++ b/tests/test_data_query.py @@ -213,7 +213,11 @@ async def test_fetch_dataset_metadata_call_exception(self): async def test_fetch_filtered_dataset_call(self): """Test db call for retrieving main data.""" pool = asynctest.CoroutineMock() - pool.acquire().__aenter__.return_value = Connection() + db_response = {"referenceBases": '', "alternateBases": '', "variantType": "", + "referenceName": 'Chr38', + "frequency": 0, "callCount": 0, "sampleCount": 0, "variantCount": 0, + "start": 0, "end": 0, "accessType": "PUBLIC", "datasetId": "test"} + pool.acquire().__aenter__.return_value = Connection(accessData=[db_response]) assembly_id = 'GRCh38' position = (10, 20, None, None, None, None) chromosome = 1 @@ -223,7 +227,30 @@ async def test_fetch_filtered_dataset_call(self): # for now it can return empty dataset # in order to get a response we will have to mock it # in Connection() class - self.assertEqual(result, []) + expected = {'referenceName': 'Chr38', 'callCount': 0, 'sampleCount': 0, 'variantCount': 0, 'datasetId': 'test', + 'referenceBases': '', 'alternateBases': '', 'variantType': '', 'start': 0, 'end': 0, 'frequency': 0, + 'info': {'accessType': 'PUBLIC'}, + 'datasetHandover': [{'handoverType': {'id': 'CUSTOM', 'label': 'Variants'}, + 'description': 'browse the variants matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Region'}, + 'description': 'browse data of the region matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Data'}, + 'description': 'retrieve information of the datasets', + 'url': 'https://examplebrowser.org/dataset/test/browser'}]} + + self.assertEqual(result, [expected]) + + async def test_fetch_filtered_dataset_call_misses(self): + """Test db call for retrieving miss data.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = Connection() # db_response is [] + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + alternate = ('DUP', None) result_miss = await fetch_filtered_dataset(pool, assembly_id, position, chromosome, reference, alternate, None, None, True) self.assertEqual(result_miss, []) diff --git a/tests/test_mate_name.py b/tests/test_mate_name.py index 028c91f7..2265a9e9 100644 --- a/tests/test_mate_name.py +++ b/tests/test_mate_name.py @@ -29,7 +29,11 @@ async def test_find_fusion(self, mock_filtered): async def test_fetch_fusion_dataset_call(self): """Test db call for retrieving mate data.""" pool = asynctest.CoroutineMock() - pool.acquire().__aenter__.return_value = Connection() + db_response = {"referenceBases": '', "alternateBases": '', "variantType": "", + "referenceName": 'Chr38', + "frequency": 0, "callCount": 0, "sampleCount": 0, "variantCount": 0, + "start": 0, "end": 0, "accessType": "PUBLIC", "datasetId": "test"} + pool.acquire().__aenter__.return_value = Connection(accessData=[db_response]) assembly_id = 'GRCh38' position = (10, 20, None, None, None, None) chromosome = 1 @@ -38,7 +42,28 @@ async def test_fetch_fusion_dataset_call(self): # for now it can return empty dataset # in order to get a response we will have to mock it # in Connection() class - self.assertEqual(result, []) + expected = {'referenceName': 'Chr38', 'callCount': 0, 'sampleCount': 0, 'variantCount': 0, 'datasetId': 'test', + 'referenceBases': '', 'alternateBases': '', 'variantType': '', 'start': 0, 'end': 0, 'frequency': 0, + 'info': {'accessType': 'PUBLIC'}, + 'datasetHandover': [{'handoverType': {'id': 'CUSTOM', 'label': 'Variants'}, + 'description': 'browse the variants matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Region'}, + 'description': 'browse data of the region matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Data'}, + 'description': 'retrieve information of the datasets', + 'url': 'https://examplebrowser.org/dataset/test/browser'}]} + self.assertEqual(result, [expected]) + + async def test_fetch_fusion_dataset_call_miss(self): + """Test db call for retrieving mate miss data.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = Connection() + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' result_miss = await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, True) self.assertEqual(result_miss, []) From 0f281d6e4ba019079244adeeb72bbb786067da28 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 10:13:11 +0300 Subject: [PATCH 19/33] do not test some lines as of problematic to unit test --- beacon_api/app.py | 3 ++- beacon_api/utils/validate.py | 6 ++++-- tests/test_basic.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/beacon_api/app.py b/beacon_api/app.py index 7b97801d..cac8d399 100644 --- a/beacon_api/app.py +++ b/beacon_api/app.py @@ -82,7 +82,8 @@ async def initialize(app): async def destroy(app): """Upon server close, close the DB connection pool.""" - await app['pool'].close() + # will defer this to asyncpg + await app['pool'].close() # pragma: no cover def set_cors(server): diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 4c647c76..817eaedb 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -54,7 +54,8 @@ def set_defaults(validator, properties, instance, schema): for error in validate_properties( validator, properties, instance, schema, ): - yield error + # Difficult to unit test + yield error # pragma: no cover return validators.extend( validator_class, {"properties": set_defaults}, @@ -119,7 +120,8 @@ def token_scheme_check(token, scheme, obj, host): raise BeaconUnauthorised(obj, host, "invalid_token", 'Invalid token scheme, Bearer required.') if token is None: - raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') + # Might never happen + raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') # pragma: no cover def verify_aud_claim(): diff --git a/tests/test_basic.py b/tests/test_basic.py index 3a4c0c08..3e35a502 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -108,9 +108,9 @@ def test_main_db(self, mock_init): def test_token_scheme_check_bad(self): """Test token scheme no token.""" - # This might never happen, yet let prepare for it + # This might never happen, yet lets prepare for it with self.assertRaises(aiohttp.web_exceptions.HTTPUnauthorized): - token_scheme_check(None, 'https', {}, 'localhost') + token_scheme_check("", 'https', {}, 'localhost') def test_access_resolution_base(self): """Test assumptions for access resolution. From 9275ecb3f01df2e180f4c4baa193e11971bf9b4c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 10:13:33 +0300 Subject: [PATCH 20/33] test db load close db connection exception --- tests/test_db_load.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_db_load.py b/tests/test_db_load.py index c70cd8fe..8fc78045 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -327,6 +327,16 @@ async def test_close(self, db_mock, mock_log): mock_log.info.mock_calls = ['Mark the database connection to be closed', 'The database connection has been closed'] + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_close_error(self, db_mock, mock_log): + """Test database URL close error.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + await self._db.close() + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO CLOSE DATABASE CONNECTION -> 'ConnectionException' object has no attribute 'close'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_unpack(self, db_mock, mock_log): From 1b4cf69d767c6a1d54e110eed1f13735c35e905f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Oct 2019 10:39:30 +0300 Subject: [PATCH 21/33] test aud claim --- tests/test_basic.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index 3e35a502..f8709227 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -3,10 +3,11 @@ from beacon_api.utils.db_load import parse_arguments, init_beacon_db, main from beacon_api.conf.config import init_db_pool from beacon_api.api.query import access_resolution -from beacon_api.utils.validate import token_scheme_check +from beacon_api.utils.validate import token_scheme_check, verify_aud_claim from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide from .test_app import PARAMS from testfixtures import TempDirectory +from test.support import EnvironmentVarGuard def mock_token(bona_fide, permissions, auth): @@ -106,6 +107,16 @@ def test_main_db(self, mock_init): main() mock_init.assert_called() + def test_aud_claim(self): + """Test aud claim function.""" + env = EnvironmentVarGuard() + env.set('JWT_AUD', "aud1,aud2") + result = verify_aud_claim() + # Because it is false we expect it not to be parsed + expected = (False, []) + self.assertEqual(result, expected) + env.unset('JWT_AUD') + def test_token_scheme_check_bad(self): """Test token scheme no token.""" # This might never happen, yet lets prepare for it From aab89eacca72dad0b31be4947e392dde04fb5a0e Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Mon, 23 Sep 2019 11:21:51 +0300 Subject: [PATCH 22/33] first test at parsing new ga4gh passports #130 --- beacon_api/permissions/ga4gh.py | 344 +++++++++++++++++++++++++------- beacon_api/utils/validate.py | 17 +- 2 files changed, 286 insertions(+), 75 deletions(-) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index ce873f20..d8fe3499 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -1,106 +1,253 @@ """Parse permissions and statuses from ELIXIR token for GA4GH claim. -Current implementation is based on https://docs.google.com/document/d/11Wg-uL75ypU5eNu2p_xh9gspmbGtmLzmdq5VfPHBirE +Current implementation is based on https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md#embedded-document-token-format -The JWT contains GA4GH DURI claims in the following form: +The ELIXIR AAI JWT payload contains a GA4GH Passport claim in the scope: .. code-block:: javascript { - "ga4gh.userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" + "scope": "ga4gh_passport_v1", + ... + } + +The token is then intended to be delivered to the /userinfo endpoint at ELIXIR AAI, which will respond with a list of +assorted third party JWTs that need to be sifted through to find the relevant tokens. Initially it can not be determined +which tokens contain the desired information. + +.. code-block:: javascript + + { + "ga4gh_passport_v1": [ + "JWT", + "JWT", + "JWT", + ... ] } -The actual dataset permissions are then requested from /userinfo endpoint, and take the form of: +Each third party token (JWT, RFC 7519) consists of three parts separated by dots, in the following manner: `header.payload.signature`. +This module processes the assorted tokens to extract the information they carry and to validate that data. + +The process is carried out as such: +1. Take token (JWT) +2. Decode token +3a. Extract `type` key from the payload portion and check if the token type is of interest +3b. If the token is of the desired type, continue, if not, discard this token and move to the next one +4. Extract `jku` key from the header portion (value is a URL that returns a JWK public key set) +5. Decode the complete token with the received public key +6. Validate the token claims +7. Extract the sought-after value from the `ga4gh_visa_v1` object's `value` key + +Dataset permissions are read from GA4GH RI claims of the type "ControlledAccessGrants" .. code-block:: javascript { - "ga4gh": { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", + "source": "https://ega-archive.org/dacs/EGAC00000000001", + "by": "dac", + "asserted": 1546300800, + "expires": 1577836800 } } -The statuses are also requested from /userinfo endpoint, and take the form of: +Bona Fide status is read from GA4GH RI claims of the type "AcceptedTermsAndPolicies" and "ResearcherStatus", each being in their respective tokens. .. code-block:: javascript { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813 + } + } + + { + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413 } } """ +import base64 +import json + import aiohttp +from authlib.jose import jwt +from authlib.jose.errors import MissingClaimError, InvalidClaimError, ExpiredTokenError, InvalidTokenError + from ..api.exceptions import BeaconServerError from ..utils.logging import LOG from ..conf import OAUTH2_CONFIG +async def decode_passport(encoded_passport): + """Return decoded header and payload from encoded passport JWT. + + Public-key-less decoding inspired by the PyJWT library https://github.com/jpadilla/pyjwt + """ + LOG.debug('Decoding GA4GH passport.') + + # Convert the token string into bytes for processing, and split it into segments + encoded_passport = encoded_passport.encode('utf-8') # `header.payload.signature` + data, _ = encoded_passport.rsplit(b'.', 1) # data contains header and payload segments, the ignored segment is the signature segment + segments = data.split(b'.', 1) # [header, payload] + + # Intermediary container + verified_segments = [] + # Handle different implementations of public exponent + # 65537 is recommended, but some use 65536 instead + # pad the missing bytes, if needed + for segment in segments: + rem = len(segment) % 4 + if rem > 0: + segment += b'=' * (4 - rem) + verified_segments.append(segment) + + # Decode the verified token segments + decoded_segments = [base64.urlsafe_b64decode(seg) for seg in verified_segments] + + # Convert the decoded segment bytes into dicts for easy access + decoded_data = [json.loads(seg.decode('utf-8')) for seg in decoded_segments] + + return decoded_data + + +async def get_ga4gh_permissions(token): + """Retrieve GA4GH passports (JWTs) from ELIXIR AAI and process them into tangible permissions.""" + LOG.info('Handling permissions.') + + # Return variables + dataset_permissions = [] + bona_fide_status = False + + # Intermediary containers + dataset_passports = [] # [(token, header)] + bona_fide_passports = [] # [(token, header, payload)] + + # Get encoded passports (assorted JWTs) from /userinfo + encoded_passports = await retrieve_user_data(token) + + # Pre-process the assorted passports (JWTs) for dataset permissions and bona fide status parsing + if encoded_passports: + # Decode the passports and check their type + for encoded_passport in encoded_passports: + # Decode passport + header, payload = await decode_passport(encoded_passport) + # Sort passports that carry dataset permissions + if payload.get('ga4gh_visa_v1', {}).get('type') == 'ControlledAccessGrants': + dataset_passports.append((encoded_passport, header)) + # Sort passports that MAY carry bona fide status information + if payload.get('ga4gh_visa_v1', {}).get('type') in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: + bona_fide_passports.append((encoded_passport, header, payload)) + + # Parse dataset passports to extract dataset permissions and validate them + dataset_permissions = await get_ga4gh_controlled(dataset_passports) + # Parse bona fide passports to extract bona fide status and validate them + bona_fide_status = await get_ga4gh_bona_fide(bona_fide_passports) + + return dataset_permissions, bona_fide_status + + async def retrieve_user_data(token): """Retrieve GA4GH user data.""" + LOG.debug('Contacting ELIXIR AAI /userinfo.') headers = {"Authorization": f"Bearer {token}"} try: async with aiohttp.ClientSession(headers=headers) as session: async with session.get(OAUTH2_CONFIG.userinfo) as r: json_body = await r.json() LOG.info('Retrieve GA4GH user data from ELIXIR AAI.') - return json_body.get("ga4gh", None) + return json_body.get("ga4gh_visa_v1", None) except Exception: raise BeaconServerError("Could not retrieve GA4GH user data from ELIXIR AAI.") -async def get_ga4gh_controlled(token, token_claim): - """Retrieve datasets from GA4GH permissions JWT claim.""" +async def get_jwk(url): + """Get JWK set keys to validate JWT.""" + LOG.debug('Retrieving JWK.') + try: + async with aiohttp.ClientSession() as session: + async with session.get(url) as r: + # This can be a single key or a list of JWK + return await r.json() + except Exception: + # This is not a fatal error, it just means that we are unable to validate the permissions, + # but the process should continue even if the validation of one token fails + LOG.error(f'Could not retrieve JWK from {url}') + pass + + +async def get_ga4gh_controlled(passports): + """Retrieve dataset permissions from GA4GH passport visas.""" # We only want to get datasets once, thus the set which prevents duplicates - LOG.info("Parsing GA4GH dataset permissions claims.") + LOG.info("Parsing GA4GH dataset permissions.") datasets = set() - # Check if the token contains a claim for GA4GH permissions - if 'ga4gh.ControlledAccessGrants' in token_claim: - # Contact /userinfo with token to get GA4GH permissions - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve permissions and parse them - if 'ControlledAccessGrants' in ga4gh: - # Extract dataset key and split by `/` to remove potential URL prefix - # the dataset id in the resulting list will always be the last element - datasets.update([p["value"].split('/')[-1] for p in ga4gh["ControlledAccessGrants"] if "value" in p]) + + # JWT decoding and validation settings + # The `aud` claim will be ignored, because Beacon has no prior knowledge + # as to where the token has originated from, and is therefore unable to + # verify the intended audience. Other claims will be validated as per usual. + claims_options = { + "aud": { + "essential": False + } + } + + for passport in passports: + # passport[0] -> token + # passport[1] -> decoded unverified header + # Attempt to decode the token and validate its contents + # None of the exceptions are fatal, and will not raise an exception + # Because even if the validation of one passport fails, the query + # Should still continue in case other passports are valid + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # Extract dataset id from validated passport + # The dataset value will be of form `https://institution.org/urn:dataset:1000` + # the extracted dataset will always be the last list element when split with `/` + dataset = decoded_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] + # Add dataset to set + datasets.update(dataset) + except MissingClaimError as e: + LOG.error(f'Missing claim(s): {e}') + pass + except ExpiredTokenError as e: + LOG.error(f'Expired signature: {e}') + pass + except InvalidClaimError as e: + LOG.error(f'Token info not corresponding with claim: {e}') + pass + except InvalidTokenError as e: + LOG.error(f'Invalid authorization token: {e}') + pass + except AttributeError as e: + LOG.error(f'GA4GH Visa did not contain a value: {e}') + pass return datasets -async def get_ga4gh_bona_fide(token, token_claim): +async def get_ga4gh_bona_fide(passports): """Retrieve Bona Fide status from GA4GH JWT claim.""" LOG.info("Parsing GA4GH bona fide claims.") @@ -108,21 +255,84 @@ async def get_ga4gh_bona_fide(token, token_claim): terms = False status = False - # Check if the token contains claims for GA4GH Bona Fide - if 'ga4gh.AcceptedTermsAndPolicies' in token_claim and 'ga4gh.ResearcherStatus' in token_claim: - # Contact /userinfo with token to get confirmation of Bona Fide status - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve statuses and agreements and parse them - if 'AcceptedTermsAndPolicies' in ga4gh: - for accepted_terms in ga4gh["AcceptedTermsAndPolicies"]: - if accepted_terms.get("value") == OAUTH2_CONFIG.bona_fide_value: + # JWT decoding and validation settings + # The `aud` claim will be ignored, because Beacon has no prior knowledge + # as to where the token has originated from, and is therefore unable to + # verify the intended audience. Other claims will be validated as per usual. + claims_options = { + "aud": { + "essential": False + } + } + + for passport in passports: + # passport[0] -> token + # passport[1] -> decoded unverified header + # passport[2] -> decoded unverified payload + # Attempt to decode the token and validate its contents + # None of the exceptions are fatal, and will not raise an exception + # Because even if the validation of one passport fails, the query + # Should still continue in case other passports are valid + # Check for the `type` of visa to determine if to look for `terms` or `status` + # + # CHECK FOR TERMS + if passport[2].get('ga4gh_visa_v1', {}).get('type') == 'AcceptedTermsAndPolicies': + # Check if the visa contains a bona fide value + if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: + # This passport has the correct type and value, next step is to validate it + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # The token is validated, therefore the terms are accepted terms = True - if 'ResearcherStatus' in ga4gh: - for researcher_status in ga4gh["ResearcherStatus"]: - if researcher_status.get("value") == OAUTH2_CONFIG.bona_fide_value: + except MissingClaimError as e: + LOG.error(f'Missing claim(s): {e}') + pass + except ExpiredTokenError as e: + LOG.error(f'Expired signature: {e}') + pass + except InvalidClaimError as e: + LOG.error(f'Token info not corresponding with claim: {e}') + pass + except InvalidTokenError as e: + LOG.error(f'Invalid authorization token: {e}') + pass + # + # CHECK FOR STATUS + if passport[2].get('ga4gh_visa_v1', {}).get('type') == 'ResearcherStatus': + # Check if the visa contains a bona fide value + if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: + # This passport has the correct type and value, next step is to validate it + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # The token is validated, therefore the status is accepted status = True - if terms and status: - # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status - return True + except MissingClaimError as e: + LOG.error(f'Missing claim(s): {e}') + pass + except ExpiredTokenError as e: + LOG.error(f'Expired signature: {e}') + pass + except InvalidClaimError as e: + LOG.error(f'Token info not corresponding with claim: {e}') + pass + except InvalidTokenError as e: + LOG.error(f'Invalid authorization token: {e}') + pass + + if terms and status: + # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status + return True return False diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 817eaedb..e1c42a3c 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -13,7 +13,7 @@ from aiocache.serializers import JsonSerializer from ..api.exceptions import BeaconUnauthorised, BeaconBadRequest, BeaconForbidden, BeaconServerError from ..conf import OAUTH2_CONFIG -from ..permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide +from ..permissions.ga4gh import get_ga4gh_permissions from jsonschema import Draft7Validator, validators from jsonschema.exceptions import ValidationError @@ -176,22 +176,23 @@ async def token_middleware(request, handler): } try: - decodedData = jwt.decode(token, key, claims_options=claims_options) # decode the token - decodedData.validate() # validate the token contents + decoded_data = jwt.decode(token, key, claims_options=claims_options) # decode the token + decoded_data.validate() # validate the token contents LOG.info('Auth Token Decoded.') - LOG.info(f'Identified as {decodedData["sub"]} user by {decodedData["iss"]}.') + LOG.info(f'Identified as {decoded_data["sub"]} user by {decoded_data["iss"]}.') # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config # the bona_fide_status is specific to ELIXIR Tokens + # + # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status + dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + # controlled_datasets = set() # currently we offer module for parsing GA4GH permissions, but multiple claims and providers can be utilised # by updating the set, meaning replicating the line below with the permissions function and its associated claim # For GA4GH DURI permissions (ELIXIR Permissions API 2.0) - controlled_datasets.update(await get_ga4gh_controlled(token, - decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else {}) + controlled_datasets.update(dataset_permissions) all_controlled = list(controlled_datasets) if bool(controlled_datasets) else None - # For Bona Fide status in GA4GH format - bona_fide_status = await get_ga4gh_bona_fide(token, decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else False request["token"] = {"bona_fide_status": bona_fide_status, # permissions key will hold the actual permissions found in the token/userinfo e.g. GA4GH permissions "permissions": all_controlled, From ff4151b5483421829dda3dd03a52d3b94b351b8c Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Tue, 24 Sep 2019 09:41:24 +0300 Subject: [PATCH 23/33] fix controlled datasets, move validation and decoding into function #130 --- beacon_api/permissions/ga4gh.py | 176 +++++++++++++------------------- 1 file changed, 73 insertions(+), 103 deletions(-) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index d8fe3499..16d7d2d7 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -33,7 +33,7 @@ 1. Take token (JWT) 2. Decode token 3a. Extract `type` key from the payload portion and check if the token type is of interest -3b. If the token is of the desired type, continue, if not, discard this token and move to the next one +3b. If the token is of the desired type, add it to list and continue, if not, discard this token and move to the next one 4. Extract `jku` key from the header portion (value is a URL that returns a JWK public key set) 5. Decode the complete token with the received public key 6. Validate the token claims @@ -171,7 +171,7 @@ async def retrieve_user_data(token): async with session.get(OAUTH2_CONFIG.userinfo) as r: json_body = await r.json() LOG.info('Retrieve GA4GH user data from ELIXIR AAI.') - return json_body.get("ga4gh_visa_v1", None) + return json_body.get("ga4gh_passport_v1", None) except Exception: raise BeaconServerError("Could not retrieve GA4GH user data from ELIXIR AAI.") @@ -191,11 +191,17 @@ async def get_jwk(url): pass -async def get_ga4gh_controlled(passports): - """Retrieve dataset permissions from GA4GH passport visas.""" - # We only want to get datasets once, thus the set which prevents duplicates - LOG.info("Parsing GA4GH dataset permissions.") - datasets = set() +async def validate_passport(passport): + """Decode a passport and validate its contents.""" + LOG.debug('Validating passport.') + + # Passports from `get_ga4gh_controlled()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # Passports from `get_bona_fide_status()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # passport[2] -> unverified decoded payload # JWT decoding and validation settings # The `aud` claim will be ignored, because Beacon has no prior knowledge @@ -207,42 +213,51 @@ async def get_ga4gh_controlled(passports): } } + # Attempt to decode the token and validate its contents + # None of the exceptions are fatal, and will not raise an exception + # Because even if the validation of one passport fails, the query + # Should still continue in case other passports are valid + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # Return decoded and validated payload contents + return decoded_passport + except MissingClaimError as e: + LOG.error(f'Missing claim(s): {e}') + pass + except ExpiredTokenError as e: + LOG.error(f'Expired signature: {e}') + pass + except InvalidClaimError as e: + LOG.error(f'Token info not corresponding with claim: {e}') + pass + except InvalidTokenError as e: + LOG.error(f'Invalid authorization token: {e}') + pass + + return None + + +async def get_ga4gh_controlled(passports): + """Retrieve dataset permissions from GA4GH passport visas.""" + # We only want to get datasets once, thus the set which prevents duplicates + LOG.info("Parsing GA4GH dataset permissions.") + datasets = set() + for passport in passports: - # passport[0] -> token - # passport[1] -> decoded unverified header - # Attempt to decode the token and validate its contents - # None of the exceptions are fatal, and will not raise an exception - # Because even if the validation of one passport fails, the query - # Should still continue in case other passports are valid - try: - # Get JWK for this passport from a third party provider - # The JWK will be requested from a URL that is given in the `jku` claim in the header - passport_key = await get_jwk(passport[1].get('jku')) - # Decode the JWT using public key - decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) - # Validate the JWT signature - decoded_passport.validate() - # Extract dataset id from validated passport - # The dataset value will be of form `https://institution.org/urn:dataset:1000` - # the extracted dataset will always be the last list element when split with `/` - dataset = decoded_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] - # Add dataset to set - datasets.update(dataset) - except MissingClaimError as e: - LOG.error(f'Missing claim(s): {e}') - pass - except ExpiredTokenError as e: - LOG.error(f'Expired signature: {e}') - pass - except InvalidClaimError as e: - LOG.error(f'Token info not corresponding with claim: {e}') - pass - except InvalidTokenError as e: - LOG.error(f'Invalid authorization token: {e}') - pass - except AttributeError as e: - LOG.error(f'GA4GH Visa did not contain a value: {e}') - pass + # Decode passport and validate its contents + validated_passport = await validate_passport(passport) + # Extract dataset id from validated passport + # The dataset value will be of form `https://institution.org/urn:dataset:1000` + # the extracted dataset will always be the last list element when split with `/` + dataset = validated_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] + # Add dataset to set + datasets.add(dataset) return datasets @@ -255,24 +270,7 @@ async def get_ga4gh_bona_fide(passports): terms = False status = False - # JWT decoding and validation settings - # The `aud` claim will be ignored, because Beacon has no prior knowledge - # as to where the token has originated from, and is therefore unable to - # verify the intended audience. Other claims will be validated as per usual. - claims_options = { - "aud": { - "essential": False - } - } - for passport in passports: - # passport[0] -> token - # passport[1] -> decoded unverified header - # passport[2] -> decoded unverified payload - # Attempt to decode the token and validate its contents - # None of the exceptions are fatal, and will not raise an exception - # Because even if the validation of one passport fails, the query - # Should still continue in case other passports are valid # Check for the `type` of visa to determine if to look for `terms` or `status` # # CHECK FOR TERMS @@ -280,56 +278,28 @@ async def get_ga4gh_bona_fide(passports): # Check if the visa contains a bona fide value if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: # This passport has the correct type and value, next step is to validate it - try: - # Get JWK for this passport from a third party provider - # The JWK will be requested from a URL that is given in the `jku` claim in the header - passport_key = await get_jwk(passport[1].get('jku')) - # Decode the JWT using public key - decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) - # Validate the JWT signature - decoded_passport.validate() - # The token is validated, therefore the terms are accepted - terms = True - except MissingClaimError as e: - LOG.error(f'Missing claim(s): {e}') - pass - except ExpiredTokenError as e: - LOG.error(f'Expired signature: {e}') - pass - except InvalidClaimError as e: - LOG.error(f'Token info not corresponding with claim: {e}') - pass - except InvalidTokenError as e: - LOG.error(f'Invalid authorization token: {e}') - pass + # + # Decode passport and validate its contents + # If the validation passes, terms will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and terms will remain False + await validate_passport(passport) + # The token is validated, therefore the terms are accepted + terms = True # # CHECK FOR STATUS if passport[2].get('ga4gh_visa_v1', {}).get('type') == 'ResearcherStatus': # Check if the visa contains a bona fide value if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: # This passport has the correct type and value, next step is to validate it - try: - # Get JWK for this passport from a third party provider - # The JWK will be requested from a URL that is given in the `jku` claim in the header - passport_key = await get_jwk(passport[1].get('jku')) - # Decode the JWT using public key - decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) - # Validate the JWT signature - decoded_passport.validate() - # The token is validated, therefore the status is accepted - status = True - except MissingClaimError as e: - LOG.error(f'Missing claim(s): {e}') - pass - except ExpiredTokenError as e: - LOG.error(f'Expired signature: {e}') - pass - except InvalidClaimError as e: - LOG.error(f'Token info not corresponding with claim: {e}') - pass - except InvalidTokenError as e: - LOG.error(f'Invalid authorization token: {e}') - pass + # + # Decode passport and validate its contents + # If the validation passes, status will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and status will remain False + await validate_passport(passport) + # The token is validated, therefore the status is accepted + status = True if terms and status: # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status From 5e663b1780e7e638495c7f89cb2c2f33c518c1c8 Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Mon, 14 Oct 2019 10:17:44 +0300 Subject: [PATCH 24/33] check for scopes before parsing token --- beacon_api/utils/validate.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index e1c42a3c..61601feb 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -185,8 +185,19 @@ async def token_middleware(request, handler): # the bona_fide_status is specific to ELIXIR Tokens # # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status - dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + bona_fide_status = False + dataset_permissions = set() + required_scopes = ['openid', 'ga4gh_passport_v1'] + token_scopes = decoded_data.get('scope').split(' ') + LOG.info(f'Required scopes: {required_scopes}') + LOG.info(f'Token scopes: {token_scopes}') + LOG.info(f'Bona fide before: {bona_fide_status}') + LOG.info(f'Permissions before: {dataset_permissions}') + if all(scope in token_scopes for scope in required_scopes): + dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) # + LOG.info(f'Bona fide after: {bona_fide_status}') + LOG.info(f'Permissions after: {dataset_permissions}') controlled_datasets = set() # currently we offer module for parsing GA4GH permissions, but multiple claims and providers can be utilised # by updating the set, meaning replicating the line below with the permissions function and its associated claim From a3405510392ab5359dabef2c96e02308d9faaf14 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 21 Oct 2019 00:29:33 +0300 Subject: [PATCH 25/33] fix documentation link --- beacon_api/permissions/ga4gh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index 16d7d2d7..0b072c09 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -1,6 +1,6 @@ """Parse permissions and statuses from ELIXIR token for GA4GH claim. -Current implementation is based on https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md#embedded-document-token-format +Current implementation is based on https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md The ELIXIR AAI JWT payload contains a GA4GH Passport claim in the scope: From 3245e6e9ff94a0c083cdab263fbd238892e25512 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 1 Nov 2019 18:15:45 +0200 Subject: [PATCH 26/33] update ga4gh with fixes --- beacon_api/api/exceptions.py | 83 +++++++------- beacon_api/permissions/ga4gh.py | 89 +++++++-------- beacon_api/utils/validate.py | 16 +-- tests/test_app.py | 6 +- tests/test_basic.py | 192 ++++++++++++++++---------------- tests/test_response.py | 26 ++--- 6 files changed, 197 insertions(+), 215 deletions(-) diff --git a/beacon_api/api/exceptions.py b/beacon_api/api/exceptions.py index 32604b22..c538c137 100644 --- a/beacon_api/api/exceptions.py +++ b/beacon_api/api/exceptions.py @@ -10,37 +10,35 @@ from ..conf import CONFIG_INFO -class BeaconError(Exception): - """BeaconError Exception specific class. +def process_exception_data(request, host, error_code, error): + """Return request data as dictionary. Generates custom exception messages based on request parameters. """ - - def __init__(self, request, host, error_code, error): - """Return request data as dictionary.""" - self.data = {'beaconId': '.'.join(reversed(host.split('.'))), - "apiVersion": __apiVersion__, - 'exists': None, - 'error': {'errorCode': error_code, - 'errorMessage': error}, - 'alleleRequest': {'referenceName': request.get("referenceName", None), - 'referenceBases': request.get("referenceBases", None), - 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), - 'assemblyId': request.get("assemblyId", None)}, - # showing empty datasetsAlleRsponse as no datasets found - # A null/None would represent no data while empty array represents - # none found or error and corresponds with exists null/None - 'datasetAlleleResponses': []} - # include datasetIds only if they are specified - # as per specification if they don't exist all datatsets will be queried - # Only one of `alternateBases` or `variantType` is required, validated by schema - oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", - "endMin", "endMax", "datasetIds"] - self.data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) - return self.data - - -class BeaconBadRequest(BeaconError): + data = {'beaconId': '.'.join(reversed(host.split('.'))), + "apiVersion": __apiVersion__, + 'exists': None, + 'error': {'errorCode': error_code, + 'errorMessage': error}, + 'alleleRequest': {'referenceName': request.get("referenceName", None), + 'referenceBases': request.get("referenceBases", None), + 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), + 'assemblyId': request.get("assemblyId", None)}, + # showing empty datasetsAlleRsponse as no datasets found + # A null/None would represent no data while empty array represents + # none found or error and corresponds with exists null/None + 'datasetAlleleResponses': []} + # include datasetIds only if they are specified + # as per specification if they don't exist all datatsets will be queried + # Only one of `alternateBases` or `variantType` is required, validated by schema + oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", + "endMin", "endMax", "datasetIds"] + data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) + + return data + + +class BeaconBadRequest(web.HTTPBadRequest): """Exception returns with 400 code and a custom error message. The method is called if one of the required parameters are missing or invalid. @@ -49,13 +47,12 @@ class BeaconBadRequest(BeaconError): def __init__(self, request, host, error): """Return custom bad request exception.""" - data = super().__init__(request, host, 400, error) - - LOG.error(f'400 ERROR MESSAGE: {error}') - raise web.HTTPBadRequest(content_type="application/json", text=json.dumps(data)) + data = process_exception_data(request, host, 400, error) + super().__init__(text=json.dumps(data), content_type="application/json") + LOG.error(f'401 ERROR MESSAGE: {error}') -class BeaconUnauthorised(BeaconError): +class BeaconUnauthorised(web.HTTPUnauthorized): """HTTP Exception returns with 401 code with a custom error message. The method is called if the user is not registered or if the token from the authentication has expired. @@ -64,17 +61,17 @@ class BeaconUnauthorised(BeaconError): def __init__(self, request, host, error, error_message): """Return custom unauthorized exception.""" - data = super().__init__(request, host, 401, error) + data = process_exception_data(request, host, 401, error) headers_401 = {"WWW-Authenticate": f"Bearer realm=\"{CONFIG_INFO.url}\"\n\ error=\"{error}\"\n\ error_description=\"{error_message}\""} + super().__init__(content_type="application/json", text=json.dumps(data), + # we use auth scheme Bearer by default + headers=headers_401) LOG.error(f'401 ERROR MESSAGE: {error}') - raise web.HTTPUnauthorized(content_type="application/json", text=json.dumps(data), - # we use auth scheme Bearer by default - headers=headers_401) -class BeaconForbidden(BeaconError): +class BeaconForbidden(web.HTTPForbidden): """HTTP Exception returns with 403 code with the error message. `'Resource not granted for authenticated user or resource protected for all users.'`. @@ -84,13 +81,12 @@ class BeaconForbidden(BeaconError): def __init__(self, request, host, error): """Return custom forbidden exception.""" - data = super().__init__(request, host, 403, error) - + data = process_exception_data(request, host, 403, error) + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'403 ERROR MESSAGE: {error}') - raise web.HTTPForbidden(content_type="application/json", text=json.dumps(data)) -class BeaconServerError(BeaconError): +class BeaconServerError(web.HTTPInternalServerError): """HTTP Exception returns with 500 code with the error message. The 500 error is not specified by the Beacon API, thus as simple error would do. @@ -100,6 +96,5 @@ def __init__(self, error): """Return custom forbidden exception.""" data = {'errorCode': 500, 'errorMessage': error} - + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'500 ERROR MESSAGE: {error}') - raise web.HTTPInternalServerError(content_type="application/json", text=json.dumps(data)) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index 0b072c09..25597826 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -7,7 +7,7 @@ .. code-block:: javascript { - "scope": "ga4gh_passport_v1", + "scope": "openid ga4gh_passport_v1", ... } @@ -87,13 +87,25 @@ import aiohttp from authlib.jose import jwt -from authlib.jose.errors import MissingClaimError, InvalidClaimError, ExpiredTokenError, InvalidTokenError from ..api.exceptions import BeaconServerError from ..utils.logging import LOG from ..conf import OAUTH2_CONFIG +async def check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions): + """Check the token for GA4GH claims.""" + if 'scope' in decoded_data: + ga4gh_scopes = ['openid', 'ga4gh_passport_v1'] + token_scopes = decoded_data.get('scope').split(' ') + LOG.info(f'GA4H Required scopes: {ga4gh_scopes}') + LOG.info(f'Token scopes: {token_scopes}') + LOG.info(f'Bona fide before: {bona_fide_status}') + LOG.info(f'Permissions before: {dataset_permissions}') + if all(scope in token_scopes for scope in ga4gh_scopes): + dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + + async def decode_passport(encoded_passport): """Return decoded header and payload from encoded passport JWT. @@ -148,10 +160,11 @@ async def get_ga4gh_permissions(token): # Decode passport header, payload = await decode_passport(encoded_passport) # Sort passports that carry dataset permissions - if payload.get('ga4gh_visa_v1', {}).get('type') == 'ControlledAccessGrants': + pass_type = payload.get('ga4gh_visa_v1', {}).get('type') + if pass_type == 'ControlledAccessGrants': dataset_passports.append((encoded_passport, header)) # Sort passports that MAY carry bona fide status information - if payload.get('ga4gh_visa_v1', {}).get('type') in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: + if pass_type in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: bona_fide_passports.append((encoded_passport, header, payload)) # Parse dataset passports to extract dataset permissions and validate them @@ -227,20 +240,8 @@ async def validate_passport(passport): decoded_passport.validate() # Return decoded and validated payload contents return decoded_passport - except MissingClaimError as e: - LOG.error(f'Missing claim(s): {e}') - pass - except ExpiredTokenError as e: - LOG.error(f'Expired signature: {e}') - pass - except InvalidClaimError as e: - LOG.error(f'Token info not corresponding with claim: {e}') - pass - except InvalidTokenError as e: - LOG.error(f'Invalid authorization token: {e}') - pass - - return None + except Exception as e: + LOG.error(f"Something went wrong when processing JWT tokens: {e}") async def get_ga4gh_controlled(passports): @@ -274,35 +275,31 @@ async def get_ga4gh_bona_fide(passports): # Check for the `type` of visa to determine if to look for `terms` or `status` # # CHECK FOR TERMS - if passport[2].get('ga4gh_visa_v1', {}).get('type') == 'AcceptedTermsAndPolicies': - # Check if the visa contains a bona fide value - if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: - # This passport has the correct type and value, next step is to validate it - # - # Decode passport and validate its contents - # If the validation passes, terms will be set to True - # If the validation fails, an exception will be raised - # (and ignored since it's not fatal), and terms will remain False - await validate_passport(passport) - # The token is validated, therefore the terms are accepted - terms = True + passport_type = passport[2].get('ga4gh_visa_v1', {}).get('type') + passport_value = passport[2].get('ga4gh_visa_v1', {}).get('value') + if passport_type in 'AcceptedTermsAndPolicies' and passport_value == OAUTH2_CONFIG.bona_fide_value: + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, terms will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and terms will remain False + await validate_passport(passport) + # The token is validated, therefore the terms are accepted + terms = True # # CHECK FOR STATUS - if passport[2].get('ga4gh_visa_v1', {}).get('type') == 'ResearcherStatus': + if passport_value == OAUTH2_CONFIG.bona_fide_value and passport_type == 'ResearcherStatus': # Check if the visa contains a bona fide value - if passport[2].get('ga4gh_visa_v1', {}).get('value') == OAUTH2_CONFIG.bona_fide_value: - # This passport has the correct type and value, next step is to validate it - # - # Decode passport and validate its contents - # If the validation passes, status will be set to True - # If the validation fails, an exception will be raised - # (and ignored since it's not fatal), and status will remain False - await validate_passport(passport) - # The token is validated, therefore the status is accepted - status = True - - if terms and status: - # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status - return True + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, status will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and status will remain False + await validate_passport(passport) + # The token is validated, therefore the status is accepted + status = True - return False + # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status + return terms and status diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 61601feb..75397bc8 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -13,7 +13,7 @@ from aiocache.serializers import JsonSerializer from ..api.exceptions import BeaconUnauthorised, BeaconBadRequest, BeaconForbidden, BeaconServerError from ..conf import OAUTH2_CONFIG -from ..permissions.ga4gh import get_ga4gh_permissions +from ..permissions.ga4gh import check_ga4gh_token from jsonschema import Draft7Validator, validators from jsonschema.exceptions import ValidationError @@ -183,19 +183,13 @@ async def token_middleware(request, handler): # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config # the bona_fide_status is specific to ELIXIR Tokens - # + dataset_permissions, bona_fide_status = [], '' + # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status bona_fide_status = False dataset_permissions = set() - required_scopes = ['openid', 'ga4gh_passport_v1'] - token_scopes = decoded_data.get('scope').split(' ') - LOG.info(f'Required scopes: {required_scopes}') - LOG.info(f'Token scopes: {token_scopes}') - LOG.info(f'Bona fide before: {bona_fide_status}') - LOG.info(f'Permissions before: {dataset_permissions}') - if all(scope in token_scopes for scope in required_scopes): - dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) - # + check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) + LOG.info(f'Bona fide after: {bona_fide_status}') LOG.info(f'Permissions after: {dataset_permissions}') controlled_datasets = set() diff --git a/tests/test_app.py b/tests/test_app.py index 5f8e2d97..ae5f63b8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -37,14 +37,14 @@ def generate_token(issuer): "iss": issuer, "aud": "audience", "exp": 9999999999, - "sub": "smth@elixir-europe.org" + "sub": "smth@smth.org" } token = jwt.encode(header, payload, pem).decode('utf-8') return token, pem def generate_bad_token(): - """Mock ELIXIR AAI token.""" + """Mock AAI token.""" pem = { "kty": "oct", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", @@ -286,7 +286,7 @@ async def test_invalid_scheme_get_query(self): @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_token_get_query(self, mock_handler, mock_object): - """Test valid token GET query endpoint, invalid scheme.""" + """Test valid token GET query endpoint.""" token = os.environ.get('TOKEN') resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), diff --git a/tests/test_basic.py b/tests/test_basic.py index f8709227..c55b9870 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -290,104 +290,100 @@ def test_access_resolution_controlled_never_reached2(self): with self.assertRaises(aiohttp.web_exceptions.HTTPForbidden): access_resolution(request, token, host, [], [], [8]) - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_controlled(self, userinfo): - """Test ga4gh permissions claim parsing.""" - userinfo.return_value = { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000002", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "no-prefix-dataset", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, {'EGAD000000000001', 'EGAD000000000002', 'no-prefix-dataset'}) # has permissions - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: no claims, no userinfo - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_bona_fide(self, userinfo): - """Test ga4gh statuses claim parsing.""" - userinfo.return_value = { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, True) # has bona fide - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: no claims, no userinfo - userinfo.return_value = {} - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide + # @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + # async def test_ga4gh_controlled(self, userinfo): + # """Test ga4gh permissions claim parsing.""" + # userinfo.return_value = { + # "ControlledAccessGrants": [ + # { + # "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", + # "source": "https://ega-archive.org/dacs/EGAC00000000001", + # "by": "dac", + # "authoriser": "john.doe@dac.org", + # "asserted": 1546300800, + # "expires": 1577836800 + # }, + # { + # "value": "https://www.ebi.ac.uk/ega/EGAD000000000002", + # "source": "https://ega-archive.org/dacs/EGAC00000000001", + # "by": "dac", + # "authoriser": "john.doe@dac.org", + # "asserted": 1546300800, + # "expires": 1577836800 + # }, + # { + # "value": "no-prefix-dataset", + # "source": "https://ega-archive.org/dacs/EGAC00000000001", + # "by": "dac", + # "authoriser": "john.doe@dac.org", + # "asserted": 1546300800, + # "expires": 1577836800 + # } + # ] + # } + # # Good test: claims OK, userinfo OK + # token_claim = ["ga4gh.ControlledAccessGrants"] + # token = 'this_is_a_jwt' + # datasets = await get_ga4gh_controlled(token, token_claim) + # self.assertEqual(datasets, {'EGAD000000000001', 'EGAD000000000002', 'no-prefix-dataset'}) # has permissions + # # Bad test: no claims, userinfo OK + # token_claim = [] + # token = 'this_is_a_jwt' + # datasets = await get_ga4gh_controlled(token, token_claim) + # self.assertEqual(datasets, set()) # doesn't have permissions + # # Bad test: claims OK, no userinfo + # userinfo.return_value = {} + # token_claim = ["ga4gh.ControlledAccessGrants"] + # token = 'this_is_a_jwt' + # datasets = await get_ga4gh_controlled(token, token_claim) + # self.assertEqual(datasets, set()) # doesn't have permissions + # # Bad test: no claims, no userinfo + # token_claim = [] + # token = 'this_is_a_jwt' + # datasets = await get_ga4gh_controlled(token, token_claim) + # self.assertEqual(datasets, set()) # doesn't have permissions + + # @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + # async def test_ga4gh_bona_fide(self, userinfo): + # """Test ga4gh statuses claim parsing.""" + # userinfo.return_value = { + # "AcceptedTermsAndPolicies": [ + # { + # "value": "https://doi.org/10.1038/s41431-018-0219-y", + # "source": "https://ga4gh.org/duri/no_org", + # "by": "self", + # "asserted": 1539069213, + # "expires": 4694742813 + # } + # ], + # "ResearcherStatus": [ + # { + # "value": "https://doi.org/10.1038/s41431-018-0219-y", + # "source": "https://ga4gh.org/duri/no_org", + # "by": "peer", + # "asserted": 1539017776, + # "expires": 1593165413 + # } + # ] + # } + # # Good test: claims OK, userinfo OK + # passports = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] + # bona_fide_status = await get_ga4gh_bona_fide(passports) + # self.assertEqual(bona_fide_status, True) # has bona fide + # # Bad test: no claims, userinfo OK + # passports = [] + # bona_fide_status = await get_ga4gh_bona_fide(passports) + # self.assertEqual(bona_fide_status, False) # doesn't have bona fide + # # Bad test: claims OK, no userinfo + # userinfo.return_value = {} + # passports = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] + # bona_fide_status = await get_ga4gh_bona_fide(passports) + # self.assertEqual(bona_fide_status, False) # doesn't have bona fide + # # Bad test: no claims, no userinfo + # userinfo.return_value = {} + # passports = [] + # bona_fide_status = await get_ga4gh_bona_fide(passports) + # self.assertEqual(bona_fide_status, False) # doesn't have bona fide if __name__ == '__main__': diff --git a/tests/test_response.py b/tests/test_response.py index daf54e4a..3f9c8c66 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -110,19 +110,19 @@ async def test_bad_retrieve_user_data(self, m): with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): await retrieve_user_data('bad_token') - @aioresponses() - async def test_bad_none_retrieve_user_data(self, m): - """Test a failing userdata call because response didn't have ga4gh format.""" - m.get("http://test.csc.fi/userinfo", payload={"not_ga4gh": [{}]}) - user_data = await retrieve_user_data('good_token') - self.assertEqual(user_data, None) - - @aioresponses() - async def test_good_retrieve_user_data(self, m): - """Test a passing call to retrieve user data.""" - m.get("http://test.csc.fi/userinfo", payload={"ga4gh": [{}]}) - user_data = await retrieve_user_data('good_token') - self.assertEqual(user_data, [{}]) + # @aioresponses() + # async def test_bad_none_retrieve_user_data(self, m): + # """Test a failing userdata call because response didn't have ga4gh format.""" + # m.get("http://test.csc.fi/userinfo", payload={"not_ga4gh": [{}]}) + # user_data = await retrieve_user_data('good_token') + # self.assertEqual(user_data, None) + + # @aioresponses() + # async def test_good_retrieve_user_data(self, m): + # """Test a passing call to retrieve user data.""" + # m.get("http://test.csc.fi/userinfo", payload={"ga4gh": [{}]}) + # user_data = await retrieve_user_data('good_token') + # self.assertEqual(user_data, [{}]) @aioresponses() async def test_get_key(self, m): From 85e885f13586bd4a545224fcc60689719834fc82 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 1 Nov 2019 19:43:05 +0200 Subject: [PATCH 27/33] fix ga4gh unit tests --- beacon_api/permissions/ga4gh.py | 2 +- beacon_api/utils/validate.py | 2 +- tests/test_basic.py | 108 +++++++++----------------------- 3 files changed, 33 insertions(+), 79 deletions(-) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index 25597826..6506b2c5 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -161,7 +161,7 @@ async def get_ga4gh_permissions(token): header, payload = await decode_passport(encoded_passport) # Sort passports that carry dataset permissions pass_type = payload.get('ga4gh_visa_v1', {}).get('type') - if pass_type == 'ControlledAccessGrants': + if pass_type == 'ControlledAccessGrants': # nosec dataset_passports.append((encoded_passport, header)) # Sort passports that MAY carry bona fide status information if pass_type in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 75397bc8..83c8d115 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -188,7 +188,7 @@ async def token_middleware(request, handler): # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status bona_fide_status = False dataset_permissions = set() - check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) + await check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) LOG.info(f'Bona fide after: {bona_fide_status}') LOG.info(f'Permissions after: {dataset_permissions}') diff --git a/tests/test_basic.py b/tests/test_basic.py index c55b9870..07292b30 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -290,42 +290,12 @@ def test_access_resolution_controlled_never_reached2(self): with self.assertRaises(aiohttp.web_exceptions.HTTPForbidden): access_resolution(request, token, host, [], [], [8]) - # @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - # async def test_ga4gh_controlled(self, userinfo): - # """Test ga4gh permissions claim parsing.""" - # userinfo.return_value = { - # "ControlledAccessGrants": [ - # { - # "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - # "source": "https://ega-archive.org/dacs/EGAC00000000001", - # "by": "dac", - # "authoriser": "john.doe@dac.org", - # "asserted": 1546300800, - # "expires": 1577836800 - # }, - # { - # "value": "https://www.ebi.ac.uk/ega/EGAD000000000002", - # "source": "https://ega-archive.org/dacs/EGAC00000000001", - # "by": "dac", - # "authoriser": "john.doe@dac.org", - # "asserted": 1546300800, - # "expires": 1577836800 - # }, - # { - # "value": "no-prefix-dataset", - # "source": "https://ega-archive.org/dacs/EGAC00000000001", - # "by": "dac", - # "authoriser": "john.doe@dac.org", - # "asserted": 1546300800, - # "expires": 1577836800 - # } - # ] - # } - # # Good test: claims OK, userinfo OK - # token_claim = ["ga4gh.ControlledAccessGrants"] - # token = 'this_is_a_jwt' - # datasets = await get_ga4gh_controlled(token, token_claim) - # self.assertEqual(datasets, {'EGAD000000000001', 'EGAD000000000002', 'no-prefix-dataset'}) # has permissions + async def test_ga4gh_controlled(self): + """Test ga4gh permissions claim parsing.""" + # Good test: claims OK, userinfo OK + token_claim = [] + datasets = await get_ga4gh_controlled(token_claim) + self.assertEqual(datasets, set()) # has permissions # # Bad test: no claims, userinfo OK # token_claim = [] # token = 'this_is_a_jwt' @@ -343,47 +313,31 @@ def test_access_resolution_controlled_never_reached2(self): # datasets = await get_ga4gh_controlled(token, token_claim) # self.assertEqual(datasets, set()) # doesn't have permissions - # @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - # async def test_ga4gh_bona_fide(self, userinfo): - # """Test ga4gh statuses claim parsing.""" - # userinfo.return_value = { - # "AcceptedTermsAndPolicies": [ - # { - # "value": "https://doi.org/10.1038/s41431-018-0219-y", - # "source": "https://ga4gh.org/duri/no_org", - # "by": "self", - # "asserted": 1539069213, - # "expires": 4694742813 - # } - # ], - # "ResearcherStatus": [ - # { - # "value": "https://doi.org/10.1038/s41431-018-0219-y", - # "source": "https://ga4gh.org/duri/no_org", - # "by": "peer", - # "asserted": 1539017776, - # "expires": 1593165413 - # } - # ] - # } - # # Good test: claims OK, userinfo OK - # passports = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - # bona_fide_status = await get_ga4gh_bona_fide(passports) - # self.assertEqual(bona_fide_status, True) # has bona fide - # # Bad test: no claims, userinfo OK - # passports = [] - # bona_fide_status = await get_ga4gh_bona_fide(passports) - # self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # # Bad test: claims OK, no userinfo - # userinfo.return_value = {} - # passports = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - # bona_fide_status = await get_ga4gh_bona_fide(passports) - # self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # # Bad test: no claims, no userinfo - # userinfo.return_value = {} - # passports = [] - # bona_fide_status = await get_ga4gh_bona_fide(passports) - # self.assertEqual(bona_fide_status, False) # doesn't have bona fide + @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + async def test_ga4gh_bona_fide(self, userinfo): + """Test ga4gh statuses claim parsing.""" + passports = [("enc", "header", { + "ga4gh_visa_v1": {"type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813} + }), + ("enc", "header", { + "ga4gh_visa_v1": {"type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413}})] + # Good test: claims OK, userinfo OK + bona_fide_status = await get_ga4gh_bona_fide(passports) + self.assertEqual(bona_fide_status, True) # has bona fide + # Bad test: no claims, userinfo OK + passports_empty = [] + bona_fide_status = await get_ga4gh_bona_fide(passports_empty) + self.assertEqual(bona_fide_status, False) # doesn't have bona fide if __name__ == '__main__': From 52c51afd8f2f1170638ede79a6935eed4945305a Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Tue, 5 Nov 2019 10:22:00 +0200 Subject: [PATCH 28/33] fix permissions delivery --- beacon_api/permissions/ga4gh.py | 9 +++++---- beacon_api/utils/validate.py | 12 +++--------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index 6506b2c5..73ff5512 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -95,16 +95,17 @@ async def check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions): """Check the token for GA4GH claims.""" + LOG.debug('Checking GA4GH claims from scope.') + if 'scope' in decoded_data: ga4gh_scopes = ['openid', 'ga4gh_passport_v1'] token_scopes = decoded_data.get('scope').split(' ') - LOG.info(f'GA4H Required scopes: {ga4gh_scopes}') - LOG.info(f'Token scopes: {token_scopes}') - LOG.info(f'Bona fide before: {bona_fide_status}') - LOG.info(f'Permissions before: {dataset_permissions}') + if all(scope in token_scopes for scope in ga4gh_scopes): dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + return dataset_permissions, bona_fide_status + async def decode_passport(encoded_passport): """Return decoded header and payload from encoded passport JWT. diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 83c8d115..d1b3c827 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -183,19 +183,13 @@ async def token_middleware(request, handler): # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config # the bona_fide_status is specific to ELIXIR Tokens - dataset_permissions, bona_fide_status = [], '' - # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status - bona_fide_status = False - dataset_permissions = set() - await check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) - - LOG.info(f'Bona fide after: {bona_fide_status}') - LOG.info(f'Permissions after: {dataset_permissions}') - controlled_datasets = set() + dataset_permissions, bona_fide_status = set(), False + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) # currently we offer module for parsing GA4GH permissions, but multiple claims and providers can be utilised # by updating the set, meaning replicating the line below with the permissions function and its associated claim # For GA4GH DURI permissions (ELIXIR Permissions API 2.0) + controlled_datasets = set() controlled_datasets.update(dataset_permissions) all_controlled = list(controlled_datasets) if bool(controlled_datasets) else None request["token"] = {"bona_fide_status": bona_fide_status, From d43543bb13b4a453e5d1211824f0309d79a33d67 Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Tue, 5 Nov 2019 14:09:10 +0200 Subject: [PATCH 29/33] unit tests for ga4gh permissions --- tests/test_basic.py | 169 ++++++++++++++++++++++++++++++++++------- tests/test_response.py | 44 +++++++---- 2 files changed, 173 insertions(+), 40 deletions(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index 07292b30..c1d8b2bf 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -4,10 +4,16 @@ from beacon_api.conf.config import init_db_pool from beacon_api.api.query import access_resolution from beacon_api.utils.validate import token_scheme_check, verify_aud_claim -from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide +from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide, validate_passport +from beacon_api.permissions.ga4gh import check_ga4gh_token, decode_passport, get_ga4gh_permissions +from deploy.test.mock_auth import generate_token from .test_app import PARAMS from testfixtures import TempDirectory from test.support import EnvironmentVarGuard +# Hack to import a function from beyond top-level package +# used for test_decode_passport() to get a real token +import sys +sys.path.append("..") def mock_token(bona_fide, permissions, auth): @@ -17,6 +23,21 @@ def mock_token(bona_fide, permissions, auth): "authenticated": auth} +class MockDecodedPassport: + """Mock JWT.""" + + def __init__(self, validated=True): + """Initialise mock JWT.""" + self.validated = validated + + def validate(self): + """Invoke validate.""" + if self.validated: + return True + else: + raise Exception + + class MockBeaconDB: """BeaconDB mock. @@ -290,31 +311,37 @@ def test_access_resolution_controlled_never_reached2(self): with self.assertRaises(aiohttp.web_exceptions.HTTPForbidden): access_resolution(request, token, host, [], [], [8]) - async def test_ga4gh_controlled(self): + @asynctest.mock.patch('beacon_api.permissions.ga4gh.validate_passport') + async def test_ga4gh_controlled(self, m_validation): """Test ga4gh permissions claim parsing.""" - # Good test: claims OK, userinfo OK - token_claim = [] - datasets = await get_ga4gh_controlled(token_claim) - self.assertEqual(datasets, set()) # has permissions - # # Bad test: no claims, userinfo OK - # token_claim = [] - # token = 'this_is_a_jwt' - # datasets = await get_ga4gh_controlled(token, token_claim) - # self.assertEqual(datasets, set()) # doesn't have permissions - # # Bad test: claims OK, no userinfo - # userinfo.return_value = {} - # token_claim = ["ga4gh.ControlledAccessGrants"] - # token = 'this_is_a_jwt' - # datasets = await get_ga4gh_controlled(token, token_claim) - # self.assertEqual(datasets, set()) # doesn't have permissions - # # Bad test: no claims, no userinfo - # token_claim = [] - # token = 'this_is_a_jwt' - # datasets = await get_ga4gh_controlled(token, token_claim) - # self.assertEqual(datasets, set()) # doesn't have permissions - - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_bona_fide(self, userinfo): + # Test: no passports, no permissions + datasets = await get_ga4gh_controlled([]) + self.assertEqual(datasets, set()) + # Test: 1 passport, 1 unique dataset, 1 permission + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}]) # one passport + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 1 unique dataset, 1 permission (permissions must not be duplicated) + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}, {}]) # two passports + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 2 unique datasets, 2 permissions + # Can't test this case with the current design! + # Would need a way for validate_passport() to mock two different results + + async def test_ga4gh_bona_fide(self): """Test ga4gh statuses claim parsing.""" passports = [("enc", "header", { "ga4gh_visa_v1": {"type": "AcceptedTermsAndPolicies", @@ -331,14 +358,102 @@ async def test_ga4gh_bona_fide(self, userinfo): "by": "peer", "asserted": 1539017776, "expires": 1593165413}})] - # Good test: claims OK, userinfo OK + # Good test: both required passport types contained the correct value bona_fide_status = await get_ga4gh_bona_fide(passports) self.assertEqual(bona_fide_status, True) # has bona fide - # Bad test: no claims, userinfo OK + # Bad test: missing passports of required type passports_empty = [] bona_fide_status = await get_ga4gh_bona_fide(passports_empty) self.assertEqual(bona_fide_status, False) # doesn't have bona fide + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_jwk') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.jwt') + async def test_validate_passport(self, m_jwt, m_jwk): + """Test passport validation.""" + m_jwk.return_value = 'jwk' + # Test: validation passed + m_jwt.return_value = MockDecodedPassport() + await validate_passport({}) + # + # This test doesn't work + # + # # Test: validation failed + # m_jwt.return_value = MockDecodedPassport(validated=False) + # with self.assertRaises(Exception): + # await validate_passport({}) + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_permissions') + async def test_check_ga4gh_token(self, m_get_perms): + """Test token scopes.""" + # Test: no scope found + decoded_data = {} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, but no claims + decoded_data = {'scope': ''} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, claims are ok + m_get_perms.return_value = {'EGAD01'}, True + decoded_data = {'scope': 'openid ga4gh_passport_v1'} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, True) + + async def test_decode_passport(self): + """Test key-less JWT decoding.""" + _, token, _ = generate_token() + header, payload = await decode_passport(token) + self.assertEqual(header.get('alg'), 'RS256') + self.assertEqual(payload.get('iss'), 'http://test.csc.fi') + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_bona_fide') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_controlled') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.decode_passport') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + async def test_get_ga4gh_permissions(self, m_userinfo, m_decode, m_controlled, m_bonafide): + """Test GA4GH permissions main function.""" + # Test: no data (nothing) + m_userinfo.return_value = [{}] + header = {} + payload = {} + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: permissions + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ControlledAccessGrants' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = {'EGAD01'} + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, False) + # Test: bona fide + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ResearcherStatus' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = True + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, True) + if __name__ == '__main__': asynctest.main() diff --git a/tests/test_response.py b/tests/test_response.py index 3f9c8c66..9172a65e 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -3,7 +3,7 @@ import asynctest from beacon_api.schemas import load_schema from beacon_api.utils.validate import get_key -from beacon_api.permissions.ga4gh import retrieve_user_data +from beacon_api.permissions.ga4gh import retrieve_user_data, get_jwk import jsonschema import json import aiohttp @@ -110,19 +110,19 @@ async def test_bad_retrieve_user_data(self, m): with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): await retrieve_user_data('bad_token') - # @aioresponses() - # async def test_bad_none_retrieve_user_data(self, m): - # """Test a failing userdata call because response didn't have ga4gh format.""" - # m.get("http://test.csc.fi/userinfo", payload={"not_ga4gh": [{}]}) - # user_data = await retrieve_user_data('good_token') - # self.assertEqual(user_data, None) + @aioresponses() + async def test_bad_none_retrieve_user_data(self, m): + """Test a failing userdata call because response didn't have ga4gh format.""" + m.get("http://test.csc.fi/userinfo", payload={"not_ga4gh": [{}]}) + user_data = await retrieve_user_data('good_token') + self.assertEqual(user_data, None) - # @aioresponses() - # async def test_good_retrieve_user_data(self, m): - # """Test a passing call to retrieve user data.""" - # m.get("http://test.csc.fi/userinfo", payload={"ga4gh": [{}]}) - # user_data = await retrieve_user_data('good_token') - # self.assertEqual(user_data, [{}]) + @aioresponses() + async def test_good_retrieve_user_data(self, m): + """Test a passing call to retrieve user data.""" + m.get("http://test.csc.fi/userinfo", payload={"ga4gh_passport_v1": [{}]}) + user_data = await retrieve_user_data('good_token') + self.assertEqual(user_data, [{}]) @aioresponses() async def test_get_key(self, m): @@ -146,6 +146,24 @@ async def test_get_key(self, m): self.assertTrue(isinstance(result, dict)) self.assertTrue(result["keys"][0]['alg'], 'RSA256') + @aioresponses() + async def test_get_jwk(self, m): + """Test get JWK.""" + data = { + "keys": [ + { + "alg": "RS256", + "kty": "RSA", + "use": "sig", + "n": "public_key", + "e": "AQAB" + } + ]} + m.get("http://test.csc.fi/jwk", payload=data) + result = await get_jwk('http://test.csc.fi/jwk') + self.assertTrue(isinstance(result, dict)) + self.assertTrue(result["keys"][0]['alg'], 'RSA256') + @asynctest.mock.patch('beacon_api.utils.validate.OAUTH2_CONFIG', return_value={'server': None}) async def test_bad_get_key(self, oauth_none): """Test bad test_get_key.""" From e8de1cd35854d16956bc1b3322a68bfc7d9098ca Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 5 Nov 2019 17:17:31 +0200 Subject: [PATCH 30/33] small fixes in unit tests --- beacon_api/utils/validate.py | 2 +- tests/test_basic.py | 25 +++++++++++-------------- tests/test_db_load.py | 2 +- tests/test_response.py | 6 ++++++ 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index d1b3c827..2bf37de4 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -206,7 +206,7 @@ async def token_middleware(request, handler): raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') # pragma: no cover except InvalidClaimError as e: raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') # pragma: no cover - except InvalidTokenError as e: + except InvalidTokenError as e: # pragma: no cover raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') # pragma: no cover else: request["token"] = {"bona_fide_status": False, diff --git a/tests/test_basic.py b/tests/test_basic.py index c1d8b2bf..584eaa73 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -6,14 +6,9 @@ from beacon_api.utils.validate import token_scheme_check, verify_aud_claim from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide, validate_passport from beacon_api.permissions.ga4gh import check_ga4gh_token, decode_passport, get_ga4gh_permissions -from deploy.test.mock_auth import generate_token -from .test_app import PARAMS +from .test_app import PARAMS, generate_token from testfixtures import TempDirectory from test.support import EnvironmentVarGuard -# Hack to import a function from beyond top-level package -# used for test_decode_passport() to get a real token -import sys -sys.path.append("..") def mock_token(bona_fide, permissions, auth): @@ -368,19 +363,21 @@ async def test_ga4gh_bona_fide(self): @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_jwk') @asynctest.mock.patch('beacon_api.permissions.ga4gh.jwt') - async def test_validate_passport(self, m_jwt, m_jwk): + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_validate_passport(self, mock_log, m_jwt, m_jwk): """Test passport validation.""" m_jwk.return_value = 'jwk' # Test: validation passed m_jwt.return_value = MockDecodedPassport() await validate_passport({}) - # - # This test doesn't work - # + # # Test: validation failed - # m_jwt.return_value = MockDecodedPassport(validated=False) + m_jwt.return_value = MockDecodedPassport(validated=False) # with self.assertRaises(Exception): - # await validate_passport({}) + await validate_passport({}) + # we are not raising the exception we are just doing a log + # need to assert the log called + mock_log.error.assert_called_with("Something went wrong when processing JWT tokens: 1") @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_permissions') async def test_check_ga4gh_token(self, m_get_perms): @@ -404,9 +401,9 @@ async def test_check_ga4gh_token(self, m_get_perms): async def test_decode_passport(self): """Test key-less JWT decoding.""" - _, token, _ = generate_token() + token, _ = generate_token('http://test.csc.fi') header, payload = await decode_passport(token) - self.assertEqual(header.get('alg'), 'RS256') + self.assertEqual(header.get('alg'), 'HS256') self.assertEqual(payload.get('iss'), 'http://test.csc.fi') @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_bona_fide') diff --git a/tests/test_db_load.py b/tests/test_db_load.py index 8fc78045..cbdbea23 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -308,7 +308,7 @@ async def test_load_datafile(self, db_mock, mock_log): @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_insert_variants(self, db_mock, mock_log): - """Test load_datafile.""" + """Test insert variants.""" db_mock.return_value = Connection() await self._db.connection() db_mock.assert_called() diff --git a/tests/test_response.py b/tests/test_response.py index 9172a65e..ef55e970 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -164,6 +164,12 @@ async def test_get_jwk(self, m): self.assertTrue(isinstance(result, dict)) self.assertTrue(result["keys"][0]['alg'], 'RSA256') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_get_jwk_bad(self, mock_log): + """Test get JWK exception log.""" + await get_jwk('http://test.csc.fi/jwk') + mock_log.error.assert_called_with("Could not retrieve JWK from http://test.csc.fi/jwk") + @asynctest.mock.patch('beacon_api.utils.validate.OAUTH2_CONFIG', return_value={'server': None}) async def test_bad_get_key(self, oauth_none): """Test bad test_get_key.""" From c7fa2bc26f62df354ae7d8945743f81a79f3a174 Mon Sep 17 00:00:00 2001 From: Teemu Kataja Date: Wed, 6 Nov 2019 10:38:50 +0200 Subject: [PATCH 31/33] update mockauth for integration tests, fix bug in integtest17 and typo in integtest32 --- deploy/test/integ_test.py | 4 +- deploy/test/mock_auth.py | 120 ++++++++++++++++++++++++-------------- 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/deploy/test/integ_test.py b/deploy/test/integ_test.py index 6666b3c0..6dc2c2d4 100644 --- a/deploy/test/integ_test.py +++ b/deploy/test/integ_test.py @@ -389,7 +389,7 @@ async def test_17(): async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: data = await resp.json() assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve both requested.') + assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') async def test_18(): @@ -701,7 +701,7 @@ async def test_32(): async with aiohttp.ClientSession() as session: async with session.get('http://localhost:5050/service-info') as resp: data = await resp.json() - # GA4GH Discovery Service-Info is small and its length should be between 3 and 6, when the Beacon info is very long + # GA4GH Discovery Service-Info is small and its length should be at least 5 (required keys), when the Beacon info is very long # https://github.com/ga4gh-discovery/service-info/blob/develop/service-info.yaml assert len(data) >= 5, 'Service info size error' # ga4gh service-info has 5 required keys, and option to add custom keys assert data['type'].startswith('org.ga4gh:beacon'), 'Service type error' # a new key used in beacon network diff --git a/deploy/test/mock_auth.py b/deploy/test/mock_auth.py index 429c230f..b5143d28 100644 --- a/deploy/test/mock_auth.py +++ b/deploy/test/mock_auth.py @@ -27,17 +27,11 @@ def generate_token(): "sub": "requester@elixir-europe.org", "aud": ["aud2", "aud3"], "azp": "azp", - "scope": "openid ga4gh", + "scope": "openid ga4gh_passport_v1", "iss": "http://test.csc.fi", "exp": 9999999999, "iat": 1561621913, - "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", - "ga4gh_userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" - ] + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } empty_payload = { "sub": "requester@elixir-europe.org", @@ -46,11 +40,77 @@ def generate_token(): "iat": 1547794655, "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } + # Craft 4 passports, 2 for bona fide status and 2 for dataset permissions + # passport for bona fide: terms + passport_terms = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1" + } + # passport for bona fide: status + passport_status = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "722ddde1-617d-4651-992d-f0fdde77bf29" + } + # passport for dataset permissions 1 + passport_dataset1 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55" + } + # passport for dataset permissions 2 + passport_dataset2 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled1", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "9fa600d6-4148-47c1-b708-36c4ba2e980e" + } public_jwk = jwk.dumps(public_key, kty='RSA') private_jwk = jwk.dumps(pem, kty='RSA') dataset_encoded = jwt.encode(header, dataset_payload, private_jwk).decode('utf-8') empty_encoded = jwt.encode(header, empty_payload, private_jwk).decode('utf-8') - return (public_jwk, dataset_encoded, empty_encoded) + passport_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode('utf-8') + passport_status_encoded = jwt.encode(header, passport_status, private_jwk).decode('utf-8') + passport_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode('utf-8') + passport_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode('utf-8') + return (public_jwk, dataset_encoded, empty_encoded, passport_terms_encoded, passport_status_encoded, + passport_dataset1_encoded, passport_dataset2_encoded) DATA = generate_token() @@ -75,42 +135,12 @@ async def userinfo(request): data = {} else: data = { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 9999999999 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 9999999999 - } - ], - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559893314, - "expires": 9999999999 - }, - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559897355, - "expires": 9999999999 - } - ] - } + "ga4gh_passport_v1": [ + DATA[3], + DATA[4], + DATA[5], + DATA[6] + ] } return web.json_response(data) From bd5ab2de1bdb02a8084335c2a7de36347824498c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 6 Nov 2019 12:02:15 +0200 Subject: [PATCH 32/33] update permissions docs with ga4gh implementation --- docs/permissions.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index bb70bc75..079d4b01 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -10,11 +10,14 @@ As per Beacon specification there are three types of permissions: e.g. ELIXIR bona_fide or researcher status. Requires a JWT Token; * ``CONTROLLED`` - data available for users that have been granted access to a protected resource by a Data Access Committee (DAC). +.. note:: In this page we are illustrating permissions according to: + `GA4GH Authentication and Authorization Infrastructure (AAI) OpenID Connect Profile `_. Registered Data --------------- For retrieving ``REGISTERED`` permissions the function below forwards the TOKEN to another server +(e.g ELIXIR ``userinfo`` endpoint) that validates the information in the token is for a registered user/token and retrieves a JSON message that contains data regarding the Bona Fide status. Custom servers can be set up to mimic this functionality. @@ -28,7 +31,7 @@ researcher. .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 103-128 + :lines: 267-305 .. note:: The ``ga4gh.AcceptedTermsAndPolicies`` and ``ga4gh.ResearcherStatus`` keys' values must be equal to those mandated by GA4GH. @@ -47,21 +50,22 @@ there is no standard way for delivering access to datasets via JWT Tokens and each AAI authority provides different claims with different structures. By default we include :meth:`beacon_api.permissions.ga4gh` add-on that offers the means to retrieve -permissions following the `GA4GH format `_ via a token provided by ELIXIR AAI. +permissions following the `GA4GH format `_ +via a token provided by ELIXIR AAI. If a token contains ``ga4gh_userinfo_claims`` JWT claim with ``ga4gh.ControlledAccessGrants``, these are parsed and retrieved as illustrated in: .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 85-100 + :lines: 248-264 The permissions are then passed in :meth:`beacon_api.utils.validate` as illustrated below: .. literalinclude:: /../beacon_api/utils/validate.py :language: python :dedent: 16 - :lines: 179-192 + :lines: 183-200 If there is no claim for GA4GH permissions as illustrated above, they will not be added to ``controlled_datasets``. From 78538039622e4babdf32c8605b6a917f76214d86 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 6 Nov 2019 12:03:34 +0200 Subject: [PATCH 33/33] bump version to 1.5.0 --- beacon_api/conf/config.ini | 2 +- deploy/test/auth_test.ini | 2 +- docs/example.rst | 2 +- tests/test.ini | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/beacon_api/conf/config.ini b/beacon_api/conf/config.ini index 1a01eebc..b6bd73c9 100644 --- a/beacon_api/conf/config.ini +++ b/beacon_api/conf/config.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.1 +version=1.5.0 # Author of this software author=CSC developers diff --git a/deploy/test/auth_test.ini b/deploy/test/auth_test.ini index 0a5f27c2..57ea9200 100644 --- a/deploy/test/auth_test.ini +++ b/deploy/test/auth_test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers diff --git a/docs/example.rst b/docs/example.rst index 7a5fdb56..07f02e19 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -126,7 +126,7 @@ Example Response: "createdAt": "2019-09-04T12:00:00Z", "updatedAt": "2019-09-05T05:55:18Z", "environment": "prod", - "version": "1.4.1" + "version": "1.5.0" } Query Endpoint diff --git a/tests/test.ini b/tests/test.ini index 060c87d6..15bced49 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers