diff --git a/beacon_api/api/exceptions.py b/beacon_api/api/exceptions.py index 32604b22..c538c137 100644 --- a/beacon_api/api/exceptions.py +++ b/beacon_api/api/exceptions.py @@ -10,37 +10,35 @@ from ..conf import CONFIG_INFO -class BeaconError(Exception): - """BeaconError Exception specific class. +def process_exception_data(request, host, error_code, error): + """Return request data as dictionary. Generates custom exception messages based on request parameters. """ - - def __init__(self, request, host, error_code, error): - """Return request data as dictionary.""" - self.data = {'beaconId': '.'.join(reversed(host.split('.'))), - "apiVersion": __apiVersion__, - 'exists': None, - 'error': {'errorCode': error_code, - 'errorMessage': error}, - 'alleleRequest': {'referenceName': request.get("referenceName", None), - 'referenceBases': request.get("referenceBases", None), - 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), - 'assemblyId': request.get("assemblyId", None)}, - # showing empty datasetsAlleRsponse as no datasets found - # A null/None would represent no data while empty array represents - # none found or error and corresponds with exists null/None - 'datasetAlleleResponses': []} - # include datasetIds only if they are specified - # as per specification if they don't exist all datatsets will be queried - # Only one of `alternateBases` or `variantType` is required, validated by schema - oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", - "endMin", "endMax", "datasetIds"] - self.data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) - return self.data - - -class BeaconBadRequest(BeaconError): + data = {'beaconId': '.'.join(reversed(host.split('.'))), + "apiVersion": __apiVersion__, + 'exists': None, + 'error': {'errorCode': error_code, + 'errorMessage': error}, + 'alleleRequest': {'referenceName': request.get("referenceName", None), + 'referenceBases': request.get("referenceBases", None), + 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), + 'assemblyId': request.get("assemblyId", None)}, + # showing empty datasetsAlleRsponse as no datasets found + # A null/None would represent no data while empty array represents + # none found or error and corresponds with exists null/None + 'datasetAlleleResponses': []} + # include datasetIds only if they are specified + # as per specification if they don't exist all datatsets will be queried + # Only one of `alternateBases` or `variantType` is required, validated by schema + oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", + "endMin", "endMax", "datasetIds"] + data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) + + return data + + +class BeaconBadRequest(web.HTTPBadRequest): """Exception returns with 400 code and a custom error message. The method is called if one of the required parameters are missing or invalid. @@ -49,13 +47,12 @@ class BeaconBadRequest(BeaconError): def __init__(self, request, host, error): """Return custom bad request exception.""" - data = super().__init__(request, host, 400, error) - - LOG.error(f'400 ERROR MESSAGE: {error}') - raise web.HTTPBadRequest(content_type="application/json", text=json.dumps(data)) + data = process_exception_data(request, host, 400, error) + super().__init__(text=json.dumps(data), content_type="application/json") + LOG.error(f'401 ERROR MESSAGE: {error}') -class BeaconUnauthorised(BeaconError): +class BeaconUnauthorised(web.HTTPUnauthorized): """HTTP Exception returns with 401 code with a custom error message. The method is called if the user is not registered or if the token from the authentication has expired. @@ -64,17 +61,17 @@ class BeaconUnauthorised(BeaconError): def __init__(self, request, host, error, error_message): """Return custom unauthorized exception.""" - data = super().__init__(request, host, 401, error) + data = process_exception_data(request, host, 401, error) headers_401 = {"WWW-Authenticate": f"Bearer realm=\"{CONFIG_INFO.url}\"\n\ error=\"{error}\"\n\ error_description=\"{error_message}\""} + super().__init__(content_type="application/json", text=json.dumps(data), + # we use auth scheme Bearer by default + headers=headers_401) LOG.error(f'401 ERROR MESSAGE: {error}') - raise web.HTTPUnauthorized(content_type="application/json", text=json.dumps(data), - # we use auth scheme Bearer by default - headers=headers_401) -class BeaconForbidden(BeaconError): +class BeaconForbidden(web.HTTPForbidden): """HTTP Exception returns with 403 code with the error message. `'Resource not granted for authenticated user or resource protected for all users.'`. @@ -84,13 +81,12 @@ class BeaconForbidden(BeaconError): def __init__(self, request, host, error): """Return custom forbidden exception.""" - data = super().__init__(request, host, 403, error) - + data = process_exception_data(request, host, 403, error) + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'403 ERROR MESSAGE: {error}') - raise web.HTTPForbidden(content_type="application/json", text=json.dumps(data)) -class BeaconServerError(BeaconError): +class BeaconServerError(web.HTTPInternalServerError): """HTTP Exception returns with 500 code with the error message. The 500 error is not specified by the Beacon API, thus as simple error would do. @@ -100,6 +96,5 @@ def __init__(self, error): """Return custom forbidden exception.""" data = {'errorCode': 500, 'errorMessage': error} - + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'500 ERROR MESSAGE: {error}') - raise web.HTTPInternalServerError(content_type="application/json", text=json.dumps(data)) diff --git a/beacon_api/conf/config.ini b/beacon_api/conf/config.ini index 1a01eebc..b6bd73c9 100644 --- a/beacon_api/conf/config.ini +++ b/beacon_api/conf/config.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.1 +version=1.5.0 # Author of this software author=CSC developers diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index ce873f20..73ff5512 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -1,106 +1,270 @@ """Parse permissions and statuses from ELIXIR token for GA4GH claim. -Current implementation is based on https://docs.google.com/document/d/11Wg-uL75ypU5eNu2p_xh9gspmbGtmLzmdq5VfPHBirE +Current implementation is based on https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md -The JWT contains GA4GH DURI claims in the following form: +The ELIXIR AAI JWT payload contains a GA4GH Passport claim in the scope: .. code-block:: javascript { - "ga4gh.userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" + "scope": "openid ga4gh_passport_v1", + ... + } + +The token is then intended to be delivered to the /userinfo endpoint at ELIXIR AAI, which will respond with a list of +assorted third party JWTs that need to be sifted through to find the relevant tokens. Initially it can not be determined +which tokens contain the desired information. + +.. code-block:: javascript + + { + "ga4gh_passport_v1": [ + "JWT", + "JWT", + "JWT", + ... ] } -The actual dataset permissions are then requested from /userinfo endpoint, and take the form of: +Each third party token (JWT, RFC 7519) consists of three parts separated by dots, in the following manner: `header.payload.signature`. +This module processes the assorted tokens to extract the information they carry and to validate that data. + +The process is carried out as such: +1. Take token (JWT) +2. Decode token +3a. Extract `type` key from the payload portion and check if the token type is of interest +3b. If the token is of the desired type, add it to list and continue, if not, discard this token and move to the next one +4. Extract `jku` key from the header portion (value is a URL that returns a JWK public key set) +5. Decode the complete token with the received public key +6. Validate the token claims +7. Extract the sought-after value from the `ga4gh_visa_v1` object's `value` key + +Dataset permissions are read from GA4GH RI claims of the type "ControlledAccessGrants" .. code-block:: javascript { - "ga4gh": { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", + "source": "https://ega-archive.org/dacs/EGAC00000000001", + "by": "dac", + "asserted": 1546300800, + "expires": 1577836800 } } -The statuses are also requested from /userinfo endpoint, and take the form of: +Bona Fide status is read from GA4GH RI claims of the type "AcceptedTermsAndPolicies" and "ResearcherStatus", each being in their respective tokens. .. code-block:: javascript { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813 + } + } + + { + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413 } } """ +import base64 +import json + import aiohttp +from authlib.jose import jwt + from ..api.exceptions import BeaconServerError from ..utils.logging import LOG from ..conf import OAUTH2_CONFIG +async def check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions): + """Check the token for GA4GH claims.""" + LOG.debug('Checking GA4GH claims from scope.') + + if 'scope' in decoded_data: + ga4gh_scopes = ['openid', 'ga4gh_passport_v1'] + token_scopes = decoded_data.get('scope').split(' ') + + if all(scope in token_scopes for scope in ga4gh_scopes): + dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + + return dataset_permissions, bona_fide_status + + +async def decode_passport(encoded_passport): + """Return decoded header and payload from encoded passport JWT. + + Public-key-less decoding inspired by the PyJWT library https://github.com/jpadilla/pyjwt + """ + LOG.debug('Decoding GA4GH passport.') + + # Convert the token string into bytes for processing, and split it into segments + encoded_passport = encoded_passport.encode('utf-8') # `header.payload.signature` + data, _ = encoded_passport.rsplit(b'.', 1) # data contains header and payload segments, the ignored segment is the signature segment + segments = data.split(b'.', 1) # [header, payload] + + # Intermediary container + verified_segments = [] + # Handle different implementations of public exponent + # 65537 is recommended, but some use 65536 instead + # pad the missing bytes, if needed + for segment in segments: + rem = len(segment) % 4 + if rem > 0: + segment += b'=' * (4 - rem) + verified_segments.append(segment) + + # Decode the verified token segments + decoded_segments = [base64.urlsafe_b64decode(seg) for seg in verified_segments] + + # Convert the decoded segment bytes into dicts for easy access + decoded_data = [json.loads(seg.decode('utf-8')) for seg in decoded_segments] + + return decoded_data + + +async def get_ga4gh_permissions(token): + """Retrieve GA4GH passports (JWTs) from ELIXIR AAI and process them into tangible permissions.""" + LOG.info('Handling permissions.') + + # Return variables + dataset_permissions = [] + bona_fide_status = False + + # Intermediary containers + dataset_passports = [] # [(token, header)] + bona_fide_passports = [] # [(token, header, payload)] + + # Get encoded passports (assorted JWTs) from /userinfo + encoded_passports = await retrieve_user_data(token) + + # Pre-process the assorted passports (JWTs) for dataset permissions and bona fide status parsing + if encoded_passports: + # Decode the passports and check their type + for encoded_passport in encoded_passports: + # Decode passport + header, payload = await decode_passport(encoded_passport) + # Sort passports that carry dataset permissions + pass_type = payload.get('ga4gh_visa_v1', {}).get('type') + if pass_type == 'ControlledAccessGrants': # nosec + dataset_passports.append((encoded_passport, header)) + # Sort passports that MAY carry bona fide status information + if pass_type in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: + bona_fide_passports.append((encoded_passport, header, payload)) + + # Parse dataset passports to extract dataset permissions and validate them + dataset_permissions = await get_ga4gh_controlled(dataset_passports) + # Parse bona fide passports to extract bona fide status and validate them + bona_fide_status = await get_ga4gh_bona_fide(bona_fide_passports) + + return dataset_permissions, bona_fide_status + + async def retrieve_user_data(token): """Retrieve GA4GH user data.""" + LOG.debug('Contacting ELIXIR AAI /userinfo.') headers = {"Authorization": f"Bearer {token}"} try: async with aiohttp.ClientSession(headers=headers) as session: async with session.get(OAUTH2_CONFIG.userinfo) as r: json_body = await r.json() LOG.info('Retrieve GA4GH user data from ELIXIR AAI.') - return json_body.get("ga4gh", None) + return json_body.get("ga4gh_passport_v1", None) except Exception: raise BeaconServerError("Could not retrieve GA4GH user data from ELIXIR AAI.") -async def get_ga4gh_controlled(token, token_claim): - """Retrieve datasets from GA4GH permissions JWT claim.""" +async def get_jwk(url): + """Get JWK set keys to validate JWT.""" + LOG.debug('Retrieving JWK.') + try: + async with aiohttp.ClientSession() as session: + async with session.get(url) as r: + # This can be a single key or a list of JWK + return await r.json() + except Exception: + # This is not a fatal error, it just means that we are unable to validate the permissions, + # but the process should continue even if the validation of one token fails + LOG.error(f'Could not retrieve JWK from {url}') + pass + + +async def validate_passport(passport): + """Decode a passport and validate its contents.""" + LOG.debug('Validating passport.') + + # Passports from `get_ga4gh_controlled()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # Passports from `get_bona_fide_status()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # passport[2] -> unverified decoded payload + + # JWT decoding and validation settings + # The `aud` claim will be ignored, because Beacon has no prior knowledge + # as to where the token has originated from, and is therefore unable to + # verify the intended audience. Other claims will be validated as per usual. + claims_options = { + "aud": { + "essential": False + } + } + + # Attempt to decode the token and validate its contents + # None of the exceptions are fatal, and will not raise an exception + # Because even if the validation of one passport fails, the query + # Should still continue in case other passports are valid + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # Return decoded and validated payload contents + return decoded_passport + except Exception as e: + LOG.error(f"Something went wrong when processing JWT tokens: {e}") + + +async def get_ga4gh_controlled(passports): + """Retrieve dataset permissions from GA4GH passport visas.""" # We only want to get datasets once, thus the set which prevents duplicates - LOG.info("Parsing GA4GH dataset permissions claims.") + LOG.info("Parsing GA4GH dataset permissions.") datasets = set() - # Check if the token contains a claim for GA4GH permissions - if 'ga4gh.ControlledAccessGrants' in token_claim: - # Contact /userinfo with token to get GA4GH permissions - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve permissions and parse them - if 'ControlledAccessGrants' in ga4gh: - # Extract dataset key and split by `/` to remove potential URL prefix - # the dataset id in the resulting list will always be the last element - datasets.update([p["value"].split('/')[-1] for p in ga4gh["ControlledAccessGrants"] if "value" in p]) + + for passport in passports: + # Decode passport and validate its contents + validated_passport = await validate_passport(passport) + # Extract dataset id from validated passport + # The dataset value will be of form `https://institution.org/urn:dataset:1000` + # the extracted dataset will always be the last list element when split with `/` + dataset = validated_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] + # Add dataset to set + datasets.add(dataset) return datasets -async def get_ga4gh_bona_fide(token, token_claim): +async def get_ga4gh_bona_fide(passports): """Retrieve Bona Fide status from GA4GH JWT claim.""" LOG.info("Parsing GA4GH bona fide claims.") @@ -108,21 +272,35 @@ async def get_ga4gh_bona_fide(token, token_claim): terms = False status = False - # Check if the token contains claims for GA4GH Bona Fide - if 'ga4gh.AcceptedTermsAndPolicies' in token_claim and 'ga4gh.ResearcherStatus' in token_claim: - # Contact /userinfo with token to get confirmation of Bona Fide status - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve statuses and agreements and parse them - if 'AcceptedTermsAndPolicies' in ga4gh: - for accepted_terms in ga4gh["AcceptedTermsAndPolicies"]: - if accepted_terms.get("value") == OAUTH2_CONFIG.bona_fide_value: - terms = True - if 'ResearcherStatus' in ga4gh: - for researcher_status in ga4gh["ResearcherStatus"]: - if researcher_status.get("value") == OAUTH2_CONFIG.bona_fide_value: - status = True - if terms and status: - # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status - return True - - return False + for passport in passports: + # Check for the `type` of visa to determine if to look for `terms` or `status` + # + # CHECK FOR TERMS + passport_type = passport[2].get('ga4gh_visa_v1', {}).get('type') + passport_value = passport[2].get('ga4gh_visa_v1', {}).get('value') + if passport_type in 'AcceptedTermsAndPolicies' and passport_value == OAUTH2_CONFIG.bona_fide_value: + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, terms will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and terms will remain False + await validate_passport(passport) + # The token is validated, therefore the terms are accepted + terms = True + # + # CHECK FOR STATUS + if passport_value == OAUTH2_CONFIG.bona_fide_value and passport_type == 'ResearcherStatus': + # Check if the visa contains a bona fide value + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, status will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and status will remain False + await validate_passport(passport) + # The token is validated, therefore the status is accepted + status = True + + # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status + return terms and status diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 817eaedb..2bf37de4 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -13,7 +13,7 @@ from aiocache.serializers import JsonSerializer from ..api.exceptions import BeaconUnauthorised, BeaconBadRequest, BeaconForbidden, BeaconServerError from ..conf import OAUTH2_CONFIG -from ..permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide +from ..permissions.ga4gh import check_ga4gh_token from jsonschema import Draft7Validator, validators from jsonschema.exceptions import ValidationError @@ -176,22 +176,22 @@ async def token_middleware(request, handler): } try: - decodedData = jwt.decode(token, key, claims_options=claims_options) # decode the token - decodedData.validate() # validate the token contents + decoded_data = jwt.decode(token, key, claims_options=claims_options) # decode the token + decoded_data.validate() # validate the token contents LOG.info('Auth Token Decoded.') - LOG.info(f'Identified as {decodedData["sub"]} user by {decodedData["iss"]}.') + LOG.info(f'Identified as {decoded_data["sub"]} user by {decoded_data["iss"]}.') # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config # the bona_fide_status is specific to ELIXIR Tokens - controlled_datasets = set() + # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status + dataset_permissions, bona_fide_status = set(), False + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) # currently we offer module for parsing GA4GH permissions, but multiple claims and providers can be utilised # by updating the set, meaning replicating the line below with the permissions function and its associated claim # For GA4GH DURI permissions (ELIXIR Permissions API 2.0) - controlled_datasets.update(await get_ga4gh_controlled(token, - decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else {}) + controlled_datasets = set() + controlled_datasets.update(dataset_permissions) all_controlled = list(controlled_datasets) if bool(controlled_datasets) else None - # For Bona Fide status in GA4GH format - bona_fide_status = await get_ga4gh_bona_fide(token, decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else False request["token"] = {"bona_fide_status": bona_fide_status, # permissions key will hold the actual permissions found in the token/userinfo e.g. GA4GH permissions "permissions": all_controlled, @@ -206,7 +206,7 @@ async def token_middleware(request, handler): raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') # pragma: no cover except InvalidClaimError as e: raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') # pragma: no cover - except InvalidTokenError as e: + except InvalidTokenError as e: # pragma: no cover raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') # pragma: no cover else: request["token"] = {"bona_fide_status": False, diff --git a/deploy/test/auth_test.ini b/deploy/test/auth_test.ini index 0a5f27c2..57ea9200 100644 --- a/deploy/test/auth_test.ini +++ b/deploy/test/auth_test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers diff --git a/deploy/test/integ_test.py b/deploy/test/integ_test.py index 6666b3c0..6dc2c2d4 100644 --- a/deploy/test/integ_test.py +++ b/deploy/test/integ_test.py @@ -389,7 +389,7 @@ async def test_17(): async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: data = await resp.json() assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve both requested.') + assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') async def test_18(): @@ -701,7 +701,7 @@ async def test_32(): async with aiohttp.ClientSession() as session: async with session.get('http://localhost:5050/service-info') as resp: data = await resp.json() - # GA4GH Discovery Service-Info is small and its length should be between 3 and 6, when the Beacon info is very long + # GA4GH Discovery Service-Info is small and its length should be at least 5 (required keys), when the Beacon info is very long # https://github.com/ga4gh-discovery/service-info/blob/develop/service-info.yaml assert len(data) >= 5, 'Service info size error' # ga4gh service-info has 5 required keys, and option to add custom keys assert data['type'].startswith('org.ga4gh:beacon'), 'Service type error' # a new key used in beacon network diff --git a/deploy/test/mock_auth.py b/deploy/test/mock_auth.py index 429c230f..b5143d28 100644 --- a/deploy/test/mock_auth.py +++ b/deploy/test/mock_auth.py @@ -27,17 +27,11 @@ def generate_token(): "sub": "requester@elixir-europe.org", "aud": ["aud2", "aud3"], "azp": "azp", - "scope": "openid ga4gh", + "scope": "openid ga4gh_passport_v1", "iss": "http://test.csc.fi", "exp": 9999999999, "iat": 1561621913, - "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", - "ga4gh_userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" - ] + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } empty_payload = { "sub": "requester@elixir-europe.org", @@ -46,11 +40,77 @@ def generate_token(): "iat": 1547794655, "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } + # Craft 4 passports, 2 for bona fide status and 2 for dataset permissions + # passport for bona fide: terms + passport_terms = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1" + } + # passport for bona fide: status + passport_status = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "722ddde1-617d-4651-992d-f0fdde77bf29" + } + # passport for dataset permissions 1 + passport_dataset1 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55" + } + # passport for dataset permissions 2 + passport_dataset2 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled1", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "9fa600d6-4148-47c1-b708-36c4ba2e980e" + } public_jwk = jwk.dumps(public_key, kty='RSA') private_jwk = jwk.dumps(pem, kty='RSA') dataset_encoded = jwt.encode(header, dataset_payload, private_jwk).decode('utf-8') empty_encoded = jwt.encode(header, empty_payload, private_jwk).decode('utf-8') - return (public_jwk, dataset_encoded, empty_encoded) + passport_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode('utf-8') + passport_status_encoded = jwt.encode(header, passport_status, private_jwk).decode('utf-8') + passport_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode('utf-8') + passport_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode('utf-8') + return (public_jwk, dataset_encoded, empty_encoded, passport_terms_encoded, passport_status_encoded, + passport_dataset1_encoded, passport_dataset2_encoded) DATA = generate_token() @@ -75,42 +135,12 @@ async def userinfo(request): data = {} else: data = { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 9999999999 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 9999999999 - } - ], - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559893314, - "expires": 9999999999 - }, - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559897355, - "expires": 9999999999 - } - ] - } + "ga4gh_passport_v1": [ + DATA[3], + DATA[4], + DATA[5], + DATA[6] + ] } return web.json_response(data) diff --git a/docs/example.rst b/docs/example.rst index 7a5fdb56..07f02e19 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -126,7 +126,7 @@ Example Response: "createdAt": "2019-09-04T12:00:00Z", "updatedAt": "2019-09-05T05:55:18Z", "environment": "prod", - "version": "1.4.1" + "version": "1.5.0" } Query Endpoint diff --git a/docs/permissions.rst b/docs/permissions.rst index bb70bc75..079d4b01 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -10,11 +10,14 @@ As per Beacon specification there are three types of permissions: e.g. ELIXIR bona_fide or researcher status. Requires a JWT Token; * ``CONTROLLED`` - data available for users that have been granted access to a protected resource by a Data Access Committee (DAC). +.. note:: In this page we are illustrating permissions according to: + `GA4GH Authentication and Authorization Infrastructure (AAI) OpenID Connect Profile `_. Registered Data --------------- For retrieving ``REGISTERED`` permissions the function below forwards the TOKEN to another server +(e.g ELIXIR ``userinfo`` endpoint) that validates the information in the token is for a registered user/token and retrieves a JSON message that contains data regarding the Bona Fide status. Custom servers can be set up to mimic this functionality. @@ -28,7 +31,7 @@ researcher. .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 103-128 + :lines: 267-305 .. note:: The ``ga4gh.AcceptedTermsAndPolicies`` and ``ga4gh.ResearcherStatus`` keys' values must be equal to those mandated by GA4GH. @@ -47,21 +50,22 @@ there is no standard way for delivering access to datasets via JWT Tokens and each AAI authority provides different claims with different structures. By default we include :meth:`beacon_api.permissions.ga4gh` add-on that offers the means to retrieve -permissions following the `GA4GH format `_ via a token provided by ELIXIR AAI. +permissions following the `GA4GH format `_ +via a token provided by ELIXIR AAI. If a token contains ``ga4gh_userinfo_claims`` JWT claim with ``ga4gh.ControlledAccessGrants``, these are parsed and retrieved as illustrated in: .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 85-100 + :lines: 248-264 The permissions are then passed in :meth:`beacon_api.utils.validate` as illustrated below: .. literalinclude:: /../beacon_api/utils/validate.py :language: python :dedent: 16 - :lines: 179-192 + :lines: 183-200 If there is no claim for GA4GH permissions as illustrated above, they will not be added to ``controlled_datasets``. diff --git a/tests/test.ini b/tests/test.ini index 060c87d6..15bced49 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers diff --git a/tests/test_app.py b/tests/test_app.py index 5f8e2d97..ae5f63b8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -37,14 +37,14 @@ def generate_token(issuer): "iss": issuer, "aud": "audience", "exp": 9999999999, - "sub": "smth@elixir-europe.org" + "sub": "smth@smth.org" } token = jwt.encode(header, payload, pem).decode('utf-8') return token, pem def generate_bad_token(): - """Mock ELIXIR AAI token.""" + """Mock AAI token.""" pem = { "kty": "oct", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", @@ -286,7 +286,7 @@ async def test_invalid_scheme_get_query(self): @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_token_get_query(self, mock_handler, mock_object): - """Test valid token GET query endpoint, invalid scheme.""" + """Test valid token GET query endpoint.""" token = os.environ.get('TOKEN') resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), diff --git a/tests/test_basic.py b/tests/test_basic.py index f8709227..584eaa73 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -4,8 +4,9 @@ from beacon_api.conf.config import init_db_pool from beacon_api.api.query import access_resolution from beacon_api.utils.validate import token_scheme_check, verify_aud_claim -from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide -from .test_app import PARAMS +from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide, validate_passport +from beacon_api.permissions.ga4gh import check_ga4gh_token, decode_passport, get_ga4gh_permissions +from .test_app import PARAMS, generate_token from testfixtures import TempDirectory from test.support import EnvironmentVarGuard @@ -17,6 +18,21 @@ def mock_token(bona_fide, permissions, auth): "authenticated": auth} +class MockDecodedPassport: + """Mock JWT.""" + + def __init__(self, validated=True): + """Initialise mock JWT.""" + self.validated = validated + + def validate(self): + """Invoke validate.""" + if self.validated: + return True + else: + raise Exception + + class MockBeaconDB: """BeaconDB mock. @@ -290,105 +306,151 @@ def test_access_resolution_controlled_never_reached2(self): with self.assertRaises(aiohttp.web_exceptions.HTTPForbidden): access_resolution(request, token, host, [], [], [8]) - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_controlled(self, userinfo): + @asynctest.mock.patch('beacon_api.permissions.ga4gh.validate_passport') + async def test_ga4gh_controlled(self, m_validation): """Test ga4gh permissions claim parsing.""" - userinfo.return_value = { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000002", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "no-prefix-dataset", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, {'EGAD000000000001', 'EGAD000000000002', 'no-prefix-dataset'}) # has permissions - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: no claims, no userinfo - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_bona_fide(self, userinfo): + # Test: no passports, no permissions + datasets = await get_ga4gh_controlled([]) + self.assertEqual(datasets, set()) + # Test: 1 passport, 1 unique dataset, 1 permission + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}]) # one passport + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 1 unique dataset, 1 permission (permissions must not be duplicated) + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}, {}]) # two passports + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 2 unique datasets, 2 permissions + # Can't test this case with the current design! + # Would need a way for validate_passport() to mock two different results + + async def test_ga4gh_bona_fide(self): """Test ga4gh statuses claim parsing.""" - userinfo.return_value = { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) + passports = [("enc", "header", { + "ga4gh_visa_v1": {"type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813} + }), + ("enc", "header", { + "ga4gh_visa_v1": {"type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413}})] + # Good test: both required passport types contained the correct value + bona_fide_status = await get_ga4gh_bona_fide(passports) self.assertEqual(bona_fide_status, True) # has bona fide - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: no claims, no userinfo - userinfo.return_value = {} - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) + # Bad test: missing passports of required type + passports_empty = [] + bona_fide_status = await get_ga4gh_bona_fide(passports_empty) self.assertEqual(bona_fide_status, False) # doesn't have bona fide + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_jwk') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.jwt') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_validate_passport(self, mock_log, m_jwt, m_jwk): + """Test passport validation.""" + m_jwk.return_value = 'jwk' + # Test: validation passed + m_jwt.return_value = MockDecodedPassport() + await validate_passport({}) + + # # Test: validation failed + m_jwt.return_value = MockDecodedPassport(validated=False) + # with self.assertRaises(Exception): + await validate_passport({}) + # we are not raising the exception we are just doing a log + # need to assert the log called + mock_log.error.assert_called_with("Something went wrong when processing JWT tokens: 1") + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_permissions') + async def test_check_ga4gh_token(self, m_get_perms): + """Test token scopes.""" + # Test: no scope found + decoded_data = {} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, but no claims + decoded_data = {'scope': ''} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, claims are ok + m_get_perms.return_value = {'EGAD01'}, True + decoded_data = {'scope': 'openid ga4gh_passport_v1'} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, True) + + async def test_decode_passport(self): + """Test key-less JWT decoding.""" + token, _ = generate_token('http://test.csc.fi') + header, payload = await decode_passport(token) + self.assertEqual(header.get('alg'), 'HS256') + self.assertEqual(payload.get('iss'), 'http://test.csc.fi') + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_bona_fide') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_controlled') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.decode_passport') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + async def test_get_ga4gh_permissions(self, m_userinfo, m_decode, m_controlled, m_bonafide): + """Test GA4GH permissions main function.""" + # Test: no data (nothing) + m_userinfo.return_value = [{}] + header = {} + payload = {} + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: permissions + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ControlledAccessGrants' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = {'EGAD01'} + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, False) + # Test: bona fide + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ResearcherStatus' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = True + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, True) + if __name__ == '__main__': asynctest.main() diff --git a/tests/test_db_load.py b/tests/test_db_load.py index 8fc78045..cbdbea23 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -308,7 +308,7 @@ async def test_load_datafile(self, db_mock, mock_log): @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_insert_variants(self, db_mock, mock_log): - """Test load_datafile.""" + """Test insert variants.""" db_mock.return_value = Connection() await self._db.connection() db_mock.assert_called() diff --git a/tests/test_response.py b/tests/test_response.py index daf54e4a..ef55e970 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -3,7 +3,7 @@ import asynctest from beacon_api.schemas import load_schema from beacon_api.utils.validate import get_key -from beacon_api.permissions.ga4gh import retrieve_user_data +from beacon_api.permissions.ga4gh import retrieve_user_data, get_jwk import jsonschema import json import aiohttp @@ -120,7 +120,7 @@ async def test_bad_none_retrieve_user_data(self, m): @aioresponses() async def test_good_retrieve_user_data(self, m): """Test a passing call to retrieve user data.""" - m.get("http://test.csc.fi/userinfo", payload={"ga4gh": [{}]}) + m.get("http://test.csc.fi/userinfo", payload={"ga4gh_passport_v1": [{}]}) user_data = await retrieve_user_data('good_token') self.assertEqual(user_data, [{}]) @@ -146,6 +146,30 @@ async def test_get_key(self, m): self.assertTrue(isinstance(result, dict)) self.assertTrue(result["keys"][0]['alg'], 'RSA256') + @aioresponses() + async def test_get_jwk(self, m): + """Test get JWK.""" + data = { + "keys": [ + { + "alg": "RS256", + "kty": "RSA", + "use": "sig", + "n": "public_key", + "e": "AQAB" + } + ]} + m.get("http://test.csc.fi/jwk", payload=data) + result = await get_jwk('http://test.csc.fi/jwk') + self.assertTrue(isinstance(result, dict)) + self.assertTrue(result["keys"][0]['alg'], 'RSA256') + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_get_jwk_bad(self, mock_log): + """Test get JWK exception log.""" + await get_jwk('http://test.csc.fi/jwk') + mock_log.error.assert_called_with("Could not retrieve JWK from http://test.csc.fi/jwk") + @asynctest.mock.patch('beacon_api.utils.validate.OAUTH2_CONFIG', return_value={'server': None}) async def test_bad_get_key(self, oauth_none): """Test bad test_get_key."""