diff --git a/.coveragerc b/.coveragerc index 7e254e0b..0fb74614 100644 --- a/.coveragerc +++ b/.coveragerc @@ -14,6 +14,7 @@ omit = [report] # Regexes for lines to exclude from consideration exclude_lines = + pragma: no cover # Don't complain about missing debug-only code: def __repr__ if self\.debug diff --git a/Pipfile b/Pipfile deleted file mode 100644 index d3fda1c9..00000000 --- a/Pipfile +++ /dev/null @@ -1,18 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[dev-packages] - -[packages] -aiohttp = "*" -asyncpg = "*" -cryptography = "*" -jsonschema = "==3.0.0" -"cyvcf2" = "*" -PyJWT = "*" -Cython = "*" - -[requires] -python_version = "3.6" diff --git a/beacon_api/__init__.py b/beacon_api/__init__.py index f8bf597a..f92eecc2 100644 --- a/beacon_api/__init__.py +++ b/beacon_api/__init__.py @@ -13,7 +13,7 @@ __license__ = CONFIG_INFO.license __copyright__ = CONFIG_INFO.copyright __docs_url__ = CONFIG_INFO.docs_url -__handover_drs__ = CONFIG_INFO.handover_drs +__handover_drs__ = CONFIG_INFO.handover_drs.rstrip("/") __handover_datasets__ = CONFIG_INFO.handover_datasets __handover_beacon__ = CONFIG_INFO.handover_beacon __handover_base__ = CONFIG_INFO.handover_base @@ -26,7 +26,6 @@ __createtime__ = CONFIG_INFO.createtime __updatetime__ = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') # Every restart of the application means an update to it - __org_id__ = CONFIG_INFO.org_id __org_name__ = CONFIG_INFO.org_name __org_description__ = CONFIG_INFO.org_description @@ -37,3 +36,7 @@ __org_info__ = {'orgInfo': CONFIG_INFO.org_info} __sample_queries__ = SAMPLE_QUERIES + +# GA4GH Discovery +__service_type__ = f'{CONFIG_INFO.service_type}:{__apiVersion__}' +__service_env__ = CONFIG_INFO.environment diff --git a/beacon_api/api/exceptions.py b/beacon_api/api/exceptions.py index 32604b22..c538c137 100644 --- a/beacon_api/api/exceptions.py +++ b/beacon_api/api/exceptions.py @@ -10,37 +10,35 @@ from ..conf import CONFIG_INFO -class BeaconError(Exception): - """BeaconError Exception specific class. +def process_exception_data(request, host, error_code, error): + """Return request data as dictionary. Generates custom exception messages based on request parameters. """ - - def __init__(self, request, host, error_code, error): - """Return request data as dictionary.""" - self.data = {'beaconId': '.'.join(reversed(host.split('.'))), - "apiVersion": __apiVersion__, - 'exists': None, - 'error': {'errorCode': error_code, - 'errorMessage': error}, - 'alleleRequest': {'referenceName': request.get("referenceName", None), - 'referenceBases': request.get("referenceBases", None), - 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), - 'assemblyId': request.get("assemblyId", None)}, - # showing empty datasetsAlleRsponse as no datasets found - # A null/None would represent no data while empty array represents - # none found or error and corresponds with exists null/None - 'datasetAlleleResponses': []} - # include datasetIds only if they are specified - # as per specification if they don't exist all datatsets will be queried - # Only one of `alternateBases` or `variantType` is required, validated by schema - oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", - "endMin", "endMax", "datasetIds"] - self.data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) - return self.data - - -class BeaconBadRequest(BeaconError): + data = {'beaconId': '.'.join(reversed(host.split('.'))), + "apiVersion": __apiVersion__, + 'exists': None, + 'error': {'errorCode': error_code, + 'errorMessage': error}, + 'alleleRequest': {'referenceName': request.get("referenceName", None), + 'referenceBases': request.get("referenceBases", None), + 'includeDatasetResponses': request.get("includeDatasetResponses", "NONE"), + 'assemblyId': request.get("assemblyId", None)}, + # showing empty datasetsAlleRsponse as no datasets found + # A null/None would represent no data while empty array represents + # none found or error and corresponds with exists null/None + 'datasetAlleleResponses': []} + # include datasetIds only if they are specified + # as per specification if they don't exist all datatsets will be queried + # Only one of `alternateBases` or `variantType` is required, validated by schema + oneof_fields = ["alternateBases", "variantType", "start", "end", "startMin", "startMax", + "endMin", "endMax", "datasetIds"] + data['alleleRequest'].update({k: request.get(k) for k in oneof_fields if k in request}) + + return data + + +class BeaconBadRequest(web.HTTPBadRequest): """Exception returns with 400 code and a custom error message. The method is called if one of the required parameters are missing or invalid. @@ -49,13 +47,12 @@ class BeaconBadRequest(BeaconError): def __init__(self, request, host, error): """Return custom bad request exception.""" - data = super().__init__(request, host, 400, error) - - LOG.error(f'400 ERROR MESSAGE: {error}') - raise web.HTTPBadRequest(content_type="application/json", text=json.dumps(data)) + data = process_exception_data(request, host, 400, error) + super().__init__(text=json.dumps(data), content_type="application/json") + LOG.error(f'401 ERROR MESSAGE: {error}') -class BeaconUnauthorised(BeaconError): +class BeaconUnauthorised(web.HTTPUnauthorized): """HTTP Exception returns with 401 code with a custom error message. The method is called if the user is not registered or if the token from the authentication has expired. @@ -64,17 +61,17 @@ class BeaconUnauthorised(BeaconError): def __init__(self, request, host, error, error_message): """Return custom unauthorized exception.""" - data = super().__init__(request, host, 401, error) + data = process_exception_data(request, host, 401, error) headers_401 = {"WWW-Authenticate": f"Bearer realm=\"{CONFIG_INFO.url}\"\n\ error=\"{error}\"\n\ error_description=\"{error_message}\""} + super().__init__(content_type="application/json", text=json.dumps(data), + # we use auth scheme Bearer by default + headers=headers_401) LOG.error(f'401 ERROR MESSAGE: {error}') - raise web.HTTPUnauthorized(content_type="application/json", text=json.dumps(data), - # we use auth scheme Bearer by default - headers=headers_401) -class BeaconForbidden(BeaconError): +class BeaconForbidden(web.HTTPForbidden): """HTTP Exception returns with 403 code with the error message. `'Resource not granted for authenticated user or resource protected for all users.'`. @@ -84,13 +81,12 @@ class BeaconForbidden(BeaconError): def __init__(self, request, host, error): """Return custom forbidden exception.""" - data = super().__init__(request, host, 403, error) - + data = process_exception_data(request, host, 403, error) + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'403 ERROR MESSAGE: {error}') - raise web.HTTPForbidden(content_type="application/json", text=json.dumps(data)) -class BeaconServerError(BeaconError): +class BeaconServerError(web.HTTPInternalServerError): """HTTP Exception returns with 500 code with the error message. The 500 error is not specified by the Beacon API, thus as simple error would do. @@ -100,6 +96,5 @@ def __init__(self, error): """Return custom forbidden exception.""" data = {'errorCode': 500, 'errorMessage': error} - + super().__init__(content_type="application/json", text=json.dumps(data)) LOG.error(f'500 ERROR MESSAGE: {error}') - raise web.HTTPInternalServerError(content_type="application/json", text=json.dumps(data)) diff --git a/beacon_api/api/info.py b/beacon_api/api/info.py index 379e5c97..634a72d1 100644 --- a/beacon_api/api/info.py +++ b/beacon_api/api/info.py @@ -9,7 +9,7 @@ from .. import __apiVersion__, __title__, __version__, __description__, __url__, __alturl__, __handover_beacon__ from .. import __createtime__, __updatetime__, __org_id__, __org_name__, __org_description__ from .. import __org_address__, __org_logoUrl__, __org_welcomeUrl__, __org_info__, __org_contactUrl__ -from .. import __sample_queries__, __handover_drs__, __docs_url__ +from .. import __sample_queries__, __handover_drs__, __docs_url__, __service_type__, __service_env__ from ..utils.data_query import fetch_dataset_metadata from ..extensions.handover import make_handover from aiocache import cached @@ -26,13 +26,18 @@ async def ga4gh_info(host): # TO DO implement some fallback mechanism for ID 'id': '.'.join(reversed(host.split('.'))), 'name': __title__, - "type": "urn:ga4gh:beacon", + "type": __service_type__, 'description': __description__, - 'documentationUrl': __docs_url__, - "organization": __org_id__, + "organization": { + "name": __org_name__, + "url": __org_welcomeUrl__, + }, 'contactUrl': __org_contactUrl__, - 'version': __version__, - "extension": {} + 'documentationUrl': __docs_url__, + 'createdAt': __createtime__, + 'updatedAt': __updatetime__, + 'environment': __service_env__, + 'version': __version__ } return beacon_info diff --git a/beacon_api/app.py b/beacon_api/app.py index 7b97801d..cac8d399 100644 --- a/beacon_api/app.py +++ b/beacon_api/app.py @@ -82,7 +82,8 @@ async def initialize(app): async def destroy(app): """Upon server close, close the DB connection pool.""" - await app['pool'].close() + # will defer this to asyncpg + await app['pool'].close() # pragma: no cover def set_cors(server): diff --git a/beacon_api/conf/__init__.py b/beacon_api/conf/__init__.py index 0b66620c..336f7119 100644 --- a/beacon_api/conf/__init__.py +++ b/beacon_api/conf/__init__.py @@ -33,6 +33,8 @@ def parse_config_file(path): 'url': config.get('beacon_api_info', 'url'), 'alturl': config.get('beacon_api_info', 'alturl'), 'createtime': config.get('beacon_api_info', 'createtime'), + 'service_type': config.get('beacon_api_info', 'service_type'), + 'environment': config.get('beacon_api_info', 'environment'), 'org_id': config.get('organisation_info', 'org_id'), 'org_name': config.get('organisation_info', 'org_name'), 'org_description': config.get('organisation_info', 'org_description'), diff --git a/beacon_api/conf/config.ini b/beacon_api/conf/config.ini index 850dc69f..b6bd73c9 100644 --- a/beacon_api/conf/config.ini +++ b/beacon_api/conf/config.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers @@ -41,6 +41,13 @@ alturl= # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=prod + [organisation_info] # Globally unique identifier for organisation that hosts this Beacon service diff --git a/beacon_api/extensions/handover.py b/beacon_api/extensions/handover.py index 5f25f33c..245caa8d 100644 --- a/beacon_api/extensions/handover.py +++ b/beacon_api/extensions/handover.py @@ -22,7 +22,7 @@ def make_handover(paths, datasetIds, chr='', start=0, end=0, ref='', alt='', var for dataset in set(datasetIds): handovers.append({"handoverType": {"id": "CUSTOM", "label": label}, "description": desc, - "url": __handover_drs__ + path.format(dataset=dataset, chr=chr, start=start, - end=end, ref=ref, alt=alt)}) + "url": __handover_drs__ + "/" + path.format(dataset=dataset, chr=chr, start=start, + end=end, ref=ref, alt=alt)}) return handovers diff --git a/beacon_api/permissions/ga4gh.py b/beacon_api/permissions/ga4gh.py index ce873f20..73ff5512 100644 --- a/beacon_api/permissions/ga4gh.py +++ b/beacon_api/permissions/ga4gh.py @@ -1,106 +1,270 @@ """Parse permissions and statuses from ELIXIR token for GA4GH claim. -Current implementation is based on https://docs.google.com/document/d/11Wg-uL75ypU5eNu2p_xh9gspmbGtmLzmdq5VfPHBirE +Current implementation is based on https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md -The JWT contains GA4GH DURI claims in the following form: +The ELIXIR AAI JWT payload contains a GA4GH Passport claim in the scope: .. code-block:: javascript { - "ga4gh.userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" + "scope": "openid ga4gh_passport_v1", + ... + } + +The token is then intended to be delivered to the /userinfo endpoint at ELIXIR AAI, which will respond with a list of +assorted third party JWTs that need to be sifted through to find the relevant tokens. Initially it can not be determined +which tokens contain the desired information. + +.. code-block:: javascript + + { + "ga4gh_passport_v1": [ + "JWT", + "JWT", + "JWT", + ... ] } -The actual dataset permissions are then requested from /userinfo endpoint, and take the form of: +Each third party token (JWT, RFC 7519) consists of three parts separated by dots, in the following manner: `header.payload.signature`. +This module processes the assorted tokens to extract the information they carry and to validate that data. + +The process is carried out as such: +1. Take token (JWT) +2. Decode token +3a. Extract `type` key from the payload portion and check if the token type is of interest +3b. If the token is of the desired type, add it to list and continue, if not, discard this token and move to the next one +4. Extract `jku` key from the header portion (value is a URL that returns a JWK public key set) +5. Decode the complete token with the received public key +6. Validate the token claims +7. Extract the sought-after value from the `ga4gh_visa_v1` object's `value` key + +Dataset permissions are read from GA4GH RI claims of the type "ControlledAccessGrants" .. code-block:: javascript { - "ga4gh": { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", + "source": "https://ega-archive.org/dacs/EGAC00000000001", + "by": "dac", + "asserted": 1546300800, + "expires": 1577836800 } } -The statuses are also requested from /userinfo endpoint, and take the form of: +Bona Fide status is read from GA4GH RI claims of the type "AcceptedTermsAndPolicies" and "ResearcherStatus", each being in their respective tokens. .. code-block:: javascript { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813 + } + } + + { + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413 } } """ +import base64 +import json + import aiohttp +from authlib.jose import jwt + from ..api.exceptions import BeaconServerError from ..utils.logging import LOG from ..conf import OAUTH2_CONFIG +async def check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions): + """Check the token for GA4GH claims.""" + LOG.debug('Checking GA4GH claims from scope.') + + if 'scope' in decoded_data: + ga4gh_scopes = ['openid', 'ga4gh_passport_v1'] + token_scopes = decoded_data.get('scope').split(' ') + + if all(scope in token_scopes for scope in ga4gh_scopes): + dataset_permissions, bona_fide_status = await get_ga4gh_permissions(token) + + return dataset_permissions, bona_fide_status + + +async def decode_passport(encoded_passport): + """Return decoded header and payload from encoded passport JWT. + + Public-key-less decoding inspired by the PyJWT library https://github.com/jpadilla/pyjwt + """ + LOG.debug('Decoding GA4GH passport.') + + # Convert the token string into bytes for processing, and split it into segments + encoded_passport = encoded_passport.encode('utf-8') # `header.payload.signature` + data, _ = encoded_passport.rsplit(b'.', 1) # data contains header and payload segments, the ignored segment is the signature segment + segments = data.split(b'.', 1) # [header, payload] + + # Intermediary container + verified_segments = [] + # Handle different implementations of public exponent + # 65537 is recommended, but some use 65536 instead + # pad the missing bytes, if needed + for segment in segments: + rem = len(segment) % 4 + if rem > 0: + segment += b'=' * (4 - rem) + verified_segments.append(segment) + + # Decode the verified token segments + decoded_segments = [base64.urlsafe_b64decode(seg) for seg in verified_segments] + + # Convert the decoded segment bytes into dicts for easy access + decoded_data = [json.loads(seg.decode('utf-8')) for seg in decoded_segments] + + return decoded_data + + +async def get_ga4gh_permissions(token): + """Retrieve GA4GH passports (JWTs) from ELIXIR AAI and process them into tangible permissions.""" + LOG.info('Handling permissions.') + + # Return variables + dataset_permissions = [] + bona_fide_status = False + + # Intermediary containers + dataset_passports = [] # [(token, header)] + bona_fide_passports = [] # [(token, header, payload)] + + # Get encoded passports (assorted JWTs) from /userinfo + encoded_passports = await retrieve_user_data(token) + + # Pre-process the assorted passports (JWTs) for dataset permissions and bona fide status parsing + if encoded_passports: + # Decode the passports and check their type + for encoded_passport in encoded_passports: + # Decode passport + header, payload = await decode_passport(encoded_passport) + # Sort passports that carry dataset permissions + pass_type = payload.get('ga4gh_visa_v1', {}).get('type') + if pass_type == 'ControlledAccessGrants': # nosec + dataset_passports.append((encoded_passport, header)) + # Sort passports that MAY carry bona fide status information + if pass_type in ['AcceptedTermsAndPolicies', 'ResearcherStatus']: + bona_fide_passports.append((encoded_passport, header, payload)) + + # Parse dataset passports to extract dataset permissions and validate them + dataset_permissions = await get_ga4gh_controlled(dataset_passports) + # Parse bona fide passports to extract bona fide status and validate them + bona_fide_status = await get_ga4gh_bona_fide(bona_fide_passports) + + return dataset_permissions, bona_fide_status + + async def retrieve_user_data(token): """Retrieve GA4GH user data.""" + LOG.debug('Contacting ELIXIR AAI /userinfo.') headers = {"Authorization": f"Bearer {token}"} try: async with aiohttp.ClientSession(headers=headers) as session: async with session.get(OAUTH2_CONFIG.userinfo) as r: json_body = await r.json() LOG.info('Retrieve GA4GH user data from ELIXIR AAI.') - return json_body.get("ga4gh", None) + return json_body.get("ga4gh_passport_v1", None) except Exception: raise BeaconServerError("Could not retrieve GA4GH user data from ELIXIR AAI.") -async def get_ga4gh_controlled(token, token_claim): - """Retrieve datasets from GA4GH permissions JWT claim.""" +async def get_jwk(url): + """Get JWK set keys to validate JWT.""" + LOG.debug('Retrieving JWK.') + try: + async with aiohttp.ClientSession() as session: + async with session.get(url) as r: + # This can be a single key or a list of JWK + return await r.json() + except Exception: + # This is not a fatal error, it just means that we are unable to validate the permissions, + # but the process should continue even if the validation of one token fails + LOG.error(f'Could not retrieve JWK from {url}') + pass + + +async def validate_passport(passport): + """Decode a passport and validate its contents.""" + LOG.debug('Validating passport.') + + # Passports from `get_ga4gh_controlled()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # Passports from `get_bona_fide_status()` will be of form + # passport[0] -> encoded passport (JWT) + # passport[1] -> unverified decoded header (contains `jku`) + # passport[2] -> unverified decoded payload + + # JWT decoding and validation settings + # The `aud` claim will be ignored, because Beacon has no prior knowledge + # as to where the token has originated from, and is therefore unable to + # verify the intended audience. Other claims will be validated as per usual. + claims_options = { + "aud": { + "essential": False + } + } + + # Attempt to decode the token and validate its contents + # None of the exceptions are fatal, and will not raise an exception + # Because even if the validation of one passport fails, the query + # Should still continue in case other passports are valid + try: + # Get JWK for this passport from a third party provider + # The JWK will be requested from a URL that is given in the `jku` claim in the header + passport_key = await get_jwk(passport[1].get('jku')) + # Decode the JWT using public key + decoded_passport = jwt.decode(passport[0], passport_key, claims_options=claims_options) + # Validate the JWT signature + decoded_passport.validate() + # Return decoded and validated payload contents + return decoded_passport + except Exception as e: + LOG.error(f"Something went wrong when processing JWT tokens: {e}") + + +async def get_ga4gh_controlled(passports): + """Retrieve dataset permissions from GA4GH passport visas.""" # We only want to get datasets once, thus the set which prevents duplicates - LOG.info("Parsing GA4GH dataset permissions claims.") + LOG.info("Parsing GA4GH dataset permissions.") datasets = set() - # Check if the token contains a claim for GA4GH permissions - if 'ga4gh.ControlledAccessGrants' in token_claim: - # Contact /userinfo with token to get GA4GH permissions - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve permissions and parse them - if 'ControlledAccessGrants' in ga4gh: - # Extract dataset key and split by `/` to remove potential URL prefix - # the dataset id in the resulting list will always be the last element - datasets.update([p["value"].split('/')[-1] for p in ga4gh["ControlledAccessGrants"] if "value" in p]) + + for passport in passports: + # Decode passport and validate its contents + validated_passport = await validate_passport(passport) + # Extract dataset id from validated passport + # The dataset value will be of form `https://institution.org/urn:dataset:1000` + # the extracted dataset will always be the last list element when split with `/` + dataset = validated_passport.get('ga4gh_visa_v1', {}).get('value').split('/')[-1] + # Add dataset to set + datasets.add(dataset) return datasets -async def get_ga4gh_bona_fide(token, token_claim): +async def get_ga4gh_bona_fide(passports): """Retrieve Bona Fide status from GA4GH JWT claim.""" LOG.info("Parsing GA4GH bona fide claims.") @@ -108,21 +272,35 @@ async def get_ga4gh_bona_fide(token, token_claim): terms = False status = False - # Check if the token contains claims for GA4GH Bona Fide - if 'ga4gh.AcceptedTermsAndPolicies' in token_claim and 'ga4gh.ResearcherStatus' in token_claim: - # Contact /userinfo with token to get confirmation of Bona Fide status - ga4gh = await retrieve_user_data(token) - # If the /userinfo endpoint responded with user data, retrieve statuses and agreements and parse them - if 'AcceptedTermsAndPolicies' in ga4gh: - for accepted_terms in ga4gh["AcceptedTermsAndPolicies"]: - if accepted_terms.get("value") == OAUTH2_CONFIG.bona_fide_value: - terms = True - if 'ResearcherStatus' in ga4gh: - for researcher_status in ga4gh["ResearcherStatus"]: - if researcher_status.get("value") == OAUTH2_CONFIG.bona_fide_value: - status = True - if terms and status: - # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status - return True - - return False + for passport in passports: + # Check for the `type` of visa to determine if to look for `terms` or `status` + # + # CHECK FOR TERMS + passport_type = passport[2].get('ga4gh_visa_v1', {}).get('type') + passport_value = passport[2].get('ga4gh_visa_v1', {}).get('value') + if passport_type in 'AcceptedTermsAndPolicies' and passport_value == OAUTH2_CONFIG.bona_fide_value: + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, terms will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and terms will remain False + await validate_passport(passport) + # The token is validated, therefore the terms are accepted + terms = True + # + # CHECK FOR STATUS + if passport_value == OAUTH2_CONFIG.bona_fide_value and passport_type == 'ResearcherStatus': + # Check if the visa contains a bona fide value + # This passport has the correct type and value, next step is to validate it + # + # Decode passport and validate its contents + # If the validation passes, status will be set to True + # If the validation fails, an exception will be raised + # (and ignored since it's not fatal), and status will remain False + await validate_passport(passport) + # The token is validated, therefore the status is accepted + status = True + + # User has agreed to terms and has been recognized by a peer, return True for Bona Fide status + return terms and status diff --git a/beacon_api/utils/db_load.py b/beacon_api/utils/db_load.py index 8ccac2af..679cdfa6 100644 --- a/beacon_api/utils/db_load.py +++ b/beacon_api/utils/db_load.py @@ -222,11 +222,11 @@ async def load_metadata(self, vcf, metafile, datafile): LOG.info(f'Calculate number of samples from {datafile}') len_samples = len(vcf.samples) LOG.info(f'Parse metadata from {metafile}') - with open(metafile, 'r') as metafile: + with open(metafile, 'r') as meta_file: # read metadata from given JSON file # TO DO: parse metadata directly from datafile if possible - LOG.info(metafile) - metadata = json.load(metafile) + LOG.info(meta_file) + metadata = json.load(meta_file) LOG.info(metadata) LOG.info('Metadata has been parsed') try: @@ -255,7 +255,8 @@ async def load_metadata(self, vcf, metafile, datafile): LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO INSERT METADATA -> {e}') except Exception as e: LOG.error(f'AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> {e}') - return metadata['datasetId'] + else: + return metadata['datasetId'] def _chunks(self, iterable, size): """Chunk records. diff --git a/beacon_api/utils/validate.py b/beacon_api/utils/validate.py index 151694fe..2bf37de4 100644 --- a/beacon_api/utils/validate.py +++ b/beacon_api/utils/validate.py @@ -13,7 +13,7 @@ from aiocache.serializers import JsonSerializer from ..api.exceptions import BeaconUnauthorised, BeaconBadRequest, BeaconForbidden, BeaconServerError from ..conf import OAUTH2_CONFIG -from ..permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide +from ..permissions.ga4gh import check_ga4gh_token from jsonschema import Draft7Validator, validators from jsonschema.exceptions import ValidationError @@ -54,7 +54,8 @@ def set_defaults(validator, properties, instance, schema): for error in validate_properties( validator, properties, instance, schema, ): - yield error + # Difficult to unit test + yield error # pragma: no cover return validators.extend( validator_class, {"properties": set_defaults}, @@ -76,8 +77,6 @@ def wrapper(func): @wraps(func) async def wrapped(*args): request = args[-1] - if not isinstance(request, web.Request): - raise BeaconBadRequest(request, request.host, "invalid request", "This does not seem a valid HTTP Request.") try: _, obj = await parse_request_object(request) except Exception: @@ -121,7 +120,21 @@ def token_scheme_check(token, scheme, obj, host): raise BeaconUnauthorised(obj, host, "invalid_token", 'Invalid token scheme, Bearer required.') if token is None: - raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') + # Might never happen + raise BeaconUnauthorised(obj, host, "invalid_token", 'Token cannot be empty.') # pragma: no cover + + +def verify_aud_claim(): + """Verify audience claim.""" + aud = [] + verify_aud = OAUTH2_CONFIG.verify_aud # Option to skip verification of `aud` claim + if verify_aud: + aud = os.environ.get('JWT_AUD', OAUTH2_CONFIG.audience) # List of intended audiences of token + # if verify_aud is set to True, we expect that a desired aud is then supplied. + # However, if verify_aud=True and no aud is supplied, we use aud=[None] which will fail for + # all tokens as a security measure. If aud=[], all tokens will pass (as is the default value). + aud = aud.split(',') if aud is not None else [None] + return verify_aud, aud def token_auth(): @@ -132,8 +145,6 @@ def token_auth(): """ @web.middleware async def token_middleware(request, handler): - if not isinstance(request, web.Request): - raise BeaconBadRequest(request, request.host, "invalid request", "This does not seem a valid HTTP Request.") if request.path in ['/query'] and 'Authorization' in request.headers: _, obj = await parse_request_object(request) try: @@ -147,14 +158,7 @@ async def token_middleware(request, handler): # Token decoding parameters key = await get_key() # JWK used to decode token with - aud = [] - verify_aud = OAUTH2_CONFIG.verify_aud # Option to skip verification of `aud` claim - if verify_aud: - aud = os.environ.get('JWT_AUD', OAUTH2_CONFIG.audience) # List of intended audiences of token - # if verify_aud is set to True, we expect that a desired aud is then supplied. - # However, if verify_aud=True and no aud is supplied, we use aud=[None] which will fail for - # all tokens as a security measure. If aud=[], all tokens will pass (as is the default value). - aud = aud.split(',') if aud is not None else [None] + verify_aud, aud = verify_aud_claim() # Prepare JWTClaims validation # can be populated with claims that are required to be present in the payload of the token claims_options = { @@ -172,22 +176,22 @@ async def token_middleware(request, handler): } try: - decodedData = jwt.decode(token, key, claims_options=claims_options) # decode the token - decodedData.validate() # validate the token contents + decoded_data = jwt.decode(token, key, claims_options=claims_options) # decode the token + decoded_data.validate() # validate the token contents LOG.info('Auth Token Decoded.') - LOG.info(f'Identified as {decodedData["sub"]} user by {decodedData["iss"]}.') + LOG.info(f'Identified as {decoded_data["sub"]} user by {decoded_data["iss"]}.') # for now the permissions just reflects that the data can be decoded from token # the bona fide status is checked against ELIXIR AAI by default or the URL from config # the bona_fide_status is specific to ELIXIR Tokens - controlled_datasets = set() + # Retrieve GA4GH Passports from /userinfo and process them into dataset permissions and bona fide status + dataset_permissions, bona_fide_status = set(), False + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, token, bona_fide_status, dataset_permissions) # currently we offer module for parsing GA4GH permissions, but multiple claims and providers can be utilised # by updating the set, meaning replicating the line below with the permissions function and its associated claim # For GA4GH DURI permissions (ELIXIR Permissions API 2.0) - controlled_datasets.update(await get_ga4gh_controlled(token, - decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else {}) + controlled_datasets = set() + controlled_datasets.update(dataset_permissions) all_controlled = list(controlled_datasets) if bool(controlled_datasets) else None - # For Bona Fide status in GA4GH format - bona_fide_status = await get_ga4gh_bona_fide(token, decodedData["ga4gh_userinfo_claims"]) if "ga4gh_userinfo_claims" in decodedData else False request["token"] = {"bona_fide_status": bona_fide_status, # permissions key will hold the actual permissions found in the token/userinfo e.g. GA4GH permissions "permissions": all_controlled, @@ -195,14 +199,15 @@ async def token_middleware(request, handler): # currently if a token is valid that means request is authenticated "authenticated": True} return await handler(request) + # Testing the exceptions is done in integration tests except MissingClaimError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Missing claim(s): {e}') + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Missing claim(s): {e}') # pragma: no cover except ExpiredTokenError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Expired signature: {e}') # pragma: no cover except InvalidClaimError as e: - raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') - except InvalidTokenError as e: - raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') + raise BeaconForbidden(obj, request.host, f'Token info not corresponding with claim: {e}') # pragma: no cover + except InvalidTokenError as e: # pragma: no cover + raise BeaconUnauthorised(obj, request.host, "invalid_token", f'Invalid authorization token: {e}') # pragma: no cover else: request["token"] = {"bona_fide_status": False, "permissions": None, diff --git a/data/init.sql b/data/init.sql index 53c327b7..e2862870 100644 --- a/data/init.sql +++ b/data/init.sql @@ -29,7 +29,7 @@ variantcount: SELECT count(*) FROM beacon_data_table; CREATE TABLE IF NOT EXISTS beacon_dataset_counts_table ( datasetId VARCHAR(128), callCount INTEGER DEFAULT NULL, - variantCount INTEGER DEFAULT NULL + variantCount BIGINT DEFAULT NULL ); CREATE TABLE IF NOT EXISTS beacon_data_table ( diff --git a/deploy/test/auth_test.ini b/deploy/test/auth_test.ini index ad464b86..57ea9200 100644 --- a/deploy/test/auth_test.ini +++ b/deploy/test/auth_test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers @@ -40,6 +40,13 @@ alturl=https://ega-archive.org/ # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=prod + [handover_info] # The base url for all handovers diff --git a/deploy/test/integ_test.py b/deploy/test/integ_test.py index 1c03a775..6dc2c2d4 100644 --- a/deploy/test/integ_test.py +++ b/deploy/test/integ_test.py @@ -389,7 +389,7 @@ async def test_17(): async with session.post('http://localhost:5050/query', data=json.dumps(payload)) as resp: data = await resp.json() assert data['exists'] is True, sys.exit('Query POST Endpoint Error!') - assert len(data['datasetAlleleResponses']) == 1, sys.exit('Should be able to retrieve both requested.') + assert len(data['datasetAlleleResponses']) == 2, sys.exit('Should be able to retrieve both requested.') async def test_18(): @@ -701,8 +701,8 @@ async def test_32(): async with aiohttp.ClientSession() as session: async with session.get('http://localhost:5050/service-info') as resp: data = await resp.json() - # GA4GH Discovery Service-Info is small and its length should be between 3 and 6, when the Beacon info is very long + # GA4GH Discovery Service-Info is small and its length should be at least 5 (required keys), when the Beacon info is very long # https://github.com/ga4gh-discovery/service-info/blob/develop/service-info.yaml - assert 5 <= len(data) <= 9, 'Service info size error' # ga4gh service-info has 5 required keys and at most 9 with optionals - assert data['type'] == 'urn:ga4gh:beacon', 'Service type error' # a new key used in beacon network + assert len(data) >= 5, 'Service info size error' # ga4gh service-info has 5 required keys, and option to add custom keys + assert data['type'].startswith('org.ga4gh:beacon'), 'Service type error' # a new key used in beacon network assert resp.status == 200, 'HTTP Status code error' diff --git a/deploy/test/mock_auth.py b/deploy/test/mock_auth.py index 429c230f..b5143d28 100644 --- a/deploy/test/mock_auth.py +++ b/deploy/test/mock_auth.py @@ -27,17 +27,11 @@ def generate_token(): "sub": "requester@elixir-europe.org", "aud": ["aud2", "aud3"], "azp": "azp", - "scope": "openid ga4gh", + "scope": "openid ga4gh_passport_v1", "iss": "http://test.csc.fi", "exp": 9999999999, "iat": 1561621913, - "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102", - "ga4gh_userinfo_claims": [ - "ga4gh.AffiliationAndRole", - "ga4gh.ControlledAccessGrants", - "ga4gh.AcceptedTermsAndPolicies", - "ga4gh.ResearcherStatus" - ] + "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } empty_payload = { "sub": "requester@elixir-europe.org", @@ -46,11 +40,77 @@ def generate_token(): "iat": 1547794655, "jti": "6ad7aa42-3e9c-4833-bd16-765cb80c2102" } + # Craft 4 passports, 2 for bona fide status and 2 for dataset permissions + # passport for bona fide: terms + passport_terms = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "bed0aff9-29b1-452c-b776-a6f2200b6db1" + } + # passport for bona fide: status + passport_status = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "722ddde1-617d-4651-992d-f0fdde77bf29" + } + # passport for dataset permissions 1 + passport_dataset1 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "d1d7b521-bd6b-433d-b2d5-3d874aab9d55" + } + # passport for dataset permissions 2 + passport_dataset2 = { + "iss": "http://test.csc.fi", + "sub": "requester@elixir-europe.org", + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled1", + "source": "https://ga4gh.org/duri/no_org", + "by": "dac", + "asserted": 1568699331 + }, + "iat": 1571144438, + "exp": 99999999999, + "jti": "9fa600d6-4148-47c1-b708-36c4ba2e980e" + } public_jwk = jwk.dumps(public_key, kty='RSA') private_jwk = jwk.dumps(pem, kty='RSA') dataset_encoded = jwt.encode(header, dataset_payload, private_jwk).decode('utf-8') empty_encoded = jwt.encode(header, empty_payload, private_jwk).decode('utf-8') - return (public_jwk, dataset_encoded, empty_encoded) + passport_terms_encoded = jwt.encode(header, passport_terms, private_jwk).decode('utf-8') + passport_status_encoded = jwt.encode(header, passport_status, private_jwk).decode('utf-8') + passport_dataset1_encoded = jwt.encode(header, passport_dataset1, private_jwk).decode('utf-8') + passport_dataset2_encoded = jwt.encode(header, passport_dataset2, private_jwk).decode('utf-8') + return (public_jwk, dataset_encoded, empty_encoded, passport_terms_encoded, passport_status_encoded, + passport_dataset1_encoded, passport_dataset2_encoded) DATA = generate_token() @@ -75,42 +135,12 @@ async def userinfo(request): data = {} else: data = { - "ga4gh": { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 9999999999 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 9999999999 - } - ], - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559893314, - "expires": 9999999999 - }, - { - "value": "https://www.ebi.ac.uk/ega/urn:hg:1000genome:controlled", - "source": "https://ga4gh.org/duri/no_org", - "by": "dac", - "asserted": 1559897355, - "expires": 9999999999 - } - ] - } + "ga4gh_passport_v1": [ + DATA[3], + DATA[4], + DATA[5], + DATA[6] + ] } return web.json_response(data) diff --git a/docs/deploy.rst b/docs/deploy.rst index 07f284cd..e8858988 100644 --- a/docs/deploy.rst +++ b/docs/deploy.rst @@ -76,7 +76,7 @@ For use with Kubernetes we provide ``YAML`` configuration. role: beacon spec: containers: - - image: cscfi/beacon + - image: cscfi/beacon-python imagePullPolicy: Always name: beacon ports: @@ -88,8 +88,9 @@ For use with Kubernetes we provide ``YAML`` configuration. name: data volumes: - name: data - persistentVolumeClaim: - claimName: beaconpy + # change below with preferred volume class + hostPath: + path: /local/disk/path --- apiVersion: v1 kind: Service diff --git a/docs/example.rst b/docs/example.rst index 0359439c..07f02e19 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -115,10 +115,18 @@ Example Response: { "id": "localhost:5050", "name": "GA4GHBeacon at CSC", + "type": "org.ga4gh:beacon:1.1.0", "description": "Beacon API Web Server based on the GA4GH Beacon API", - "documentationUrl": "https://beacon-python.readthedocs.io/en/latest/", + "organization": { + "name": "CSC - IT Center for Science", + "url": "https://www.csc.fi/" + }, "contactUrl": "https://www.csc.fi/contact-info", - "version": "1.4.0" + "documentationUrl": "https://beacon-network.readthedocs.io/en/latest/", + "createdAt": "2019-09-04T12:00:00Z", + "updatedAt": "2019-09-05T05:55:18Z", + "environment": "prod", + "version": "1.5.0" } Query Endpoint diff --git a/docs/instructions.rst b/docs/instructions.rst index 14522834..84e0f8d3 100644 --- a/docs/instructions.rst +++ b/docs/instructions.rst @@ -69,7 +69,7 @@ pointing to the location of the file using `CONFIG_FILE` environment variable. .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 1-68 + :lines: 1-75 .. _oauth2: @@ -81,7 +81,7 @@ The configuration variables reside in the same `CONFIG_FILE` as described above .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 90-116 + :lines: 97-123 * ``server`` should point to an API that returns a public key, which can be used to validate the received JWTBearer token. * ``issuers`` is a string of comma separated values, e.g. `one,two,three` without spaces. The issuers string should contain diff --git a/docs/optionals.rst b/docs/optionals.rst index a5c2d67b..e209848c 100644 --- a/docs/optionals.rst +++ b/docs/optionals.rst @@ -17,7 +17,7 @@ The handover protocol can be configured in ``config.ini`` as follows: .. literalinclude:: /../beacon_api/conf/config.ini :language: python - :lines: 71-87 + :lines: 78-94 .. note:: Handover protocol is disabled by default, as shown by the commented out ``drs`` variable. This variable should point to the server, which serves the additional data. To enable the handover protocol, uncomment the ``drs`` variable. diff --git a/docs/permissions.rst b/docs/permissions.rst index ce23362c..079d4b01 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -10,11 +10,14 @@ As per Beacon specification there are three types of permissions: e.g. ELIXIR bona_fide or researcher status. Requires a JWT Token; * ``CONTROLLED`` - data available for users that have been granted access to a protected resource by a Data Access Committee (DAC). +.. note:: In this page we are illustrating permissions according to: + `GA4GH Authentication and Authorization Infrastructure (AAI) OpenID Connect Profile `_. Registered Data --------------- For retrieving ``REGISTERED`` permissions the function below forwards the TOKEN to another server +(e.g ELIXIR ``userinfo`` endpoint) that validates the information in the token is for a registered user/token and retrieves a JSON message that contains data regarding the Bona Fide status. Custom servers can be set up to mimic this functionality. @@ -28,7 +31,7 @@ researcher. .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 103-128 + :lines: 267-305 .. note:: The ``ga4gh.AcceptedTermsAndPolicies`` and ``ga4gh.ResearcherStatus`` keys' values must be equal to those mandated by GA4GH. @@ -47,21 +50,22 @@ there is no standard way for delivering access to datasets via JWT Tokens and each AAI authority provides different claims with different structures. By default we include :meth:`beacon_api.permissions.ga4gh` add-on that offers the means to retrieve -permissions following the `GA4GH format `_ via a token provided by ELIXIR AAI. +permissions following the `GA4GH format `_ +via a token provided by ELIXIR AAI. If a token contains ``ga4gh_userinfo_claims`` JWT claim with ``ga4gh.ControlledAccessGrants``, these are parsed and retrieved as illustrated in: .. literalinclude:: /../beacon_api/permissions/ga4gh.py :language: python - :lines: 85-100 + :lines: 248-264 The permissions are then passed in :meth:`beacon_api.utils.validate` as illustrated below: .. literalinclude:: /../beacon_api/utils/validate.py :language: python :dedent: 16 - :lines: 175-188 + :lines: 183-200 If there is no claim for GA4GH permissions as illustrated above, they will not be added to ``controlled_datasets``. diff --git a/requirements.txt b/requirements.txt index 7ea21431..1b167433 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp aiohttp_cors asyncpg -jsonschema==3.0.2 +jsonschema Cython numpy cyvcf2==0.10.1; python_version < '3.7' diff --git a/setup.py b/setup.py index 5987b3b9..ab7b6dae 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ 'Programming Language :: Python :: 3.7', ], install_requires=['aiohttp', 'asyncpg', 'authlib', - 'jsonschema==3.0.2', 'gunicorn'], + 'jsonschema', 'gunicorn'], extras_require={ 'test': ['coverage', 'pytest', 'pytest-cov', 'coveralls', 'testfixtures', 'tox', diff --git a/tests/test.ini b/tests/test.ini index 8a541212..15bced49 100644 --- a/tests/test.ini +++ b/tests/test.ini @@ -7,7 +7,7 @@ title=GA4GHBeacon at CSC # Version of the Beacon implementation -version=1.4.0 +version=1.5.0 # Author of this software author=CSC developers @@ -40,10 +40,17 @@ alturl=https://ega-archive.org/ # Datetime when this Beacon was created createtime=2018-07-25T00:00:00Z +# GA4GH Discovery type `groupId` and `artifactId`, joined in /service-info with apiVersion +# See https://github.com/ga4gh-discovery/ga4gh-service-registry for more information and possible values +service_type=org.ga4gh:beacon + +# GA4GH Discovery server environment, possible values: prod, dev, test +environment=test + [handover_info] # The base url for all handovers -drs=https://examplebrowser.org +drs=https://examplebrowser.org/ # Make the handovers 1- or 0-based handover_base = 1 diff --git a/tests/test_app.py b/tests/test_app.py index 5f8e2d97..ae5f63b8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -37,14 +37,14 @@ def generate_token(issuer): "iss": issuer, "aud": "audience", "exp": 9999999999, - "sub": "smth@elixir-europe.org" + "sub": "smth@smth.org" } token = jwt.encode(header, payload, pem).decode('utf-8') return token, pem def generate_bad_token(): - """Mock ELIXIR AAI token.""" + """Mock AAI token.""" pem = { "kty": "oct", "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", @@ -286,7 +286,7 @@ async def test_invalid_scheme_get_query(self): @asynctest.mock.patch('beacon_api.app.query_request_handler', side_effect=json.dumps(PARAMS)) @unittest_run_loop async def test_valid_token_get_query(self, mock_handler, mock_object): - """Test valid token GET query endpoint, invalid scheme.""" + """Test valid token GET query endpoint.""" token = os.environ.get('TOKEN') resp = await self.client.request("POST", "/query", data=json.dumps(PARAMS), diff --git a/tests/test_basic.py b/tests/test_basic.py index 0606d4df..584eaa73 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -3,9 +3,12 @@ from beacon_api.utils.db_load import parse_arguments, init_beacon_db, main from beacon_api.conf.config import init_db_pool from beacon_api.api.query import access_resolution -from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide -from .test_app import PARAMS +from beacon_api.utils.validate import token_scheme_check, verify_aud_claim +from beacon_api.permissions.ga4gh import get_ga4gh_controlled, get_ga4gh_bona_fide, validate_passport +from beacon_api.permissions.ga4gh import check_ga4gh_token, decode_passport, get_ga4gh_permissions +from .test_app import PARAMS, generate_token from testfixtures import TempDirectory +from test.support import EnvironmentVarGuard def mock_token(bona_fide, permissions, auth): @@ -15,6 +18,21 @@ def mock_token(bona_fide, permissions, auth): "authenticated": auth} +class MockDecodedPassport: + """Mock JWT.""" + + def __init__(self, validated=True): + """Initialise mock JWT.""" + self.validated = validated + + def validate(self): + """Invoke validate.""" + if self.validated: + return True + else: + raise Exception + + class MockBeaconDB: """BeaconDB mock. @@ -105,6 +123,22 @@ def test_main_db(self, mock_init): main() mock_init.assert_called() + def test_aud_claim(self): + """Test aud claim function.""" + env = EnvironmentVarGuard() + env.set('JWT_AUD', "aud1,aud2") + result = verify_aud_claim() + # Because it is false we expect it not to be parsed + expected = (False, []) + self.assertEqual(result, expected) + env.unset('JWT_AUD') + + def test_token_scheme_check_bad(self): + """Test token scheme no token.""" + # This might never happen, yet lets prepare for it + with self.assertRaises(aiohttp.web_exceptions.HTTPUnauthorized): + token_scheme_check("", 'https', {}, 'localhost') + def test_access_resolution_base(self): """Test assumptions for access resolution. @@ -272,105 +306,151 @@ def test_access_resolution_controlled_never_reached2(self): with self.assertRaises(aiohttp.web_exceptions.HTTPForbidden): access_resolution(request, token, host, [], [], [8]) - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_controlled(self, userinfo): + @asynctest.mock.patch('beacon_api.permissions.ga4gh.validate_passport') + async def test_ga4gh_controlled(self, m_validation): """Test ga4gh permissions claim parsing.""" - userinfo.return_value = { - "ControlledAccessGrants": [ - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000001", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "https://www.ebi.ac.uk/ega/EGAD000000000002", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - }, - { - "value": "no-prefix-dataset", - "source": "https://ega-archive.org/dacs/EGAC00000000001", - "by": "dac", - "authoriser": "john.doe@dac.org", - "asserted": 1546300800, - "expires": 1577836800 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, {'EGAD000000000001', 'EGAD000000000002', 'no-prefix-dataset'}) # has permissions - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.ControlledAccessGrants"] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - # Bad test: no claims, no userinfo - token_claim = [] - token = 'this_is_a_jwt' - datasets = await get_ga4gh_controlled(token, token_claim) - self.assertEqual(datasets, set()) # doesn't have permissions - - @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') - async def test_ga4gh_bona_fide(self, userinfo): + # Test: no passports, no permissions + datasets = await get_ga4gh_controlled([]) + self.assertEqual(datasets, set()) + # Test: 1 passport, 1 unique dataset, 1 permission + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}]) # one passport + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 1 unique dataset, 1 permission (permissions must not be duplicated) + passport = {"ga4gh_visa_v1": {"type": "ControlledAccessGrants", + "value": "https://institution.org/EGAD01", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813}} + m_validation.return_value = passport + dataset = await get_ga4gh_controlled([{}, {}]) # two passports + self.assertEqual(dataset, {'EGAD01'}) + # Test: 2 passports, 2 unique datasets, 2 permissions + # Can't test this case with the current design! + # Would need a way for validate_passport() to mock two different results + + async def test_ga4gh_bona_fide(self): """Test ga4gh statuses claim parsing.""" - userinfo.return_value = { - "AcceptedTermsAndPolicies": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "self", - "asserted": 1539069213, - "expires": 4694742813 - } - ], - "ResearcherStatus": [ - { - "value": "https://doi.org/10.1038/s41431-018-0219-y", - "source": "https://ga4gh.org/duri/no_org", - "by": "peer", - "asserted": 1539017776, - "expires": 1593165413 - } - ] - } - # Good test: claims OK, userinfo OK - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) + passports = [("enc", "header", { + "ga4gh_visa_v1": {"type": "AcceptedTermsAndPolicies", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "self", + "asserted": 1539069213, + "expires": 4694742813} + }), + ("enc", "header", { + "ga4gh_visa_v1": {"type": "ResearcherStatus", + "value": "https://doi.org/10.1038/s41431-018-0219-y", + "source": "https://ga4gh.org/duri/no_org", + "by": "peer", + "asserted": 1539017776, + "expires": 1593165413}})] + # Good test: both required passport types contained the correct value + bona_fide_status = await get_ga4gh_bona_fide(passports) self.assertEqual(bona_fide_status, True) # has bona fide - # Bad test: no claims, userinfo OK - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: claims OK, no userinfo - userinfo.return_value = {} - token_claim = ["ga4gh.AcceptedTermsAndPolicies", "ga4gh.ResearcherStatus"] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) - self.assertEqual(bona_fide_status, False) # doesn't have bona fide - # Bad test: no claims, no userinfo - userinfo.return_value = {} - token_claim = [] - token = 'this_is_a_jwt' - bona_fide_status = await get_ga4gh_bona_fide(token, token_claim) + # Bad test: missing passports of required type + passports_empty = [] + bona_fide_status = await get_ga4gh_bona_fide(passports_empty) self.assertEqual(bona_fide_status, False) # doesn't have bona fide + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_jwk') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.jwt') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_validate_passport(self, mock_log, m_jwt, m_jwk): + """Test passport validation.""" + m_jwk.return_value = 'jwk' + # Test: validation passed + m_jwt.return_value = MockDecodedPassport() + await validate_passport({}) + + # # Test: validation failed + m_jwt.return_value = MockDecodedPassport(validated=False) + # with self.assertRaises(Exception): + await validate_passport({}) + # we are not raising the exception we are just doing a log + # need to assert the log called + mock_log.error.assert_called_with("Something went wrong when processing JWT tokens: 1") + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_permissions') + async def test_check_ga4gh_token(self, m_get_perms): + """Test token scopes.""" + # Test: no scope found + decoded_data = {} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, but no claims + decoded_data = {'scope': ''} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: scope is ok, claims are ok + m_get_perms.return_value = {'EGAD01'}, True + decoded_data = {'scope': 'openid ga4gh_passport_v1'} + dataset_permissions, bona_fide_status = await check_ga4gh_token(decoded_data, {}, False, set()) + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, True) + + async def test_decode_passport(self): + """Test key-less JWT decoding.""" + token, _ = generate_token('http://test.csc.fi') + header, payload = await decode_passport(token) + self.assertEqual(header.get('alg'), 'HS256') + self.assertEqual(payload.get('iss'), 'http://test.csc.fi') + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_bona_fide') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.get_ga4gh_controlled') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.decode_passport') + @asynctest.mock.patch('beacon_api.permissions.ga4gh.retrieve_user_data') + async def test_get_ga4gh_permissions(self, m_userinfo, m_decode, m_controlled, m_bonafide): + """Test GA4GH permissions main function.""" + # Test: no data (nothing) + m_userinfo.return_value = [{}] + header = {} + payload = {} + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, False) + # Test: permissions + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ControlledAccessGrants' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = {'EGAD01'} + m_bonafide.return_value = False + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, {'EGAD01'}) + self.assertEqual(bona_fide_status, False) + # Test: bona fide + m_userinfo.return_value = [{}] + header = {} + payload = { + 'ga4gh_visa_v1': { + 'type': 'ResearcherStatus' + } + } + m_decode.return_value = header, payload + m_controlled.return_value = set() + m_bonafide.return_value = True + dataset_permissions, bona_fide_status = await get_ga4gh_permissions('token') + self.assertEqual(dataset_permissions, set()) + self.assertEqual(bona_fide_status, True) + if __name__ == '__main__': asynctest.main() diff --git a/tests/test_data_query.py b/tests/test_data_query.py index 2f6317a6..587f0e90 100644 --- a/tests/test_data_query.py +++ b/tests/test_data_query.py @@ -1,4 +1,5 @@ import asynctest +import aiohttp from unittest import mock from beacon_api.utils.data_query import filter_exists, transform_record from beacon_api.utils.data_query import transform_misses, transform_metadata, find_datasets, add_handover @@ -6,7 +7,7 @@ from beacon_api.extensions.handover import make_handover from datetime import datetime from beacon_api.utils.data_query import handle_wildcard -from .test_db_load import Connection +from .test_db_load import Connection, ConnectionException class Record: @@ -154,6 +155,13 @@ async def test_datasets_access_call_public(self): # in Connection() class self.assertEqual(result, (['mock:public:id'], [], [])) + async def test_datasets_access_call_exception(self): + """Test db call of getting public datasets access with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_datasets_access(pool, None) + async def test_datasets_access_call_registered(self): """Test db call of getting registered datasets access.""" pool = asynctest.CoroutineMock() @@ -195,10 +203,21 @@ async def test_fetch_dataset_metadata_call(self): # in Connection() class self.assertEqual(result, []) + async def test_fetch_dataset_metadata_call_exception(self): + """Test db call of getting datasets metadata with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_dataset_metadata(pool, None, None) + async def test_fetch_filtered_dataset_call(self): """Test db call for retrieving main data.""" pool = asynctest.CoroutineMock() - pool.acquire().__aenter__.return_value = Connection() + db_response = {"referenceBases": '', "alternateBases": '', "variantType": "", + "referenceName": 'Chr38', + "frequency": 0, "callCount": 0, "sampleCount": 0, "variantCount": 0, + "start": 0, "end": 0, "accessType": "PUBLIC", "datasetId": "test"} + pool.acquire().__aenter__.return_value = Connection(accessData=[db_response]) assembly_id = 'GRCh38' position = (10, 20, None, None, None, None) chromosome = 1 @@ -208,10 +227,45 @@ async def test_fetch_filtered_dataset_call(self): # for now it can return empty dataset # in order to get a response we will have to mock it # in Connection() class - self.assertEqual(result, []) + expected = {'referenceName': 'Chr38', 'callCount': 0, 'sampleCount': 0, 'variantCount': 0, 'datasetId': 'test', + 'referenceBases': '', 'alternateBases': '', 'variantType': '', 'start': 0, 'end': 0, 'frequency': 0, + 'info': {'accessType': 'PUBLIC'}, + 'datasetHandover': [{'handoverType': {'id': 'CUSTOM', 'label': 'Variants'}, + 'description': 'browse the variants matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Region'}, + 'description': 'browse data of the region matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Data'}, + 'description': 'retrieve information of the datasets', + 'url': 'https://examplebrowser.org/dataset/test/browser'}]} + + self.assertEqual(result, [expected]) + + async def test_fetch_filtered_dataset_call_misses(self): + """Test db call for retrieving miss data.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = Connection() # db_response is [] + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + alternate = ('DUP', None) result_miss = await fetch_filtered_dataset(pool, assembly_id, position, chromosome, reference, alternate, None, None, True) self.assertEqual(result_miss, []) + async def test_fetch_filtered_dataset_call_exception(self): + """Test db call of retrieving main data with exception.""" + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + alternate = ('DUP', None) + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_filtered_dataset(pool, assembly_id, position, chromosome, reference, alternate, None, None, False) + def test_handle_wildcard(self): """Test PostgreSQL wildcard handling.""" sequence1 = 'ATCG' diff --git a/tests/test_db_load.py b/tests/test_db_load.py index f69467da..cbdbea23 100644 --- a/tests/test_db_load.py +++ b/tests/test_db_load.py @@ -11,7 +11,7 @@ class Variant: Mock this for Variant calculations. """ - def __init__(self, ALT, REF, INF, call_rate, var_type, num_called): + def __init__(self, ALT, REF, INF, call_rate, var_type, num_called, is_sv=False): """Initialize class.""" self.INFO = INF self.ALT = ALT @@ -19,7 +19,7 @@ def __init__(self, ALT, REF, INF, call_rate, var_type, num_called): self.call_rate = call_rate self.var_type = var_type self.num_called = num_called - self.is_sv = False + self.is_sv = is_sv class INFO: @@ -28,12 +28,13 @@ class INFO: Mock this for storing VCF info. """ - def __init__(self, AC, VT, AN): + def __init__(self, AC, VT, AN, AF, SVTYPE=None): """Initialize class.""" self.AC = AC self.VT = VT self.AN = AN - self.AF = None + self.AF = AF + self.SVTYPE = SVTYPE def get(self, key): """Inside `__getitem__` method.""" @@ -119,6 +120,30 @@ def transaction(self, *args, **kwargs): return Transaction(*args, **kwargs) +class ConnectionException: + """Class Connection with Exception. + + Mock this from asyncpg. + """ + + def __init__(self): + """Initialize class.""" + pass + + def transaction(self, *args, **kwargs): + """Mimic transaction.""" + return Transaction(*args, **kwargs) + + async def execute(self, query, *args): + """Mimic execute.""" + return Exception + + @asyncio.coroutine + def prepare(self, query): + """Mimic prepare.""" + return Exception + + class DatabaseTestCase(asynctest.TestCase): """Test database operations.""" @@ -150,7 +175,8 @@ def setUp(self): #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 19 112 . A G 10 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 - 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.""" + 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + chrM 15011 . T C . PASS . GT:GQ:DP:RO:QR:AO:QA:GL 1:160:970:0:0:968:31792:-2860.58,0 1:160:970:0:0:968:31792:-2860.58,0""" self.datafile = self._dir.write('data.csv', self.data.encode('utf-8')) def tearDown(self): @@ -164,6 +190,18 @@ async def test_rchop(self, db_mock): await self._db.connection() result = self._db._rchop('INS:ME:LINE1', ":LINE1") self.assertEqual('INS:ME', result) + result_no_ending = self._db._rchop('INS', ":LINE1") + self.assertEqual('INS', result_no_ending) + + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_handle_type(self, db_mock): + """Test handle type.""" + db_mock.return_value = Connection() + await self._db.connection() + result = self._db._handle_type(1, int) + self.assertEqual([1], result) + result_tuple = self._db._handle_type((0.1, 0.2), float) + self.assertEqual([0.1, 0.2], result_tuple) @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_bnd_parts(self, db_mock): @@ -205,6 +243,16 @@ async def test_create_tables(self, db_mock, mock_log): # Should assert logs mock_log.info.assert_called_with('Tables have been created') + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_create_tables_exception(self, db_mock, mock_log): + """Test creating tables exception.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + await self._db.create_tables('sql.init') + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO CREATE TABLES -> [Errno 2] No such file or directory: 'sql.init'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') @asynctest.mock.patch('beacon_api.utils.db_load.VCF') @@ -230,6 +278,18 @@ async def test_load_metadata(self, mock_vcf, db_mock, mock_log): mock_log.info.mock_calls = [f'Parsing metadata from {metafile}', 'Metadata has been parsed'] + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_load_metadata_exception(self, db_mock, mock_log): + """Test load metadata error.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + vcf = asynctest.mock.MagicMock(name='samples') + vcf.samples.return_value = [1, 2, 3] + await self._db.load_metadata(vcf, 'meta.are', 'datafile') + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO PARSE METADATA -> [Errno 2] No such file or directory: 'meta.are'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_load_datafile(self, db_mock, mock_log): @@ -248,7 +308,7 @@ async def test_load_datafile(self, db_mock, mock_log): @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_insert_variants(self, db_mock, mock_log): - """Test load_datafile.""" + """Test insert variants.""" db_mock.return_value = Connection() await self._db.connection() db_mock.assert_called() @@ -257,12 +317,6 @@ async def test_insert_variants(self, db_mock, mock_log): mock_log.info.mock_calls = [f'Received 1 variants for insertion to DATASET1', 'Insert variants into the database'] - # This was the case when BeaconDB() was initiated with a URL parameter, now it happens with environment variables - # def test_bad_init(self): - # """Capture error in case of anything wrong with initializing BeaconDB.""" - # with self.assertRaises(TypeError): - # BeaconDB() - @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_close(self, db_mock, mock_log): @@ -273,24 +327,42 @@ async def test_close(self, db_mock, mock_log): mock_log.info.mock_calls = ['Mark the database connection to be closed', 'The database connection has been closed'] + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') + @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') + async def test_close_error(self, db_mock, mock_log): + """Test database URL close error.""" + db_mock.return_value = ConnectionException() + await self._db.connection() + await self._db.close() + log = "AN ERROR OCCURRED WHILE ATTEMPTING TO CLOSE DATABASE CONNECTION -> 'ConnectionException' object has no attribute 'close'" + mock_log.error.assert_called_with(log) + @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') async def test_unpack(self, db_mock, mock_log): """Test database URL fetching.""" db_mock.return_value = Connection() await self._db.connection() - inf1 = INFO((1), 'S', 3) - variant = Variant(['C'], 'T', inf1, 0.7, 'snp', 3) - result = self._db._unpack(variant) + inf1 = INFO((1), 'i', 3, None) + variant_1 = Variant(['C'], 'T', inf1, 0.7, 'indel', 3) + result = self._db._unpack(variant_1) self.assertEqual(([0.3333333333333333], [1], ['SNP'], ['C'], 3, []), result) - inf2 = INFO(1, 'S', 3) - variant = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'snp', 3) - result = self._db._unpack(variant) + inf2 = INFO(1, 'M', 3, None) + variant_2 = Variant(['AT', 'A'], 'ATA', inf2, 0.7, 'mnp', 3) + result = self._db._unpack(variant_2) self.assertEqual(([0.3333333333333333], [1], ['DEL', 'DEL'], ['AT', 'A'], 3, []), result) - inf3 = INFO((1), 'S', 3) - variant = Variant(['TC'], 'T', inf3, 0.7, 'snp', 3) - result = self._db._unpack(variant) - self.assertEqual(([0.3333333333333333], [1], ['INS'], ['TC'], 3, []), result) + inf3 = INFO((1), 'S', 3, 0.5) + variant_3 = Variant(['TC'], 'T', inf3, 0.7, 'snp', 3) + result = self._db._unpack(variant_3) + self.assertEqual(([0.5], [1], ['INS'], ['TC'], 3, []), result) + inf4 = INFO((1), '', 3, None, 'BND') + variant_4 = Variant(['TC'], 'T', inf4, 0.7, 'snp', 3) + result = self._db._unpack(variant_4) + self.assertEqual(([0.3333333333333333], [1], ['SNP'], ['TC'], 3, []), result) + inf5 = INFO((1), 'S', 3, None, '') + variant_5 = Variant(['TC'], 'T', inf5, 0.7, 'ins', 3) + result5 = self._db._unpack(variant_5) + self.assertEqual(([0.3333333333333333], [1], ['INS'], ['TC'], 3, []), result5) @asynctest.mock.patch('beacon_api.utils.db_load.LOG') @asynctest.mock.patch('beacon_api.utils.db_load.asyncpg.connect') diff --git a/tests/test_mate_name.py b/tests/test_mate_name.py index ca7d6d86..2265a9e9 100644 --- a/tests/test_mate_name.py +++ b/tests/test_mate_name.py @@ -1,6 +1,7 @@ import asynctest +import aiohttp from beacon_api.extensions.mate_name import find_fusion, fetch_fusion_dataset -from .test_db_load import Connection +from .test_db_load import Connection, ConnectionException class TestDataQueryFunctions(asynctest.TestCase): @@ -28,7 +29,11 @@ async def test_find_fusion(self, mock_filtered): async def test_fetch_fusion_dataset_call(self): """Test db call for retrieving mate data.""" pool = asynctest.CoroutineMock() - pool.acquire().__aenter__.return_value = Connection() + db_response = {"referenceBases": '', "alternateBases": '', "variantType": "", + "referenceName": 'Chr38', + "frequency": 0, "callCount": 0, "sampleCount": 0, "variantCount": 0, + "start": 0, "end": 0, "accessType": "PUBLIC", "datasetId": "test"} + pool.acquire().__aenter__.return_value = Connection(accessData=[db_response]) assembly_id = 'GRCh38' position = (10, 20, None, None, None, None) chromosome = 1 @@ -37,10 +42,42 @@ async def test_fetch_fusion_dataset_call(self): # for now it can return empty dataset # in order to get a response we will have to mock it # in Connection() class - self.assertEqual(result, []) + expected = {'referenceName': 'Chr38', 'callCount': 0, 'sampleCount': 0, 'variantCount': 0, 'datasetId': 'test', + 'referenceBases': '', 'alternateBases': '', 'variantType': '', 'start': 0, 'end': 0, 'frequency': 0, + 'info': {'accessType': 'PUBLIC'}, + 'datasetHandover': [{'handoverType': {'id': 'CUSTOM', 'label': 'Variants'}, + 'description': 'browse the variants matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/variant/Chr38-1--'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Region'}, + 'description': 'browse data of the region matched by the query', + 'url': 'https://examplebrowser.org/dataset/test/browser/region/Chr38-1-1'}, + {'handoverType': {'id': 'CUSTOM', 'label': 'Data'}, + 'description': 'retrieve information of the datasets', + 'url': 'https://examplebrowser.org/dataset/test/browser'}]} + self.assertEqual(result, [expected]) + + async def test_fetch_fusion_dataset_call_miss(self): + """Test db call for retrieving mate miss data.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = Connection() + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' result_miss = await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, True) self.assertEqual(result_miss, []) + async def test_fetch_fusion_dataset_call_exception(self): + """Test db call for retrieving mate data with exception.""" + pool = asynctest.CoroutineMock() + pool.acquire().__aenter__.return_value = ConnectionException() + assembly_id = 'GRCh38' + position = (10, 20, None, None, None, None) + chromosome = 1 + reference = 'A' + with self.assertRaises(aiohttp.web_exceptions.HTTPInternalServerError): + await fetch_fusion_dataset(pool, assembly_id, position, chromosome, reference, None, None, None, False) + if __name__ == '__main__': asynctest.main() diff --git a/tests/test_response.py b/tests/test_response.py index daf54e4a..ef55e970 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -3,7 +3,7 @@ import asynctest from beacon_api.schemas import load_schema from beacon_api.utils.validate import get_key -from beacon_api.permissions.ga4gh import retrieve_user_data +from beacon_api.permissions.ga4gh import retrieve_user_data, get_jwk import jsonschema import json import aiohttp @@ -120,7 +120,7 @@ async def test_bad_none_retrieve_user_data(self, m): @aioresponses() async def test_good_retrieve_user_data(self, m): """Test a passing call to retrieve user data.""" - m.get("http://test.csc.fi/userinfo", payload={"ga4gh": [{}]}) + m.get("http://test.csc.fi/userinfo", payload={"ga4gh_passport_v1": [{}]}) user_data = await retrieve_user_data('good_token') self.assertEqual(user_data, [{}]) @@ -146,6 +146,30 @@ async def test_get_key(self, m): self.assertTrue(isinstance(result, dict)) self.assertTrue(result["keys"][0]['alg'], 'RSA256') + @aioresponses() + async def test_get_jwk(self, m): + """Test get JWK.""" + data = { + "keys": [ + { + "alg": "RS256", + "kty": "RSA", + "use": "sig", + "n": "public_key", + "e": "AQAB" + } + ]} + m.get("http://test.csc.fi/jwk", payload=data) + result = await get_jwk('http://test.csc.fi/jwk') + self.assertTrue(isinstance(result, dict)) + self.assertTrue(result["keys"][0]['alg'], 'RSA256') + + @asynctest.mock.patch('beacon_api.permissions.ga4gh.LOG') + async def test_get_jwk_bad(self, mock_log): + """Test get JWK exception log.""" + await get_jwk('http://test.csc.fi/jwk') + mock_log.error.assert_called_with("Could not retrieve JWK from http://test.csc.fi/jwk") + @asynctest.mock.patch('beacon_api.utils.validate.OAUTH2_CONFIG', return_value={'server': None}) async def test_bad_get_key(self, oauth_none): """Test bad test_get_key.""" diff --git a/tox.ini b/tox.ini index c8597cb0..e1bb94c4 100644 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,7 @@ commands = sphinx-build -W -c docs/ -b html docs/ docs/_build/html [testenv:bandit] skip_install = true +; plain search for known vulnerable code deps = bandit commands = bandit -r beacon_api/ -c .bandit.yml @@ -23,7 +24,7 @@ commands = bandit -r beacon_api/ -c .bandit.yml [testenv:flake8] skip_install = true deps = - pydocstyle==3.0.0 + pydocstyle flake8 flake8-docstrings commands = flake8 . @@ -36,7 +37,7 @@ deps = .[test] -rrequirements.txt # Stop after first failure -commands = py.test -x --cov=beacon_api tests/ +commands = py.test -x --cov=beacon_api tests/ --cov-fail-under=80 python {toxinidir}/tests/coveralls.py [travis]