diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4ba20a407..7869e0a77 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.18.13 +current_version = 1.19.0 commit = True tag = False tag_name = {new_version} diff --git a/.gitignore b/.gitignore index 655efcb35..7307b17a2 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,7 @@ ubuntu-bionic-18.04-cloudimg-console.log ## vim temp files *.swp + +## Python temp files +**/__pycache__ +**/*.py[cod] diff --git a/CHANGES.md b/CHANGES.md index b501495c6..3fe9d7d2c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,6 +16,38 @@ [//]: # (list changes here, using '-' for each new entry, remove this when items are added) +[1.19.0](https://github.com/bird-house/birdhouse-deploy/tree/1.19.0) (2022-06-08) +------------------------------------------------------------------------------------------------------------------ + +## Changes: + +- Magpie/Twitcher: update `magpie` service + from [3.21.0](https://github.com/Ouranosinc/Magpie/tree/3.21.0) + to [3.26.0](https://github.com/Ouranosinc/Magpie/tree/3.26.0) and + bundled `twitcher` from [0.6.2](https://github.com/bird-house/twitcher/tree/v0.6.2) + to [0.7.0](https://github.com/bird-house/twitcher/tree/v0.7.0). + + - Adds [Service Hooks](https://pavics-magpie.readthedocs.io/en/latest/configuration.html#service-hooks) allowing + Twitcher to apply HTTP pre-request/post-response modifications to requested services and resources in accordance + to `MagpieAdapter` implementation and using plugin Python scripts when matched against specific request parameters. + + - Using *Service Hooks*, inject ``X-WPS-Output-Context`` header in Weaver job submission requests through the proxied + request by Twitcher and `MagpieAdapter`. This header contains the user ID that indicates to Weaver were to store + job output results, allowing to save them in the corresponding user's workspace directory under `wpsoutputs` path. + More details found in PR https://github.com/bird-house/birdhouse-deploy/pull/244. + + - Using *Service Hooks*, filter processes returned by Weaver in JSON response from ``/processes`` endpoint using + respective permissions applied onto each ``/processes/{processID}`` for the requesting user. Users will only be able + to see processes for which they have read access to retrieve the process description. + More details found in PR https://github.com/bird-house/birdhouse-deploy/pull/245. + + - Using *Service Hooks*, automatically apply permissions for the user that successfully deployed a Weaver process + using ``POST /processes`` request, granting it direct access to this process during process listing, process + description request and for submitting job execution of this process. + Only this user deploying the process will have access to it until further permissions are added in Magpie to share + or publish it with other users, groups and/or publicly. The user must have the necessary permission to deploy a new + process in the first place. More details found in PR https://github.com/bird-house/birdhouse-deploy/pull/247. + [1.18.13](https://github.com/bird-house/birdhouse-deploy/tree/1.18.13) (2022-06-07) ------------------------------------------------------------------------------------------------------------------ @@ -3351,4 +3383,3 @@ Prior Versions All versions prior to [1.7.0](https://github.com/bird-house/birdhouse-deploy/tree/1.7.0) were not officially tagged. Is it strongly recommended employing later versions to ensure better traceability of changes that could impact behavior and potential issues on new server instances. - diff --git a/README.rst b/README.rst index 48ca857e2..4b1495d7c 100644 --- a/README.rst +++ b/README.rst @@ -14,13 +14,13 @@ for a full-fledged production platform. * - releases - | |latest-version| |commits-since| -.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.18.13.svg +.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.19.0.svg :alt: Commits since latest release - :target: https://github.com/bird-house/birdhouse-deploy/compare/1.18.13...master + :target: https://github.com/bird-house/birdhouse-deploy/compare/1.19.0...master -.. |latest-version| image:: https://img.shields.io/badge/tag-1.18.13-blue.svg?style=flat +.. |latest-version| image:: https://img.shields.io/badge/tag-1.19.0-blue.svg?style=flat :alt: Latest Tag - :target: https://github.com/bird-house/birdhouse-deploy/tree/1.18.13 + :target: https://github.com/bird-house/birdhouse-deploy/tree/1.19.0 .. |readthedocs| image:: https://readthedocs.org/projects/birdhouse-deploy/badge/?version=latest :alt: ReadTheDocs Build Status (latest version) diff --git a/birdhouse/components/weaver/config/magpie/config.yml.template b/birdhouse/components/weaver/config/magpie/config.yml.template index 85d821038..6298aca70 100644 --- a/birdhouse/components/weaver/config/magpie/config.yml.template +++ b/birdhouse/components/weaver/config/magpie/config.yml.template @@ -10,6 +10,35 @@ providers: c4i: false type: api # FIXME: 'ades' when https://github.com/Ouranosinc/Magpie/issues/360 implemented sync_type: api + # hook locations should be relative to mounted Twitcher location as they are run within that container + # see following for hooks details: + # - https://github.com/Ouranosinc/Magpie/blob/master/config/providers.cfg + # - https://pavics-magpie.readthedocs.io/en/latest/configuration.html#service-hooks + hooks: + # when a job is created in weaver, apply the header that will nest output results under user's context directory + # see also: + # - https://pavics-weaver.readthedocs.io/en/latest/processes.html?highlight=x-wps-output-context#outputs-location + # each path below are equivalents, but with more or less specific reference to the requested service/process + - type: request + path: "/providers/[\\w_-]+/processes/[\\w_-]+/jobs" + method: POST + target: /opt/birdhouse/src/magpie/hooks/weaver_hooks.py:add_x_wps_output_context + - type: request + path: "/processes/[\\w_-]+/jobs" + method: POST + target: /opt/birdhouse/src/magpie/hooks/weaver_hooks.py:add_x_wps_output_context + - type: request + path: "/jobs" + method: POST + target: /opt/birdhouse/src/magpie/hooks/weaver_hooks.py:add_x_wps_output_context + - type: response + path: "/processes" + method: GET + target: /opt/birdhouse/src/magpie/hooks/weaver_hooks.py:filter_allowed_processes + - type: response + path: "/processes" + method: POST + target: /opt/birdhouse/src/magpie/hooks/weaver_hooks.py:allow_user_deployed_processes # FIXME: remove when https://github.com/Ouranosinc/Magpie/issues/360 implemented, see 'default.env' ${WEAVER_WPS_NAME}: @@ -66,9 +95,10 @@ permissions: action: create # Process deployment (write) and listing (read) + # use 'read-match' to allow only listing, and not describe underlying processes (require 'read' on them individually) - service: ${WEAVER_MANAGER_NAME} resource: /processes # GET is processes listing, POST is deploy: only allow view by anonymous - permission: read # under '/processes/...', JSON 'DescribeProcess', POST job submit, GET results, etc. + permission: read-match # under '/processes/...', JSON 'DescribeProcess', POST job submit, GET results, etc. group: anonymous action: create diff --git a/birdhouse/components/weaver/config/magpie/weaver_hooks.py b/birdhouse/components/weaver/config/magpie/weaver_hooks.py new file mode 100644 index 000000000..045cace3b --- /dev/null +++ b/birdhouse/components/weaver/config/magpie/weaver_hooks.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +These hooks will be running within Twitcher, using MagpieAdapter context, applied for Weaver requests. + +The code below can make use of any package that is installed by Magpie/Twitcher. + +.. seealso:: + Documentation about Magpie/Twitcher request/response hooks is available here: + https://pavics-magpie.readthedocs.io/en/latest/configuration.html#service-hooks +""" + +import json +from typing import TYPE_CHECKING + +import transaction + +from magpie.api.management.resource import resource_utils as ru +from magpie.api.management.user import user_utils as uu +from magpie.api.requests import get_user, get_service_matchdict_checked +from magpie.constants import get_constant +from magpie.models import Route +from magpie.permissions import Access, Permission, PermissionSet, Scope +from magpie.utils import get_header, get_logger + +if TYPE_CHECKING: + from pyramid.request import Request + from pyramid.response import Response + + from magpie.adapter import HookContext + +LOGGER = get_logger("birdhouse-weaver-hooks") + + +def is_admin(request): + # type: (Request) -> bool + admin_group = get_constant("MAGPIE_ADMIN_GROUP", settings_container=request) + if not request.user: # no user authenticated (public) + return False + return admin_group in [group.group_name for group in request.user.groups] + + +def add_x_wps_output_context(request): + # type: (Request) -> Request + """ + Apply the ``X-WPS-Output-Context`` for saving outputs in the user-context WPS-outputs directory. + """ + header = get_header("X-WPS-Output-Context", request.headers) + # if explicitly provided, ensure it is permitted (admin allow any, otherwise self-user reference only) + if header is not None: + if request.user is None: + header = "public" + else: + if not is_admin(request): + # override disallowed writing to other location + # otherwise, up to admin to have written something sensible + header = "user-" + str(request.user.id) + else: + if request.user is None: + header = "public" + else: + header = "user-" + str(request.user.id) + request.headers["X-WPS-Output-Context"] = header + return request + + +def filter_allowed_processes(response, context): + # type: (Response, HookContext) -> Response + """ + Filter processes returned by Weaver response according to allowed resources by user. + + Following are sample (clipped) JSON body that can be expected from Weaver (or any OGC API - Processes). + + Using ``GET https:///processes`` + + .. code-block:: json + :caption: Detailed process listing from Weaver (other fields than 'processes' are removed for concise example). + + { + "processes": [ + { + "id": "ColibriFlyingpigeon_SubsetBbox", + "title": "ColibriFlyingpigeon_SubsetBbox", + "mutable": true, + "keywords": [ + "application" + ], + "metadata": [], + "jobControlOptions": [ + "async-execute" + ], + "outputTransmission": [ + "reference", + "value" + ], + "processDescriptionURL": "https:///processes/ColibriFlyingpigeon_SubsetBbox", + "processEndpointWPS1": "https:///ows/wps", + "executeEndpoint": "https:///processes/ColibriFlyingpigeon_SubsetBbox/jobs" + } + ] + } + + Using ``GET https:///processes?detail=false`` + + .. code-block:: json + :caption: Simple process listing from Weaver (other fields than 'processes' are removed for concise example). + + { + "description": "Listing of available processes successful.", + "processes": [ + "CatFile", + "ColibriFlyingpigeon_SubsetBbox", + ], + "page": 0, + "total": 2 + } + + """ + if "application/json" in response.content_type: + body = response.json + if "processes" in body: + if is_admin(response.request): # don't waste time checking permissions, full access anyway + return response + + # depending on 'detail' query, processes can be returned as list of IDs or nested JSON summaries + processes = { + proc if isinstance(proc, str) else proc.get("id"): proc + for proc in body["processes"] + } + + # only need 2 first levels ('processes' and each process 'id' under it) + children = ru.get_resource_children(context.resource, response.request.db, limit_depth=2) + proc_res = None + for res in children.values(): + if res["node"].resource_name == "processes": + # if nothing under 'processes' resource, then guarantee no permissions, done check + if not res["children"]: + return response + proc_res = res + break + if not proc_res: + return response # 'processes' itself does not exist, no permissions possible and done check + + allowed_processes = [] + known_processes = proc_res["children"].values() + known_processes = {res["node"].resource_name: res for res in known_processes} + request_user = get_user(response.request) + for proc_name in processes: + if proc_name not in known_processes: + continue # do not bother checking missing resource + child_proc = known_processes[proc_name]["node"] + perms = context.service.effective_permissions(request_user, child_proc, [Permission.READ]) + if perms[0].access == Access.ALLOW: + proc = processes[proc_name] + allowed_processes.append(proc) + + # override collected and permitted processes access by user + body["processes"] = allowed_processes + + # WARNING: + # JSON generated from 'body' attribute cannot be overridden directly (computed inline). + # Also, since we override, must set any Content header accordingly with modifications. + data = json.dumps(body).encode("UTF-8") + response.body = data + c_len = len(data) + response.content_length = c_len + response.headers["Content-Length"] = str(c_len) + + return response + + +def allow_user_deployed_processes(response): + # type: (Response) -> Response + """ + Add the user permissions to read (listing and description) and execute the process for the deploying user. + + This will grant access to the process definition by the user that deployed it until they desire to make it public. + At a later time, a request to the appropriate group to share (restricted group) or to publish publicly (anonymous) + could be made to create the relevant permissions to describe or execute the process by other users. + + Expected response format from service: + + .. code-block:: json + { + "processSummary": { + "id": "", + "..." + } + } + + If any failure occurs, simply return the response to let deployment succeed, but user will not receive access to it + automatically. Manual update of permissions would be necessary by platform administrator via Magpie. + """ + p_id = "" + u_name = "" + try: + # only apply permission if deployment was successful + if "application/json" in response.content_type and response.status_code == 201: + body = response.json + info = body.get("processSummary", {}) or body.get("process", {}) # bw-compat + p_id = info.get("id") + if not (p_id and isinstance(p_id, str)): + return response + + # user is not necessarily admin + # in fact, this operation is only needed if non-admin, since admin has full access anyway + request = response.request + if is_admin(request): + return response + user = request.user + # if deploy endpoint was made public, then even anonymous could deploy (not recommended, but possible) + if not user: + user = get_user(request) + u_name = user.user_name + + # note: matchdict reference of Twitcher owsproxy view is used, just so happens to be same name as Magpie + service = get_service_matchdict_checked(request) + + # find the nested resource matching: "weaver/processes/" + children = ru.get_resource_children(service, request.db, limit_depth=2) + p_res = None + for res in children.values(): + if res["node"].resource_name == "processes": + processes_res_id = res["node"].resource_id + for child_res in res["children"].values(): + if child_res["node"].resource_name == p_id: + p_res = child_res["node"] + break + break + else: + # resource 'processes' should already exist, but create it if somehow missing + # otherwise, it will be impossible to create '' under it + resp = ru.create_resource("processes", None, Route.resource_type_name, service.resource_id, request.db) + processes_res_id = resp.json["resource"]["resource_id"] + + # note: + # since this is running within a *response* hook, the request transaction is already handled + # define a new transaction to create new resources + with transaction.manager: + + # if '' somehow already exists, use it + if p_res is None: + resp = ru.create_resource(p_id, None, Route.resource_type_name, processes_res_id, request.db) + p_res_id = resp.json["resource"]["resource_id"] + p_res = ru.ResourceService.by_resource_id(p_res_id, request.db) + if not p_res: + LOGGER.warning( + "Failed creation of permissions for user [%s] to access deployed process [%s] in Weaver. " + "Could not retrieve resource matching deployed process!", u_name, p_id + ) + return response + + # apply necessary permissions to give full access to the deployed process to the user + # override permissions to undo what could have been previously applied (only if already existed) + p_desc = PermissionSet(Permission.READ, Access.ALLOW, Scope.RECURSIVE) # describe proc + jobs statuses + p_exec = PermissionSet(Permission.WRITE, Access.ALLOW, Scope.RECURSIVE) # edit process + execute jobs + r_desc = uu.create_user_resource_permission_response(user, p_res, p_desc, request.db, overwrite=True) + r_exec = uu.create_user_resource_permission_response(user, p_res, p_exec, request.db, overwrite=True) + + # summit transaction results (new resources and permissions) + transaction.commit() + + # sanity check + if r_desc.status_code in [200, 201] and r_exec.status_code in [200, 201]: + LOGGER.info( + "Successful creation of permissions for user [%s] to access deployed process [%s] in Weaver.", + u_name, p_id + ) + else: + statuses = [r_desc.status_code, r_exec.status_code] + LOGGER.warning( + "Failed creation of permissions for user [%s] to access deployed process [%s] in Weaver. " + "Permission creation returned unexpected statuses: %s", u_name, p_id, statuses + ) + except Exception as exc: + LOGGER.error( + "Failed creation of permissions for user [%s] to access deployed process [%s] in Weaver. " + "Unexpected exception occurred: [%s]", u_name, p_id, str(exc) + ) + + return response diff --git a/birdhouse/components/weaver/docker-compose-extra.yml b/birdhouse/components/weaver/docker-compose-extra.yml index 2e16bb60f..84d16c375 100644 --- a/birdhouse/components/weaver/docker-compose-extra.yml +++ b/birdhouse/components/weaver/docker-compose-extra.yml @@ -23,12 +23,19 @@ services: magpie: volumes: # NOTE: - # Although file use the "config.yml" format, it is very important to pass it as independent/duplicate reference - # provider/permissions ".cfg" files. This is because Magpie will not parse multiple "config.yml" files - # additively with other component's ".cfg" files, as "config.yml" are intended for unique-combined definitions. - # Data structure within "config.yml" is the same as within the respective sections in typical ".cfg" files. - - ./components/weaver/config/magpie/config.yml:/opt/local/src/magpie/config/permissions/weaver-permissions.cfg:ro - - ./components/weaver/config/magpie/config.yml:/opt/local/src/magpie/config/providers/weaver-provider.cfg:ro + # Although file uses the "config.yml" format, it is very important to pass it as independent/duplicate reference + # provider/permissions config files. This is because 'MAGPIE_CONFIG_PATH' is not used to allow parsing multiple + # config files for each extendable service, using loading of all configuration files found in mount directories. + - ./components/weaver/config/magpie/config.yml:/opt/local/src/magpie/config/permissions/weaver-permissions.yml:ro + - ./components/weaver/config/magpie/config.yml:/opt/local/src/magpie/config/providers/weaver-provider.yml:ro + + # extend twitcher with MagpieAdapter hooks employed for weaver proxied requests + twitcher: + volumes: + # NOTE: MagpieAdapter hooks are defined within Magpie config, but it is actually Twitcher proxy that runs them + # target mount location depends on main docker-compose 'MAGPIE_PROVIDERS_CONFIG_PATH' environment variable + - ./components/weaver/config/magpie/config.yml:/opt/birdhouse/src/magpie/config/weaver-config.yml:ro + - ./components/weaver/config/magpie/weaver_hooks.py:/opt/birdhouse/src/magpie/hooks/weaver_hooks.py:ro # Image 'weaver' is the API side of the application weaver: diff --git a/birdhouse/default.env b/birdhouse/default.env index c35ff8995..214887eb0 100644 --- a/birdhouse/default.env +++ b/birdhouse/default.env @@ -22,7 +22,7 @@ export GEOSERVER_IMAGE="pavics/geoserver:2.19.0-kartoza-build20210329" export BASH_IMAGE="bash:5.1.4" # Tag version that will be used to update Magpie API, Magpie CLI, and matching Twitcher with Magpie Adapter -export MAGPIE_VERSION=3.21.0 +export MAGPIE_VERSION=3.26.0 # Root directory under which all data persistence should be nested under export DATA_PERSIST_ROOT="/data" diff --git a/birdhouse/docker-compose.yml b/birdhouse/docker-compose.yml index 121e2678b..7dc212f14 100644 --- a/birdhouse/docker-compose.yml +++ b/birdhouse/docker-compose.yml @@ -333,7 +333,8 @@ services: environment: TWITCHER_PROTECTED_URL: https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH} # target directories to allow loading multiple config files of corresponding category - # each compose override should volume mount its files in the matching directories + # each compose override should volume mount its files inside the matching directories + # (note: DO NOT use 'MAGPIE_CONFIG_PATH' that would disable multi-config loading capability) MAGPIE_PROVIDERS_CONFIG_PATH: "/opt/local/src/magpie/config/providers" MAGPIE_PERMISSIONS_CONFIG_PATH: "/opt/local/src/magpie/config/permissions" MAGPIE_POSTGRES_HOST: postgres-magpie @@ -358,6 +359,14 @@ services: container_name: twitcher ports: - "8000:8000" + environment: + # target directories to allow loading multiple config files of corresponding category + # each compose override should volume mount its files inside the below directory + # (note: DO NOT use 'MAGPIE_CONFIG_PATH' that would disable multi-config loading capability) + # Only 'providers' sections are used to employ 'request/response hooks' with 'MagpieAdapter'. + # Hooks are defined within Magpie config, but it is actually Twitcher proxy that runs them. + # Other Magpie components are unknown and ignored by Twitcher itself. + MAGPIE_PROVIDERS_CONFIG_PATH: "/opt/birdhouse/src/magpie/config" env_file: - ./config/postgres-magpie/credentials.env depends_on: