From 7b46380148da0db41aba05487fdf051f0548591c Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Thu, 16 Nov 2023 12:23:24 +0100 Subject: [PATCH] restore the functionality to specify a Drive folder as the job code location. --- .gitlab-ci.yml | 20 +++--------------- api/deployment/nginx-app-staging.conf | 4 ++++ api/simqueue/data_repositories.py | 30 +++++++++++++++++++++++++++ api/simqueue/resources/for_users.py | 13 +++++++++--- api/simqueue/settings.py | 2 ++ 5 files changed, 49 insertions(+), 20 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 19eeb30..37cdbfc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,20 +17,6 @@ build_job_queue_production: tags: - shell-runner -build_job_queue_staging: - stage: build - only: - variables: - - $CI_COMMIT_BRANCH == "staging" - script: - - bash get_build_info.sh staging - - docker build -f job_manager/Dockerfile.staging -t docker-registry.ebrains.eu/neuromorphic/nmpi_queue_server:staging . - - echo $DOCKER_REGISTRY_USER - - docker login -u $DOCKER_REGISTRY_USER -p $DOCKER_REGISTRY_SECRET docker-registry.ebrains.eu - - docker push docker-registry.ebrains.eu/neuromorphic/nmpi_queue_server:staging - tags: - - shell-runner - build_quotas_production: stage: build only: @@ -65,9 +51,9 @@ build_job_queue_v3_staging: stage: build only: variables: - - $CI_COMMIT_BRANCH == "api-v3" + - $CI_COMMIT_BRANCH == "staging" script: - - bash get_build_info.sh api-v3 + - bash get_build_info.sh staging - docker build -f api/deployment/Dockerfile.staging -t docker-registry.ebrains.eu/neuromorphic/nmpi_queue_server_v3:staging api - echo $DOCKER_REGISTRY_USER - docker login -u $DOCKER_REGISTRY_USER -p $DOCKER_REGISTRY_SECRET docker-registry.ebrains.eu @@ -80,7 +66,7 @@ test_job_queue_v3_staging: stage: test only: variables: - - $CI_COMMIT_BRANCH == "api-v3" + - $CI_COMMIT_BRANCH == "staging" script: - python3 -m pip install -r api/requirements.txt.lock - python3 -m pip install -r api/requirements_testing.txt diff --git a/api/deployment/nginx-app-staging.conf b/api/deployment/nginx-app-staging.conf index 2b038fb..c44e79d 100644 --- a/api/deployment/nginx-app-staging.conf +++ b/api/deployment/nginx-app-staging.conf @@ -35,4 +35,8 @@ server { alias /home/docker/site/static; } + location /tmp_download { + alias /home/docker/site/tmp_download; + } + } diff --git a/api/simqueue/data_repositories.py b/api/simqueue/data_repositories.py index 2f5b902..fc2e722 100644 --- a/api/simqueue/data_repositories.py +++ b/api/simqueue/data_repositories.py @@ -1,9 +1,12 @@ import os +import uuid from urllib.request import urlretrieve, urlcleanup, HTTPError from urllib.parse import urlparse from ebrains_drive.client import DriveApiClient, BucketApiClient from ebrains_drive.exceptions import DoesNotExist +from . import settings + class SourceFileDoesNotExist(Exception): pass @@ -143,6 +146,33 @@ def _delete(cls, collab_name, path, access_token): dir_obj = target_repository.get_dir(path) dir_obj.delete() + @classmethod + def get_download_url(cls, drive_uri, user): + access_token = user.token["access_token"] + ebrains_drive_client = DriveApiClient(token=access_token) + assert drive_uri.startswith("drive://") + path = drive_uri[len("drive://") :] + + collab_name, *path_parts = path.split("/") + remote_path = "/".join([""] + path_parts) + + target_repository = ebrains_drive_client.repos.get_repo_by_url(collab_name) + try: + dir_obj = target_repository.get_dir(remote_path) + # todo: add option to overwrite files + except DoesNotExist: + raise SourceFileDoesNotExist(drive_uri) + # generate a random name but repeatable name for the temporary file + os.makedirs(settings.TMP_FILE_ROOT, exist_ok=True) + zipfile_name = f"{uuid.uuid5(uuid.NAMESPACE_URL, drive_uri)}.zip" + if zipfile_name not in os.listdir(settings.TMP_FILE_ROOT): + # download zip of Drive directory contents + local_zipfile_path = os.path.join(settings.TMP_FILE_ROOT, zipfile_name) + _response = dir_obj.download(local_zipfile_path) + # todo: check the response + + return f"{settings.TMP_FILE_URL}/{zipfile_name}" + class EBRAINSBucket: name = "EBRAINS Bucket" diff --git a/api/simqueue/resources/for_users.py b/api/simqueue/resources/for_users.py index ca18e76..fd78b57 100644 --- a/api/simqueue/resources/for_users.py +++ b/api/simqueue/resources/for_users.py @@ -25,7 +25,7 @@ Session, SessionStatus, ) -from ..data_repositories import SourceFileDoesNotExist, SourceFileIsTooBig +from ..data_repositories import SourceFileDoesNotExist, SourceFileIsTooBig, EBRAINSDrive from .. import db, oauth, utils from ..globals import PROVIDER_QUEUE_NAMES @@ -362,7 +362,11 @@ def normalize_code(code, collab, user): In all other cases, the function returns the value of `code` unchanged. """ - raise NotImplementedError + if code.startswith("drive://"): + # todo: add original `code` field to job provenance + return EBRAINSDrive.get_download_url(code, user) + else: + return code @router.post("/jobs/", response_model=AcceptedJob, status_code=status_codes.HTTP_201_CREATED) @@ -376,7 +380,10 @@ async def create_job( get_user_task = asyncio.create_task(oauth.User.from_token(token.credentials)) user = await get_user_task if (as_admin and user.is_admin) or user.can_edit(job.collab): - job.code = normalize_code(job.code, job.collab, user) + try: + job.code = normalize_code(job.code, job.collab, user) + except SourceFileDoesNotExist as err: + raise HTTPException(status_code=status_codes.HTTP_400_BAD_REQUEST, detail=str(err)) proceed = await utils.check_quotas(job.collab, job.hardware_platform, user=user.username) if proceed: accepted_job = await db.create_job(user_id=user.username, job=job.to_db()) diff --git a/api/simqueue/settings.py b/api/simqueue/settings.py index a033229..cbef7e1 100644 --- a/api/simqueue/settings.py +++ b/api/simqueue/settings.py @@ -13,3 +13,5 @@ BASE_URL = os.environ.get("NMPI_BASE_URL") # ADMIN_GROUP_ID = "" AUTHENTICATION_TIMEOUT = 20 +TMP_FILE_URL = os.environ.get("NMPI_BASE_URL") + "/tmp_download" +TMP_FILE_ROOT = "tmp_download"