diff --git a/.gitignore b/.gitignore index ac4206b..3cf1707 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,7 @@ cython_debug/ # --- **/ro-crate-metadata.json +sapporo_example/run +sapporo_example/results +sapporo_example/ro-crate-metadata.json +sapporo_example/ro-crate-metadata_failed.json diff --git a/sapporo_example/complete/ro-crate-metadata.json b/sapporo_example/complete/ro-crate-metadata.json deleted file mode 100644 index 6637d8b..0000000 --- a/sapporo_example/complete/ro-crate-metadata.json +++ /dev/null @@ -1,130 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@id": "./", - "@type": "Dataset", - "hasPart": [ - { - "@id": "outputs/ERR034597_1.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq" - }, - { - "@id": "outputs/" - } - ], - "name": "example research project", - "datePublished": "2023-03-14T04:20:36.202+00:00" - }, - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" - }, - "about": { - "@id": "./" - } - }, - { - "@id": "outputs/ERR034597_1.small_fastqc.html", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.1U.fq", - "contentSize": "187289B", - "sha256": "f4ca88b95062fe9335b80826929aa2760571d5c6bd1be5c6c00000dc006af852" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.2P.fq", - "contentSize": "5560582B", - "sha256": "f551bf14128f5035fe3c87e37b3260c7804b0d9716e181020dbcc5f4a4e32af2" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.2U.fq", - "contentSize": "80131B", - "sha256": "2725ebbb861b73b563839e5fc9d576bf359c0a9f69cb59514077c2222db00c45" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_2.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.1P.fq", - "contentSize": "5566650B", - "sha256": "1c1ce97df2ce1199dc57a15963e5e3bfcc128dd635c5870b60daa512ec8ccdaa" - }, - { - "@id": "outputs/", - "@type": "Dataset", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "outputs", - "hasPart": [ - { - "@id": "outputs/ERR034597_1.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq" - } - ] - }, - { - "@id": "#sapporo-run", - "@type": "SapporoRun", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "workflow_params": "{\"fastq_1\":{\"location\":\"ERR034597_1.small.fq.gz\",\"class\":\"File\"},\"fastq_2\":{\"location\":\"ERR034597_2.small.fq.gz\",\"class\":\"File\"},\"nthreads\":2}", - "workflow_type": "CWL", - "workflow_type_version": "v1.0", - "workflow_engine_name": "cwltool", - "workflow_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/trimming_and_qc.cwl", - "workflow_attachment": "[{\"file_name\": \"ERR034597_2.small.fq.gz\", \"file_url\": \"https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_2.small.fq.gz\"}, {\"file_name\": \"ERR034597_1.small.fq.gz\", \"file_url\": \"https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_1.small.fq.gz\"}]", - "sapporo_location": "http://sapporo-service:1122", - "state": "COMPLETE", - "outputs": { - "@id": "outputs/" - } - } - ] -} \ No newline at end of file diff --git a/sapporo_example/compose.yml b/sapporo_example/compose.yml new file mode 100644 index 0000000..8d088d1 --- /dev/null +++ b/sapporo_example/compose.yml @@ -0,0 +1,32 @@ +version: "3.5" +services: + nii-dg: + build: + context: .. + dockerfile: Dockerfile-api + image: nii-dg + container_name: nii-dg + ports: + - 0.0.0.0:5000:5000 + restart: on-failure + networks: + - nii-dg-network + sapporo-service: + image: ghcr.io/sapporo-wes/sapporo-service:1.4.9 + container_name: sapporo-service + volumes: + - ${PWD}/run:${PWD}/run + - /var/run/docker.sock:/var/run/docker.sock + - /tmp:/tmp + environment: + - SAPPORO_DEBUG=False + - SAPPORO_RUN_DIR=${PWD}/run + ports: + - 0.0.0.0:1122:1122 + restart: on-failure + networks: + - nii-dg-network + +networks: + nii-dg-network: + name: nii-dg-network diff --git a/sapporo_example/download_results.py b/sapporo_example/download_results.py new file mode 100644 index 0000000..f9a403d --- /dev/null +++ b/sapporo_example/download_results.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +import argparse +import json +from pathlib import Path +from typing import List +from urllib.request import urlopen + + +def fetch_output_file_list(run_id: str, endpoint: str) -> List[str]: + with urlopen(f"{endpoint.rstrip('/')}/runs/{run_id}") as response: + result = json.load(response) + return [output["file_name"] for output in result["outputs"]] + + +def download_results(endpoint: str, run_id: str, output_dir: Path) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + output_file_list = fetch_output_file_list(run_id, endpoint) + + for output_file in output_file_list: + with urlopen(f"{endpoint.rstrip('/')}/runs/{run_id}/data/outputs/{output_file}") as response: + file_path = output_dir.joinpath("outputs").joinpath(output_file).resolve() + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, "wb") as f: + f.write(response.read()) + + # download run_request.json + with urlopen(f"{endpoint.rstrip('/')}/runs/{run_id}/data/run_request.json") as response: + file_path = output_dir.joinpath("run_request.json").resolve() + with open(file_path, "wb") as f: + f.write(response.read()) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("endpoint", help="The endpoint to send the request (default: http://localhost:1122)", + default="http://localhost:1122", nargs="?") + parser.add_argument("run_id", help="The RUN_ID of the executed workflow") + parser.add_argument("output_dir", help="The output directory to save the results (default: ./results)", + default="./results", nargs="?") + args = parser.parse_args() + + download_results(args.endpoint, args.run_id, Path(args.output_dir).resolve()) + + +if __name__ == "__main__": + main() diff --git a/sapporo_example/execute_workflow.py b/sapporo_example/execute_workflow.py new file mode 100644 index 0000000..2ca8f5f --- /dev/null +++ b/sapporo_example/execute_workflow.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +import argparse +import json +from urllib.parse import urlencode +from urllib.request import Request, urlopen + + +def execute_workflow(endpoint: str) -> None: + data = { + "workflow_params": json.dumps({ + "fastq_1": {"location": "ERR034597_1.small.fq.gz", "class": "File"}, + "fastq_2": {"location": "ERR034597_2.small.fq.gz", "class": "File"}, + "nthreads": 2 + }), + "workflow_type": "CWL", + "workflow_type_version": "v1.0", + "workflow_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/trimming_and_qc.cwl", + "workflow_engine_name": "cwltool", + "workflow_attachment": json.dumps([ + {"file_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_2.small.fq.gz", "file_name": "ERR034597_2.small.fq.gz"}, + {"file_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_1.small.fq.gz", "file_name": "ERR034597_1.small.fq.gz"} + ]) + } + + data = urlencode(data).encode("utf-8") # type: ignore + headers = {"Content-Type": "application/x-www-form-urlencoded"} + # remove trailing slash from endpoint + request = Request(f"{endpoint.rstrip('/')}/runs", data=data, headers=headers) # type: ignore + + with urlopen(request) as response: + result = json.load(response) + print(result) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("endpoint", help="The endpoint to send the request (default: http://localhost:1122)", + default="http://localhost:1122", nargs="?") + args = parser.parse_args() + execute_workflow(args.endpoint) + + +if __name__ == "__main__": + main() diff --git a/sapporo_example/failed/ro-crate-metadata.json b/sapporo_example/failed/ro-crate-metadata.json deleted file mode 100644 index e047173..0000000 --- a/sapporo_example/failed/ro-crate-metadata.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@id": "./", - "@type": "Dataset", - "hasPart": [ - { - "@id": "outputs/ERR034597_1.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq" - }, - { - "@id": "outputs/" - } - ], - "name": "example research project", - "datePublished": "2023-03-14T04:24:35.757+00:00" - }, - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "conformsTo": { - "@id": "https://w3id.org/ro/crate/1.1" - }, - "about": { - "@id": "./" - } - }, - { - "@id": "outputs/ERR034597_1.small_fastqc.html", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small_fastqc.html", - "contentSize": "592393B", - "sha256": "e8b481c75d81f97080d8d61b9a21558c91e94488cc89dd20d09de4f7171df32d" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.1U.fq", - "contentSize": "187289B", - "sha256": "f4ca88b95062fe9335b80826929aa2760571d5c6bd1be5c6c00000dc006af852" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.2P.fq", - "contentSize": "5560582B", - "sha256": "f551bf14128f5035fe3c87e37b3260c7804b0d9716e181020dbcc5f4a4e32af2" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.2U.fq", - "contentSize": "80131B", - "sha256": "2725ebbb861b73b563839e5fc9d576bf359c0a9f69cb59514077c2222db00c45" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_2.small_fastqc.html", - "contentSize": "592565B", - "sha256": "0f977665918cfebd9be552bb670c7c78cb70df83f1eb6af3303381062f9ff8d4" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq", - "@type": "File", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "ERR034597_1.small.fq.trimmed.1P.fq", - "contentSize": "5566650B", - "sha256": "1c1ce97df2ce1199dc57a15963e5e3bfcc128dd635c5870b60daa512ec8ccdaa" - }, - { - "@id": "outputs/", - "@type": "Dataset", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "name": "outputs", - "hasPart": [ - { - "@id": "outputs/ERR034597_1.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1U.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2P.fq" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.2U.fq" - }, - { - "@id": "outputs/ERR034597_2.small_fastqc.html" - }, - { - "@id": "outputs/ERR034597_1.small.fq.trimmed.1P.fq" - } - ] - }, - { - "@id": "#sapporo-run", - "@type": "SapporoRun", - "@context": "https://raw.githubusercontent.com/NII-DG/nii_dg/main/schema/context/sapporo.jsonld", - "workflow_params": "{\"fastq_1\":{\"location\":\"ERR034597_1.small.fq.gz\",\"class\":\"File\"},\"fastq_2\":{\"location\":\"ERR034597_2.small.fq.gz\",\"class\":\"File\"},\"nthreads\":2}", - "workflow_type": "CWL", - "workflow_type_version": "v1.0", - "workflow_engine_name": "cwltool", - "workflow_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/trimming_and_qc.cwl", - "workflow_attachment": "[{\"file_name\": \"ERR034597_2.small.fq.gz\", \"file_url\": \"https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_2.small.fq.gz\"}, {\"file_name\": \"ERR034597_1.small.fq.gz\", \"file_url\": \"https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_1.small.fq.gz\"}]", - "sapporo_location": "http://sapporo-service:1122", - "state": "COMPLETE", - "outputs": { - "@id": "outputs/" - } - } - ] -} \ No newline at end of file diff --git a/sapporo_example/initial_run.py b/sapporo_example/initial_run.py deleted file mode 100644 index 48f7f13..0000000 --- a/sapporo_example/initial_run.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# coding: utf-8 - -import json -import subprocess -from pathlib import Path -from typing import Any, Dict - -import requests - -from nii_dg.utils import download_file_from_url, get_sapporo_run_status - - -def get_initial_run_id(sh_path: Path) -> str: - proc = subprocess.run(["sh", sh_path], text=True, capture_output=True, check=True) - req_dict = json.loads(proc.stdout) - return req_dict["run_id"] - - -def get_run_results(run_id: str, endpoint: str) -> Dict[str, Any]: - run_result = requests.get(endpoint + "/runs/" + run_id, timeout=(10, 30)) - - return run_result.json() - - -def execute_initial_run(sh_path: Path, run_dir: Path, endpoint: str) -> None: - run_id = get_initial_run_id(sh_path) - state = get_sapporo_run_status(run_id, endpoint) - - if state not in ["COMPLETE", "EXECUTOR_ERROR"]: - raise ValueError("Initial execution didn't run successfully.") - run_results = get_run_results(run_id, endpoint) - - file_list = ["outputs/" + file_dict["file_name"] for file_dict in run_results["outputs"]] - file_list += ["run_request.json"] - - for file_name in file_list: - download_file_from_url(endpoint + "/runs/" + run_id + "/data/" + file_name, run_dir.joinpath(file_name)) - - -if __name__ == "__main__": - dir_path = Path(__file__).with_name("initial_run_dir") - sapporo_endpoint = "http://sapporo-service:1122" - - execute_initial_run(Path(__file__).with_name("initial_run.sh"), dir_path, sapporo_endpoint) diff --git a/sapporo_example/initial_run.sh b/sapporo_example/initial_run.sh deleted file mode 100644 index 5e6e83d..0000000 --- a/sapporo_example/initial_run.sh +++ /dev/null @@ -1,8 +0,0 @@ -curl -X POST \ - -F 'workflow_params={"fastq_1":{"location":"ERR034597_1.small.fq.gz","class":"File"},"fastq_2":{"location":"ERR034597_2.small.fq.gz","class":"File"},"nthreads":2}' \ - -F 'workflow_type=CWL' \ - -F 'workflow_type_version=v1.0' \ - -F 'workflow_url=https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/trimming_and_qc.cwl' \ - -F 'workflow_engine_name=cwltool' \ - -F 'workflow_attachment=[{"file_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_2.small.fq.gz","file_name": "ERR034597_2.small.fq.gz"},{"file_url": "https://raw.githubusercontent.com/sapporo-wes/sapporo-service/main/tests/resources/cwltool/ERR034597_1.small.fq.gz","file_name": "ERR034597_1.small.fq.gz"}]' \ - http://localhost:1122/runs diff --git a/sapporo_example/initial_run_dir/outputs/ERR034597_1.small.fq.trimmed.1P.fq b/sapporo_example/initial_run_dir/outputs/ERR034597_1.small.fq.trimmed.1P.fq deleted file mode 100644 index 24f9382..0000000 --- a/sapporo_example/initial_run_dir/outputs/ERR034597_1.small.fq.trimmed.1P.fq +++ /dev/null @@ -1,94556 +0,0 @@ -@ERR034597.3 FCB06A1ABXX:6:1101:1190:1997#ACTTGAAT/1 -TAAATCCAAACTATAGCTGTCGTTTTTATCATTTCCCAATACAGAAGCTTTTCTTTTGCTTGTTCGTAATGCTACACTTTTTGTTCAGCT -+ -HHHHHHHGHHHFGFHHHHHFHHHHHHHHHCEHHHHGFFHHHHHHEHDHHHEHEEHHFDHHHEHHBFCAEEHHHFHHFHHHHF?HEHFCBF -@ERR034597.4 FCB06A1ABXX:6:1101:1447:1971#ACTTGAAT/1 -CTTTTGACCGCTAAGAACCAAGGTCTTACCTGTTTTTTCAGCTTTTTCTTCTCCTTTTTGCTAAGATGAGCATAGGGATCATCTGCCT -+ -626<<79