From 58afc1d6c4f5b1c70fc27045eca35b8197613557 Mon Sep 17 00:00:00 2001 From: Bjoern Gruening Date: Thu, 22 Dec 2016 00:22:51 +0100 Subject: [PATCH 1/3] add new script to populate the container --- import_workflows.py | 12 ++++++ library_data.yaml | 5 +++ setup_data_libraries.py | 84 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 import_workflows.py create mode 100644 library_data.yaml create mode 100644 setup_data_libraries.py diff --git a/import_workflows.py b/import_workflows.py new file mode 100644 index 0000000..565b471 --- /dev/null +++ b/import_workflows.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +import os +from bioblend import galaxy +admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org') +admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'admin') +url = "http://localhost:8080" +gi = galaxy.GalaxyInstance(url=url, email=admin_email, password=admin_pass) + +wf = galaxy.workflows.WorkflowClient(gi) +wf.import_workflow_from_local_path('/galaxy-dist/GraphClust_two.ga') +wf.import_workflow_from_local_path('/galaxy-dist/GraphClust_one.ga') diff --git a/library_data.yaml b/library_data.yaml new file mode 100644 index 0000000..991b642 --- /dev/null +++ b/library_data.yaml @@ -0,0 +1,5 @@ +libraries: + - name: "Test data" + files: + - https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-high-representatives.fa + - https://raw.githubusercontent.com/eteriSokhoyan/test-data/master/cliques-low-representatives.fa diff --git a/setup_data_libraries.py b/setup_data_libraries.py new file mode 100644 index 0000000..65db95e --- /dev/null +++ b/setup_data_libraries.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +import os +import time +import yaml +import argparse +import subprocess +import logging as log +from bioblend import galaxy +from subprocess import CalledProcessError + + +def main( data ): + """ + Load files into a Galaxy data library. + By default all test-data tools from all installed tools + will be linked into a data library. + """ + + log.info("Importing data libraries.") + + url = "http://localhost:8080" + # The environment variables are set by the parent container + admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org') + admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'admin') + + # Establish connection to galaxy instance + gi = galaxy.GalaxyInstance(url=url, email=admin_email, password=admin_pass) + + jc = galaxy.jobs.JobsClient(gi) + + folders = dict() + + libraries = yaml.load(data) + for lib in libraries['libraries']: + folders[lib['name']] = lib['files'] + + if folders: + log.info("Create 'Test Data' library.") + lib = gi.libraries.create_library('Training Data', 'Data pulled from online archives.') + lib_id = lib['id'] + + for fname, urls in folders.items(): + log.info("Creating folder: %s" % fname) + folder = gi.libraries.create_folder( lib_id, fname ) + for url in urls: + gi.libraries.upload_file_from_url( + lib_id, + url, + folder_id = folder[0]['id'], + ) + + no_break = True + while True: + no_break = False + for job in jc.get_jobs(): + if job['state'] != 'ok': + no_break = True + if not no_break: + break + time.sleep(3) + + + time.sleep(20) + log.info("Finished importing test data.") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Populate the Galaxy data library with test data.' + ) + parser.add_argument("-v", "--verbose", help="Increase output verbosity.", + action="store_true") + parser.add_argument('-i', '--infile', type=argparse.FileType('r')) + + #TODO: Add options to override the admin_user and admin_password + specify + # files to upload via command line interface. + + args = parser.parse_args() + if args.verbose: + log.basicConfig(level=log.DEBUG) + + main( args.infile ) + From 2a2770f18a3889a91a4e664b3daea349f0a9736e Mon Sep 17 00:00:00 2001 From: Bjoern Gruening Date: Thu, 22 Dec 2016 00:23:02 +0100 Subject: [PATCH 2/3] add new script to Dockerfile --- Dockerfile | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d179d10..0981004 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ ENV ENABLE_TTS_INSTALL True # Enable Conda dependency resolution ENV GALAXY_CONFIG_CONDA_AUTO_INSTALL=True \ GALAXY_CONFIG_CONDA_AUTO_INIT=True \ - GALAXY_CONFIG_USE_CACHED_DEPENDENCY_MANAGER=True + GALAXY_CONFIG_USE_CACHED_DEPENDENCY_MANAGER=True # Install tools ADD graphclust.yml $GALAXY_ROOT/tools.yaml @@ -18,3 +18,19 @@ RUN install-tools $GALAXY_ROOT/tools.yaml && \ /tool_deps/_conda/bin/conda clean --tarballs ADD tour_graphclust_wf.yaml $GALAXY_ROOT/config/plugins/tours/graphclust.wf.yaml + +# Data libraries +ADD setup_data_libraries.py $GALAXY_ROOT/setup_data_libraries.py +ADD library_data.yaml $GALAXY_ROOT/library_data.yaml + +# Hacky script to import workflows into Galaxy after installation. I would argue this step is redundant. +ADD import_workflows.py $GALAXY_ROOT/import_workflows.py +ADD GraphClust_one.ga $GALAXY_ROOT/GraphClust_one.ga +ADD GraphClust_two.ga $GALAXY_ROOT/GraphClust_two.ga + +# Download training data and populate the data library +RUN startup_lite && \ + sleep 30 && \ + . $GALAXY_VIRTUAL_ENV/bin/activate && \ + python $GALAXY_ROOT/setup_data_libraries.py -i $GALAXY_ROOT/library_data.yaml && \ + python $GALAXY_ROOT/import_workflows.py From f142158b4de7a28af1ea96623e2352ec0e738c88 Mon Sep 17 00:00:00 2001 From: Bjoern Gruening Date: Thu, 22 Dec 2016 00:54:34 +0100 Subject: [PATCH 3/3] fix path --- import_workflows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/import_workflows.py b/import_workflows.py index 565b471..7d9687b 100644 --- a/import_workflows.py +++ b/import_workflows.py @@ -8,5 +8,5 @@ gi = galaxy.GalaxyInstance(url=url, email=admin_email, password=admin_pass) wf = galaxy.workflows.WorkflowClient(gi) -wf.import_workflow_from_local_path('/galaxy-dist/GraphClust_two.ga') -wf.import_workflow_from_local_path('/galaxy-dist/GraphClust_one.ga') +wf.import_workflow_from_local_path('/galaxy-central/GraphClust_two.ga') +wf.import_workflow_from_local_path('/galaxy-central/GraphClust_one.ga')