diff --git a/croo/cromwell_metadata.py b/croo/cromwell_metadata.py index 2772b9c..173a62b 100644 --- a/croo/cromwell_metadata.py +++ b/croo/cromwell_metadata.py @@ -9,7 +9,7 @@ import re import json import caper -from caper.caper_uri import CaperURI +from autouri import AutoURI from collections import OrderedDict, namedtuple from .dag import DAG @@ -61,7 +61,7 @@ def find_files_in_dict(d): elif isinstance(v, str): maybe_files.append((v, (-1,))) for f, shard_idx in maybe_files: - if CaperURI(f).is_valid_uri(): + if AutoURI(f).is_valid: files.append((k, f, shard_idx)) return files diff --git a/croo/croo.py b/croo/croo.py index e0e4d7a..551d103 100755 --- a/croo/croo.py +++ b/croo/croo.py @@ -8,16 +8,20 @@ import os import sys +import logging import json import re import caper -from caper.caper_uri import init_caper_uri, CaperURI, URI_LOCAL - +from autouri import AutoURI, AbsPath, GCSURI, S3URI, logger from .croo_args import parse_croo_arguments from .croo_html_report import CrooHtmlReport from .cromwell_metadata import CromwellMetadata +logging.basicConfig(level=logging.INFO, format='%(asctime)s|%(name)s|%(levelname)s| %(message)s') +logger = logging.getLogger(__name__) + + class Croo(object): """Cromwell output organizer (croo) @@ -29,23 +33,39 @@ class Croo(object): KEY_INPUT = 'inputs' def __init__(self, metadata_json, out_def_json, out_dir, + tmp_dir, soft_link=True, ucsc_genome_db=None, ucsc_genome_pos=None, - no_graph=False): + use_presigned_url_s3=False, + use_presigned_url_gcs=False, + duration_presigned_url_s3=0, + duration_presigned_url_gcs=0, + public_gcs=False, + gcp_private_key=None, + map_path_to_url=None, + no_checksum=False): """Initialize croo with output definition JSON + Args: + soft_link: + DO NOT MAKE A COPY of original cromwell output + (source) on out_dir (destination). + Try to soft-link it if both src and dest are on local storage. + Otherwise, original cromwell outputs will be just referenced. """ + self._tmp_dir = tmp_dir if isinstance(metadata_json, dict): self._metadata = metadata_json else: - f = CaperURI(metadata_json).get_local_file() + f = AutoURI(metadata_json).localize_on(self._tmp_dir) with open(f, 'r') as fp: self._metadata = json.loads(fp.read()) if isinstance(self._metadata, list): if len(self._metadata) > 1: - print('[Croo] Warning: multiple metadata JSON objects ' - 'found in metadata JSON file. Taking the first ' - 'one...') + logger.warning( + 'Multiple metadata JSON objects ' + 'found in metadata JSON file. Taking the first ' + 'one...') elif len(self._metadata) == 0: raise Exception('metadata JSON file is empty') self._metadata = self._metadata[0] @@ -53,7 +73,15 @@ def __init__(self, metadata_json, out_def_json, out_dir, self._cm = CromwellMetadata(self._metadata) self._ucsc_genome_db = ucsc_genome_db self._ucsc_genome_pos = ucsc_genome_pos - self._no_graph = no_graph + + self._use_presigned_url_s3 = use_presigned_url_s3 + self._use_presigned_url_gcs = use_presigned_url_gcs + self._duration_presigned_url_s3 = duration_presigned_url_s3 + self._duration_presigned_url_gcs = duration_presigned_url_gcs + self._public_gcs = public_gcs + self._gcp_private_key = gcp_private_key + self._map_path_to_url = map_path_to_url + self._no_checksum = no_checksum if isinstance(out_def_json, dict): self._out_def_json = out_def_json @@ -66,7 +94,7 @@ def __init__(self, metadata_json, out_def_json, out_dir, 'add "#CROO out_def [URL_OR_CLOUD_URI]" ' 'to your WDL') out_def_json = out_def_json_file_from_wdl - f = CaperURI(out_def_json).get_local_file() + f = AutoURI(out_def_json).localize_on(self._tmp_dir) with open(f, 'r') as fp: self._out_def_json = json.loads(fp.read()) @@ -89,6 +117,13 @@ def organize_output(self): workflow_id=self._cm.get_workflow_id(), dag=self._task_graph, task_graph_template=self._task_graph_template, + public_gcs=self._public_gcs, + gcp_private_key=self._gcp_private_key, + use_presigned_url_gcs=self._use_presigned_url_gcs, + use_presigned_url_s3=self._use_presigned_url_s3, + duration_presigned_url_s3 = self._duration_presigned_url_s3, + duration_presigned_url_gcs = self._duration_presigned_url_gcs, + map_path_to_url=self._map_path_to_url, ucsc_genome_db=self._ucsc_genome_db, ucsc_genome_pos=self._ucsc_genome_pos) @@ -105,7 +140,7 @@ def organize_output(self): full_path = node.output_path shard_idx = node.shard_idx - if node_format is not None and not self._no_graph: + if node_format is not None: interpreted_node_format = Croo.__interpret_inline_exp( node_format, full_path, shard_idx) if subgraph is not None: @@ -141,26 +176,51 @@ def organize_output(self): if k != output_name: continue + target_uri = full_path if path is not None: interpreted_path = Croo.__interpret_inline_exp( path, full_path, shard_idx) - # write to output directory - target_uri = os.path.join(self._out_dir, - interpreted_path) - # if soft_link, target_uri changes to original source - target_uri = CaperURI(full_path).copy( - target_uri=target_uri, - soft_link=self._soft_link) - else: - target_uri = full_path + au = AutoURI(full_path) + target_path = os.path.join(self._out_dir, interpreted_path) + + if self._soft_link: + au_target = AutoURI(target_path) + if isinstance(au, AbsPath) and isinstance(au_target, AbsPath): + au.soft_link(target_path, force=True) + target_uri = target_path + else: + target_uri = full_path + else: + target_uri = au.cp( + target_path, + no_checksum=self._no_checksum, + make_md5_file=True) # get presigned URLs if possible + target_url = None if path is not None or table_item is not None \ or ucsc_track is not None or node_format is not None: - target_url = CaperURI(target_uri).get_url() - else: - target_url = None + u = AutoURI(target_uri) + + if isinstance(u, GCSURI): + if self._public_gcs: + target_url = u.get_public_url() + + elif self._use_presigned_url_gcs: + target_url = u.get_presigned_url( + duration=self._duration_presigned_url_gcs, + private_key_file=self._gcp_private_key) + + elif isinstance(u, S3URI): + if self._use_presigned_url_s3: + target_url = u.get_presigned_url( + duration=self._duration_presigned_url_s3) + + elif isinstance(u, AbsPath): + if self._map_path_to_url: + target_url = u.get_mapped_url( + map_path_to_url=self._map_path_to_url) if table_item is not None: interpreted_table_item = Croo.__interpret_inline_exp( @@ -174,7 +234,7 @@ def organize_output(self): ucsc_track, full_path, shard_idx) report.add_to_ucsc_track(target_url, interpreted_ucsc_track) - if node_format is not None and not self._no_graph: + if node_format is not None: interpreted_node_format = Croo.__interpret_inline_exp( node_format, full_path, shard_idx) if subgraph is not None: @@ -239,69 +299,31 @@ def __interpret_inline_exp(s, full_path, shard_idx): return result -def init_dirs_args(args): - """More initialization for out/tmp directories since tmp - directory is important for inter-storage transfer using - CaperURI - """ - if args['out_dir'].startswith(('http://', 'https://')): - raise ValueError('URL is not allowed for --out-dir') - elif args['out_dir'].startswith(('gs://', 's3://')): - if args.get('tmp_dir') is None: - args['tmp_dir'] = os.path.join(os.getcwd(), '.croo_tmp') - else: - args['out_dir'] = os.path.abspath(os.path.expanduser(args['out_dir'])) - os.makedirs(args['out_dir'], exist_ok=True) - - if args.get('tmp_dir') is None: - args['tmp_dir'] = os.path.join(args['out_dir'], '.croo_tmp') - - # make temp dir - os.makedirs(args['tmp_dir'], exist_ok=True) - - mapping_path_to_url = None - if args.get('tsv_mapping_path_to_url') is not None: - mapping_path_to_url = {} - f = os.path.expanduser(args.get('tsv_mapping_path_to_url')) - with open(f, 'r') as fp: - lines = fp.read().strip('\n').split('\n') - for line in lines: - k, v = line.split('\t') - mapping_path_to_url[k] = v - - # init caper uri to transfer files across various storages - # e.g. gs:// to s3://, http:// to local, ... - init_caper_uri( - tmp_dir=args['tmp_dir'], - tmp_s3_bucket=None, - tmp_gcs_bucket=None, - http_user=args.get('http_user'), - http_password=args.get('http_password'), - use_gsutil_over_aws_s3=args.get('use_gsutil_over_aws_s3'), - use_presigned_url_s3=args.get('use_presigned_url_s3'), - use_presigned_url_gcs=args.get('use_presigned_url_gcs'), - gcp_private_key_file=args.get('gcp_private_key'), - public_gcs=args.get('public_gcs'), - duration_sec_presigned_url_s3=args.get('duration_presigned_url_s3'), - duration_sec_presigned_url_gcs=args.get('duration_presigned_url_gcs'), - mapping_path_to_url=mapping_path_to_url, - verbose=True) def main(): - # parse arguments. note that args is a dict args = parse_croo_arguments() - # init out/tmp dirs and CaperURI for inter-storage transfer - init_dirs_args(args) + if args['verbose']: + logger.setLevel('INFO') + elif args['debug']: + logger.setLevel('DEBUG') co = Croo( metadata_json=args['metadata_json'], out_def_json=args['out_def_json'], out_dir=args['out_dir'], + tmp_dir=args['tmp_dir'], + soft_link=args['method'] == 'link', ucsc_genome_db=args['ucsc_genome_db'], ucsc_genome_pos=args['ucsc_genome_pos'], - soft_link=args['method'] == 'link', - no_graph=args['no_graph']) + use_presigned_url_s3=args['use_presigned_url_s3'], + use_presigned_url_gcs=args['use_presigned_url_gcs'], + duration_presigned_url_s3=args['duration_presigned_url_s3'], + duration_presigned_url_gcs=args['duration_presigned_url_gcs'], + public_gcs=args['public_gcs'], + gcp_private_key=args['gcp_private_key'], + map_path_to_url=args['mapping_path_to_url'], + no_checksum=args['no_checksum']) co.organize_output() diff --git a/croo/croo_args.py b/croo/croo_args.py index 51d7a39..fd56f23 100755 --- a/croo/croo_args.py +++ b/croo/croo_args.py @@ -7,13 +7,14 @@ """ import argparse +import csv +import os import sys -from caper.caper_uri import ( - MAX_DURATION_SEC_PRESIGNED_URL_S3, - MAX_DURATION_SEC_PRESIGNED_URL_GCS) +from autouri import S3URI, GCSURI +from autouri import logger as autouri_logger -__version__ = '0.3.4' +__version__ = '0.4.0' def parse_croo_arguments(): """Argument parser for Cromwell Output Organizer (COO) @@ -36,9 +37,6 @@ def parse_croo_arguments(): 'Original output files will be kept in Cromwell\'s output ' 'directory. ' '"copy" makes copies of Cromwell\'s original outputs') - p.add_argument( - '--no-graph', action='store_true', - help='No task graph.') p.add_argument( '--ucsc-genome-db', help='UCSC genome browser\'s "db=" parameter. ' @@ -63,12 +61,12 @@ def parse_croo_arguments(): 'Google Cloud Platform (GCP). This key will be used to make ' 'presigned URLs on files on gs://.') p.add_argument( - '--duration-presigned-url-s3', - default=MAX_DURATION_SEC_PRESIGNED_URL_S3, + '--duration-presigned-url-s3', type=int, + default=S3URI.DURATION_PRESIGNED_URL, help='Duration for presigned URLs for files on s3:// in seconds. ') p.add_argument( - '--duration-presigned-url-gcs', - default=MAX_DURATION_SEC_PRESIGNED_URL_GCS, + '--duration-presigned-url-gcs', type=int, + default=GCSURI.DURATION_PRESIGNED_URL, help='Duration for presigned URLs for files on gs:// in seconds. ') p.add_argument( '--tsv-mapping-path-to-url', @@ -87,17 +85,25 @@ def parse_croo_arguments(): 'stored here. You can clean it up but will lose all cached files ' 'so that remote files will be re-downloaded.') p.add_argument( - '--use-gsutil-over-aws-s3', action='store_true', - help='Use gsutil instead of aws s3 CLI even for S3 buckets.') - p.add_argument( - '--http-user', - help='Username to download data from private URLs') + '--use-gsutil-for-s3', action='store_true', + help='Use gsutil for direct tranfer between GCS and S3 buckets. ' + 'Make sure that you have "gsutil" installed and configured ' + 'to have access to credentials for GCS and S3 ' + '(e.g. ~/.boto or ~/.aws/credientials)') p.add_argument( - '--http-password', - help='Password to download data from private URLs') + '--no-checksum', action='store_true', + help='Always overwrite on output directory/bucket (--out-dir) ' + 'even if md5-identical files (or soft links) already exist there. ' + 'Md5 hash/filename/filesize checking will be skipped.') p.add_argument('-v', '--version', action='store_true', help='Show version') + group_log_level = p.add_mutually_exclusive_group() + group_log_level.add_argument('-V', '--verbose', action='store_true', + help='Prints all logs >= INFO level') + group_log_level.add_argument('-D', '--debug', action='store_true', + help='Prints all logs >= DEBUG level') + if '-v' in sys.argv or '--version' in sys.argv: print(__version__) p.exit() @@ -111,20 +117,92 @@ def parse_croo_arguments(): if args.version is not None and args.version: print(__version__) p.exit() - check_args(args) # convert to dict - return vars(args) + d_args = vars(args) + + check_args(d_args) + init_dirs(d_args) + init_autouri(d_args) + + return d_args def check_args(args): - if args.use_presigned_url_gcs and args.gcp_private_key is None: + """Check cmd line arguments are valid + + Args: + args: + dict of cmd line arguments + """ + if args['use_presigned_url_gcs'] and args['gcp_private_key'] is None: raise ValueError( 'Define --gcp-private-key to use presigned URLs on GCS' ' (--use-presigned-url-gcs).') - if args.public_gcs and args.use_presigned_url_gcs: + + if args['public_gcs'] and args['use_presigned_url_gcs']: raise ValueError( 'Public GCS bucket (--public-gcs) cannot be presigned ' '(--use-presigned-url-gcs and --gcp-private-key). ' 'Choose one of them.') + if args['tmp_dir'] is None: + pass + elif args['tmp_dir'].startswith(('http://', 'https://')): + raise ValueError('URL is not allowed for --tmp-dir') + elif args['tmp_dir'].startswith(('gs://', 's3://')): + raise ValueError('Cloud URI is not allowed for --tmp-dir') + + if args['out_dir'].startswith(('http://', 'https://')): + raise ValueError('URL is not allowed for --out-dir') + + +def init_dirs(args): + """More initialization for out/tmp directories since tmp + directory is important for inter-storage transfer using + Autouri + + Args: + args: + dict of cmd line arguments + """ + if args['out_dir'].startswith(('gs://', 's3://')): + if args['tmp_dir'] is None: + args['tmp_dir'] = os.path.join(os.getcwd(), '.croo_tmp') + else: + args['out_dir'] = os.path.abspath(os.path.expanduser(args['out_dir'])) + os.makedirs(args['out_dir'], exist_ok=True) + if args['tmp_dir'] is None: + args['tmp_dir'] = os.path.join(args['out_dir'], '.croo_tmp') + + if args['tmp_dir'] is not None: + args['tmp_dir'] = os.path.abspath(os.path.expanduser(args['tmp_dir'])) + + +def init_autouri(args): + """Initialize Autouri and its logger + + Args: + args: + dict of cmd line arguments + """ + GCSURI.init_gcsuri( + use_gsutil_for_s3=args['use_gsutil_for_s3']) + + # autouri's path to url mapping + if args['tsv_mapping_path_to_url'] is not None: + mapping_path_to_url = {} + f = os.path.expanduser(args['tsv_mapping_path_to_url']) + with open(f, newline='') as fp: + reader = csv.reader(fp, delimiter='\t') + for line in reader: + mapping_path_to_url[line[0]] = line[1] + args['mapping_path_to_url'] = mapping_path_to_url + else: + args['mapping_path_to_url'] = None + + # autouri's logger + if args['verbose']: + autouri_logger.setLevel('INFO') + elif args['debug']: + autouri_logger.setLevel('DEBUG') diff --git a/croo/croo_html_report.py b/croo/croo_html_report.py index 94b946d..66dd781 100755 --- a/croo/croo_html_report.py +++ b/croo/croo_html_report.py @@ -9,7 +9,7 @@ from .croo_html_report_tracks import CrooHtmlReportUCSCTracks from .croo_html_report_file_table import CrooHtmlReportFileTable from .croo_html_report_task_graph import CrooHtmlReportTaskGraph -from caper.caper_uri import CaperURI +from autouri import AutoURI class CrooHtmlReport(object): @@ -31,13 +31,32 @@ class CrooHtmlReport(object): def __init__(self, out_dir, workflow_id, dag, task_graph_template=None, + public_gcs=None, + gcp_private_key=None, + use_presigned_url_gcs=False, + use_presigned_url_s3=False, + duration_presigned_url_s3=None, + duration_presigned_url_gcs=None, + map_path_to_url=None, ucsc_genome_db=None, ucsc_genome_pos=None): self._out_dir = out_dir self._workflow_id = workflow_id + self._public_gcs = public_gcs + self._gcp_private_key = gcp_private_key + self._use_presigned_url_gcs = use_presigned_url_gcs + self._use_presigned_url_s3 = use_presigned_url_s3 + self._duration_presigned_url_s3 = duration_presigned_url_s3 + self._duration_presigned_url_gcs = duration_presigned_url_gcs + self._map_path_to_url = map_path_to_url self._ucsc_tracks = CrooHtmlReportUCSCTracks( out_dir=out_dir, workflow_id=workflow_id, + public_gcs=public_gcs, + gcp_private_key=gcp_private_key, + use_presigned_url_gcs=use_presigned_url_gcs, + use_presigned_url_s3=use_presigned_url_s3, + map_path_to_url=map_path_to_url, ucsc_genome_db=ucsc_genome_db, ucsc_genome_pos=ucsc_genome_pos) self._file_table = CrooHtmlReportFileTable( @@ -79,5 +98,5 @@ def save_to_file(self): self._out_dir, CrooHtmlReport.REPORT_HTML.format( workflow_id=self._workflow_id)) - CaperURI(uri_report).write_str_to_file(html) + AutoURI(uri_report).write(html) return html \ No newline at end of file diff --git a/croo/croo_html_report_file_table.py b/croo/croo_html_report_file_table.py index 8019775..991843b 100755 --- a/croo/croo_html_report_file_table.py +++ b/croo/croo_html_report_file_table.py @@ -5,7 +5,7 @@ """ import os -from caper.caper_uri import CaperURI, URI_LOCAL, URI_URL +from autouri import AutoURI class CrooHtmlReportFileTable(object): @@ -134,5 +134,5 @@ def dir_first(s): self._out_dir, CrooHtmlReportFileTable.FILETABLE_TSV.format( workflow_id=self._workflow_id)) - CaperURI(uri_filetable).write_str_to_file(contents) + AutoURI(uri_filetable).write(contents) return table_contents diff --git a/croo/croo_html_report_task_graph.py b/croo/croo_html_report_task_graph.py index 2f18b4b..f19855c 100755 --- a/croo/croo_html_report_task_graph.py +++ b/croo/croo_html_report_task_graph.py @@ -8,7 +8,10 @@ import os from copy import deepcopy from base64 import b64encode -from caper.caper_uri import CaperURI, URI_LOCAL, URI_URL +from graphviz import Source +from graphviz.backend import ExecutableNotFound +from autouri import AutoURI +from .croo import logger class CrooHtmlReportTaskGraph(object): @@ -26,6 +29,7 @@ def __init__(self, out_dir, workflow_id, dag, template_d): A template dict that will be converted to a template dot file for graphviz This dot file will be converted into SVG and finally be embedded in HTML Refer to the function caper.dict_tool.dict_to_dot_str() for details + https://github.com/ENCODE-DCC/caper/blob/master/caper/dict_tool.py#L190 """ self._out_dir = out_dir self._workflow_id = workflow_id @@ -49,9 +53,9 @@ def get_html_body_str(self): if svg_contents is None: return '' else: - head = '
Task graph\n' - tail = '

' + head = 'Task graph
\n' img = svg_contents + tail = '

' return head + img + tail def __make_svg(self): @@ -61,7 +65,6 @@ def __make_svg(self): """ if not self._items: return None - from graphviz import Source # define call back functions for node format, href, subgraph def fnc_node_format(n): @@ -89,15 +92,26 @@ def fnc_subgraph(n): template=self._template_d) # temporary dot, svg from graphviz.Source.render tmp_dot = '_tmp_.dot' - svg = Source(dot_str, format='svg').render( - filename=tmp_dot) + + try: + svg = Source(dot_str, format='svg').render( + filename=tmp_dot) + except (ExecutableNotFound, FileNotFoundError): + logger.info( + 'Importing graphviz failed. Task graph will not be available. ' + 'Check if you have installed graphviz correctly so that ' + '"dot" executable exists on your PATH. ' + '"pip install graphviz" does not install such "dot". ' + 'Use apt or system-level installer instead. ' + 'e.g. sudo apt-get install graphviz.') + return None # save to DOT uri_dot = os.path.join( self._out_dir, CrooHtmlReportTaskGraph.TASK_GRAPH_DOT.format( workflow_id=self._workflow_id)) - CaperURI(uri_dot).write_str_to_file(dot_str) + AutoURI(uri_dot).write(dot_str) # save to SVG with open (svg, 'r') as fp: @@ -106,7 +120,7 @@ def fnc_subgraph(n): self._out_dir, CrooHtmlReportTaskGraph.TASK_GRAPH_SVG.format( workflow_id=self._workflow_id)) - CaperURI(uri_svg).write_str_to_file(svg_contents) + AutoURI(uri_svg).write(svg_contents) os.remove(tmp_dot) os.remove(svg) diff --git a/croo/croo_html_report_tracks.py b/croo/croo_html_report_tracks.py index cd58a07..e7cc80e 100755 --- a/croo/croo_html_report_tracks.py +++ b/croo/croo_html_report_tracks.py @@ -7,7 +7,7 @@ import os import urllib.parse -from caper.caper_uri import CaperURI +from autouri import AutoURI, GCSURI, S3URI, AbsPath class CrooHtmlReportUCSCTracks(object): @@ -18,7 +18,7 @@ class CrooHtmlReportUCSCTracks(object): UCSC_BROWSER_QUERY_URL = 'http://genome.ucsc.edu/cgi-bin/hgTracks?db={db}&ignoreCookie=1{extra_param}&hgct_customText={encoded}' UCSC_BROWSER_TEXT_FORMAT = '{track_line} bigDataUrl="{url}"\n' HTML_TRACK_HUB_LINK = """ -UCSC browser tracks +{title}

""" HTML_TRACK_HUB_TEXT = """ @@ -32,10 +32,24 @@ class CrooHtmlReportUCSCTracks(object):
""" def __init__(self, out_dir, workflow_id, + public_gcs=None, + gcp_private_key=None, + use_presigned_url_gcs=False, + use_presigned_url_s3=False, + duration_presigned_url_s3=None, + duration_presigned_url_gcs=None, + map_path_to_url=None, ucsc_genome_db=None, ucsc_genome_pos=None): self._out_dir = out_dir self._workflow_id = workflow_id + self._public_gcs = public_gcs + self._gcp_private_key = gcp_private_key + self._use_presigned_url_gcs = use_presigned_url_gcs + self._use_presigned_url_s3 = use_presigned_url_s3 + self._duration_presigned_url_s3 = duration_presigned_url_s3 + self._duration_presigned_url_gcs = duration_presigned_url_gcs + self._map_path_to_url = map_path_to_url self._ucsc_genome_db = ucsc_genome_db self._ucsc_genome_pos = ucsc_genome_pos self._items = [] @@ -60,36 +74,60 @@ def get_html_body_str(self): else: extra_param = '' - encoded = urllib.parse.quote(txt) + # save to TXT + uri_txt = os.path.join( + self._out_dir, + CrooHtmlReportUCSCTracks.UCSC_TRACKS_TXT.format( + workflow_id=self._workflow_id)) + + # localize TXT + # long URL doesn't work + u = AutoURI(uri_txt) + u.write(txt) + + url_trackhub_txt_file = None + if isinstance(u, GCSURI): + if self._public_gcs: + url_trackhub_txt_file = u.get_public_url() + + elif self._use_presigned_url_gcs: + url_trackhub_txt_file = u.get_presigned_url( + duration=self._duration_presigned_url_gcs, + private_key_file=self._gcp_private_key) + + elif isinstance(u, S3URI): + if self._use_presigned_url_s3: + url_trackhub_txt_file = u.get_presigned_url( + duration=self._duration_presigned_url_s3) + + elif isinstance(u, AbsPath): + if self._map_path_to_url: + url_trackhub_txt_file = u.get_mapped_url( + map_path_to_url=self._map_path_to_url) + html = '' + url = CrooHtmlReportUCSCTracks.UCSC_BROWSER_QUERY_URL.format( db=self._ucsc_genome_db, extra_param=extra_param, - encoded=encoded) - - html = '' + encoded=urllib.parse.quote(txt)) html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_LINK.format( + title='UCSC browser tracks', url=url) - html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_TEXT.format( - title='UCSC track hub plain text', - txt=txt) - html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_TEXT.format( - title='UCSC track hub encoded URL ' - '(Use this for browser\'s parameter &hgct_customText=)', - txt=encoded) - # save to TXT - uri_txt = os.path.join( - self._out_dir, - CrooHtmlReportUCSCTracks.UCSC_TRACKS_TXT.format( - workflow_id=self._workflow_id)) - CaperURI(uri_txt).write_str_to_file(txt) + if url_trackhub_txt_file is not None: + url = CrooHtmlReportUCSCTracks.UCSC_BROWSER_QUERY_URL.format( + db=self._ucsc_genome_db, + extra_param=extra_param, + encoded=urllib.parse.quote(url_trackhub_txt_file)) - # save to URL - uri_url = os.path.join( - self._out_dir, - CrooHtmlReportUCSCTracks.UCSC_TRACKS_URL.format( - workflow_id=self._workflow_id)) - CaperURI(uri_url).write_str_to_file(url) + html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_LINK.format( + title='UCSC browser tracks (if the above link does not work)', + url=url) + + html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_TEXT.format( + title='UCSC track hub plain text. Paste it directly to custom track edit box ' + 'on UCSC genome browser.', + txt=txt) return html diff --git a/setup.py b/setup.py index 3dad79c..4d5b49e 100644 --- a/setup.py +++ b/setup.py @@ -21,5 +21,5 @@ 'License :: OSI Approved :: MIT License', 'Operating System :: POSIX :: Linux', ], - install_requires=['caper>=0.6.1', 'graphviz'] + install_requires=['autouri>=0.1.1', 'graphviz'] )