From 34bd1e80c364ec2dec49a8407e37f19e4d31aaec Mon Sep 17 00:00:00 2001 From: Laurence Rowe Date: Fri, 21 Feb 2020 14:40:57 -0800 Subject: [PATCH] igv visualiztion --- src/encoded/region_search.py | 8 ++ src/encoded/static/components/vis_defines.js | 8 ++ src/encoded/vis_defines.py | 5 +- src/encoded/vis_igv.py | 123 +++++++++++++++++++ src/encoded/visualization.py | 1 + 5 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 src/encoded/vis_igv.py diff --git a/src/encoded/region_search.py b/src/encoded/region_search.py index d93c56a72f6..05bd3238385 100644 --- a/src/encoded/region_search.py +++ b/src/encoded/region_search.py @@ -797,6 +797,14 @@ def search_result_actions(request, doc_types, es_results, position=None): ensembl_url = vis_format_url("ensembl", hub_url, assembly, pos) if ensembl_url is not None: browser_urls['Ensembl'] = ensembl_url + igv_search_params = request.params.dict_of_lists() + igv_search_params['assembly'] = [assembly] + igv_params = { + "sessionURL": request.route_url('batch_igv', _query=igv_search_params), + } + if pos is not None: + igv_params['locus'] = pos + browser_urls['IGV'] = 'https://igv.org/app/?' + urlencode(igv_params) if browser_urls: viz[assembly] = browser_urls #actions.setdefault('visualize_batch', {})[assembly] =\ diff --git a/src/encoded/static/components/vis_defines.js b/src/encoded/static/components/vis_defines.js index 26ba83889ba..756ea069e50 100644 --- a/src/encoded/static/components/vis_defines.js +++ b/src/encoded/static/components/vis_defines.js @@ -81,6 +81,7 @@ const ASSEMBLY_DETAILS = { * File types allowed for each browser. */ const browserFileTypes = { + IGV: [], UCSC: [], Ensembl: ['bigWig', 'bigBed'], hic: ['hic'], @@ -100,6 +101,12 @@ const browserFileTypes = { export const visOpenBrowser = (dataset, browser, assembly, files, datasetUrl) => { let href; switch (browser) { + case 'IGV': { + const ucscAssembly = (ASSEMBLY_DETAILS[assembly] || {}).ucsc_assembly || assembly; + const sessionURL = `${datasetUrl}@@igv.json?assembly=${encodeURIComponent(ucscAssembly)}`; + href=`https://igv.org/app/?sessionURL=${encodeURIComponent(sessionURL)}`; + break; + } case 'UCSC': { // UCSC does not use `files` under any circumstances. const ucscAssembly = ASSEMBLY_DETAILS[assembly].ucsc_assembly; @@ -192,6 +199,7 @@ const browserOrder = [ 'UCSC', 'hic', 'Ensembl', + 'IGV', ]; diff --git a/src/encoded/vis_defines.py b/src/encoded/vis_defines.py index 5d433a8e5cd..c6e56399455 100644 --- a/src/encoded/vis_defines.py +++ b/src/encoded/vis_defines.py @@ -13,6 +13,7 @@ from snovault.elasticsearch.interfaces import ELASTIC_SEARCH import time from pkg_resources import resource_filename +from .vis_igv import file_igv_viewable import logging @@ -1659,7 +1660,7 @@ def browsers_available( elif item_type not in VISIBLE_DATASET_TYPES_LC: return [] browsers = set() - full_set = {'ucsc', 'ensembl', 'hic'} + full_set = {'ucsc', 'ensembl', 'hic', 'igv'} file_assemblies = None file_types = None if request is not None: @@ -1681,6 +1682,8 @@ def browsers_available( file_assemblies = visualizable_assemblies(assemblies, files) if file_types is None: continue + if any(file_igv_viewable(obj) for obj in (files or []) if obj.get('assembly') == assembly): + browsers.add('IGV') if ('ucsc' not in browsers and 'ucsc_assembly' in mapped_assembly.keys() and not BROWSER_FILE_TYPES['ucsc'].isdisjoint(file_types)): diff --git a/src/encoded/vis_igv.py b/src/encoded/vis_igv.py new file mode 100644 index 00000000000..627bd865bdd --- /dev/null +++ b/src/encoded/vis_igv.py @@ -0,0 +1,123 @@ +from pyramid.httpexceptions import HTTPUnprocessableEntity +from pyramid.view import view_config +from snovault import TYPES +from urllib.parse import urljoin, urlencode + + +def includeme(config): + '''Associated views routes''' + config.add_route('batch_igv', '/batch-igv.json') + config.scan(__name__) + + +@view_config(name='igv.json', context='.types.dataset.Dataset', request_method='GET', permission='view') +def dataset_igv(context, request): + # Disable rendering as html. + request.accept = 'application/json' + assemblies = request.params.getall('assembly') + if len(assemblies) != 1: + return HTTPUnprocessableEntity("Must specify exactly one assembly") + assembly = assemblies[0] + obj = request.embed('/%s' % context.uuid, as_user=True) + index_mapping = request.registry[TYPES]["File"].schema.get('file_format_index_file_extension', {}) + tracks = list(itertracks(assembly, request.url, index_mapping, [obj])) + return { + 'reference': assembly, + 'tracks': tracks, + } + + +@view_config(route_name='batch_igv', request_method='GET', permission='search') +def search_igv(context, request): + # Disable rendering as html. + request.accept = 'application/json' + params = request.params.dict_of_lists() + assemblies = params.get('assembly', []) + if len(assemblies) != 1: + return HTTPUnprocessableEntity("Must specify exactly one assembly") + assembly = assemblies[0] + view = 'region-search' if 'region' in request.params else 'search' + params['field'] = ALL_FIELDS + params['frame'] = [] + params['limit'] = ['all'] + results = request.embed('/%s/?%s' % (view, urlencode(params, doseq=True)), as_user=True) + index_mapping = request.registry[TYPES]["File"].schema.get('file_format_index_file_extension', {}) + tracks = list(itertracks(assembly, request.url, index_mapping, results['@graph'])) + return { + 'reference': assembly, + 'tracks': tracks, + } + + +TRACK_MAP = { + 'bigBed': { 'type': 'annotation', 'format': 'bigBed' }, + 'bigWig': { 'type': 'wig', 'format': 'bigWig' }, + + ## These all require tabix / bam index files to be available for efficient visualization. + # ('bed', 'narrowPeak'): { 'type': 'annotation', 'format': 'narrowPeak' }, + # ('bed', 'broadPeak'): { 'type': 'annotation', 'format': 'broadPeak' }, + # 'gff3': { 'type': 'annotation', 'format': 'gff3' }, + # 'gtf': { 'type': 'annotation', 'format': 'gtf' }, + # 'bam': { 'type': 'alignment', 'format': 'bam' }, + # 'vcf': { 'type': 'variant', 'format': 'vcf' }, + # ('bed', 'bedGraph'): { 'type': 'wig', 'format': 'bedGraph' }, + + ## I don't think wig has an index file format, just use bedGraph or bigWig. + # 'wig': { 'type': 'wig', 'format': 'wig' }, +} + +FILE_FIELDS = { + 'file_format', + 'file_format_type', + 'href', + 'accession', + 'assembly', +} +DATASET_FIELDS = { + 'accession', + } | {f'files.{field}' for field in FILE_FIELDS} +ALL_FIELDS = FILE_FIELDS | DATASET_FIELDS + + +def itertracks(assembly, baseurl, index_mapping, results): + for result in results: + if 'File' in result['@type']: + yield from filetracks(assembly, baseurl, index_mapping, result) + elif 'Dataset' in result['@type']: + for obj in result.get('files', []): + yield from filetracks(assembly, baseurl, index_mapping, obj) + + +def filetracks(assembly, baseurl, index_mapping, obj): + if obj.get('assembly') != assembly: + return + file_format = obj.get('file_format') + file_format_type = obj.get('file_format_type') + base = TRACK_MAP.get((file_format, file_format_type)) + if base is None: + base = TRACK_MAP.get(file_format) + if base is None: + return + if 'href' not in obj or 'accession' not in obj: + return + url = urljoin(baseurl, obj['href']) + data = { + "name": obj['accession'], + "url": url, + } + ext = index_mapping.get(file_format) + if ext: + data["indexURL"] = url + ext + data.update(base) + yield data + + +def file_igv_viewable(obj): + if 'href' not in obj or 'accession' not in obj or 'assembly' not in obj: + return False + file_format = obj.get('file_format') + file_format_type = obj.get('file_format_type') + base = TRACK_MAP.get((file_format, file_format_type)) + if base is None: + base = TRACK_MAP.get(file_format) + return base is not None diff --git a/src/encoded/visualization.py b/src/encoded/visualization.py index 48ff4c8a946..68efec7d802 100644 --- a/src/encoded/visualization.py +++ b/src/encoded/visualization.py @@ -32,6 +32,7 @@ log.setLevel(logging.INFO) def includeme(config): + config.include('.vis_igv') config.add_route('batch_hub', '/batch_hub/{search_params}/{txt}') config.add_route('batch_hub:trackdb', '/batch_hub/{search_params}/{assembly}/{txt}') config.scan(__name__)