diff --git a/src/JBrowse/Model/BGZip/BGZBlob.js b/src/JBrowse/Model/BGZip/BGZBlob.js index 3244251f8d..21cbf4a118 100644 --- a/src/JBrowse/Model/BGZip/BGZBlob.js +++ b/src/JBrowse/Model/BGZip/BGZBlob.js @@ -135,4 +135,4 @@ var BGZBlob = declare( null, }); return BGZBlob; -}); \ No newline at end of file +}); diff --git a/src/JBrowse/Store/SeqFeature/GFF3Tabix.js b/src/JBrowse/Store/SeqFeature/GFF3Tabix.js index 79db2d697c..114f3fc4f8 100644 --- a/src/JBrowse/Store/SeqFeature/GFF3Tabix.js +++ b/src/JBrowse/Store/SeqFeature/GFF3Tabix.js @@ -71,9 +71,9 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In }) // start our global stats estimation - this.getHeader() + this.indexedData.featureCount('nonexistent') .then( - header => { + () => { this._deferred.features.resolve({ success: true }) this._estimateGlobalStats() .then( @@ -88,33 +88,8 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In ) }, - getHeader() { - if (this._parsedHeader) return this._parsedHeader - - this._parsedHeader = new Deferred() - const reject = this._parsedHeader.reject.bind(this._parsedHeader) - - this.indexedData.indexLoaded - .then( () => { - const maxFetch = this.indexedData.index.firstDataLine - ? this.indexedData.index.firstDataLine.block + this.indexedData.data.blockSize - 1 - : null - - this.indexedData.data.read( - 0, - maxFetch, - bytes => this._parsedHeader.resolve( this.header ), - reject - ); - }, - reject - ) - - return this._parsedHeader - }, - _getFeatures(query, featureCallback, finishedCallback, errorCallback, allowRedispatch = true) { - this.getHeader().then( + this.indexedData.featureCount('nonexistent').then( () => { const lines = [] this.indexedData.getLines( diff --git a/src/JBrowse/Store/SeqFeature/IndexedStatsEstimationMixin.js b/src/JBrowse/Store/SeqFeature/IndexedStatsEstimationMixin.js index ec0752b803..06d050d635 100644 --- a/src/JBrowse/Store/SeqFeature/IndexedStatsEstimationMixin.js +++ b/src/JBrowse/Store/SeqFeature/IndexedStatsEstimationMixin.js @@ -20,24 +20,21 @@ return declare( GlobalStats, { * estimate the feature density of the store. * @private */ - _estimateGlobalStats: function( refseq ) { - refseq = refseq || this.refSeq; - var featCount; - if(this.indexedData) { - featCount = this.indexedData.featureCount(refseq.name); - } else if(this.bam) { - var chr = refseq.name; - chr = this.browser.regularizeReferenceName( chr ); - var chrId = this.bam.chrToIndex && this.bam.chrToIndex[chr]; - featCount = this.bam.index.featureCount(chrId, true); + async _estimateGlobalStats(refseq) { + refseq = refseq || this.refSeq + let featCount + if (this.indexedData) { + featCount = await this.indexedData.featureCount(refseq.name) + } else if (this.bam) { + const chr = this.browser.regularizeReferenceName(refseq.name) + const chrId = this.bam.chrToIndex && this.bam.chrToIndex[chr] + featCount = await this.bam.index.featureCount(chrId, true) } - if(featCount == -1) { - return this.inherited(arguments); + if (featCount == -1) { + return this.inherited('_estimateGlobalStats', arguments) } - var density = featCount / (refseq.end - refseq.start); - var deferred = new Deferred(); - deferred.resolve({ featureDensity: density }); - return deferred; + const featureDensity = featCount / (refseq.end - refseq.start) + return { featureDensity } } }); diff --git a/src/JBrowse/Store/SeqFeature/VCFTabix.js b/src/JBrowse/Store/SeqFeature/VCFTabix.js index 8dcff3f045..fcfbe4bdaf 100644 --- a/src/JBrowse/Store/SeqFeature/VCFTabix.js +++ b/src/JBrowse/Store/SeqFeature/VCFTabix.js @@ -1,3 +1,7 @@ +const promisify = cjsRequire('util.promisify') +const zlib = cjsRequire('zlib') +const gunzip = promisify(zlib.gunzip) + define([ 'dojo/_base/declare', 'dojo/_base/lang', @@ -29,7 +33,7 @@ define([ // files don't actually have an end coordinate, so we have to make it // here. also convert coordinates to interbase. var VCFIndexedFile = declare( TabixIndexedFile, { - parseLine: function() { + parseLine() { var i = this.inherited( arguments ); if( i ) { var ret = i.fields[7].match(/^END=(\d+)|;END=(\d+)/); @@ -43,7 +47,7 @@ var VCFIndexedFile = declare( TabixIndexedFile, { return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, IndexedStatsEstimationMixin, VCFParser ], { - constructor: function( args ) { + constructor( args ) { var thisB = this; var csiBlob, tbiBlob; @@ -69,6 +73,8 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In { expectRanges: true } ); + this.fileBlob = fileBlob + this.indexedData = new VCFIndexedFile( { tbi: tbiBlob, @@ -97,35 +103,15 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In }, /** fetch and parse the VCF header lines */ - getVCFHeader: function() { - var thisB = this; - return this._parsedHeader || ( this._parsedHeader = function() { - var d = new Deferred(); - var reject = lang.hitch( d, 'reject' ); - - thisB.indexedData.indexLoaded.then( function() { - var maxFetch = thisB.indexedData.index.firstDataLine - ? thisB.indexedData.index.firstDataLine.block + thisB.indexedData.data.blockSize - 1 - : null; - - thisB.indexedData.data.read( - 0, - maxFetch, - function( bytes ) { - thisB.parseHeader( new Uint8Array( bytes ) ); - d.resolve( thisB.header ); - }, - reject - ); - }, - reject - ); - - return d; - }.call(this)); + getVCFHeader() { + if (!this._parsedHeader) { + this._parsedHeader = this.indexedData.getHeader() + .then(headerBytes => this.parseHeader(headerBytes)) + } + return this._parsedHeader }, - _getFeatures: function( query, featureCallback, finishedCallback, errorCallback ) { + _getFeatures( query, featureCallback, finishedCallback, errorCallback ) { var thisB = this; thisB.getVCFHeader().then( function() { thisB.indexedData.getLines( @@ -152,12 +138,12 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In * smart enough to regularize reference sequence names, while * others are not. */ - hasRefSeq: function( seqName, callback, errorCallback ) { + hasRefSeq( seqName, callback, errorCallback ) { return this.indexedData.index.hasRefSeq( seqName, callback, errorCallback ); }, - saveStore: function() { + saveStore() { return { urlTemplate: this.config.file.url, tbiUrlTemplate: ((this.config.tbi)||{}).url, diff --git a/src/JBrowse/Store/TabixIndexedFile.js b/src/JBrowse/Store/TabixIndexedFile.js index 3f06418a0c..b82e0f0525 100644 --- a/src/JBrowse/Store/TabixIndexedFile.js +++ b/src/JBrowse/Store/TabixIndexedFile.js @@ -1,231 +1,62 @@ +const { TabixIndexedFile } = cjsRequire('@gmod/tabix') + +// this is basically just an adaptor to @gmod/tabix TabixIndexedFile now define([ 'dojo/_base/declare', - 'dojo/_base/array', - 'JBrowse/Util', - 'JBrowse/Util/TextIterator', - 'JBrowse/Store/LRUCache', - 'JBrowse/Errors', - 'JBrowse/Model/BGZip/BGZBlob', - 'JBrowse/Model/TabixIndex', - 'JBrowse/Model/CSIIndex' + 'JBrowse/Model/BlobFilehandleWrapper' ], function( declare, - array, - Util, - TextIterator, - LRUCache, - Errors, - BGZBlob, - TabixIndex, - CSIIndex + BlobFilehandleWrapper, ) { return declare( null, { constructor: function( args ) { this.browser = args.browser; - if(args.tbi) { - this.index = new TabixIndex({ blob: new BGZBlob( args.tbi ), browser: args.browser } ); - } else if(args.csi) { - this.index = new CSIIndex({ blob: new BGZBlob( args.csi ), browser: args.browser } ); - } - - this.data = new BGZBlob( args.file ); - this.indexLoaded = this.index.load(); - - this.chunkSizeLimit = args.chunkSizeLimit || 2000000; - }, - - getLines: function( ref, min, max, itemCallback, finishCallback, errorCallback ) { - var thisB = this; - var args = Array.prototype.slice.call(arguments); - this.indexLoaded.then(function() { - thisB._fetch.apply( thisB, args ); - }, errorCallback); - }, - featureCount: function(refSeq) { - return this.index.featureCount(refSeq); - }, - - _fetch: function( ref, min, max, itemCallback, finishCallback, errorCallback ) { - errorCallback = errorCallback || function(e) { console.error(e, e.stack); }; - - var chunks = this.index.blocksForRange( ref, min, max); - if ( ! chunks ) { - errorCallback('Error in index fetch ('+[ref,min,max].join(',')+')'); - return; - } - - // toString function is used by the cache for making cache keys - chunks.toString = chunks.toUniqueString = function() { - return this.join(', '); - }; - - // check the chunks for any that are over the size limit. if - // any are, don't fetch any of them - for( var i = 0; i this.chunkSizeLimit ) { - errorCallback( new Errors.DataOverflow('Too much data. Chunk size '+Util.commifyNumber(size)+' bytes exceeds chunkSizeLimit of '+Util.commifyNumber(this.chunkSizeLimit)+'.' ) ); - return; - } - } - var fetchError; - try { - this._fetchChunkData( - chunks, - ref, - min, - max, - itemCallback, - finishCallback, - errorCallback - ); - } catch( e ) { - errorCallback( e ); - } + this.data = new TabixIndexedFile({ + filehandle: new BlobFilehandleWrapper(args.file), + tbiFilehandle: args.tbi && new BlobFilehandleWrapper(args.tbi), + csiFilehandle: args.csi && new BlobFilehandleWrapper(args.csi), + chunkSizeLimit: args.chunkSizeLimit || 2000000, + renameRefSeqs: n => this.browser.regularizeReferenceName(n) + }) }, - _fetchChunkData: function( chunks, ref, min, max, itemCallback, endCallback, errorCallback ) { - var thisB = this; - - if( ! chunks.length ) { - endCallback(); - return; - } - - var allItems = []; - var chunksProcessed = 0; - - var cache = this.chunkCache = this.chunkCache || new LRUCache({ - name: 'TabixIndexedFileChunkedCache', - fillCallback: dojo.hitch( this, '_readChunkItems' ), - sizeFunction: function( chunkItems ) { - return chunkItems.length; - }, - maxSize: 100000 // cache up to 100,000 items - }); - - var regRef = this.browser.regularizeReferenceName( ref ); - - var haveError; - array.forEach( chunks, function( c ) { - cache.get( c, function( chunkItems, e ) { - if( e && !haveError ) - errorCallback( e ); - if(( haveError = haveError || e )) { - return; - } - - for( var i = 0; i< chunkItems.length; i++ ) { - var item = chunkItems[i]; - if( item._regularizedRef == regRef ) { - // on the right ref seq - if( item.start > max ) // past end of range, can stop iterating - break; - else if( item.end >= min ) // must be in range - itemCallback( item ); - } - } - if( ++chunksProcessed == chunks.length ) { - endCallback(); - } - }); - }); + getMetadata() { + return this.data.getMetadata() }, - _readChunkItems: function( chunk, callback ) { - var items = []; - this.data.read(chunk.minv.block, chunk.maxv.block - chunk.minv.block + 1, ( data ) => { - data = new Uint8Array(data); - //console.log( 'reading chunk %d compressed, %d uncompressed', chunk.maxv.block-chunk.minv.block+65536, data.length ); - var lineIterator = new TextIterator.FromBytes({ bytes: data, offset: 0 }); - try { - this._parseItems( - lineIterator, - function(i) { items.push(i); }, - function() { callback(items); } - ); - } catch( e ) { - callback( null, e ); - } - }, - function(e) { - callback( null, e ); - }); + getHeader() { + return this.data.getHeaderBuffer() }, - _parseItems: function( lineIterator, itemCallback, finishCallback ) { - var that = this; - var itemCount = 0; - - var maxItemsWithoutYielding = 300; - while ( true ) { - // if we've read no more than a certain number of items this cycle, read another one - if( itemCount <= maxItemsWithoutYielding ) { - var item = this.parseItem( lineIterator ); - if( item ) { - itemCallback( item ); - itemCount++; - } - else { - finishCallback(); - return; - } - } - // if we're not done but we've read a good chunk of - // items, schedule the rest of our work in a timeout to continue - // later, avoiding blocking any UI stuff that needs to be done - else { - window.setTimeout( function() { - that._parseItems( lineIterator, itemCallback, finishCallback ); - }, 1); - return; - } - } + featureCount(refSeq) { + return this.data.lineCount(refSeq); }, - parseItem: function( iterator ) { - var metaChar = this.index.metaChar; - var line, item, fileOffset; - do { - fileOffset = iterator.getOffset(); - line = iterator.getline(); - } while( line && ( line.charAt(0) == metaChar // meta line, skip - || line.charAt( line.length - 1 ) != "\n" // no newline at the end, incomplete - || ! ( item = this.tryParseLine( line, fileOffset ) ) // line could not be parsed - ) - ); - - if( line && item ) - return item; - - return null; + getLines( ref, min, max, itemCallback, finishCallback, errorCallback ) { + this.data.getMetadata() + .then( metadata => { + const regularizeReferenceName = this.browser.regularizeReferenceName(ref) + return this.data.getLines(regularizeReferenceName, min, max, line => { + itemCallback(this.parseLine(metadata, line)) + }) + }) + .then(finishCallback, errorCallback || (e => { console.error(e, e.stack) })) }, - tryParseLine: function( line, fileOffset ) { - try { - return this.parseLine( line, fileOffset ); - } catch(e) { - //console.warn('parse failed: "'+line+'"'); - return null; + parseLine({columnNumbers}, line) { + const fields = line.split("\t") + return { // note: index column numbers are 1-based + ref: fields[columnNumbers.ref - 1], + _regularizedRef: this.browser.regularizeReferenceName(fields[columnNumbers.ref - 1]), + start: parseInt(fields[columnNumbers.start - 1]), + end: parseInt(fields[columnNumbers.end - 1]), + fields, + fileOffset: line.fileOffset, } - }, - - parseLine: function( line, fileOffset ) { - var fields = line.split( "\t" ); - fields[fields.length-1] = fields[fields.length-1].replace(/\n$/,''); // trim off the newline - var item = { // note: index column numbers are 1-based - ref: fields[this.index.columnNumbers.ref-1], - _regularizedRef: this.browser.regularizeReferenceName( fields[this.index.columnNumbers.ref-1] ), - start: parseInt(fields[this.index.columnNumbers.start-1]), - end: parseInt(fields[this.index.columnNumbers.end-1]), - fields: fields, - fileOffset: fileOffset, - }; - return item; } });