Skip to content

Commit

Permalink
bunch of work on tabix gff3 perf
Browse files Browse the repository at this point in the history
  • Loading branch information
rbuels committed Sep 8, 2018
1 parent 769a4e9 commit 1e09073
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 13 deletions.
13 changes: 12 additions & 1 deletion src/JBrowse/Store/SeqFeature.js
Expand Up @@ -79,14 +79,16 @@ return declare( Store,
} else {
throw new Error('invalid topLevelFeatures configuration value',confVal)
}
if (typesList.length)
if (typesList.length) {
this._topLevelFeatureTypes = typesList
this._topLevelFeaturesTransform = features => {
let resultFeatures = []
features.forEach( feature => {
resultFeatures.push(...this._findSubfeaturesWithTypes(typesList,feature))
})
return resultFeatures
}
}
}

if (this._topLevelFeaturesTransform) {
Expand All @@ -98,6 +100,15 @@ return declare( Store,
}
},

_isTopLevelFeatureType(featureType) {
if (this._topLevelFeatureTypes) {
return this._topLevelFeatureTypes.includes(featureType)
} else if (this._topLevelFeaturesTransform) {
throw new Error('custom top-level feature transforms not supported in this use case')
}
return true
},

_evalConf: function( confVal, confKey ) {
// evaluate callbacks as functions
return typeof confVal == 'function' ? confVal.call( this, this ) : confVal;
Expand Down
17 changes: 12 additions & 5 deletions src/JBrowse/Store/SeqFeature/GFF3Tabix.js
@@ -1,4 +1,4 @@
import gff from '@gmod/gff'
const gff = cjsRequire('@gmod/gff').default

define([
'dojo/_base/declare',
Expand Down Expand Up @@ -102,14 +102,21 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In
if (allowRedispatch && lines.length) {
let minStart = Infinity
let maxEnd = -Infinity
lines.forEach( line => {
if(!this.dontRedispatch.includes(line.fields[2])) {
let start = line.start-1 // tabix indexes are 1-based
lines.forEach(line => {
const featureType = line.fields[2]
// only expand redispatch range if the feature is not in dontRedispatch,
// and is a top-level feature
if(
!this.dontRedispatch.includes(featureType) &&
this._isTopLevelFeatureType(featureType)
) {
let start = line.start-1 // gff is 1-based
if (start < minStart) minStart = start
if (line.end > maxEnd) maxEnd = line.end
}
})
if (maxEnd > query.end || minStart < query.start) {
// console.log(`redispatching ${query.start}-${query.end} => ${minStart}-${maxEnd}`)
let newQuery = Object.assign({},query,{ start: minStart, end: maxEnd })
// make a new feature callback to only return top-level features
// in the original query range
Expand Down Expand Up @@ -223,7 +230,7 @@ return declare( [ SeqFeatureStore, DeferredStatsMixin, DeferredFeaturesMixin, In
* others are not.
*/
hasRefSeq( seqName, callback, errorCallback ) {
return this.indexedData.index.hasRefSeq( seqName, callback, errorCallback );
return this.indexedData.hasRefSeq( seqName, callback, errorCallback );
},

saveStore() {
Expand Down
28 changes: 21 additions & 7 deletions src/JBrowse/Store/TabixIndexedFile.js
Expand Up @@ -3,10 +3,12 @@ const { TabixIndexedFile } = cjsRequire('@gmod/tabix')
// this is basically just an adaptor to @gmod/tabix TabixIndexedFile now
define([
'dojo/_base/declare',
'JBrowse/Errors',
'JBrowse/Model/BlobFilehandleWrapper'
],
function(
declare,
Errors,
BlobFilehandleWrapper,
) {

Expand All @@ -32,30 +34,42 @@ return declare( null, {
return this.data.getHeaderBuffer()
},

featureCount(refSeq) {
return this.data.lineCount(refSeq);
featureCount(ref) {
const regularizeReferenceName = this.browser.regularizeReferenceName(ref)
return this.data.lineCount(regularizeReferenceName);
},

getLines( ref, min, max, itemCallback, finishCallback, errorCallback ) {
this.data.getMetadata()
.then( metadata => {
const regularizeReferenceName = this.browser.regularizeReferenceName(ref)
return this.data.getLines(regularizeReferenceName, min, max, line => {
itemCallback(this.parseLine(metadata, line))
return this.data.getLines(regularizeReferenceName, min, max, (line, fileOffset) => {
itemCallback(this.parseLine(metadata, line, fileOffset))
})
})
.then(finishCallback, errorCallback || (e => { console.error(e, e.stack) }))
.then(finishCallback, error => {
if (errorCallback) {
if (error.message && error.message.indexOf('Too much data') >= 0) {
error = new Errors.DataOverflow(error.message)
}
errorCallback(error)
} else
console.error(error)
})
},

parseLine({columnNumbers}, line) {
parseLine({columnNumbers}, line, fileOffset) {
const fields = line.split("\t")
if (!(fileOffset >= 0)) {
throw new Error(`invalid tabix file offset ${fileOffset}`)
}
return { // note: index column numbers are 1-based
ref: fields[columnNumbers.ref - 1],
_regularizedRef: this.browser.regularizeReferenceName(fields[columnNumbers.ref - 1]),
start: parseInt(fields[columnNumbers.start - 1]),
end: parseInt(fields[columnNumbers.end - 1]),
fields,
fileOffset: line.fileOffset,
fileOffset,
}
}

Expand Down

0 comments on commit 1e09073

Please sign in to comment.