Skip to content

Commit

Permalink
mostly-working Tabix index reader and new BGZBlob model for reading b…
Browse files Browse the repository at this point in the history
…gzipped files
  • Loading branch information
rbuels committed Mar 5, 2013
1 parent 9d067bc commit c836da5
Show file tree
Hide file tree
Showing 6 changed files with 440 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@
[submodule "src/json-schema"]
path = src/json-schema
url = git://github.com/kriszyp/json-schema.git
[submodule "src/jDataView"]
path = src/jDataView
url = git@github.com:rbuels/jDataView.git
138 changes: 138 additions & 0 deletions src/JBrowse/Model/BGZBlob.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/**
* File blob in Heng Li's `bgzip` format.
*/
define( [
'dojo/_base/declare',
'jszlib/inflate',
'jszlib/arrayCopy'
],
function(
declare,
inflate,
arrayCopy
) {

var BGZBlob = declare( null,
{
constructor: function( blob ) {
this.blob = blob;
},

blocksize: 1<<16,

slice: function(s, l) {
return new BGZBlob( this.blob.slice( s, l ) );
},

fetch: function( callback, failCallback ) {
this.blob.fetch(
this._wrap( callback ),
failCallback
);
},

read: function( offset, length, callback, failCallback ) {
this.blob.read( offset,
length + this.blockSize, //< need to over-fetch by a whole block size
this._wrap( callback, length ),
failCallback
);
},

_wrap: function( callback, maxLen ) {
var thisB = this;
return function( bgzData ) {
callback( thisB.unbgzf( bgzData, maxLen ) );
};
},

readInt: function(ba, offset) {
return (ba[offset + 3] << 24) | (ba[offset + 2] << 16) | (ba[offset + 1] << 8) | (ba[offset]);
},

readShort: function(ba, offset) {
return (ba[offset + 1] << 8) | (ba[offset]);
},

readFloat: function(ba, offset) {
var temp = new Uint8Array( 4 );
for( var i = 0; i<4; i++ ) {
temp[i] = ba[offset+i];
}
var fa = new Float32Array( temp.buffer );
return fa[0];
},

unbgzf: function(data, lim) {
lim = Math.min( lim || Infinity, data.byteLength - 27);
var oBlockList = [];
var totalSize = 0;

for( var ptr = [0]; ptr[0] < lim; ptr[0] += 8) {

var ba = new Uint8Array( data, ptr[0], 18 );

// check the bgzf block magic
if( !( ba[0] == 31 && ba[1] == 139 ) ) {
console.error( 'invalid BGZF block header, skipping', ba );
break;
}

var xlen = this.readShort( ba, 10 );
var compressedDataOffset = ptr[0] + 12 + xlen;

// var inPtr = ptr[0];
// var bSize = Utils.readShort( ba, 16 );
// var logLength = Math.min(data.byteLength-ptr[0], 40);
// console.log( xlen, bSize, bSize - xlen - 19, new Uint8Array( data, ptr[0], logLength ), logLength );

var unc;
try {
unc = inflate(
data,
compressedDataOffset,
data.byteLength - compressedDataOffset,
ptr
);
} catch( inflateError ) {
// if we have a buffer error and we have already
// inflated some data, there is probably just an
// incomplete BGZF block at the end of the data, so
// ignore it and stop inflating
if( /^Z_BUF_ERROR/.test(inflateError.statusString) && oBlockList.length ) {
break;
}
// otherwise it's some other kind of real error
else {
throw inflateError;
}
}
if( unc.byteLength ) {
totalSize += unc.byteLength;
oBlockList.push( unc );
}
// else {
// console.error( 'BGZF decompression failed for block ', compressedDataOffset, data.byteLength-compressedDataOffset, [inPtr] );
// }
}

if (oBlockList.length == 1) {
return oBlockList[0];
} else {
var out = new Uint8Array(totalSize);
var cursor = 0;
for (var i = 0; i < oBlockList.length; ++i) {
var b = new Uint8Array(oBlockList[i]);
arrayCopy(b, 0, out, cursor, b.length);
cursor += b.length;
}
return out.buffer;
}
}



});

return BGZBlob;
});

0 comments on commit c836da5

Please sign in to comment.