Skip to content

Commit

Permalink
Re-instate block merging, but with a size limit
Browse files Browse the repository at this point in the history
Increases performance a lot for queries that involve many blocks.

Reverts "No merge adjacent blocks and fix unzip bug related to this",
commit 584adc4.
  • Loading branch information
rbuels committed Jul 25, 2019
1 parent 2dedbf1 commit bb25513
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 6 deletions.
13 changes: 12 additions & 1 deletion src/csi.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const { unzip } = require('./unzip')
const VirtualOffset = require('./virtualOffset')
const Chunk = require('./chunk')

const { longToNumber, checkAbortSignal } = require('./util')
const { longToNumber, checkAbortSignal, canMergeBlocks } = require('./util')

const CSI1_MAGIC = 21582659 // CSI\1
const CSI2_MAGIC = 38359875 // CSI\2
Expand Down Expand Up @@ -295,6 +295,17 @@ class CSI {
for (let i = 1; i < numOffsets; i += 1)
if (off[i - 1].maxv.compareTo(off[i].minv) >= 0)
off[i - 1].maxv = off[i].minv
// merge adjacent blocks
l = 0
for (let i = 1; i < numOffsets; i += 1) {
if (canMergeBlocks(off[l], off[i])) off[l].maxv = off[i].maxv
else {
l += 1
off[l].minv = off[i].minv
off[l].maxv = off[i].maxv
}
}
numOffsets = l + 1

return off.slice(0, numOffsets)
}
Expand Down
13 changes: 12 additions & 1 deletion src/tbi.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const { unzip } = require('./unzip')
const TBI_MAGIC = 21578324 // TBI\1
const TAD_LIDX_SHIFT = 14

const { longToNumber, checkAbortSignal } = require('./util')
const { longToNumber, checkAbortSignal, canMergeBlocks } = require('./util')

/**
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
Expand Down Expand Up @@ -274,6 +274,17 @@ class TabixIndex {
for (let i = 1; i < numOffsets; i += 1)
if (off[i - 1].maxv.compareTo(off[i].minv) >= 0)
off[i - 1].maxv = off[i].minv
// merge adjacent blocks
l = 0
for (let i = 1; i < numOffsets; i += 1) {
if (canMergeBlocks(off[l], off[i])) off[l].maxv = off[i].maxv
else {
l += 1
off[l].minv = off[i].minv
off[l].maxv = off[i].maxv
}
}
numOffsets = l + 1

return off.slice(0, numOffsets)
}
Expand Down
3 changes: 0 additions & 3 deletions src/unzip.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ function unzipChunk(inputData, chunk) {
}
if (fileStartingOffset + pos >= chunk.maxv.blockPosition) {
// this is the last chunk, trim it and stop decompressing
// note if it is the same block is minv it subtracts that already
// trimmed part of the slice length

decompressedBlocks[decompressedBlocks.length - 1] = decompressedBlocks[
decompressedBlocks.length - 1
].slice(
Expand Down
10 changes: 10 additions & 0 deletions src/util.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const MAX_MERGED_BLOCK_SIZE = 30 * 2 ** 20 // 30 MB

module.exports = {
longToNumber(long) {
if (
Expand Down Expand Up @@ -35,4 +37,12 @@ module.exports = {
}
}
},

canMergeBlocks(block1, block2) {
return (
block1.maxv.blockPosition === block2.minv.blockPosition &&
block2.maxv.blockPosition - block1.minv.blockPosition <=
MAX_MERGED_BLOCK_SIZE
)
},
}
2 changes: 1 addition & 1 deletion test/csi.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ describe('csi index', () => {
let blocks = await ti.blocksForRange('1', 1, 4000)
expect(blocks.length).toEqual(0)
blocks = await ti.blocksForRange('1', 0, 2000046092)
expect(blocks.length).toEqual(4)
expect(blocks.length).toEqual(1)
expect(blocks[0].minv.blockPosition).toEqual(0)
expect(blocks[0].minv.dataPosition).toEqual(2560)
// console.log( blocks );
Expand Down

0 comments on commit bb25513

Please sign in to comment.