Skip to content

Commit

Permalink
Fix pseudobin parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Oct 12, 2018
1 parent 4fb24cc commit d5796df
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 10 deletions.
34 changes: 24 additions & 10 deletions src/bai.js
@@ -1,7 +1,9 @@
const Long = require('long')
const VirtualOffset = require('./virtualOffset')
const Chunk = require('./chunk')

const BAI_MAGIC = 21578050 // BAI\1
const { longToNumber } = require('./util')

class BAI {
/**
Expand All @@ -23,15 +25,20 @@ class BAI {
}
}

parsePseudoBin(bytes, offset) {
const lineCount = longToNumber(
Long.fromBytesLE(bytes.slice(offset + 20, offset + 28), true),
)
return { lineCount }
}

async lineCount(refId) {
const indexData = await this.parse()
if (!indexData) return -1
const indexes = indexData.indices[refId]
if (!indexes) return -1
const depth = 5
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
const ret = indexes.binIndex[binLimit + 1]
return ret ? ret[ret.length - 1].minv.dataPosition : -1
const index = (await this.parse()).indices[refId]
if (!index) {
return -1
}
const ret = index.stats || {}
return ret.lineCount === undefined ? -1 : ret.lineCount
}

// memoize
Expand All @@ -46,18 +53,25 @@ class BAI {
}

data.refCount = bytes.readInt32LE(4)
const depth = 5
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7

// read the indexes for each reference sequence
data.indices = new Array(data.refCount)
let currOffset = 8
for (let i = 0; i < data.refCount; i += 1) {
// the binning index
const binCount = bytes.readInt32LE(currOffset)
let stats

currOffset += 4
const binIndex = {}
for (let j = 0; j < binCount; j += 1) {
const bin = bytes.readUInt32LE(currOffset)
if (bin > binLimit) {
stats = this.parsePseudoBin(bytes, currOffset + 4)
}

const chunkCount = bytes.readInt32LE(currOffset + 4)
currOffset += 8
const chunks = new Array(chunkCount)
Expand All @@ -82,7 +96,7 @@ class BAI {

currOffset += nintv * 8

data.indices[i] = { binIndex }
data.indices[i] = { binIndex, stats }
}

return data
Expand Down Expand Up @@ -166,7 +180,7 @@ class BAI {
* the given reference sequence ID, false otherwise
*/
async hasRefSeq(seqId) {
return !!(await this.parse()).indices[seqId]
return !!((await this.parse()).indices[seqId] || {}).binIndex
}

/**
Expand Down
16 changes: 16 additions & 0 deletions test/bai.test.js
Expand Up @@ -18,6 +18,22 @@ describe('index formats', () => {
expect(await ti.hasRefSeq(0)).toEqual(true)
})
})

describe('index human data', () => {
it('loads 1000 genomes bai', async () => {
const ti = new BAI({
filehandle: new LocalFile(
require.resolve(
'./data/HG00096.chrom20.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.bai',
),
),
})
const indexData = await ti.parse()
expect(indexData.bai).toEqual(true)
expect(await ti.hasRefSeq(19)).toEqual(true)
expect(await ti.lineCount(19)).toEqual(2924253)
})
})
describe('bam header', () => {
it('loads volvox-sorted.bam', async () => {
const ti = new BAM({
Expand Down
Binary file not shown.

0 comments on commit d5796df

Please sign in to comment.