Skip to content

Commit

Permalink
Fix CRAM mismatches calculation regression in v2.2.0 (#3342)
Browse files Browse the repository at this point in the history
* Fix regression on CIGAR/mismatch generation in CRAM files

* Small test harness for readfeatures

* Make sure mismatches and cigar match between bam and cram

* Add test files
  • Loading branch information
cmdcolin committed Nov 21, 2022
1 parent 8a7c3bf commit 4fef1dd
Show file tree
Hide file tree
Showing 14 changed files with 438 additions and 234 deletions.
9 changes: 9 additions & 0 deletions plugins/alignments/src/BamAdapter/MismatchParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,15 @@ test('more skip', () => {
"altbase": "G",
"base": "A",
"length": 1,
"qual": undefined,
"start": 6,
"type": "mismatch",
},
{
"altbase": "C",
"base": "A",
"length": 1,
"qual": undefined,
"start": 11,
"type": "mismatch",
},
Expand All @@ -164,27 +166,31 @@ test('more skip', () => {
"altbase": "G",
"base": "C",
"length": 1,
"qual": undefined,
"start": 32,
"type": "mismatch",
},
{
"altbase": "A",
"base": "C",
"length": 1,
"qual": undefined,
"start": 34,
"type": "mismatch",
},
{
"altbase": "C",
"base": "C",
"length": 1,
"qual": undefined,
"start": 40,
"type": "mismatch",
},
{
"altbase": "A",
"base": "C",
"length": 1,
"qual": undefined,
"start": 46,
"type": "mismatch",
},
Expand All @@ -198,20 +204,23 @@ test('more skip', () => {
"altbase": "A",
"base": "G",
"length": 1,
"qual": undefined,
"start": 52,
"type": "mismatch",
},
{
"altbase": "G",
"base": "G",
"length": 1,
"qual": undefined,
"start": 68,
"type": "mismatch",
},
{
"altbase": "G",
"base": "G",
"length": 1,
"qual": undefined,
"start": 70,
"type": "mismatch",
},
Expand Down
5 changes: 1 addition & 4 deletions plugins/alignments/src/BamAdapter/MismatchParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,7 @@ export function mdToMismatches(
}
const s = getTemplateCoordLocal(curr.start)
curr.base = seq[s] || 'X'
const qualScore = qual?.[s]
if (qualScore) {
curr.qual = qualScore
}
curr.qual = qual?.[s]
curr.altbase = token
nextRecord()
}
Expand Down
107 changes: 107 additions & 0 deletions plugins/alignments/src/CombinationTest.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import PluginManager from '@jbrowse/core/PluginManager'
import { toArray } from 'rxjs/operators'
import { LocalFile } from 'generic-filehandle'
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'

import CramAdapter from './CramAdapter/CramAdapter'
import BamAdapter from './BamAdapter/BamAdapter'
import { SequenceAdapter } from './CramAdapter/CramTestAdapters'

import cramConfigSchema from './CramAdapter/configSchema'
import bamConfigSchema from './BamAdapter/configSchema'

const pluginManager = new PluginManager()

const getVolvoxSequenceSubAdapter: getSubAdapterType = async () => {
return {
dataAdapter: new SequenceAdapter(
new LocalFile(require.resolve('../test_data/volvox.fa')),
),
sessionIds: new Set(),
}
}

async function getFeats(f1: string, f2: string) {
const cramAdapter = new CramAdapter(
cramConfigSchema.create({
cramLocation: {
localPath: require.resolve(f1),
},
craiLocation: {
localPath: require.resolve(f1 + '.crai'),
},
}),
getVolvoxSequenceSubAdapter,
pluginManager,
)

const bamAdapter = new BamAdapter(
bamConfigSchema.create({
bamLocation: {
localPath: require.resolve(f2),
},
index: {
location: {
localPath: require.resolve(f2 + '.bai'),
},
},
}),
)
const query = {
assemblyName: 'volvox',
refName: 'ctgA',
start: 1,
end: 10200,
}
const bamFeatures = bamAdapter.getFeatures(query)
const cramFeatures = cramAdapter.getFeatures(query)
const bamFeaturesArray = await bamFeatures.pipe(toArray()).toPromise()
const cramFeaturesArray = await cramFeatures.pipe(toArray()).toPromise()
return { bamFeaturesArray, cramFeaturesArray }
}

type M = { start: number }

async function cigarCheck(f: string) {
const { cramFeaturesArray, bamFeaturesArray } = await getFeats(
`../test_data/${f}.cram`,
`../test_data/${f}.bam`,
)
const cramMap = Object.fromEntries(
cramFeaturesArray.map(f => [f.get('name'), f.get('CIGAR')]),
)
const bamMap = Object.fromEntries(
bamFeaturesArray.map(f => [f.get('name'), f.get('CIGAR')]),
)
expect(bamMap).toEqual(cramMap)
}

async function mismatchesCheck(f: string) {
const { cramFeaturesArray, bamFeaturesArray } = await getFeats(
`../test_data/${f}.cram`,
`../test_data/${f}.bam`,
)
const cramMap = Object.fromEntries(
cramFeaturesArray.map(f => [
f.get('name'),
f.get('mismatches').sort((a: M, b: M) => b.start - a.start),
]),
)
const bamMap = Object.fromEntries(
bamFeaturesArray.map(f => [
f.get('name'),
f.get('mismatches').sort((a: M, b: M) => b.start - a.start),
]),
)
expect(bamMap).toEqual(cramMap)
}

test('match CIGAR across file types', async () => {
await cigarCheck('volvox-sorted')
await cigarCheck('volvox-long-reads.fastq.sorted')
})

test('mismatches same across file types', async () => {
await mismatchesCheck('volvox-sorted')
await mismatchesCheck('volvox-long-reads.fastq.sorted')
})
Loading

0 comments on commit 4fef1dd

Please sign in to comment.