Skip to content

Commit

Permalink
Merge pull request #1630 from GMOD/gwas
Browse files Browse the repository at this point in the history
Get column names from BED tabix files and other utils for external jbrowse-plugin-gwas support
  • Loading branch information
rbuels committed Feb 19, 2021
2 parents 9a8d75a + cd7be9b commit e3ad824
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 1,545 deletions.
34 changes: 30 additions & 4 deletions plugins/bed/src/BedTabixAdapter/BedTabixAdapter.test.ts
Expand Up @@ -27,7 +27,7 @@ test('adapter can fetch features from volvox-bed12.bed.gz', async () => {

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray).toMatchSnapshot()
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch features from volvox.sort.bed.gz simple bed3', async () => {
Expand Down Expand Up @@ -55,7 +55,7 @@ test('adapter can fetch features from volvox.sort.bed.gz simple bed3', async ()

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray).toMatchSnapshot()
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch features bed with autosql', async () => {
Expand Down Expand Up @@ -123,7 +123,7 @@ test('adapter can fetch features bed with autosql', async () => {

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray).toMatchSnapshot()
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch bed with header', async () => {
Expand Down Expand Up @@ -155,5 +155,31 @@ test('adapter can fetch bed with header', async () => {

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray).toMatchSnapshot()
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can use gwas header', async () => {
const adapter = new BedTabixAdapter(
MyConfigSchema.create({
bedGzLocation: {
localPath: require.resolve('./test_data/gwas.bed.gz'),
},
index: {
location: {
localPath: require.resolve('./test_data/gwas.bed.gz.tbi'),
},
},
}),
)

const features = adapter.getFeatures({
refName: '1',
start: 0,
end: 100_000,
assemblyName: 'hg19',
})

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})
56 changes: 48 additions & 8 deletions plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts
Expand Up @@ -19,6 +19,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {

protected bed: TabixIndexedFile

protected columnNames: string[]

protected scoreColumn: string

public static capabilities = ['getFeatures', 'getRefNames']

public constructor(config: Instance<typeof MyConfigSchema>) {
Expand All @@ -40,7 +44,8 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
tbiFilehandle: indexType !== 'CSI' ? openLocation(location) : undefined,
chunkCacheSize: 50 * 2 ** 20,
})

this.columnNames = readConfObject(config, 'columnNames')
this.scoreColumn = readConfObject(config, 'scoreColumn')
this.parser = new BED({ autoSql })
}

Expand All @@ -52,18 +57,48 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
return this.bed.getHeader()
}

defaultParser(fields: string[], line: string) {
return Object.fromEntries(line.split('\t').map((f, i) => [fields[i], f]))
}

async getNames() {
if (this.columnNames.length) {
return this.columnNames
}
const header = await this.bed.getHeader()
const defs = header.split('\n').filter(f => !!f)
const defline = defs[defs.length - 1]
return defline && defline.includes('\t')
? defline
.slice(1)
.split('\t')
.map(field => field.trim())
: null
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const meta = await this.bed.getMetadata()
const { columnNumbers } = meta
const colRef = columnNumbers.ref - 1
const colStart = columnNumbers.start - 1
const colEnd = columnNumbers.end - 1
// colSame handles special case for tabix where a single column is both
// the start and end, this is assumed to be covering the base at this
// position (e.g. tabix -s 1 -b 2 -e 2) begin and end are same
const colSame = colStart === colEnd ? 1 : 0
const names = await this.getNames()
await this.bed.getLines(query.refName, query.start, query.end, {
lineCallback: (line: string, fileOffset: number) => {
const l = line.split('\t')
const refName = l[0]
const start = +l[1]
const end = +l[2]
const uniqueId = `bed-${fileOffset}`
const data = this.parser.parseLine(line, {
uniqueId,
})
const refName = l[colRef]
const start = +l[colStart]

const end = +l[colEnd] + colSame
const uniqueId = `${this.id}-${fileOffset}`
const data = names
? this.defaultParser(names, line)
: this.parser.parseLine(line, { uniqueId })

const { blockCount, blockSizes, blockStarts, chromStarts } = data

Expand All @@ -84,6 +119,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
})
}
}

if (this.scoreColumn) {
data.score = data[this.scoreColumn]
}
delete data.chrom
delete data.chromStart
delete data.chromEnd
Expand All @@ -92,6 +131,7 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
start,
end,
refName,
uniqueId,
})
const r = f.get('thickStart') ? ucscProcessedTranscript(f) : f
observer.next(r)
Expand Down

0 comments on commit e3ad824

Please sign in to comment.