Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plaintext bed support with BedAdapter #3015

Merged
merged 1 commit into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions plugins/bed/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
},
"dependencies": {
"@babel/runtime": "^7.17.9",
"@flatten-js/interval-tree": "^1.0.15",
"@gmod/bbi": "^1.0.35",
"@gmod/bed": "^2.0.6",
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/tabix": "^1.5.2"
},
"peerDependencies": {
Expand Down
170 changes: 170 additions & 0 deletions plugins/bed/src/BedAdapter/BedAdapter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { toArray } from 'rxjs/operators'
import BedAdapter from './BedAdapter'
import MyConfigSchema from './configSchema'

import { TextDecoder } from 'web-encoding'
if (!window.TextDecoder) {
window.TextDecoder = TextDecoder
}

test('adapter can fetch features from volvox-bed12.bed', async () => {
const adapter = new BedAdapter(
MyConfigSchema.create({
bedLocation: {
localPath: require.resolve('./test_data/volvox-bed12.bed'),
locationType: 'LocalPathLocation',
},
}),
)

const features = adapter.getFeatures({
refName: 'ctgA',
start: 0,
end: 20000,
assemblyName: 'volvox',
})
expect(await adapter.hasDataForRefName('ctgA')).toBe(true)
expect(await adapter.hasDataForRefName('ctgB')).toBe(false)

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch features from volvox.sort.bed simple bed3', async () => {
const adapter = new BedAdapter(
MyConfigSchema.create({
bedLocation: {
localPath: require.resolve('./test_data/volvox.sort.bed'),
locationType: 'LocalPathLocation',
},
}),
)

const features = adapter.getFeatures({
refName: 'contigA',
start: 0,
end: 20000,
assemblyName: 'volvox',
})
expect(await adapter.hasDataForRefName('contigA')).toBe(true)
expect(await adapter.hasDataForRefName('ctgB')).toBe(false)

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch features bed with autosql', async () => {
const adapter = new BedAdapter(
MyConfigSchema.create({
bedLocation: {
localPath: require.resolve('./test_data/volvox-autosql.bed'),
locationType: 'LocalPathLocation',
},

autoSql: `table gdcCancer
"somatic variants converted from MAF files obtained through the NCI GDC"
(
string chrom; "Chromosome (or contig, scaffold, etc.)"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
string sampleCount; "Number of samples with this variant"
string freq; "Variant frequency"
lstring Hugo_Symbol; "Hugo symbol"
lstring Entrez_Gene_Id; "Entrez Gene Id"
lstring Variant_Classification; "Class of variant"
lstring Variant_Type; "Type of variant"
lstring Reference_Allele; "Reference allele"
lstring Tumor_Seq_Allele1; "Tumor allele 1"
lstring Tumor_Seq_Allele2; "Tumor allele 2"
lstring dbSNP_RS; "dbSNP RS number"
lstring dbSNP_Val_Status; "dbSNP validation status"
lstring days_to_death; "Number of days till death"
lstring cigarettes_per_day; "Number of cigarettes per day"
lstring weight; "Weight"
lstring alcohol_history; "Any alcohol consumption?"
lstring alcohol_intensity; "Frequency of alcohol consumption"
lstring bmi; "Body mass index"
lstring years_smoked; "Number of years smoked"
lstring height; "Height"
lstring gender; "Gender"
lstring project_id; "TCGA Project id"
lstring ethnicity; "Ethnicity"
lstring Tumor_Sample_Barcode; "Tumor sample barcode"
lstring Matched_Norm_Sample_Barcode; "Matcheds normal sample barcode"
lstring case_id; "Case ID number"
)`,
}),
)
const features = adapter.getFeatures({
refName: 'ctgA',
start: 0,
end: 20000,
assemblyName: 'volvox',
})
expect(await adapter.hasDataForRefName('ctgA')).toBe(true)
expect(await adapter.hasDataForRefName('ctgB')).toBe(false)

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can fetch bed with header', async () => {
const adapter = new BedAdapter(
MyConfigSchema.create({
bedLocation: {
localPath: require.resolve('./test_data/volvox.sort.with.header.bed'),
locationType: 'LocalPathLocation',
},
}),
)

const features = adapter.getFeatures({
refName: 'contigA',
start: 0,
end: 20000,
assemblyName: 'volvox',
})
expect(await adapter.hasDataForRefName('contigA')).toBe(true)
expect(await adapter.hasDataForRefName('ctgB')).toBe(false)

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})

test('adapter can use gwas header', async () => {
const adapter = new BedAdapter(
MyConfigSchema.create({
bedLocation: {
localPath: require.resolve('./test_data/gwas.bed'),
locationType: 'LocalPathLocation',
},
colRef: 0,
colStart: 1,
colEnd: 1,
}),
)

const features = adapter.getFeatures({
refName: '1',
start: 0,
end: 100_000,
assemblyName: 'hg19',
})

const featuresArray = await features.pipe(toArray()).toPromise()
const featuresJsonArray = featuresArray.map(f => f.toJSON())
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
})
172 changes: 172 additions & 0 deletions plugins/bed/src/BedAdapter/BedAdapter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import BED from '@gmod/bed'
import {
BaseFeatureDataAdapter,
BaseOptions,
} from '@jbrowse/core/data_adapters/BaseAdapter'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { Region, Feature } from '@jbrowse/core/util'
import { featureData } from '../util'
import IntervalTree from '@flatten-js/interval-tree'
import { unzip } from '@gmod/bgzf-filehandle'

function isGzip(buf: Buffer) {
return buf[0] === 31 && buf[1] === 139 && buf[2] === 8
}

export default class BedAdapter extends BaseFeatureDataAdapter {
protected bedFeatures?: Promise<{
header: string
features: Record<string, string[]>
parser: typeof BED
columnNames: string[]
scoreColumn: string
colRef: number
colStart: number
colEnd: number
}>

protected intervalTrees: {
[key: string]: Promise<IntervalTree | undefined> | undefined
} = {}

public static capabilities = ['getFeatures', 'getRefNames']

private async loadDataP(opts: BaseOptions = {}) {
const pm = this.pluginManager
const bedLoc = this.getConf('bedLocation')
const buf = await openLocation(bedLoc, pm).readFile(opts)
const buffer = isGzip(buf) ? await unzip(buf) : buf
// 512MB max chrome string length is 512MB
if (buffer.length > 536_870_888) {
throw new Error('Data exceeds maximum string length (512MB)')
}
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer)
const lines = data.split('\n').filter(f => !!f)
const headerLines = []
let i = 0
for (; i < lines.length && lines[i].startsWith('#'); i++) {
headerLines.push(lines[i])
}
const header = headerLines.join('\n')
const features = {} as Record<string, string[]>
for (; i < lines.length; i++) {
const line = lines[i]
const tab = line.indexOf('\t')
const refName = line.slice(0, tab)
if (!features[refName]) {
features[refName] = []
}
features[refName].push(line)
}

const autoSql = this.getConf('autoSql') as string
const parser = new BED({ autoSql })
const columnNames = this.getConf('columnNames')
const scoreColumn = this.getConf('scoreColumn')
const colRef = this.getConf('colRef')
const colStart = this.getConf('colStart')
const colEnd = this.getConf('colEnd')

return {
header,
features,
parser,
columnNames,
scoreColumn,
colRef,
colStart,
colEnd,
}
}

private async loadData(opts: BaseOptions = {}) {
if (!this.bedFeatures) {
this.bedFeatures = this.loadDataP(opts).catch(e => {
this.bedFeatures = undefined
throw e
})
}

return this.bedFeatures
}

public async getRefNames(opts: BaseOptions = {}) {
const { features } = await this.loadData(opts)
return Object.keys(features)
}

async getHeader(opts: BaseOptions = {}) {
const { header } = await this.loadData(opts)
return header
}

async getNames() {
const { header, columnNames } = await this.loadData()
if (columnNames.length) {
return columnNames
}
const defs = header.split('\n').filter(f => !!f)
const defline = defs[defs.length - 1]
return defline?.includes('\t')
? defline
.slice(1)
.split('\t')
.map(field => field.trim())
: undefined
}

private async loadFeatureIntervalTreeHelper(refName: string) {
const { colRef, colStart, colEnd, features, parser, scoreColumn } =
await this.loadData()
const lines = features[refName]
if (!lines) {
return undefined
}
const names = await this.getNames()

const intervalTree = new IntervalTree()
const ret = lines.map((f, i) => {
const uniqueId = `${this.id}-${refName}-${i}`
return featureData(
f,
colRef,
colStart,
colEnd,
scoreColumn,
parser,
uniqueId,
names,
)
})

for (let i = 0; i < ret.length; i++) {
const obj = ret[i]
intervalTree.insert([obj.get('start'), obj.get('end')], obj)
}
return intervalTree
}

private async loadFeatureIntervalTree(refName: string) {
if (!this.intervalTrees[refName]) {
this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper(
refName,
).catch(e => {
this.intervalTrees[refName] = undefined
throw e
})
}
return this.intervalTrees[refName]
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const { start, end, refName } = query
const intervalTree = await this.loadFeatureIntervalTree(refName)
intervalTree?.search([start, end]).forEach(f => observer.next(f))
observer.complete()
}, opts.signal)
}

public freeResources(): void {}
}
Loading