-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add bedtabix tests for plaintext bed adapter
- Loading branch information
Showing
20 changed files
with
1,374 additions
and
204 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
import { toArray } from 'rxjs/operators' | ||
import BedAdapter from './BedAdapter' | ||
import MyConfigSchema from './configSchema' | ||
|
||
import { TextDecoder } from 'web-encoding' | ||
if (!window.TextDecoder) { | ||
window.TextDecoder = TextDecoder | ||
} | ||
|
||
test('adapter can fetch features from volvox-bed12.bed', async () => { | ||
const adapter = new BedAdapter( | ||
MyConfigSchema.create({ | ||
bedLocation: { | ||
localPath: require.resolve('./test_data/volvox-bed12.bed'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
}), | ||
) | ||
|
||
const features = adapter.getFeatures({ | ||
refName: 'ctgA', | ||
start: 0, | ||
end: 20000, | ||
assemblyName: 'volvox', | ||
}) | ||
expect(await adapter.hasDataForRefName('ctgA')).toBe(true) | ||
expect(await adapter.hasDataForRefName('ctgB')).toBe(false) | ||
|
||
const featuresArray = await features.pipe(toArray()).toPromise() | ||
const featuresJsonArray = featuresArray.map(f => f.toJSON()) | ||
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() | ||
}) | ||
|
||
test('adapter can fetch features from volvox.sort.bed simple bed3', async () => { | ||
const adapter = new BedAdapter( | ||
MyConfigSchema.create({ | ||
bedLocation: { | ||
localPath: require.resolve('./test_data/volvox.sort.bed'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
}), | ||
) | ||
|
||
const features = adapter.getFeatures({ | ||
refName: 'contigA', | ||
start: 0, | ||
end: 20000, | ||
assemblyName: 'volvox', | ||
}) | ||
expect(await adapter.hasDataForRefName('contigA')).toBe(true) | ||
expect(await adapter.hasDataForRefName('ctgB')).toBe(false) | ||
|
||
const featuresArray = await features.pipe(toArray()).toPromise() | ||
const featuresJsonArray = featuresArray.map(f => f.toJSON()) | ||
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() | ||
}) | ||
|
||
test('adapter can fetch features bed with autosql', async () => { | ||
const adapter = new BedAdapter( | ||
MyConfigSchema.create({ | ||
bedLocation: { | ||
localPath: require.resolve('./test_data/volvox-autosql.bed'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
|
||
autoSql: `table gdcCancer | ||
"somatic variants converted from MAF files obtained through the NCI GDC" | ||
( | ||
string chrom; "Chromosome (or contig, scaffold, etc.)" | ||
uint chromStart; "Start position in chromosome" | ||
uint chromEnd; "End position in chromosome" | ||
string name; "Name of item" | ||
uint score; "Score from 0-1000" | ||
char[1] strand; "+ or -" | ||
uint thickStart; "Start of where display should be thick (start codon)" | ||
uint thickEnd; "End of where display should be thick (stop codon)" | ||
uint reserved; "Used as itemRgb as of 2004-11-22" | ||
int blockCount; "Number of blocks" | ||
int[blockCount] blockSizes; "Comma separated list of block sizes" | ||
int[blockCount] chromStarts; "Start positions relative to chromStart" | ||
string sampleCount; "Number of samples with this variant" | ||
string freq; "Variant frequency" | ||
lstring Hugo_Symbol; "Hugo symbol" | ||
lstring Entrez_Gene_Id; "Entrez Gene Id" | ||
lstring Variant_Classification; "Class of variant" | ||
lstring Variant_Type; "Type of variant" | ||
lstring Reference_Allele; "Reference allele" | ||
lstring Tumor_Seq_Allele1; "Tumor allele 1" | ||
lstring Tumor_Seq_Allele2; "Tumor allele 2" | ||
lstring dbSNP_RS; "dbSNP RS number" | ||
lstring dbSNP_Val_Status; "dbSNP validation status" | ||
lstring days_to_death; "Number of days till death" | ||
lstring cigarettes_per_day; "Number of cigarettes per day" | ||
lstring weight; "Weight" | ||
lstring alcohol_history; "Any alcohol consumption?" | ||
lstring alcohol_intensity; "Frequency of alcohol consumption" | ||
lstring bmi; "Body mass index" | ||
lstring years_smoked; "Number of years smoked" | ||
lstring height; "Height" | ||
lstring gender; "Gender" | ||
lstring project_id; "TCGA Project id" | ||
lstring ethnicity; "Ethnicity" | ||
lstring Tumor_Sample_Barcode; "Tumor sample barcode" | ||
lstring Matched_Norm_Sample_Barcode; "Matcheds normal sample barcode" | ||
lstring case_id; "Case ID number" | ||
)`, | ||
}), | ||
) | ||
const features = adapter.getFeatures({ | ||
refName: 'ctgA', | ||
start: 0, | ||
end: 20000, | ||
assemblyName: 'volvox', | ||
}) | ||
expect(await adapter.hasDataForRefName('ctgA')).toBe(true) | ||
expect(await adapter.hasDataForRefName('ctgB')).toBe(false) | ||
|
||
const featuresArray = await features.pipe(toArray()).toPromise() | ||
const featuresJsonArray = featuresArray.map(f => f.toJSON()) | ||
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() | ||
}) | ||
|
||
test('adapter can fetch bed with header', async () => { | ||
const adapter = new BedAdapter( | ||
MyConfigSchema.create({ | ||
bedLocation: { | ||
localPath: require.resolve('./test_data/volvox.sort.with.header.bed'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
}), | ||
) | ||
|
||
const features = adapter.getFeatures({ | ||
refName: 'contigA', | ||
start: 0, | ||
end: 20000, | ||
assemblyName: 'volvox', | ||
}) | ||
expect(await adapter.hasDataForRefName('contigA')).toBe(true) | ||
expect(await adapter.hasDataForRefName('ctgB')).toBe(false) | ||
|
||
const featuresArray = await features.pipe(toArray()).toPromise() | ||
const featuresJsonArray = featuresArray.map(f => f.toJSON()) | ||
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() | ||
}) | ||
|
||
test('adapter can use gwas header', async () => { | ||
const adapter = new BedAdapter( | ||
MyConfigSchema.create({ | ||
bedLocation: { | ||
localPath: require.resolve('./test_data/gwas.bed'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
colRef: 0, | ||
colStart: 1, | ||
colEnd: 1, | ||
}), | ||
) | ||
|
||
const features = adapter.getFeatures({ | ||
refName: '1', | ||
start: 0, | ||
end: 100_000, | ||
assemblyName: 'hg19', | ||
}) | ||
|
||
const featuresArray = await features.pipe(toArray()).toPromise() | ||
const featuresJsonArray = featuresArray.map(f => f.toJSON()) | ||
expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import BED from '@gmod/bed' | ||
import { | ||
BaseFeatureDataAdapter, | ||
BaseOptions, | ||
} from '@jbrowse/core/data_adapters/BaseAdapter' | ||
import { openLocation } from '@jbrowse/core/util/io' | ||
import { ObservableCreate } from '@jbrowse/core/util/rxjs' | ||
import { Region, Feature } from '@jbrowse/core/util' | ||
import { featureData } from '../util' | ||
import IntervalTree from '@flatten-js/interval-tree' | ||
import { unzip } from '@gmod/bgzf-filehandle' | ||
|
||
function isGzip(buf: Buffer) { | ||
return buf[0] === 31 && buf[1] === 139 && buf[2] === 8 | ||
} | ||
|
||
export default class BedAdapter extends BaseFeatureDataAdapter { | ||
protected bedFeatures?: Promise<{ | ||
header: string | ||
features: Record<string, string[]> | ||
parser: typeof BED | ||
columnNames: string[] | ||
scoreColumn: string | ||
colRef: number | ||
colStart: number | ||
colEnd: number | ||
}> | ||
|
||
protected intervalTrees: { | ||
[key: string]: Promise<IntervalTree | undefined> | undefined | ||
} = {} | ||
|
||
public static capabilities = ['getFeatures', 'getRefNames'] | ||
|
||
private async loadDataP(opts: BaseOptions = {}) { | ||
const pm = this.pluginManager | ||
const bedLoc = this.getConf('bedLocation') | ||
const buf = await openLocation(bedLoc, pm).readFile(opts) | ||
const buffer = isGzip(buf) ? await unzip(buf) : buf | ||
// 512MB max chrome string length is 512MB | ||
if (buffer.length > 536_870_888) { | ||
throw new Error('Data exceeds maximum string length (512MB)') | ||
} | ||
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer) | ||
const lines = data.split('\n').filter(f => !!f) | ||
const headerLines = [] | ||
let i = 0 | ||
for (; i < lines.length && lines[i].startsWith('#'); i++) { | ||
headerLines.push(lines[i]) | ||
} | ||
const header = headerLines.join('\n') | ||
const features = {} as Record<string, string[]> | ||
for (; i < lines.length; i++) { | ||
const line = lines[i] | ||
const tab = line.indexOf('\t') | ||
const refName = line.slice(0, tab) | ||
if (!features[refName]) { | ||
features[refName] = [] | ||
} | ||
features[refName].push(line) | ||
} | ||
|
||
const autoSql = this.getConf('autoSql') as string | ||
const parser = new BED({ autoSql }) | ||
const columnNames = this.getConf('columnNames') | ||
const scoreColumn = this.getConf('scoreColumn') | ||
const colRef = this.getConf('colRef') | ||
const colStart = this.getConf('colStart') | ||
const colEnd = this.getConf('colEnd') | ||
|
||
return { | ||
header, | ||
features, | ||
parser, | ||
columnNames, | ||
scoreColumn, | ||
colRef, | ||
colStart, | ||
colEnd, | ||
} | ||
} | ||
|
||
private async loadData(opts: BaseOptions = {}) { | ||
if (!this.bedFeatures) { | ||
this.bedFeatures = this.loadDataP(opts).catch(e => { | ||
this.bedFeatures = undefined | ||
throw e | ||
}) | ||
} | ||
|
||
return this.bedFeatures | ||
} | ||
|
||
public async getRefNames(opts: BaseOptions = {}) { | ||
const { features } = await this.loadData(opts) | ||
return Object.keys(features) | ||
} | ||
|
||
async getHeader(opts: BaseOptions = {}) { | ||
const { header } = await this.loadData(opts) | ||
return header | ||
} | ||
|
||
async getNames() { | ||
const { header, columnNames } = await this.loadData() | ||
if (columnNames.length) { | ||
return columnNames | ||
} | ||
const defs = header.split('\n').filter(f => !!f) | ||
const defline = defs[defs.length - 1] | ||
return defline?.includes('\t') | ||
? defline | ||
.slice(1) | ||
.split('\t') | ||
.map(field => field.trim()) | ||
: undefined | ||
} | ||
|
||
private async loadFeatureIntervalTreeHelper(refName: string) { | ||
const { colRef, colStart, colEnd, features, parser, scoreColumn } = | ||
await this.loadData() | ||
const lines = features[refName] | ||
if (!lines) { | ||
return undefined | ||
} | ||
const names = await this.getNames() | ||
|
||
const intervalTree = new IntervalTree() | ||
const ret = lines.map((f, i) => { | ||
const uniqueId = `${this.id}-${refName}-${i}` | ||
return featureData( | ||
f, | ||
colRef, | ||
colStart, | ||
colEnd, | ||
scoreColumn, | ||
parser, | ||
uniqueId, | ||
names, | ||
) | ||
}) | ||
|
||
for (let i = 0; i < ret.length; i++) { | ||
const obj = ret[i] | ||
intervalTree.insert([obj.get('start'), obj.get('end')], obj) | ||
} | ||
return intervalTree | ||
} | ||
|
||
private async loadFeatureIntervalTree(refName: string) { | ||
if (!this.intervalTrees[refName]) { | ||
this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper( | ||
refName, | ||
).catch(e => { | ||
this.intervalTrees[refName] = undefined | ||
throw e | ||
}) | ||
} | ||
return this.intervalTrees[refName] | ||
} | ||
|
||
public getFeatures(query: Region, opts: BaseOptions = {}) { | ||
return ObservableCreate<Feature>(async observer => { | ||
const { start, end, refName } = query | ||
const intervalTree = await this.loadFeatureIntervalTree(refName) | ||
intervalTree?.search([start, end]).forEach(f => observer.next(f)) | ||
observer.complete() | ||
}, opts.signal) | ||
} | ||
|
||
public freeResources(): void {} | ||
} |
Oops, something went wrong.