diff --git a/plugins/bed/src/BedAdapter/BedAdapter.test.ts b/plugins/bed/src/BedAdapter/BedAdapter.test.ts new file mode 100644 index 0000000000..d772b882e3 --- /dev/null +++ b/plugins/bed/src/BedAdapter/BedAdapter.test.ts @@ -0,0 +1,170 @@ +import { toArray } from 'rxjs/operators' +import BedAdapter from './BedAdapter' +import MyConfigSchema from './configSchema' + +import { TextDecoder } from 'web-encoding' +if (!window.TextDecoder) { + window.TextDecoder = TextDecoder +} + +test('adapter can fetch features from volvox-bed12.bed', async () => { + const adapter = new BedAdapter( + MyConfigSchema.create({ + bedLocation: { + localPath: require.resolve('./test_data/volvox-bed12.bed'), + locationType: 'LocalPathLocation', + }, + }), + ) + + const features = adapter.getFeatures({ + refName: 'ctgA', + start: 0, + end: 20000, + assemblyName: 'volvox', + }) + expect(await adapter.hasDataForRefName('ctgA')).toBe(true) + expect(await adapter.hasDataForRefName('ctgB')).toBe(false) + + const featuresArray = await features.pipe(toArray()).toPromise() + const featuresJsonArray = featuresArray.map(f => f.toJSON()) + expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() +}) + +test('adapter can fetch features from volvox.sort.bed simple bed3', async () => { + const adapter = new BedAdapter( + MyConfigSchema.create({ + bedLocation: { + localPath: require.resolve('./test_data/volvox.sort.bed'), + locationType: 'LocalPathLocation', + }, + }), + ) + + const features = adapter.getFeatures({ + refName: 'contigA', + start: 0, + end: 20000, + assemblyName: 'volvox', + }) + expect(await adapter.hasDataForRefName('contigA')).toBe(true) + expect(await adapter.hasDataForRefName('ctgB')).toBe(false) + + const featuresArray = await features.pipe(toArray()).toPromise() + const featuresJsonArray = featuresArray.map(f => f.toJSON()) + expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() +}) + +test('adapter can fetch features bed with autosql', async () => { + const adapter = new BedAdapter( + MyConfigSchema.create({ + bedLocation: { + localPath: require.resolve('./test_data/volvox-autosql.bed'), + locationType: 'LocalPathLocation', + }, + + autoSql: `table gdcCancer +"somatic variants converted from MAF files obtained through the NCI GDC" + ( + string chrom; "Chromosome (or contig, scaffold, etc.)" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + uint score; "Score from 0-1000" + char[1] strand; "+ or -" + uint thickStart; "Start of where display should be thick (start codon)" + uint thickEnd; "End of where display should be thick (stop codon)" + uint reserved; "Used as itemRgb as of 2004-11-22" + int blockCount; "Number of blocks" + int[blockCount] blockSizes; "Comma separated list of block sizes" + int[blockCount] chromStarts; "Start positions relative to chromStart" + string sampleCount; "Number of samples with this variant" + string freq; "Variant frequency" + lstring Hugo_Symbol; "Hugo symbol" + lstring Entrez_Gene_Id; "Entrez Gene Id" + lstring Variant_Classification; "Class of variant" + lstring Variant_Type; "Type of variant" + lstring Reference_Allele; "Reference allele" + lstring Tumor_Seq_Allele1; "Tumor allele 1" + lstring Tumor_Seq_Allele2; "Tumor allele 2" + lstring dbSNP_RS; "dbSNP RS number" + lstring dbSNP_Val_Status; "dbSNP validation status" + lstring days_to_death; "Number of days till death" + lstring cigarettes_per_day; "Number of cigarettes per day" + lstring weight; "Weight" + lstring alcohol_history; "Any alcohol consumption?" + lstring alcohol_intensity; "Frequency of alcohol consumption" + lstring bmi; "Body mass index" + lstring years_smoked; "Number of years smoked" + lstring height; "Height" + lstring gender; "Gender" + lstring project_id; "TCGA Project id" + lstring ethnicity; "Ethnicity" + lstring Tumor_Sample_Barcode; "Tumor sample barcode" + lstring Matched_Norm_Sample_Barcode; "Matcheds normal sample barcode" + lstring case_id; "Case ID number" +)`, + }), + ) + const features = adapter.getFeatures({ + refName: 'ctgA', + start: 0, + end: 20000, + assemblyName: 'volvox', + }) + expect(await adapter.hasDataForRefName('ctgA')).toBe(true) + expect(await adapter.hasDataForRefName('ctgB')).toBe(false) + + const featuresArray = await features.pipe(toArray()).toPromise() + const featuresJsonArray = featuresArray.map(f => f.toJSON()) + expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() +}) + +test('adapter can fetch bed with header', async () => { + const adapter = new BedAdapter( + MyConfigSchema.create({ + bedLocation: { + localPath: require.resolve('./test_data/volvox.sort.with.header.bed'), + locationType: 'LocalPathLocation', + }, + }), + ) + + const features = adapter.getFeatures({ + refName: 'contigA', + start: 0, + end: 20000, + assemblyName: 'volvox', + }) + expect(await adapter.hasDataForRefName('contigA')).toBe(true) + expect(await adapter.hasDataForRefName('ctgB')).toBe(false) + + const featuresArray = await features.pipe(toArray()).toPromise() + const featuresJsonArray = featuresArray.map(f => f.toJSON()) + expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() +}) + +test('adapter can use gwas header', async () => { + const adapter = new BedAdapter( + MyConfigSchema.create({ + bedLocation: { + localPath: require.resolve('./test_data/gwas.bed'), + locationType: 'LocalPathLocation', + }, + colRef: 0, + colStart: 1, + colEnd: 1, + }), + ) + + const features = adapter.getFeatures({ + refName: '1', + start: 0, + end: 100_000, + assemblyName: 'hg19', + }) + + const featuresArray = await features.pipe(toArray()).toPromise() + const featuresJsonArray = featuresArray.map(f => f.toJSON()) + expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot() +}) diff --git a/plugins/bed/src/BedAdapter/BedAdapter.ts b/plugins/bed/src/BedAdapter/BedAdapter.ts new file mode 100644 index 0000000000..b6d8ebd477 --- /dev/null +++ b/plugins/bed/src/BedAdapter/BedAdapter.ts @@ -0,0 +1,177 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import BED from '@gmod/bed' +import { + BaseFeatureDataAdapter, + BaseOptions, +} from '@jbrowse/core/data_adapters/BaseAdapter' +import { openLocation } from '@jbrowse/core/util/io' +import { ObservableCreate } from '@jbrowse/core/util/rxjs' +import { Region, Feature } from '@jbrowse/core/util' +import { featureData } from '../util' +import IntervalTree from '@flatten-js/interval-tree' +import { unzip } from '@gmod/bgzf-filehandle' + +function isGzip(buf: Buffer) { + return buf[0] === 31 && buf[1] === 139 && buf[2] === 8 +} + +export default class BedAdapter extends BaseFeatureDataAdapter { + protected bedFeatures?: Promise<{ + header: string + features: Record + parser: typeof BED + columnNames: string[] + scoreColumn: string + colRef: number + colStart: number + colEnd: number + }> + + protected intervalTrees: { + [key: string]: Promise | undefined + } = {} + + public static capabilities = ['getFeatures', 'getRefNames'] + + private async loadDataP(opts: BaseOptions = {}) { + const pm = this.pluginManager + const bedLoc = this.getConf('bedLocation') + const buf = await openLocation(bedLoc, pm).readFile(opts) + const buffer = isGzip(buf) ? await unzip(buf) : buf + // 512MB max chrome string length is 512MB + if (buffer.length > 536_870_888) { + throw new Error('Data exceeds maximum string length (512MB)') + } + const data = new TextDecoder('utf8', { fatal: true }).decode(buffer) + const lines = data.split('\n').filter(f => !!f) + const headerLines = [] + let i = 0 + for (; i < lines.length && lines[i].startsWith('#'); i++) { + headerLines.push(lines[i]) + } + const header = headerLines.join('\n') + const features = {} as Record + for (; i < lines.length; i++) { + const line = lines[i] + const tab = line.indexOf('\t') + const refName = line.slice(0, tab) + if (!features[refName]) { + features[refName] = [] + } + features[refName].push(line) + } + + const autoSql = this.getConf('autoSql') as string + const parser = new BED({ autoSql }) + const columnNames = this.getConf('columnNames') + const scoreColumn = this.getConf('scoreColumn') + const colRef = this.getConf('colRef') + const colStart = this.getConf('colStart') + const colEnd = this.getConf('colEnd') + + return { + header, + features, + parser, + columnNames, + scoreColumn, + colRef, + colStart, + colEnd, + } + } + + private async loadData(opts: BaseOptions = {}) { + if (!this.bedFeatures) { + this.bedFeatures = this.loadDataP(opts).catch(e => { + this.bedFeatures = undefined + throw e + }) + } + + return this.bedFeatures + } + + public async getRefNames(opts: BaseOptions = {}) { + const { features } = await this.loadData(opts) + return Object.keys(features) + } + + async getHeader(opts: BaseOptions = {}) { + const { header } = await this.loadData(opts) + return header + } + + defaultParser(fields: string[], line: string) { + return Object.fromEntries(line.split('\t').map((f, i) => [fields[i], f])) + } + + async getNames() { + const { header, columnNames } = await this.loadData() + if (columnNames.length) { + return columnNames + } + const defs = header.split('\n').filter(f => !!f) + const defline = defs[defs.length - 1] + return defline?.includes('\t') + ? defline + .slice(1) + .split('\t') + .map(field => field.trim()) + : undefined + } + + private async loadFeatureIntervalTreeHelper(refName: string) { + const { colRef, colStart, colEnd, features, parser, scoreColumn } = + await this.loadData() + const lines = features[refName] + if (!lines) { + return undefined + } + const names = await this.getNames() + + const intervalTree = new IntervalTree() + const ret = lines.map((f, i) => { + const uniqueId = `${this.id}-${refName}-${i}` + return featureData( + f, + colRef, + colStart, + colEnd, + scoreColumn, + parser, + uniqueId, + names, + ) + }) + + for (let i = 0; i < ret.length; i++) { + const obj = ret[i] + intervalTree.insert([obj.get('start'), obj.get('end')], obj) + } + return intervalTree + } + + private async loadFeatureIntervalTree(refName: string) { + if (!this.intervalTrees[refName]) { + this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper( + refName, + ).catch(e => { + this.intervalTrees[refName] = undefined + throw e + }) + } + return this.intervalTrees[refName] + } + + public getFeatures(query: Region, opts: BaseOptions = {}) { + return ObservableCreate(async observer => { + const { start, end, refName } = query + const intervalTree = await this.loadFeatureIntervalTree(refName) + intervalTree?.search([start, end]).forEach(f => observer.next(f)) + observer.complete() + }, opts.signal) + } + + public freeResources(): void {} +} diff --git a/plugins/bed/src/BedAdapter/__snapshots__/BedAdapter.test.ts.snap b/plugins/bed/src/BedAdapter/__snapshots__/BedAdapter.test.ts.snap new file mode 100644 index 0000000000..d3ebe037c2 --- /dev/null +++ b/plugins/bed/src/BedAdapter/__snapshots__/BedAdapter.test.ts.snap @@ -0,0 +1,437 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`adapter can fetch bed with header 1`] = ` +Array [ + Object { + "end": 3009, + "refName": "contigA", + "start": 3000, + "strand": 0, + "uniqueId": "test-contigA-0", + }, + Object { + "end": 3114, + "refName": "contigA", + "start": 3105, + "strand": 0, + "uniqueId": "test-contigA-1", + }, + Object { + "end": 3161, + "refName": "contigA", + "start": 3152, + "strand": 0, + "uniqueId": "test-contigA-2", + }, + Object { + "end": 3180, + "refName": "contigA", + "start": 3171, + "strand": 0, + "uniqueId": "test-contigA-3", + }, + Object { + "end": 3183, + "refName": "contigA", + "start": 3174, + "strand": 0, + "uniqueId": "test-contigA-4", + }, + Object { + "end": 3222, + "refName": "contigA", + "start": 3213, + "strand": 0, + "uniqueId": "test-contigA-5", + }, + Object { + "end": 3474, + "refName": "contigA", + "start": 3465, + "strand": 0, + "uniqueId": "test-contigA-6", + }, + Object { + "end": 3804, + "refName": "contigA", + "start": 3795, + "strand": 0, + "uniqueId": "test-contigA-7", + }, + Object { + "end": 4044, + "refName": "contigA", + "start": 4035, + "strand": 0, + "uniqueId": "test-contigA-8", + }, + Object { + "end": 4082, + "refName": "contigA", + "start": 4073, + "strand": 0, + "uniqueId": "test-contigA-9", + }, +] +`; + +exports[`adapter can fetch features bed with autosql 1`] = ` +Array [ + Object { + "Entrez_Gene_Id": "2782", + "Hugo_Symbol": "GNB1", + "Matched_Norm_Sample_Barcode": "TCGA-OR-A5KB-11A-11D-A30A-10", + "Reference_Allele": "T", + "Tumor_Sample_Barcode": "TCGA-OR-A5KB-01A-11D-A30A-10", + "Tumor_Seq_Allele1": "T", + "Tumor_Seq_Allele2": "G", + "Variant_Classification": "Splice_Region", + "Variant_Type": "SNP", + "alcohol_history": "--", + "alcohol_intensity": "--", + "bmi": "--", + "case_id": "09454ed6-64bc-4a35-af44-7c4344623d45", + "cigarettes_per_day": "--", + "days_to_death": "--", + "dbSNP_RS": "novel", + "dbSNP_Val_Status": "", + "end": 10, + "ethnicity": "not hispanic or latino", + "freq": "0.0108695652174", + "gender": "female", + "height": "--", + "name": "T>G", + "project_id": "TCGA-ACC", + "refName": "ctgA", + "reserved": "0,0,0", + "sampleCount": "1", + "score": 1, + "start": 1, + "strand": 0, + "subfeatures": Array [ + Object { + "end": 2, + "parentId": "test-ctgA-0", + "start": 1, + "strand": 0, + "type": "three_prime_UTR", + "uniqueId": "test-ctgA-0-0", + }, + ], + "type": "mRNA", + "uniqueId": "test-ctgA-0", + "weight": "--", + "years_smoked": "--", + }, +] +`; + +exports[`adapter can fetch features from volvox.sort.bed simple bed3 1`] = ` +Array [ + Object { + "end": 3009, + "refName": "contigA", + "start": 3000, + "strand": 0, + "uniqueId": "test-contigA-0", + }, + Object { + "end": 3114, + "refName": "contigA", + "start": 3105, + "strand": 0, + "uniqueId": "test-contigA-1", + }, + Object { + "end": 3161, + "refName": "contigA", + "start": 3152, + "strand": 0, + "uniqueId": "test-contigA-2", + }, + Object { + "end": 3180, + "refName": "contigA", + "start": 3171, + "strand": 0, + "uniqueId": "test-contigA-3", + }, + Object { + "end": 3183, + "refName": "contigA", + "start": 3174, + "strand": 0, + "uniqueId": "test-contigA-4", + }, + Object { + "end": 3222, + "refName": "contigA", + "start": 3213, + "strand": 0, + "uniqueId": "test-contigA-5", + }, + Object { + "end": 3474, + "refName": "contigA", + "start": 3465, + "strand": 0, + "uniqueId": "test-contigA-6", + }, + Object { + "end": 3804, + "refName": "contigA", + "start": 3795, + "strand": 0, + "uniqueId": "test-contigA-7", + }, + Object { + "end": 4044, + "refName": "contigA", + "start": 4035, + "strand": 0, + "uniqueId": "test-contigA-8", + }, + Object { + "end": 4082, + "refName": "contigA", + "start": 4073, + "strand": 0, + "uniqueId": "test-contigA-9", + }, +] +`; + +exports[`adapter can fetch features from volvox-bed12.bed 1`] = ` +Array [ + Object { + "end": 9000, + "itemRgb": "0,0,0", + "name": "EDEN.1", + "refName": "ctgA", + "score": 1000, + "start": 1049, + "strand": 1, + "subfeatures": Array [ + Object { + "end": 1200, + "parentId": "test-ctgA-0", + "start": 1049, + "strand": 1, + "type": "five_prime_UTR", + "uniqueId": "test-ctgA-0-0", + }, + Object { + "end": 1500, + "parentId": "test-ctgA-0", + "start": 1200, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-0-1", + }, + Object { + "end": 3902, + "parentId": "test-ctgA-0", + "start": 2999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-0-2", + }, + Object { + "end": 5500, + "parentId": "test-ctgA-0", + "start": 4999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-0-3", + }, + Object { + "end": 7608, + "parentId": "test-ctgA-0", + "start": 6999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-0-4", + }, + Object { + "end": 9000, + "parentId": "test-ctgA-0", + "start": 7608, + "strand": 1, + "type": "three_prime_UTR", + "uniqueId": "test-ctgA-0-5", + }, + ], + "type": "mRNA", + "uniqueId": "test-ctgA-0", + }, + Object { + "end": 9000, + "itemRgb": "0,0,0", + "name": "EDEN.2", + "refName": "ctgA", + "score": 1000, + "start": 1049, + "strand": 1, + "subfeatures": Array [ + Object { + "end": 1200, + "parentId": "test-ctgA-1", + "start": 1049, + "strand": 1, + "type": "five_prime_UTR", + "uniqueId": "test-ctgA-1-0", + }, + Object { + "end": 1500, + "parentId": "test-ctgA-1", + "start": 1200, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-1-1", + }, + Object { + "end": 5500, + "parentId": "test-ctgA-1", + "start": 4999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-1-2", + }, + Object { + "end": 7608, + "parentId": "test-ctgA-1", + "start": 6999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-1-3", + }, + Object { + "end": 9000, + "parentId": "test-ctgA-1", + "start": 7608, + "strand": 1, + "type": "three_prime_UTR", + "uniqueId": "test-ctgA-1-4", + }, + ], + "type": "mRNA", + "uniqueId": "test-ctgA-1", + }, + Object { + "end": 9000, + "itemRgb": "0,0,0", + "name": "EDEN.3", + "refName": "ctgA", + "score": 1000, + "start": 1299, + "strand": 1, + "subfeatures": Array [ + Object { + "end": 1500, + "parentId": "test-ctgA-2", + "start": 1299, + "strand": 1, + "type": "five_prime_UTR", + "uniqueId": "test-ctgA-2-0", + }, + Object { + "end": 3300, + "parentId": "test-ctgA-2", + "start": 2999, + "strand": 1, + "type": "five_prime_UTR", + "uniqueId": "test-ctgA-2-1", + }, + Object { + "end": 3902, + "parentId": "test-ctgA-2", + "start": 3300, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-2-2", + }, + Object { + "end": 5500, + "parentId": "test-ctgA-2", + "start": 4999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-2-3", + }, + Object { + "end": 7600, + "parentId": "test-ctgA-2", + "start": 6999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-2-4", + }, + Object { + "end": 9000, + "parentId": "test-ctgA-2", + "start": 7600, + "strand": 1, + "type": "three_prime_UTR", + "uniqueId": "test-ctgA-2-5", + }, + ], + "type": "mRNA", + "uniqueId": "test-ctgA-2", + }, + Object { + "end": 23000, + "itemRgb": "0,0,0", + "name": "rna-Apple3", + "refName": "ctgA", + "score": 1000, + "start": 17399, + "strand": 1, + "subfeatures": Array [ + Object { + "end": 17999, + "parentId": "test-ctgA-3", + "start": 17399, + "strand": 1, + "type": "five_prime_UTR", + "uniqueId": "test-ctgA-3-0", + }, + Object { + "end": 18800, + "parentId": "test-ctgA-3", + "start": 17999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-3-1", + }, + Object { + "end": 19500, + "parentId": "test-ctgA-3", + "start": 18999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-3-2", + }, + Object { + "end": 21200, + "parentId": "test-ctgA-3", + "start": 20999, + "strand": 1, + "type": "CDS", + "uniqueId": "test-ctgA-3-3", + }, + Object { + "end": 23000, + "parentId": "test-ctgA-3", + "start": 21200, + "strand": 1, + "type": "three_prime_UTR", + "uniqueId": "test-ctgA-3-4", + }, + ], + "type": "mRNA", + "uniqueId": "test-ctgA-3", + }, +] +`; + +exports[`adapter can use gwas header 1`] = `Array []`; diff --git a/plugins/bed/src/BedAdapter/configSchema.ts b/plugins/bed/src/BedAdapter/configSchema.ts new file mode 100644 index 0000000000..3f90262d40 --- /dev/null +++ b/plugins/bed/src/BedAdapter/configSchema.ts @@ -0,0 +1,45 @@ +import { ConfigurationSchema } from '@jbrowse/core/configuration' + +export default ConfigurationSchema( + 'BedAdapter', + { + bedLocation: { + type: 'fileLocation', + defaultValue: { uri: '/path/to/my.bed.gz', locationType: 'UriLocation' }, + }, + + columnNames: { + type: 'stringArray', + description: 'List of column names', + defaultValue: [], + }, + + scoreColumn: { + type: 'string', + description: 'The column to use as a "score" attribute', + defaultValue: '', + }, + + autoSql: { + type: 'string', + description: 'The autoSql definition for the data fields in the file', + defaultValue: '', + }, + colRef: { + type: 'number', + description: 'The column to use as a "refName" attribute', + defaultValue: 0, + }, + colStart: { + type: 'number', + description: 'The column to use as a "start" attribute', + defaultValue: 1, + }, + colEnd: { + type: 'number', + description: 'The column to use as a "end" attribute', + defaultValue: 2, + }, + }, + { explicitlyTyped: true }, +) diff --git a/plugins/bed/src/BedAdapter/index.ts b/plugins/bed/src/BedAdapter/index.ts new file mode 100644 index 0000000000..7703441d29 --- /dev/null +++ b/plugins/bed/src/BedAdapter/index.ts @@ -0,0 +1 @@ +export { default as configSchema } from './configSchema' diff --git a/plugins/bed/src/BedAdapter/test_data/gwas.bed b/plugins/bed/src/BedAdapter/test_data/gwas.bed new file mode 100644 index 0000000000..8f0f6191ce --- /dev/null +++ b/plugins/bed/src/BedAdapter/test_data/gwas.bed @@ -0,0 +1,20 @@ +#chrom pos rsid ref alt neg_log_pvalue beta stderr_beta alt_allele_freq +1 721290 rs12565286 G C 0.275233 . . . +1 752566 rs3094315 G A 0.0315638 . . . +1 775659 rs2905035 A G 0.183626 . . . +1 777122 rs2980319 A T 0.207258 . . . +1 779322 rs4040617 A G 0.0326393 . . . +1 780785 rs2977612 T A 0.0225965 . . . +1 785050 rs2905062 G A 0.0346572 . . . +1 785989 rs2980300 T C 0.00638744 . . . +1 798959 rs11240777 G A 0.49853 . . . +1 990380 . C A,G,T 0.136915 . . . +1 998501 rs3813193 G C 0.450384 . . . +1 1003629 rs4075116 C T 0.881074 . . . +1 1005806 rs3934834 C T 0.756218 . . . +1 1017170 rs3766193 C G 0.336111 . . . +1 1017197 rs3766192 C T 0.381743 . . . +1 1017587 rs3766191 C T 0.775467 . . . +1 1018562 rs9442371 C T 0.233364 . . . +1 1018704 rs9442372 A G 0.248721 . . . +1 1021346 rs10907177 A G 0.842846 . . . diff --git a/plugins/bed/src/BedAdapter/test_data/volvox-autosql.bed b/plugins/bed/src/BedAdapter/test_data/volvox-autosql.bed new file mode 100644 index 0000000000..4bc6378578 --- /dev/null +++ b/plugins/bed/src/BedAdapter/test_data/volvox-autosql.bed @@ -0,0 +1 @@ +ctgA 1 10 T>G 1 . 1815756 1815757 0,0,0 1 1 0 1 0.0108695652174 GNB1 2782 Splice_Region SNP T T G novel -- -- -- -- -- -- -- -- female TCGA-ACC not hispanic or latino TCGA-OR-A5KB-01A-11D-A30A-10 TCGA-OR-A5KB-11A-11D-A30A-10 09454ed6-64bc-4a35-af44-7c4344623d45 diff --git a/plugins/bed/src/BedAdapter/test_data/volvox-bed12.bed b/plugins/bed/src/BedAdapter/test_data/volvox-bed12.bed new file mode 100644 index 0000000000..1b08fb2b49 --- /dev/null +++ b/plugins/bed/src/BedAdapter/test_data/volvox-bed12.bed @@ -0,0 +1,4 @@ +ctgA 1049 9000 EDEN.1 1000 + 1200 7608 0,0,0 4 451,903,501,2001 0,1950,3950,5950 EDEN EDEN - - - +ctgA 1049 9000 EDEN.2 1000 + 1200 7608 0,0,0 3 451,501,2001 0,3950,5950 EDEN EDEN - - - +ctgA 1299 9000 EDEN.3 1000 + 3300 7600 0,0,0 4 201,903,501,2001 0,1700,3700,5700 EDEN EDEN - - - +ctgA 17399 23000 rna-Apple3 1000 + 17999 21200 0,0,0 3 1401,501,2001 0,1600,3600 rna-Apple3 rna-Apple3 - - - diff --git a/plugins/bed/src/BedAdapter/test_data/volvox.sort.bed b/plugins/bed/src/BedAdapter/test_data/volvox.sort.bed new file mode 100644 index 0000000000..4024add388 --- /dev/null +++ b/plugins/bed/src/BedAdapter/test_data/volvox.sort.bed @@ -0,0 +1,109 @@ +contigA 3000 3009 +contigA 3105 3114 +contigA 3152 3161 +contigA 3171 3180 +contigA 3174 3183 +contigA 3213 3222 +contigA 3465 3474 +contigA 3795 3804 +contigA 4035 4044 +contigA 4073 4082 +contigA 4074 4083 +contigA 4166 4175 +contigA 4179 4188 +contigA 4204 4213 +contigA 4248 4257 +contigA 4260 4269 +contigA 4577 4586 +contigA 4828 4837 +contigA 4901 4910 +contigA 5006 5015 +contigA 5051 5060 +contigA 5201 5210 +contigA 5260 5269 +contigA 5281 5290 +contigA 5349 5358 +contigA 5353 5362 +contigA 5423 5432 +contigA 5457 5466 +contigA 5548 5557 +contigA 5626 5635 +contigA 5835 5844 +contigA 5909 5918 +contigA 5927 5936 +contigA 5969 5978 +contigA 6070 6079 +contigA 6185 6194 +contigA 6285 6294 +contigA 6363 6372 +contigA 6559 6568 +contigA 6834 6843 +contigA 6908 6917 +contigA 6918 6927 +contigA 7141 7150 +contigA 7173 7182 +contigA 7569 7578 +contigA 7619 7628 +contigA 7654 7663 +contigA 7671 7680 +contigA 7707 7716 +contigA 7739 7748 +contigA 8077 8086 +contigA 8128 8137 +contigA 8132 8141 +contigA 8210 8219 +contigA 8267 8276 +contigA 8328 8337 +contigA 8524 8533 +contigA 8659 8668 +contigA 8660 8669 +contigA 8742 8751 +contigA 8811 8820 +contigA 8816 8825 +contigA 8964 8973 +contigA 9048 9057 +contigA 9054 9063 +contigA 9136 9145 +contigA 9243 9252 +contigA 9320 9329 +contigA 9366 9375 +contigA 9525 9534 +contigA 9580 9589 +contigA 9610 9619 +contigA 9744 9753 +contigA 9753 9762 +contigA 9759 9768 +contigA 9807 9816 +contigA 9853 9862 +contigA 9884 9893 +contigA 9920 9929 +contigA 10043 10052 +contigA 10216 10225 +contigA 10217 10226 +contigA 10252 10261 +contigA 10384 10393 +contigA 10498 10507 +contigA 10585 10594 +contigA 10684 10693 +contigA 10781 10790 +contigA 10875 10884 +contigA 11004 11013 +contigA 11095 11104 +contigA 11182 11191 +contigA 11207 11216 +contigA 11241 11250 +contigA 11335 11344 +contigA 11340 11349 +contigA 11535 11544 +contigA 11543 11552 +contigA 11549 11558 +contigA 11653 11662 +contigA 11675 11684 +contigA 11689 11698 +contigA 11774 11783 +contigA 11832 11841 +contigA 12386 12395 +contigA 12408 12417 +contigA 12441 12450 +contigA 12530 12539 +contigA 12738 12747 diff --git a/plugins/bed/src/BedAdapter/test_data/volvox.sort.with.header.bed b/plugins/bed/src/BedAdapter/test_data/volvox.sort.with.header.bed new file mode 100644 index 0000000000..215d294bdb --- /dev/null +++ b/plugins/bed/src/BedAdapter/test_data/volvox.sort.with.header.bed @@ -0,0 +1,121 @@ +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +#this is a test header +contigA 3000 3009 +contigA 3105 3114 +contigA 3152 3161 +contigA 3171 3180 +contigA 3174 3183 +contigA 3213 3222 +contigA 3465 3474 +contigA 3795 3804 +contigA 4035 4044 +contigA 4073 4082 +contigA 4074 4083 +contigA 4166 4175 +contigA 4179 4188 +contigA 4204 4213 +contigA 4248 4257 +contigA 4260 4269 +contigA 4577 4586 +contigA 4828 4837 +contigA 4901 4910 +contigA 5006 5015 +contigA 5051 5060 +contigA 5201 5210 +contigA 5260 5269 +contigA 5281 5290 +contigA 5349 5358 +contigA 5353 5362 +contigA 5423 5432 +contigA 5457 5466 +contigA 5548 5557 +contigA 5626 5635 +contigA 5835 5844 +contigA 5909 5918 +contigA 5927 5936 +contigA 5969 5978 +contigA 6070 6079 +contigA 6185 6194 +contigA 6285 6294 +contigA 6363 6372 +contigA 6559 6568 +contigA 6834 6843 +contigA 6908 6917 +contigA 6918 6927 +contigA 7141 7150 +contigA 7173 7182 +contigA 7569 7578 +contigA 7619 7628 +contigA 7654 7663 +contigA 7671 7680 +contigA 7707 7716 +contigA 7739 7748 +contigA 8077 8086 +contigA 8128 8137 +contigA 8132 8141 +contigA 8210 8219 +contigA 8267 8276 +contigA 8328 8337 +contigA 8524 8533 +contigA 8659 8668 +contigA 8660 8669 +contigA 8742 8751 +contigA 8811 8820 +contigA 8816 8825 +contigA 8964 8973 +contigA 9048 9057 +contigA 9054 9063 +contigA 9136 9145 +contigA 9243 9252 +contigA 9320 9329 +contigA 9366 9375 +contigA 9525 9534 +contigA 9580 9589 +contigA 9610 9619 +contigA 9744 9753 +contigA 9753 9762 +contigA 9759 9768 +contigA 9807 9816 +contigA 9853 9862 +contigA 9884 9893 +contigA 9920 9929 +contigA 10043 10052 +contigA 10216 10225 +contigA 10217 10226 +contigA 10252 10261 +contigA 10384 10393 +contigA 10498 10507 +contigA 10585 10594 +contigA 10684 10693 +contigA 10781 10790 +contigA 10875 10884 +contigA 11004 11013 +contigA 11095 11104 +contigA 11182 11191 +contigA 11207 11216 +contigA 11241 11250 +contigA 11335 11344 +contigA 11340 11349 +contigA 11535 11544 +contigA 11543 11552 +contigA 11549 11558 +contigA 11653 11662 +contigA 11675 11684 +contigA 11689 11698 +contigA 11774 11783 +contigA 11832 11841 +contigA 12386 12395 +contigA 12408 12417 +contigA 12441 12450 +contigA 12530 12539 +contigA 12738 12747 diff --git a/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts b/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts index d10a7aee18..4e1ff9742d 100644 --- a/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts +++ b/plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts @@ -4,15 +4,13 @@ import { BaseFeatureDataAdapter, BaseOptions, } from '@jbrowse/core/data_adapters/BaseAdapter' -import { FileLocation, Region } from '@jbrowse/core/util/types' import { openLocation } from '@jbrowse/core/util/io' import { ObservableCreate } from '@jbrowse/core/util/rxjs' -import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature' +import { FileLocation, Region, Feature } from '@jbrowse/core/util' import { TabixIndexedFile } from '@gmod/tabix' -import { readConfObject } from '@jbrowse/core/configuration' -import { ucscProcessedTranscript } from '../util' +import { featureData } from '../util' import PluginManager from '@jbrowse/core/PluginManager' -import { AnyConfigurationModel } from '@jbrowse/core/configuration/configurationSchema' +import { AnyConfigurationModel } from '@jbrowse/core/configuration' import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache' export default class BedTabixAdapter extends BaseFeatureDataAdapter { @@ -32,31 +30,20 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { pluginManager?: PluginManager, ) { super(config, getSubAdapter, pluginManager) - const bedGzLocation = readConfObject( - config, - 'bedGzLocation', - ) as FileLocation - const index = readConfObject(config, 'index') as { - indexType?: string - location: FileLocation - } - const autoSql = readConfObject(config, 'autoSql') as string - const { location, indexType } = index + const bedGzLoc = this.getConf('bedGzLocation') as FileLocation + const type = this.getConf(['index', 'indexType']) + const loc = this.getConf(['index', 'location']) + const autoSql = this.getConf('autoSql') + const pm = this.pluginManager this.bed = new TabixIndexedFile({ - filehandle: openLocation(bedGzLocation, this.pluginManager), - csiFilehandle: - indexType === 'CSI' - ? openLocation(location, this.pluginManager) - : undefined, - tbiFilehandle: - indexType !== 'CSI' - ? openLocation(location, this.pluginManager) - : undefined, + filehandle: openLocation(bedGzLoc, pm), + csiFilehandle: type === 'CSI' ? openLocation(loc, pm) : undefined, + tbiFilehandle: type !== 'CSI' ? openLocation(loc, pm) : undefined, chunkCacheSize: 50 * 2 ** 20, }) - this.columnNames = readConfObject(config, 'columnNames') - this.scoreColumn = readConfObject(config, 'scoreColumn') + this.columnNames = this.getConf('columnNames') + this.scoreColumn = this.getConf('scoreColumn') this.parser = new BED({ autoSql }) } @@ -68,10 +55,6 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { return this.bed.getHeader() } - defaultParser(fields: string[], line: string) { - return Object.fromEntries(line.split('\t').map((f, i) => [fields[i], f])) - } - async getNames() { if (this.columnNames.length) { return this.columnNames @@ -97,55 +80,21 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter { // colSame handles special case for tabix where a single column is both // the start and end, this is assumed to be covering the base at this // position (e.g. tabix -s 1 -b 2 -e 2) begin and end are same - const colSame = colStart === colEnd ? 1 : 0 const names = await this.getNames() await this.bed.getLines(query.refName, query.start, query.end, { - lineCallback: (line: string, fileOffset: number) => { - const l = line.split('\t') - const refName = l[colRef] - const start = +l[colStart] - - const end = +l[colEnd] + colSame - const uniqueId = `${this.id}-${fileOffset}` - const data = names - ? this.defaultParser(names, line) - : this.parser.parseLine(line, { uniqueId }) - - const { blockCount, blockSizes, blockStarts, chromStarts } = data - - if (blockCount) { - const starts = chromStarts || blockStarts || [] - const sizes = blockSizes - const blocksOffset = start - data.subfeatures = [] - - for (let b = 0; b < blockCount; b += 1) { - const bmin = (starts[b] || 0) + blocksOffset - const bmax = bmin + (sizes[b] || 0) - data.subfeatures.push({ - uniqueId: `${uniqueId}-${b}`, - start: bmin, - end: bmax, - type: 'block', - }) - } - } - - if (this.scoreColumn) { - data.score = +data[this.scoreColumn] - } - delete data.chrom - delete data.chromStart - delete data.chromEnd - const f = new SimpleFeature({ - ...data, - start, - end, - refName, - uniqueId, - }) - const r = f.get('thickStart') ? ucscProcessedTranscript(f) : f - observer.next(r) + lineCallback: (line, fileOffset) => { + observer.next( + featureData( + line, + colRef, + colStart, + colEnd, + this.scoreColumn, + this.parser, + `${this.id}-${fileOffset}`, + names, + ), + ) }, signal: opts.signal, }) diff --git a/plugins/bed/src/index.ts b/plugins/bed/src/index.ts index 00bfc6441d..0ed1c2890d 100644 --- a/plugins/bed/src/index.ts +++ b/plugins/bed/src/index.ts @@ -3,12 +3,13 @@ import Plugin from '@jbrowse/core/Plugin' import PluginManager from '@jbrowse/core/PluginManager' import { configSchema as bigBedAdapterConfigSchema } from './BigBedAdapter' import { configSchema as bedTabixAdapterConfigSchema } from './BedTabixAdapter' +import { configSchema as bedAdapterConfigSchema } from './BedAdapter' import { FileLocation } from '@jbrowse/core/util/types' import { + getFileName, makeIndex, makeIndexType, AdapterGuesser, - getFileName, } from '@jbrowse/core/util/tracks' export default class BedPlugin extends Plugin { @@ -59,6 +60,16 @@ export default class BedPlugin extends Plugin { import('./BedTabixAdapter/BedTabixAdapter').then(r => r.default), }), ) + + pluginManager.addAdapterType( + () => + new AdapterType({ + name: 'BedAdapter', + configSchema: bedAdapterConfigSchema, + getAdapterClass: () => + import('./BedAdapter/BedAdapter').then(r => r.default), + }), + ) pluginManager.addToExtensionPoint( 'Core-guessAdapterForLocation', (adapterGuesser: AdapterGuesser) => { @@ -85,5 +96,27 @@ export default class BedPlugin extends Plugin { } }, ) + + pluginManager.addToExtensionPoint( + 'Core-guessAdapterForLocation', + (adapterGuesser: AdapterGuesser) => { + return ( + file: FileLocation, + index?: FileLocation, + adapterHint?: string, + ) => { + const regexGuess = /\.bed$/i + const adapterName = 'BedAdapter' + const fileName = getFileName(file) + if (regexGuess.test(fileName) || adapterHint === adapterName) { + return { + type: adapterName, + bedLocation: file, + } + } + return adapterGuesser(file, index, adapterHint) + } + }, + ) } } diff --git a/plugins/bed/src/util.ts b/plugins/bed/src/util.ts index 009c3c2ca0..cf57bcde92 100644 --- a/plugins/bed/src/util.ts +++ b/plugins/bed/src/util.ts @@ -1,4 +1,5 @@ -import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature' +import { SimpleFeature, Feature } from '@jbrowse/core/util' +import BED from '@gmod/bed' export function ucscProcessedTranscript(feature: Feature) { const children = feature.children() @@ -97,11 +98,9 @@ export function ucscProcessedTranscript(feature: Feature) { }) } }) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const newData: Record = {} - feature.tags().forEach(tag => { - newData[tag] = feature.get(tag) - }) + const newData = Object.fromEntries( + feature.tags().map(tag => [tag, feature.get(tag)]), + ) newData.subfeatures = newChildren newData.type = 'mRNA' newData.uniqueId = feature.id() @@ -120,3 +119,63 @@ export function ucscProcessedTranscript(feature: Feature) { }) return newFeature } + +function defaultParser(fields: string[], line: string) { + return Object.fromEntries(line.split('\t').map((f, i) => [fields[i], f])) +} + +export function featureData( + line: string, + colRef: number, + colStart: number, + colEnd: number, + scoreColumn: string, + parser: typeof BED, + uniqueId: string, + names?: string[], +) { + const l = line.split('\t') + const refName = l[colRef] + const start = +l[colStart] + const colSame = colStart === colEnd ? 1 : 0 + + const end = +l[colEnd] + colSame + const data = names + ? defaultParser(names, line) + : parser.parseLine(line, { uniqueId }) + + const { blockCount, blockSizes, blockStarts, chromStarts } = data + + if (blockCount) { + const starts = chromStarts || blockStarts || [] + const sizes = blockSizes + const blocksOffset = start + data.subfeatures = [] + + for (let b = 0; b < blockCount; b += 1) { + const bmin = (starts[b] || 0) + blocksOffset + const bmax = bmin + (sizes[b] || 0) + data.subfeatures.push({ + uniqueId: `${uniqueId}-${b}`, + start: bmin, + end: bmax, + type: 'block', + }) + } + } + + if (scoreColumn) { + data.score = +data[scoreColumn] + } + delete data.chrom + delete data.chromStart + delete data.chromEnd + const f = new SimpleFeature({ + ...data, + start, + end, + refName, + uniqueId, + }) + return f.get('thickStart') ? ucscProcessedTranscript(f) : f +} diff --git a/plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts b/plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts index 38c597fe15..883ae4fc5a 100644 --- a/plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts +++ b/plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts @@ -3,7 +3,6 @@ import { BaseOptions, } from '@jbrowse/core/data_adapters/BaseAdapter' import { NoAssemblyRegion } from '@jbrowse/core/util/types' -import { readConfObject } from '@jbrowse/core/configuration' import { openLocation } from '@jbrowse/core/util/io' import { ObservableCreate } from '@jbrowse/core/util/rxjs' import IntervalTree from '@flatten-js/interval-tree' @@ -23,16 +22,14 @@ export default class extends BaseFeatureDataAdapter { }> private async loadDataP() { - const buffer = await openLocation( - readConfObject(this.config, 'gffLocation'), - this.pluginManager, - ).readFile() - const buf = isGzip(buffer) ? await unzip(buffer) : buffer + const pm = this.pluginManager + const buf = await openLocation(this.getConf('gffLocation'), pm).readFile() + const buffer = isGzip(buf) ? await unzip(buf) : buf // 512MB max chrome string length is 512MB - if (buf.length > 536_870_888) { + if (buffer.length > 536_870_888) { throw new Error('Data exceeds maximum string length (512MB)') } - const data = new TextDecoder('utf8', { fatal: true }).decode(buf) + const data = new TextDecoder('utf8', { fatal: true }).decode(buffer) const lines = data.split('\n') const headerLines = [] for (let i = 0; i < lines.length && lines[i].startsWith('#'); i++) { diff --git a/plugins/gtf/src/GtfAdapter/GtfAdapter.ts b/plugins/gtf/src/GtfAdapter/GtfAdapter.ts index a71abbcc48..c08f51f2b4 100644 --- a/plugins/gtf/src/GtfAdapter/GtfAdapter.ts +++ b/plugins/gtf/src/GtfAdapter/GtfAdapter.ts @@ -23,11 +23,9 @@ export default class extends BaseFeatureDataAdapter { [key: string]: Promise | undefined } = {} - private async loadDataP() { - const buffer = await openLocation( - this.getConf('gtfLocation'), - this.pluginManager, - ).readFile() + private async loadDataP(opts: BaseOptions = {}) { + const gtfLoc = this.getConf('gtfLocation') + const buffer = await openLocation(gtfLoc, this.pluginManager).readFile(opts) const buf = isGzip(buffer) ? await unzip(buffer) : buffer // 512MB max chrome string length is 512MB @@ -39,7 +37,12 @@ export default class extends BaseFeatureDataAdapter { const lines = data.split('\n').filter(f => !!f && !f.startsWith('#')) const feats = {} as { [key: string]: string[] } for (let i = 0; i < lines.length; i++) { - const refName = lines[i].split('\t')[0] + const line = lines[i] + if (line.startsWith('#')) { + continue + } + const tab = line.indexOf('\t') + const refName = line.slice(0, tab) if (!feats[refName]) { feats[refName] = [] } @@ -49,9 +52,9 @@ export default class extends BaseFeatureDataAdapter { return { feats } } - private async loadData() { + private async loadData(opts: BaseOptions = {}) { if (!this.gtfFeatures) { - this.gtfFeatures = this.loadDataP().catch(e => { + this.gtfFeatures = this.loadDataP(opts).catch(e => { this.gtfFeatures = undefined throw e }) @@ -61,7 +64,7 @@ export default class extends BaseFeatureDataAdapter { } public async getRefNames(opts: BaseOptions = {}) { - const { feats } = await this.loadData() + const { feats } = await this.loadData(opts) return Object.keys(feats) } @@ -83,7 +86,7 @@ export default class extends BaseFeatureDataAdapter { (f, i) => new SimpleFeature({ data: featureData(f), - id: `${this.id}-offset-${i}`, + id: `${this.id}-${refName}-${i}`, }), ) diff --git a/plugins/gtf/src/GtfAdapter/__snapshots__/GtfAdapter.test.ts.snap b/plugins/gtf/src/GtfAdapter/__snapshots__/GtfAdapter.test.ts.snap index 60bba240e6..6a065accdf 100644 --- a/plugins/gtf/src/GtfAdapter/__snapshots__/GtfAdapter.test.ts.snap +++ b/plugins/gtf/src/GtfAdapter/__snapshots__/GtfAdapter.test.ts.snap @@ -18,7 +18,7 @@ Array [ "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -26,14 +26,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "exon", - "uniqueId": "test-offset-0-0", + "uniqueId": "test-ctgA-0-0", }, Object { "end": 1200, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -41,14 +41,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "five_prime_UTR", - "uniqueId": "test-offset-0-1", + "uniqueId": "test-ctgA-0-1", }, Object { "end": 1500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -56,14 +56,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "CDS", - "uniqueId": "test-offset-0-2", + "uniqueId": "test-ctgA-0-2", }, Object { "end": 3902, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -71,14 +71,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "CDS", - "uniqueId": "test-offset-0-3", + "uniqueId": "test-ctgA-0-3", }, Object { "end": 3902, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -86,14 +86,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "exon", - "uniqueId": "test-offset-0-4", + "uniqueId": "test-ctgA-0-4", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -101,14 +101,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "CDS", - "uniqueId": "test-offset-0-5", + "uniqueId": "test-ctgA-0-5", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -116,14 +116,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "exon", - "uniqueId": "test-offset-0-6", + "uniqueId": "test-ctgA-0-6", }, Object { "end": 7608, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -131,14 +131,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "CDS", - "uniqueId": "test-offset-0-7", + "uniqueId": "test-ctgA-0-7", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -146,14 +146,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "exon", - "uniqueId": "test-offset-0-8", + "uniqueId": "test-ctgA-0-8", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.1", - "parentId": "test-offset-0", + "parentId": "test-ctgA-0", "phase": 0, "refName": "ctgA", "source": "example", @@ -161,12 +161,12 @@ Array [ "strand": 1, "transcript_id": "EDEN.1", "type": "three_prime_UTR", - "uniqueId": "test-offset-0-9", + "uniqueId": "test-ctgA-0-9", }, ], "transcript_id": "EDEN.1", "type": "transcript", - "uniqueId": "test-offset-0", + "uniqueId": "test-ctgA-0", }, Object { "end": 9000, @@ -184,7 +184,7 @@ Array [ "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -192,14 +192,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "exon", - "uniqueId": "test-offset-1-0", + "uniqueId": "test-ctgA-1-0", }, Object { "end": 1200, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -207,14 +207,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "five_prime_UTR", - "uniqueId": "test-offset-1-1", + "uniqueId": "test-ctgA-1-1", }, Object { "end": 1500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -222,14 +222,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "CDS", - "uniqueId": "test-offset-1-2", + "uniqueId": "test-ctgA-1-2", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -237,14 +237,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "CDS", - "uniqueId": "test-offset-1-3", + "uniqueId": "test-ctgA-1-3", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -252,14 +252,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "exon", - "uniqueId": "test-offset-1-4", + "uniqueId": "test-ctgA-1-4", }, Object { "end": 7608, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -267,14 +267,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "CDS", - "uniqueId": "test-offset-1-5", + "uniqueId": "test-ctgA-1-5", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -282,14 +282,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "exon", - "uniqueId": "test-offset-1-6", + "uniqueId": "test-ctgA-1-6", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.2", - "parentId": "test-offset-1", + "parentId": "test-ctgA-1", "phase": 0, "refName": "ctgA", "source": "example", @@ -297,12 +297,12 @@ Array [ "strand": 1, "transcript_id": "EDEN.2", "type": "three_prime_UTR", - "uniqueId": "test-offset-1-7", + "uniqueId": "test-ctgA-1-7", }, ], "transcript_id": "EDEN.2", "type": "transcript", - "uniqueId": "test-offset-1", + "uniqueId": "test-ctgA-1", }, Object { "end": 9000, @@ -320,7 +320,7 @@ Array [ "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 0, "refName": "ctgA", "source": "example", @@ -328,14 +328,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "exon", - "uniqueId": "test-offset-2-0", + "uniqueId": "test-ctgA-2-0", }, Object { "end": 1500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 1, "refName": "ctgA", "source": "example", @@ -343,14 +343,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "five_prime_UTR", - "uniqueId": "test-offset-2-1", + "uniqueId": "test-ctgA-2-1", }, Object { "end": 3902, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 0, "refName": "ctgA", "source": "example", @@ -358,14 +358,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "exon", - "uniqueId": "test-offset-2-2", + "uniqueId": "test-ctgA-2-2", }, Object { "end": 3300, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 1, "refName": "ctgA", "source": "example", @@ -373,14 +373,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "five_prime_UTR", - "uniqueId": "test-offset-2-3", + "uniqueId": "test-ctgA-2-3", }, Object { "end": 3902, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 0, "refName": "ctgA", "source": "example", @@ -388,14 +388,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "CDS", - "uniqueId": "test-offset-2-4", + "uniqueId": "test-ctgA-2-4", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 1, "refName": "ctgA", "source": "example", @@ -403,14 +403,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "CDS", - "uniqueId": "test-offset-2-5", + "uniqueId": "test-ctgA-2-5", }, Object { "end": 5500, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 0, "refName": "ctgA", "source": "example", @@ -418,14 +418,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "exon", - "uniqueId": "test-offset-2-6", + "uniqueId": "test-ctgA-2-6", }, Object { "end": 7600, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 1, "refName": "ctgA", "source": "example", @@ -433,14 +433,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "CDS", - "uniqueId": "test-offset-2-7", + "uniqueId": "test-ctgA-2-7", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 0, "refName": "ctgA", "source": "example", @@ -448,14 +448,14 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "exon", - "uniqueId": "test-offset-2-8", + "uniqueId": "test-ctgA-2-8", }, Object { "end": 9000, "gene_id": "EDEN", "gene_name": "EDEN", "name": "EDEN.3", - "parentId": "test-offset-2", + "parentId": "test-ctgA-2", "phase": 1, "refName": "ctgA", "source": "example", @@ -463,12 +463,12 @@ Array [ "strand": 1, "transcript_id": "EDEN.3", "type": "three_prime_UTR", - "uniqueId": "test-offset-2-9", + "uniqueId": "test-ctgA-2-9", }, ], "transcript_id": "EDEN.3", "type": "transcript", - "uniqueId": "test-offset-2", + "uniqueId": "test-ctgA-2", }, Object { "end": 11500, @@ -482,7 +482,7 @@ Array [ Object { "end": 11500, "name": "Apple1", - "parentId": "test-offset-3", + "parentId": "test-ctgA-3", "phase": 0, "refName": "ctgA", "source": "bare_predicted", @@ -490,12 +490,12 @@ Array [ "strand": 1, "transcript_id": "Apple1", "type": "CDS", - "uniqueId": "test-offset-3-0", + "uniqueId": "test-ctgA-3-0", }, ], "transcript_id": "Apple1", "type": "transcript", - "uniqueId": "test-offset-3", + "uniqueId": "test-ctgA-3", }, Object { "end": 17000, @@ -509,7 +509,7 @@ Array [ Object { "end": 13800, "name": "cds-Apple2", - "parentId": "test-offset-4", + "parentId": "test-ctgA-4", "phase": 0, "refName": "ctgA", "source": "predicted", @@ -517,12 +517,12 @@ Array [ "strand": 1, "transcript_id": "cds-Apple2", "type": "CDS", - "uniqueId": "test-offset-4-0", + "uniqueId": "test-ctgA-4-0", }, Object { "end": 15500, "name": "cds-Apple2", - "parentId": "test-offset-4", + "parentId": "test-ctgA-4", "phase": 1, "refName": "ctgA", "source": "predicted", @@ -530,12 +530,12 @@ Array [ "strand": 1, "transcript_id": "cds-Apple2", "type": "CDS", - "uniqueId": "test-offset-4-1", + "uniqueId": "test-ctgA-4-1", }, Object { "end": 17000, "name": "cds-Apple2", - "parentId": "test-offset-4", + "parentId": "test-ctgA-4", "phase": 2, "refName": "ctgA", "source": "predicted", @@ -543,12 +543,12 @@ Array [ "strand": 1, "transcript_id": "cds-Apple2", "type": "CDS", - "uniqueId": "test-offset-4-2", + "uniqueId": "test-ctgA-4-2", }, ], "transcript_id": "cds-Apple2", "type": "transcript", - "uniqueId": "test-offset-4", + "uniqueId": "test-ctgA-4", }, Object { "end": 23000, @@ -562,7 +562,7 @@ Array [ Object { "end": 17999, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -570,12 +570,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "UTR", - "uniqueId": "test-offset-5-0", + "uniqueId": "test-ctgA-5-0", }, Object { "end": 18800, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -583,12 +583,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "exon", - "uniqueId": "test-offset-5-1", + "uniqueId": "test-ctgA-5-1", }, Object { "end": 18800, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -596,12 +596,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "CDS", - "uniqueId": "test-offset-5-2", + "uniqueId": "test-ctgA-5-2", }, Object { "end": 19500, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -609,12 +609,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "CDS", - "uniqueId": "test-offset-5-3", + "uniqueId": "test-ctgA-5-3", }, Object { "end": 19500, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -622,12 +622,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "exon", - "uniqueId": "test-offset-5-4", + "uniqueId": "test-ctgA-5-4", }, Object { "end": 21200, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -635,12 +635,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "CDS", - "uniqueId": "test-offset-5-5", + "uniqueId": "test-ctgA-5-5", }, Object { "end": 23000, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -648,12 +648,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "exon", - "uniqueId": "test-offset-5-6", + "uniqueId": "test-ctgA-5-6", }, Object { "end": 23000, "name": "rna-Apple3", - "parentId": "test-offset-5", + "parentId": "test-ctgA-5", "phase": 0, "refName": "ctgA", "source": "exonerate", @@ -661,12 +661,12 @@ Array [ "strand": 1, "transcript_id": "rna-Apple3", "type": "UTR", - "uniqueId": "test-offset-5-7", + "uniqueId": "test-ctgA-5-7", }, ], "transcript_id": "rna-Apple3", "type": "transcript", - "uniqueId": "test-offset-5", + "uniqueId": "test-ctgA-5", }, ] `; @@ -696,7 +696,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -707,7 +707,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-0", + "uniqueId": "test-GeneScaffold_1-0-0", }, Object { "end": 107812, @@ -719,7 +719,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -730,7 +730,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-1", + "uniqueId": "test-GeneScaffold_1-0-1", }, Object { "end": 107877, @@ -742,7 +742,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -753,7 +753,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-2", + "uniqueId": "test-GeneScaffold_1-0-2", }, Object { "end": 107895, @@ -765,7 +765,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -776,7 +776,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-3", + "uniqueId": "test-GeneScaffold_1-0-3", }, Object { "end": 107917, @@ -788,7 +788,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -799,7 +799,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-4", + "uniqueId": "test-GeneScaffold_1-0-4", }, Object { "end": 107987, @@ -811,7 +811,7 @@ Array [ "gene_source": "ensembl", "gene_version": "1", "name": "ENSVPAT00000000407", - "parentId": "test-offset-0", + "parentId": "test-GeneScaffold_1-0", "phase": 0, "refName": "GeneScaffold_1", "source": "ensembl", @@ -822,7 +822,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "exon", - "uniqueId": "test-offset-0-5", + "uniqueId": "test-GeneScaffold_1-0-5", }, ], "transcript_biotype": "pseudogene", @@ -830,7 +830,7 @@ Array [ "transcript_source": "ensembl", "transcript_version": "1", "type": "transcript", - "uniqueId": "test-offset-0", + "uniqueId": "test-GeneScaffold_1-0", }, ] `; diff --git a/products/jbrowse-web/src/__snapshots__/jbrowseModel.test.ts.snap b/products/jbrowse-web/src/__snapshots__/jbrowseModel.test.ts.snap index 1586ca2922..e0f3a2a6f4 100644 --- a/products/jbrowse-web/src/__snapshots__/jbrowseModel.test.ts.snap +++ b/products/jbrowse-web/src/__snapshots__/jbrowseModel.test.ts.snap @@ -1966,6 +1966,36 @@ Object { "trackId": "bedtabix_genes", "type": "FeatureTrack", }, + Object { + "adapter": Object { + "bedLocation": Object { + "internetAccountId": undefined, + "internetAccountPreAuthorization": undefined, + "locationType": "UriLocation", + "uri": "volvox-bed12.bed", + }, + "type": "BedAdapter", + }, + "assemblyNames": Array [ + "volvox", + ], + "category": Array [ + "Miscellaneous", + ], + "displays": Array [ + Object { + "displayId": "bed_genes-LinearBasicDisplay", + "type": "LinearBasicDisplay", + }, + Object { + "displayId": "bed_genes-LinearArcDisplay", + "type": "LinearArcDisplay", + }, + ], + "name": "Bed genes", + "trackId": "bed_genes", + "type": "FeatureTrack", + }, Object { "adapter": Object { "bigWigLocation": Object { diff --git a/test_data/volvox/config.json b/test_data/volvox/config.json index 101f2ffed1..28d5f98726 100644 --- a/test_data/volvox/config.json +++ b/test_data/volvox/config.json @@ -1030,6 +1030,19 @@ } } }, + { + "type": "FeatureTrack", + "trackId": "bed_genes", + "name": "Bed genes", + "assemblyNames": ["volvox"], + "category": ["Miscellaneous"], + "adapter": { + "type": "BedAdapter", + "bedLocation": { + "uri": "volvox-bed12.bed" + } + } + }, { "type": "QuantitativeTrack", "trackId": "LrM3WWJR0tj", diff --git a/test_data/volvox/volvox-bed12.bed b/test_data/volvox/volvox-bed12.bed new file mode 100644 index 0000000000..1b08fb2b49 --- /dev/null +++ b/test_data/volvox/volvox-bed12.bed @@ -0,0 +1,4 @@ +ctgA 1049 9000 EDEN.1 1000 + 1200 7608 0,0,0 4 451,903,501,2001 0,1950,3950,5950 EDEN EDEN - - - +ctgA 1049 9000 EDEN.2 1000 + 1200 7608 0,0,0 3 451,501,2001 0,3950,5950 EDEN EDEN - - - +ctgA 1299 9000 EDEN.3 1000 + 3300 7600 0,0,0 4 201,903,501,2001 0,1700,3700,5700 EDEN EDEN - - - +ctgA 17399 23000 rna-Apple3 1000 + 17999 21200 0,0,0 3 1401,501,2001 0,1600,3600 rna-Apple3 rna-Apple3 - - -