Skip to content

Commit

Permalink
Add aggregation to BigBedAdapter to group bigGenePred transcripts (#4456
Browse files Browse the repository at this point in the history
)
  • Loading branch information
cmdcolin committed Jun 19, 2024
1 parent f541a28 commit c470c16
Show file tree
Hide file tree
Showing 8 changed files with 742 additions and 614 deletions.
1 change: 0 additions & 1 deletion packages/core/util/simpleFeature.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ export default class SimpleFeature implements Feature {
`invalid feature data, end less than start. end: ${this.data.end} start: ${this.data.start}`,
)
}

if (this.data.subfeatures) {
this.subfeatures = this.data.subfeatures?.map(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down
268 changes: 182 additions & 86 deletions plugins/bed/src/BigBedAdapter/BigBedAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,22 @@ import {
import { Region } from '@jbrowse/core/util/types'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
import { map, mergeAll } from 'rxjs/operators'
import {
doesIntersect2,
max,
min,
Feature,
SimpleFeature,
} from '@jbrowse/core/util'
import { Observer } from 'rxjs'
import { SimpleFeatureSerializedNoId } from '@jbrowse/core/util/simpleFeature'

// locals
import { isUCSC, makeBlocks, ucscProcessedTranscript } from '../util'
import {
isUcscProcessedTranscript,
makeBlocks,
ucscProcessedTranscript,
} from '../util'

export default class BigBedAdapter extends BaseFeatureDataAdapter {
private cached?: Promise<{ bigbed: BigBed; header: Header; parser: BED }>
Expand Down Expand Up @@ -43,7 +54,6 @@ export default class BigBedAdapter extends BaseFeatureDataAdapter {

async getHeader(opts?: BaseOptions) {
const { parser, header } = await this.configure(opts)
// @ts-expect-error
const { version, fileType } = header
const { fields, ...rest } = parser.autoSql
return {
Expand All @@ -56,96 +66,182 @@ export default class BigBedAdapter extends BaseFeatureDataAdapter {
}
}

public getFeatures(region: Region, opts: BaseOptions = {}) {
public async getFeaturesHelper(
query: Region,
opts: BaseOptions = {},
observer: Observer<Feature>,
allowRedispatch: boolean,
originalQuery = query,
) {
const { signal } = opts
const scoreColumn = this.getConf('scoreColumn')
return ObservableCreate<Feature>(async observer => {
try {
const { parser, bigbed } = await this.configure(opts)
const ob = await bigbed.getFeatureStream(
region.refName,
region.start,
region.end,
{
signal,
basesPerSpan: region.end - region.start,
},
const aggregateField = this.getConf('aggregateField')
const { parser, bigbed } = await this.configure(opts)
const feats = await bigbed.getFeatures(
query.refName,
query.start,
query.end,
{
signal,
basesPerSpan: query.end - query.start,
},
)
if (allowRedispatch && feats.length) {
let minStart = Infinity
let maxEnd = -Infinity
for (const feat of feats) {
if (feat.start < minStart) {
minStart = feat.start
}
if (feat.end > maxEnd) {
maxEnd = feat.end
}
}
if (maxEnd > query.end || minStart < query.start) {
await this.getFeaturesHelper(
{ ...query, start: minStart, end: maxEnd },
opts,
observer,
false,
query,
)
ob.pipe(
mergeAll(),
map(feat => {
const data = parser.parseLine(
`${region.refName}\t${feat.start}\t${feat.end}\t${feat.rest}`,
{
uniqueId: feat.uniqueId!,
},
)
return
}
}

const parentAggregation = {} as Record<
string,
SimpleFeatureSerializedNoId[]
>

if (feat.uniqueId === undefined) {
throw new Error('invalid bbi feature')
}
const {
uniqueId,
type,
chromStart,
chromStarts,
blockStarts,
blockCount,
blockSizes,
chromEnd,
thickStart,
thickEnd,
chrom,
score,
...rest
} = data
if (feats.some(f => f.uniqueId === undefined)) {
throw new Error('found uniqueId undefined')
}
for (const feat of feats) {
const data = parser.parseLine(
`${query.refName}\t${feat.start}\t${feat.end}\t${feat.rest}`,
{ uniqueId: feat.uniqueId! },
)

const aggr = data[aggregateField]
if (!parentAggregation[aggr]) {
parentAggregation[aggr] = []
}
const {
uniqueId,
type,
chromStart,
chromStarts,
blockStarts,
blockCount,
blockSizes,
chromEnd,
thickStart,
thickEnd,
chrom,
score,
...rest
} = data

const subfeatures = blockCount
? makeBlocks({
chromStarts,
blockStarts,
blockCount,
blockSizes,
uniqueId,
refName: region.refName,
start: feat.start,
})
: []
const subfeatures = blockCount
? makeBlocks({
chromStarts,
blockStarts,
blockCount,
blockSizes,
uniqueId,
refName: query.refName,
start: feat.start,
})
: []

// collection of heuristics for suggesting that this feature should
// be converted to a gene, CNV bigbed has many gene like features
// including thickStart and blockCount but no strand
return new SimpleFeature({
if (isUcscProcessedTranscript(data)) {
const f = ucscProcessedTranscript({
...rest,
uniqueId,
type,
start: feat.start,
end: feat.end,
refName: query.refName,
score: scoreColumn ? +data[scoreColumn] : score,
chromStarts,
blockCount,
blockSizes,
thickStart,
thickEnd,
subfeatures,
})
if (aggr) {
parentAggregation[aggr].push(f)
} else {
if (
doesIntersect2(
f.start,
f.end,
originalQuery.start,
originalQuery.end,
)
) {
observer.next(
new SimpleFeature({ id: `${this.id}-${uniqueId}`, data: f }),
)
}
}
} else {
if (
doesIntersect2(
feat.start,
feat.end,
originalQuery.start,
originalQuery.end,
)
) {
observer.next(
new SimpleFeature({
id: `${this.id}-${uniqueId}`,
data: isUCSC(data)
? ucscProcessedTranscript({
...rest,
uniqueId,
type,
start: feat.start,
end: feat.end,
refName: region.refName,
score: scoreColumn ? +data[scoreColumn] : score,
chromStarts,
blockCount,
blockSizes,
thickStart,
thickEnd,
subfeatures,
})
: {
...rest,
uniqueId,
type,
start: feat.start,
score: scoreColumn ? +data[scoreColumn] : score,
end: feat.end,
refName: region.refName,
subfeatures,
},
})
data: {
...rest,
uniqueId,
type,
start: feat.start,
score: scoreColumn ? +data[scoreColumn] : score,
end: feat.end,
refName: query.refName,
subfeatures,
},
}),
)
}
}
}

Object.entries(parentAggregation).map(([name, subfeatures]) => {
const s = min(subfeatures.map(f => f.start))
const e = max(subfeatures.map(f => f.end))
if (doesIntersect2(s, e, originalQuery.start, originalQuery.end)) {
const { uniqueId, strand } = subfeatures[0]
observer.next(
new SimpleFeature({
id: `${this.id}-${uniqueId}-parent`,
data: {
type: 'gene',
subfeatures,
strand,
name,
start: s,
end: e,
refName: query.refName,
},
}),
).subscribe(observer)
)
}
})
observer.complete()
}
public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
try {
await this.getFeaturesHelper(query, opts, observer, true)
} catch (e) {
observer.error(e)
}
Expand Down
Loading

0 comments on commit c470c16

Please sign in to comment.