Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new pairwise indexed PAF adapter format with CLI creation workflow #3859

Merged
merged 12 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ jobs:
name: Test and typecheck on node 20.x and ubuntu-latest
runs-on: ubuntu-latest
steps:
- run: sudo apt-get -y install tabix
- uses: actions/checkout@v4
- name: Use Node.js 20.x
uses: actions/setup-node@v4
with:
node-version: '20.5.1'
node-version: '20'
- name: Install deps (with cache)
uses: bahmutov/npm-install@v1
- name: Test codebase
Expand All @@ -29,7 +30,7 @@ jobs:
- name: Use Node.js 20
uses: actions/setup-node@v4
with:
node-version: '20.5.1'
node-version: '20'
- name: Install website deps (with cache)
uses: bahmutov/npm-install@v1
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/website.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20.5.1'
node-version: '20'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
Expand Down
4 changes: 3 additions & 1 deletion plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { FileLocation, Region, Feature } from '@jbrowse/core/util'
import { TabixIndexedFile } from '@gmod/tabix'
import { featureData } from '../util'
import PluginManager from '@jbrowse/core/PluginManager'
import { AnyConfigurationModel } from '@jbrowse/core/configuration'
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'

// locals
import { featureData } from '../util'

export default class BedTabixAdapter extends BaseFeatureDataAdapter {
private parser: BED

Expand Down
1 change: 1 addition & 0 deletions plugins/comparative-adapters/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
},
"dependencies": {
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/tabix": "^1.5.6",
"generic-filehandle": "^3.0.0"
},
"peerDependencies": {
Expand Down
21 changes: 12 additions & 9 deletions plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ import { unzip } from '@gmod/bgzf-filehandle'
import { MismatchParser } from '@jbrowse/plugin-alignments'

// locals
import SyntenyFeature from './SyntenyFeature'
import { isGzip, parseLineByLine } from '../util'
import SyntenyFeature from '../SyntenyFeature'
import {
getWeightedMeans,
flipCigar,
swapIndelCigar,
parsePAFLine,
PAFRecord,
} from './util'
isGzip,
parseLineByLine,
} from '../util'
import { getWeightedMeans, PAFRecord } from './util'

const { parseCigar } = MismatchParser

Expand Down Expand Up @@ -102,8 +102,12 @@ export default class PAFAdapter extends BaseFeatureDataAdapter {

// The index of the assembly name in the query list corresponds to the
// adapter in the subadapters list
const index = assemblyNames.indexOf(query.assemblyName)
const { start: qstart, end: qend, refName: qref, assemblyName } = query
const index = assemblyNames.indexOf(assemblyName)

// if the getFeatures::query is on the query assembly, flip orientation
// of data
const flip = index === 0
if (index === -1) {
console.warn(`${assemblyName} not found in this adapter`)
observer.complete()
Expand All @@ -117,9 +121,8 @@ export default class PAFAdapter extends BaseFeatureDataAdapter {
let mateName = ''
let mateStart = 0
let mateEnd = 0
const flip = index === 0
const assemblyName = assemblyNames[+!flip]
if (index === 0) {

if (flip) {
start = r.qstart
end = r.qend
refName = r.qname
Expand Down
63 changes: 0 additions & 63 deletions plugins/comparative-adapters/src/PAFAdapter/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,66 +107,3 @@ function weightedMean(tuples: [number, number][]) {
)
return valueSum / weightSum
}

export function parsePAFLine(line: string) {
const [
qname,
,
qstart,
qend,
strand,
tname,
,
tstart,
tend,
numMatches,
blockLen,
mappingQual,
...fields
] = line.split('\t')

const rest = Object.fromEntries(
fields.map(field => {
const r = field.indexOf(':')
const fieldName = field.slice(0, r)
const fieldValue = field.slice(r + 3)
return [fieldName, fieldValue]
}),
)

return {
tname,
tstart: +tstart,
tend: +tend,
qname,
qstart: +qstart,
qend: +qend,
strand: strand === '-' ? -1 : 1,
extra: {
numMatches: +numMatches,
blockLen: +blockLen,
mappingQual: +mappingQual,
...rest,
},
} as PAFRecord
}

export function flipCigar(cigar: string[]) {
const arr = []
for (let i = cigar.length - 2; i >= 0; i -= 2) {
arr.push(cigar[i])
const op = cigar[i + 1]
if (op === 'D') {
arr.push('I')
} else if (op === 'I') {
arr.push('D')
} else {
arr.push(op)
}
}
return arr
}

export function swapIndelCigar(cigar: string) {
return cigar.replaceAll('D', 'K').replaceAll('I', 'D').replaceAll('K', 'I')
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import {
BaseFeatureDataAdapter,
BaseOptions,
} from '@jbrowse/core/data_adapters/BaseAdapter'
import { FileLocation, Region } from '@jbrowse/core/util/types'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { Feature } from '@jbrowse/core/util'
import { AnyConfigurationModel } from '@jbrowse/core/configuration'
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
import PluginManager from '@jbrowse/core/PluginManager'
import { openLocation } from '@jbrowse/core/util/io'
import { TabixIndexedFile } from '@gmod/tabix'

// locals
import { parsePAFLine } from '../util'
import SyntenyFeature from '../SyntenyFeature'

interface PAFOptions extends BaseOptions {
config?: AnyConfigurationModel
}

export default class PAFAdapter extends BaseFeatureDataAdapter {
public static capabilities = ['getFeatures', 'getRefNames']

protected pif: TabixIndexedFile

public constructor(
config: AnyConfigurationModel,
getSubAdapter?: getSubAdapterType,
pluginManager?: PluginManager,
) {
super(config, getSubAdapter, pluginManager)
const pifGzLoc = this.getConf('pifGzLocation') as FileLocation
const type = this.getConf(['index', 'indexType'])
const loc = this.getConf(['index', 'location'])
const pm = this.pluginManager

this.pif = new TabixIndexedFile({
filehandle: openLocation(pifGzLoc, pm),
csiFilehandle: type === 'CSI' ? openLocation(loc, pm) : undefined,
tbiFilehandle: type !== 'CSI' ? openLocation(loc, pm) : undefined,
chunkCacheSize: 50 * 2 ** 20,
})
}
async getHeader() {
return this.pif.getHeader()
}

getAssemblyNames(): string[] {
const assemblyNames = this.getConf('assemblyNames') as string[]
if (assemblyNames.length === 0) {
return [
this.getConf('queryAssembly') as string,
this.getConf('targetAssembly') as string,
]
}
return assemblyNames
}

public async hasDataForRefName() {
return true
}

async getRefNames(opts: BaseOptions & { regions?: Region[] } = {}) {
const r1 = opts.regions?.[0].assemblyName
if (!r1) {
throw new Error('no assembly name provided')
}

const idx = this.getAssemblyNames().indexOf(r1)
const names = await this.pif.getReferenceSequenceNames(opts)
if (idx === 0) {
return names.filter(n => n.startsWith('q')).map(n => n.slice(1))
} else if (idx === 1) {
return names.filter(n => n.startsWith('t')).map(n => n.slice(1))
}
return []
}

getFeatures(query: Region, opts: PAFOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const { assemblyName } = query

const assemblyNames = this.getAssemblyNames()
const index = assemblyNames.indexOf(assemblyName)
const flip = index === 0
const letter = flip ? 'q' : 't'

await this.pif.getLines(letter + query.refName, query.start, query.end, {
lineCallback: (line, fileOffset) => {
const r = parsePAFLine(line)
const refName = r.qname.slice(1)
const start = r.qstart
const end = r.qend
const mateName = r.tname
const mateStart = r.tstart
const mateEnd = r.tend

const { extra, strand } = r
const { numMatches = 0, blockLen = 1, cg, ...rest } = extra

observer.next(
new SyntenyFeature({
uniqueId: fileOffset + assemblyName,
assemblyName,
start,
end,
type: 'match',
refName,
strand,
...rest,
CIGAR: extra.cg,
syntenyId: fileOffset,
identity: numMatches / blockLen,
numMatches,
blockLen,
mate: {
start: mateStart,
end: mateEnd,
refName: mateName,
assemblyName: assemblyNames[+flip],
},
}),
)
},
signal: opts.signal,
})

observer.complete()
})
}

freeResources(/* { query } */): void {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { ConfigurationSchema } from '@jbrowse/core/configuration'
import { types } from 'mobx-state-tree'

/**
* #config PairwiseIndexedPAFAdapter
*/
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars

const PairwiseIndexedPAFAdapter = ConfigurationSchema(
'PairwiseIndexedPAFAdapter',
{
/**
* #slot
*/
assemblyNames: {
type: 'stringArray',
defaultValue: [],
description:
'Array of assembly names to use for this file. The target assembly name is the first value in the array, query assembly name is the second',
},
/**
* #slot
*/
targetAssembly: {
type: 'string',
defaultValue: '',
description: 'Alternative to assemblyNames: the target assembly name',
},
/**
* #slot
*/
queryAssembly: {
type: 'string',
defaultValue: '',
description: 'Alternative to assemblyNames: the query assembly name',
},
/**
* #slot
*/
pifGzLocation: {
type: 'fileLocation',
description: 'location of pairwise tabix indexed PAF (pif)',
defaultValue: {
uri: '/path/to/data/file.pif.gz',
locationType: 'UriLocation',
},
},
/**
* #slot
*/
index: ConfigurationSchema('TabixIndex', {
/**
* #slot index.indexType
*/
indexType: {
model: types.enumeration('IndexType', ['TBI', 'CSI']),
type: 'stringEnum',
defaultValue: 'TBI',
},
/**
* #slot index.location
*/
location: {
type: 'fileLocation',
defaultValue: {
uri: '/path/to/my.paf.gz.tbi',
locationType: 'UriLocation',
},
},
}),
},
{ explicitlyTyped: true },
)

export default PairwiseIndexedPAFAdapter
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import PluginManager from '@jbrowse/core/PluginManager'
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'
import configSchema from './configSchema'

export default (pluginManager: PluginManager) => {
pluginManager.addAdapterType(
() =>
new AdapterType({
name: 'PairwiseIndexedPAFAdapter',
displayName: 'Pairwise indexed PAF adapter',
configSchema,
adapterMetadata: {
hiddenFromGUI: true,
},
getAdapterClass: () =>
import('./PairwiseIndexedPAFAdapter').then(r => r.default),
}),
)
}
Loading
Loading