-
Notifications
You must be signed in to change notification settings - Fork 60
/
vcfAdapter.ts
87 lines (76 loc) · 2.27 KB
/
vcfAdapter.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import { createGunzip } from 'zlib'
import readline from 'readline'
import { Track, decodeURIComponentNoThrow } from '../util'
import { getLocalOrRemoteStream } from './common'
import { checkAbortSignal } from '@jbrowse/core/util'
export async function* indexVcf(
config: Track,
attributesToIndex: string[],
inLocation: string,
outLocation: string,
typesToExclude: string[],
quiet: boolean,
statusCallback: (message: string) => void,
signal?: AbortSignal,
) {
const { trackId } = config
let receivedBytes = 0
const { totalBytes, stream } = await getLocalOrRemoteStream(
inLocation,
outLocation,
)
stream.on('data', chunk => {
receivedBytes += chunk.length
const progress = Math.round((receivedBytes / totalBytes) * 100)
statusCallback(`${progress}`)
})
const gzStream = inLocation.match(/.b?gz$/)
? stream.pipe(createGunzip())
: stream
const rl = readline.createInterface({
input: gzStream,
})
for await (const line of rl) {
if (line.startsWith('#')) {
continue
}
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t')
// turns vcf info attrs into a map, and converts the arrays into space
// separated strings
const fields = Object.fromEntries(
info
.split(';')
.map(f => f.trim())
.filter(f => !!f)
.map(f => f.split('='))
.map(([key, val]) => [
key.trim(),
val
? decodeURIComponentNoThrow(val).trim().split(',').join(' ')
: undefined,
]),
)
const end = fields.END
const locStr = `${ref}:${pos}..${end || +pos + 1}`
if (id === '.') {
continue
}
const infoAttrs = attributesToIndex
.map(attr => fields[attr])
.filter((f): f is string => !!f)
const ids = id.split(',')
for (const id of ids) {
const attrs = [id]
const record = JSON.stringify([
encodeURIComponent(locStr),
encodeURIComponent(trackId),
encodeURIComponent(id || ''),
...infoAttrs.map(a => encodeURIComponent(a || '')),
]).replaceAll(',', '|')
// Check abort signal
checkAbortSignal(signal)
yield `${record} ${[...new Set(attrs)].join(' ')}\n`
}
}
}