-
Notifications
You must be signed in to change notification settings - Fork 60
/
gff3Adapter.ts
98 lines (85 loc) · 2.4 KB
/
gff3Adapter.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import { SingleBar, Presets } from 'cli-progress'
import { createGunzip } from 'zlib'
import readline from 'readline'
// locals
import { Track } from '../base'
import { decodeURIComponentNoThrow, getLocalOrRemoteStream } from '../util'
export async function* indexGff3({
config,
attributesToIndex,
inLocation,
outLocation,
typesToExclude,
quiet,
}: {
config: Track
attributesToIndex: string[]
inLocation: string
outLocation: string
typesToExclude: string[]
quiet: boolean
}) {
const { trackId } = config
// progress bar code was aided by blog post at
// https://webomnizz.com/download-a-file-with-progressbar-using-node-js/
const progressBar = new SingleBar(
{
format: '{bar} ' + trackId + ' {percentage}% | ETA: {eta}s',
etaBuffer: 2000,
},
Presets.shades_classic,
)
let receivedBytes = 0
const { totalBytes, stream } = await getLocalOrRemoteStream(
inLocation,
outLocation,
)
if (!quiet) {
progressBar.start(totalBytes, 0)
}
stream.on('data', chunk => {
receivedBytes += chunk.length
progressBar.update(receivedBytes)
})
const rl = readline.createInterface({
input: inLocation.match(/.b?gz$/) ? stream.pipe(createGunzip()) : stream,
})
for await (const line of rl) {
if (!line.trim()) {
continue
} else if (line.startsWith('#')) {
continue
} else if (line.startsWith('>')) {
break
}
const [seq_id, , type, start, end, , , , col9] = line.split('\t')
const locStr = `${seq_id}:${start}..${end}`
if (!typesToExclude.includes(type)) {
// turns gff3 attrs into a map, and converts the arrays into space
// separated strings
const col9attrs = Object.fromEntries(
col9
.split(';')
.map(f => f.trim())
.filter(f => !!f)
.map(f => f.split('='))
.map(([key, val]) => [
key.trim(),
decodeURIComponentNoThrow(val).trim().split(',').join(' '),
]),
)
const attrs = attributesToIndex
.map(attr => col9attrs[attr])
.filter((f): f is string => !!f)
if (attrs.length) {
const record = JSON.stringify([
encodeURIComponent(locStr),
encodeURIComponent(trackId),
...attrs.map(a => encodeURIComponent(a)),
]).replaceAll(',', '|')
yield `${record} ${[...new Set(attrs)].join(' ')}\n`
}
}
}
progressBar.stop()
}