-
Notifications
You must be signed in to change notification settings - Fork 9
/
htsget.ts
157 lines (146 loc) · 4.46 KB
/
htsget.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import { unzip } from '@gmod/bgzf-filehandle'
import { BaseOpts, BamOpts } from './util'
import BamFile, { BAM_MAGIC } from './bamFile'
import Chunk from './chunk'
import { parseHeaderText } from './sam'
interface HtsgetChunk {
url: string
headers?: Record<string, string>
}
async function concat(arr: HtsgetChunk[], opts?: Record<string, any>) {
const res = await Promise.all(
arr.map(async chunk => {
const { url, headers } = chunk
if (url.startsWith('data:')) {
return Buffer.from(url.split(',')[1], 'base64')
} else {
//remove referer header, it is not even allowed to be specified
// @ts-expect-error
//eslint-disable-next-line @typescript-eslint/no-unused-vars
const { referer, ...rest } = headers
const res = await fetch(url, {
...opts,
headers: { ...opts?.headers, ...rest },
})
if (!res.ok) {
throw new Error(
`HTTP ${res.status} fetching ${url}: ${await res.text()}`,
)
}
return Buffer.from(await res.arrayBuffer())
}
}),
)
return Buffer.concat(await Promise.all(res.map(elt => unzip(elt))))
}
export default class HtsgetFile extends BamFile {
private baseUrl: string
private trackId: string
constructor(args: { trackId: string; baseUrl: string }) {
super({ htsget: true })
this.baseUrl = args.baseUrl
this.trackId = args.trackId
}
async *streamRecordsForRange(
chr: string,
min: number,
max: number,
opts?: BamOpts,
) {
const base = `${this.baseUrl}/${this.trackId}`
const url = `${base}?referenceName=${chr}&start=${min}&end=${max}&format=BAM`
const chrId = this.chrToIndex?.[chr]
if (chrId === undefined) {
yield []
} else {
const result = await fetch(url, { ...opts })
if (!result.ok) {
throw new Error(
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
)
}
const data = await result.json()
const uncba = await concat(data.htsget.urls.slice(1), opts)
yield* this._fetchChunkFeatures(
[
// fake stuff to pretend to be a Chunk
{
buffer: uncba,
_fetchedSize: undefined,
bin: 0,
compareTo() {
return 0
},
toUniqueString() {
return `${chr}_${min}_${max}`
},
fetchedSize() {
return 0
},
minv: {
dataPosition: 0,
blockPosition: 0,
compareTo: () => 0,
},
maxv: {
dataPosition: Number.MAX_SAFE_INTEGER,
blockPosition: 0,
compareTo: () => 0,
},
toString() {
return `${chr}_${min}_${max}`
},
},
],
chrId,
min,
max,
opts,
)
}
}
async _readChunk({ chunk }: { chunk: Chunk; opts: BaseOpts }) {
if (!chunk.buffer) {
throw new Error('expected chunk.buffer in htsget')
}
return { data: chunk.buffer, cpositions: [], dpositions: [], chunk }
}
async getHeader(opts: BaseOpts = {}) {
const url = `${this.baseUrl}/${this.trackId}?referenceName=na&class=header`
const result = await fetch(url, opts)
if (!result.ok) {
throw new Error(
`HTTP ${result.status} fetching ${url}: ${await result.text()}`,
)
}
const data = await result.json()
const uncba = await concat(data.htsget.urls, opts)
if (uncba.readInt32LE(0) !== BAM_MAGIC) {
throw new Error('Not a BAM file')
}
const headLen = uncba.readInt32LE(4)
const headerText = uncba.toString('utf8', 8, 8 + headLen)
const samHeader = parseHeaderText(headerText)
// use the @SQ lines in the header to figure out the
// mapping between ref ref ID numbers and names
const idToName: { refName: string; length: number }[] = []
const nameToId: Record<string, number> = {}
const sqLines = samHeader.filter(l => l.tag === 'SQ')
for (const [refId, sqLine] of sqLines.entries()) {
let refName = ''
let length = 0
for (const item of sqLine.data) {
if (item.tag === 'SN') {
refName = item.value
} else if (item.tag === 'LN') {
length = +item.value
}
}
nameToId[refName] = refId
idToName[refId] = { refName, length }
}
this.chrToIndex = nameToId
this.indexToChr = idToName
return samHeader
}
}