/
LazyTrie.js
265 lines (235 loc) · 8.7 KB
/
LazyTrie.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
define( ['dojo/_base/declare','JBrowse/Util'], function( declare, Util ) {
return declare('JBrowse.Store.LazyTrie', null,
/**
* @lends JBrowse.Store.LazyTrie.prototype
*/
{
/**
* <pre>
* Implements a lazy PATRICIA tree.
* This structure is a map where the keys are strings. The map supports fast
* queries by key string prefix ("show me all the values for keys that
* start with "abc"). It also supports lazily loading subtrees.
*
* Each edge is labeled with a substring of a key string.
* Each node in the tree has one or more children, each of which represents
* a potential completion of the string formed by concatenating all of the
* edge strings from that node up to the root.
* Nodes also have zero or one data items.
* Leaves have zero or one data items.
*
* Each loaded node is an array.
* element 0 is the edge string;
* element 1 is the data item, or null if there is none;
* any further elements are the child nodes, sorted lexicographically
* by their edge string
*
* Each lazy node is an array where the first element is the number of
* data items in the subtree rooted at that node, and the second element
* is the edge string for that node.
* when the lazy node is loaded, the lazy array gets replaced with
* a loaded node array; lazy nodes and loaded nodes can be distinguished by:
* "string" == typeof loaded_node[0]
* "number" == typeof lazy_node[0]
*
* e.g., for the mappings:
* abc => 0
* abcd => 1
* abce => "baz"
* abfoo => [3, 4]
* abbar (subtree to be loaded lazily)
*
* the structure is:
*
* [, , ["ab", ,
* [3, "bar"],
* ["c", 0, ["d", 1],
* ["e", "baz"]],
* ["foo", [3, 4]]
* ]
* ]
*
* The main goals for this structure were to minimize the JSON size on
* the wire (so, no type tags in the JSON to distinguish loaded nodes,
* lazy nodes, and leaves) while supporting lazy loading and reasonably
* fast lookups.
* </pre>
*
* @constructs
*/
constructor: function(rootURL, chunkTempl) {
this.rootURL = rootURL;
this.chunkTempl = chunkTempl;
var trie = this;
dojo.xhrGet({url: rootURL,
handleAs: "json",
load: function(o) {
if (!o) {
console.log("failed to load trie");
return;
}
trie.root = o;
trie.extra = o[0];
if (trie.deferred) {
trie.deferred.callee.apply(trie, trie.deferred);
delete trie.deferred;
}
}
});
},
chunkUrl: function(prefix) {
var chunkUrl = this.chunkTempl.replace("\{Chunk\}", prefix);
return Util.resolveUrl(this.rootURL, chunkUrl);
},
pathToPrefix: function(path) {
var node = this.root;
var result = "";
loop: for (var i = 0; i < path.length; i++) {
switch(typeof node[path[i]][0]) {
case 'string': // regular node
result += node[path[i]][0];
break;
case 'number': // lazy node
result += node[path[i]][1];
break loop;
}
node = node[path[i]];
}
return result;
},
valuesFromPrefix: function(query, callback) {
var trie = this;
this.findNode(query, function(prefix, node) {
callback(trie.valuesFromNode(node));
});
},
mappingsFromPrefix: function(query, callback) {
var trie = this;
this.findNode(query, function(prefix, node) {
callback(trie.mappingsFromNode(prefix, node));
});
},
mappingsFromNode: function(prefix, node) {
var results = [];
if (node[1] !== null)
results.push([prefix, node[1]]);
for (var i = 2; i < node.length; i++) {
if ("string" == typeof node[i][0]) {
results = results.concat(this.mappingsFromNode(prefix + node[i][0],
node[i]));
}
}
return results;
},
valuesFromNode: function(node) {
var results = [];
if (node[1] !== null)
results.push(node[1]);
for (var i = 2; i < node.length; i++)
results = results.concat(this.valuesFromNode(node[i]));
return results;
},
exactMatch: function(key, callback, notfoundCallback ) {
notfoundCallback = notfoundCallback || function() {};
var trie = this;
this.findNode(key,
function(prefix, node) {
if ((prefix.toLowerCase() == key.toLowerCase()) && node[1])
callback(node[1]);
},
notfoundCallback
);
},
findNode: function(query, foundCallback, notfoundCallback ) {
notfoundCallback = notfoundCallback || function() {};
var trie = this;
this.findPath(query, function(path) {
var node = trie.root;
for (var i = 0; i < path.length; i++)
node = node[path[i]];
var foundPrefix = trie.pathToPrefix(path);
foundCallback(foundPrefix, node);
}, notfoundCallback);
},
findPath: function(query, foundCallback, notfoundCallback) {
notfoundCallback = notfoundCallback || function() {};
if (!this.root) {
this.deferred = arguments;
return;
}
query = query.toLowerCase();
var node = this.root;
var qStart = 0;
var childIndex;
var path = [];
while(true) {
childIndex = this.binarySearch(node, query.charAt(qStart));
if (childIndex < 0) {
notfoundCallback();
return;
}
path.push(childIndex);
if ("number" == typeof node[childIndex][0]) {
// lazy node
var trie = this;
dojo.xhrGet({url: this.chunkUrl(this.pathToPrefix(path)),
handleAs: "json",
load: function(o) {
node[childIndex] = o;
trie.findPath(query, foundCallback);
}
});
return;
}
node = node[childIndex];
// if the current edge string doesn't match the
// relevant part of the query string, then there's no
// match
if (query.substr(qStart, node[0].length)
!= node[0].substr(0, Math.min(node[0].length,
query.length - qStart))) {
notfoundCallback();
return;
}
qStart += node[0].length;
if (qStart >= query.length) {
// we've reached the end of the query string, and we
// have some matches
foundCallback(path);
return;
}
}
},
binarySearch: function(a, firstChar) {
var low = 2; // skip edge string (in 0) and data item (in 1)
var high = a.length - 1;
var mid, midVal;
while (low <= high) {
mid = (low + high) >>> 1;
switch(typeof a[mid][0]) {
case 'string': // regular node
midVal = a[mid][0].charAt(0);
break;
case 'number': // lazy node
midVal = a[mid][1].charAt(0);
break;
}
if (midVal < firstChar) {
low = mid + 1;
} else if (midVal > firstChar) {
high = mid - 1;
} else {
return mid; // key found
}
}
return -(low + 1); // key not found.
}
}); });
/*
Copyright (c) 2007-2009 The Evolutionary Software Foundation
Created by Mitchell Skinner <mitch_skinner@berkeley.edu>
This package and its accompanying libraries are free software; you can
redistribute it and/or modify it under the terms of the LGPL (either
version 2.1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text.
*/