/
sparse_index_reader_base.h
414 lines (339 loc) · 11.1 KB
/
sparse_index_reader_base.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
/**
* @file sparse_index_reader_base.h
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2017-2021 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines class SparseIndexReaderBase.
*/
#ifndef TILEDB_SPARSE_INDEX_READER_BASE_H
#define TILEDB_SPARSE_INDEX_READER_BASE_H
#include <queue>
#include "reader_base.h"
#include "tiledb/common/common.h"
#include "tiledb/common/status.h"
#include "tiledb/sm/array_schema/dimension.h"
#include "tiledb/sm/query/query_condition.h"
#include "tiledb/sm/query/readers/result_cell_slab.h"
namespace tiledb {
namespace sm {
class Array;
class ArraySchema;
class MemoryTracker;
class StorageManager;
class Subarray;
class FragIdx {
public:
/* ********************************* */
/* CONSTRUCTORS & DESTRUCTORS */
/* ********************************* */
FragIdx() = default;
FragIdx(uint64_t tile_idx, uint64_t cell_idx)
: tile_idx_(tile_idx)
, cell_idx_(cell_idx) {
}
/** Move constructor. */
FragIdx(FragIdx&& other) noexcept {
// Swap with the argument
swap(other);
}
/** Move-assign operator. */
FragIdx& operator=(FragIdx&& other) {
// Swap with the argument
swap(other);
return *this;
}
DISABLE_COPY_AND_COPY_ASSIGN(FragIdx);
/* ********************************* */
/* PUBLIC METHODS */
/* ********************************* */
/** Swaps the contents (all field values) of this tile with the given tile. */
void swap(FragIdx& frag_tile_idx) {
std::swap(tile_idx_, frag_tile_idx.tile_idx_);
std::swap(cell_idx_, frag_tile_idx.cell_idx_);
}
/* ********************************* */
/* PUBLIC ATTRIBUTES */
/* ********************************* */
/** Tile index. */
uint64_t tile_idx_;
/** Cell index. */
uint64_t cell_idx_;
};
class IgnoredTile {
public:
/* ********************************* */
/* CONSTRUCTORS & DESTRUCTORS */
/* ********************************* */
IgnoredTile() = default;
IgnoredTile(uint64_t frag_idx, uint64_t tile_idx)
: frag_idx_(frag_idx)
, tile_idx_(tile_idx) {
}
/** Move constructor. */
IgnoredTile(IgnoredTile&& other) noexcept {
// Swap with the argument
swap(other);
}
/** Move-assign operator. */
IgnoredTile& operator=(IgnoredTile&& other) {
// Swap with the argument
swap(other);
return *this;
}
DISABLE_COPY_AND_COPY_ASSIGN(IgnoredTile);
/* ********************************* */
/* PUBLIC METHODS */
/* ********************************* */
inline uint64_t frag_idx() const {
return frag_idx_;
}
inline uint64_t tile_idx() const {
return tile_idx_;
}
bool operator==(const IgnoredTile& v) const {
return frag_idx_ == v.frag_idx_ && tile_idx_ == v.tile_idx_;
}
/** Swaps the contents (all field values) of this tile with the given tile. */
void swap(IgnoredTile& other) {
std::swap(frag_idx_, other.frag_idx_);
std::swap(tile_idx_, other.tile_idx_);
}
/* ********************************* */
/* PRIVATE ATTRIBUTES */
/* ********************************* */
// Frag index.
uint64_t frag_idx_;
// Tile index.
uint64_t tile_idx_;
};
struct ignored_tile_hash {
size_t operator()(IgnoredTile const& v) const {
std::size_t h1 = std::hash<uint64_t>()(v.frag_idx());
std::size_t h2 = std::hash<uint64_t>()(v.tile_idx());
return h1 ^ h2;
}
};
/** Processes read queries. */
class SparseIndexReaderBase : public ReaderBase {
public:
/* ********************************* */
/* TYPE DEFINITIONS */
/* ********************************* */
/** The state for a read sparse global order query. */
struct ReadState {
/** The tile index inside of each fragments. */
std::vector<FragIdx> frag_idx_;
/** Is the reader done with the query. */
bool done_adding_result_tiles_;
};
/* ********************************* */
/* CONSTRUCTORS & DESTRUCTORS */
/* ********************************* */
/** Constructor. */
SparseIndexReaderBase(
stats::Stats* stats,
shared_ptr<Logger> logger,
StorageManager* storage_manager,
Array* array,
Config& config,
std::unordered_map<std::string, QueryBuffer>& buffers,
Subarray& subarray,
Layout layout,
QueryCondition& condition);
/** Destructor. */
~SparseIndexReaderBase() = default;
/* ********************************* */
/* PUBLIC METHODS */
/* ********************************* */
/**
* Returns the current read state.
*
* @return pointer to the read state.
*/
const ReadState* read_state() const;
/**
* Returns the current read state.
*
* @return pointer to the read state.
*/
ReadState* read_state();
/**
* Initializes the reader.
*
* @return Status.
*/
Status init();
/**
* Resize the output buffers to the correct size after copying.
*
* @param cells_copied Number of cells copied.
*
* @return Status.
*/
Status resize_output_buffers(uint64_t cells_copied);
protected:
/* ********************************* */
/* PROTECTED ATTRIBUTES */
/* ********************************* */
/** Read state. */
ReadState read_state_;
/** Have we loaded all thiles for this fragment. */
std::vector<uint8_t> all_tiles_loaded_;
/** Dimension names. */
std::vector<std::string> dim_names_;
/** Are dimensions var sized. */
std::vector<bool> is_dim_var_size_;
/** Reverse sorted vector, per fragments, of tiles ranges in the subarray, if
* set. */
std::vector<std::vector<std::pair<uint64_t, uint64_t>>> result_tile_ranges_;
/** Have ve loaded the initial data. */
bool initial_data_loaded_;
/** Total memory budget. */
uint64_t memory_budget_;
/** Mutex protecting memory budget variables. */
std::mutex mem_budget_mtx_;
/** Memory tracker object for the array. */
MemoryTracker* array_memory_tracker_;
/** Memory used for coordinates tiles. */
uint64_t memory_used_for_coords_total_;
/** Memory used for query condition tiles. */
uint64_t memory_used_qc_tiles_total_;
/** Memory used for result tile ranges. */
uint64_t memory_used_result_tile_ranges_;
/** How much of the memory budget is reserved for coords. */
double memory_budget_ratio_coords_;
/** How much of the memory budget is reserved for query condition. */
double memory_budget_ratio_query_condition_;
/** How much of the memory budget is reserved for tile ranges. */
double memory_budget_ratio_tile_ranges_;
/** How much of the memory budget is reserved for array data. */
double memory_budget_ratio_array_data_;
/** Are we in elements mode. */
bool elements_mode_;
/** Names of dim/attr loaded for query condition. */
std::vector<std::string> qc_loaded_attr_names_;
/** Names of dim/attr loaded for query condition. */
std::unordered_set<std::string> qc_loaded_attr_names_set_;
/* Are the users buffers full. */
bool buffers_full_;
/** List of tiles to ignore. */
std::unordered_set<IgnoredTile, ignored_tile_hash> ignored_tiles_;
/* ********************************* */
/* PROTECTED METHODS */
/* ********************************* */
/**
* Return how many cells were copied to the users buffers so far.
*
* @param names Attribute/dimensions to compute for.
*
* @return Number of cells copied.
*/
uint64_t cells_copied(const std::vector<std::string>& names);
/**
* Get the coordinate tiles size for a dimension.
*
* @param include_coords Include coordinates or not in the calculation.
* @param dim_num Number of dimensions.
* @param f Fragment index.
* @param t Tile index.
*
* @return Status, tiles_size, tiles_size_qc.
*/
template <class BitmapType>
tuple<Status, optional<std::pair<uint64_t, uint64_t>>> get_coord_tiles_size(
bool include_coords, unsigned dim_num, unsigned f, uint64_t t);
/**
* Load tile offsets and result tile ranges.
*
* @param include_coords Are coords included.
* @return Status.
*/
Status load_initial_data(bool include_coords);
/**
* Read and unfilter coord tiles.
*
* @param include_coords Include coordinates or not.
* @param result_tiles The result tiles to process.
*
* @return Status.
*/
Status read_and_unfilter_coords(
bool include_coords, const std::vector<ResultTile*>& result_tiles);
/**
* Compute tile bitmaps.
*
* @param result_tiles Result tiles to process.
*
* @return Status.
* */
template <class BitmapType>
Status compute_tile_bitmaps(std::vector<ResultTile*>& result_tiles);
/**
* Apply query condition.
*
* @param result_tiles Result tiles to process.
*
* @return Status.
*/
template <class ResultTileType, class BitmapType>
Status apply_query_condition(std::vector<ResultTile*>& result_tiles);
/**
* Read and unfilter as many attributes as can fit in the memory budget and
* return the names loaded in 'names_to_copy'. Also keep the 'buffer_idx'
* updated to keep track of progress.
*
* @param memory_budget Memory budget allowed for this operation.
* @param names Attribute/dimensions to compute for.
* @param mem_usage_per_attr Computed per attribute memory usage.
* @param buffer_idx Stores/return the current buffer index in process.
* @param result_tiles Result tiles to process.
*
* @return Status, index_to_copy.
*/
tuple<Status, optional<std::vector<uint64_t>>> read_and_unfilter_attributes(
const uint64_t memory_budget,
const std::vector<std::string>& names,
const std::vector<uint64_t>& mem_usage_per_attr,
uint64_t* buffer_idx,
std::vector<ResultTile*>& result_tiles);
/**
* Adds an extra offset in the end of the offsets buffer indicating the
* returned data size if an attribute is var-sized.
*
* @return Status.
*/
Status add_extra_offset();
/**
* Remove a result tile range for a specific fragment.
*
* @param f Fragment index.
*/
void remove_result_tile_range(uint64_t f);
};
} // namespace sm
} // namespace tiledb
#endif // TILEDB_SPARSE_INDEX_READER_BASE_H