@@ -298,6 +298,92 @@ struct ft_handle {
298
298
PAIR_ATTR make_ftnode_pair_attr (FTNODE node);
299
299
PAIR_ATTR make_invalid_pair_attr (void );
300
300
301
+ //
302
+ // Field in ftnode_fetch_extra that tells the
303
+ // partial fetch callback what piece of the node
304
+ // is needed by the ydb
305
+ //
306
+ enum ftnode_fetch_type {
307
+ ftnode_fetch_none = 1 , // no partitions needed.
308
+ ftnode_fetch_subset, // some subset of partitions needed
309
+ ftnode_fetch_prefetch, // this is part of a prefetch call
310
+ ftnode_fetch_all, // every partition is needed
311
+ ftnode_fetch_keymatch, // one child is needed if it holds both keys
312
+ };
313
+
314
+ // Info passed to cachetable fetch callbacks to say which parts of a node
315
+ // should be fetched (perhaps a subset, perhaps the whole thing, depending
316
+ // on operation)
317
+ class ftnode_fetch_extra {
318
+ public:
319
+ // Used when the whole node must be in memory, such as for flushes.
320
+ void create_for_full_read (FT ft);
321
+
322
+ // A subset of children are necessary. Used by point queries.
323
+ void create_for_subset_read (FT ft, ft_search *search, const DBT *left, const DBT *right,
324
+ bool left_is_neg_infty, bool right_is_pos_infty,
325
+ bool disable_prefetching, bool read_all_partitions);
326
+
327
+ // No partitions are necessary - only pivots and/or subtree estimates.
328
+ // Currently used for stat64.
329
+ void create_for_min_read (FT ft);
330
+
331
+ // Used to prefetch partitions that fall within the bounds given by the cursor.
332
+ void create_for_prefetch (FT ft, struct ft_cursor *cursor);
333
+
334
+ // Only a portion of the node (within a keyrange) is required.
335
+ // Used by keysrange when the left and right key are in the same basement node.
336
+ void create_for_keymatch (FT ft, const DBT *left, const DBT *right,
337
+ bool disable_prefetching, bool read_all_partitions);
338
+
339
+ void destroy (void );
340
+
341
+ // return: true if a specific childnum is required to be in memory
342
+ bool wants_child_available (int childnum) const ;
343
+
344
+ // return: the childnum of the leftmost child that is required to be in memory
345
+ int leftmost_child_wanted (FTNODE node) const ;
346
+
347
+ // return: the childnum of the rightmost child that is required to be in memory
348
+ int rightmost_child_wanted (FTNODE node) const ;
349
+
350
+ // needed for reading a node off disk
351
+ FT ft;
352
+
353
+ enum ftnode_fetch_type type;
354
+
355
+ // used in the case where type == ftnode_fetch_subset
356
+ // parameters needed to find out which child needs to be decompressed (so it can be read)
357
+ ft_search *search;
358
+ DBT range_lock_left_key, range_lock_right_key;
359
+ bool left_is_neg_infty, right_is_pos_infty;
360
+
361
+ // states if we should try to aggressively fetch basement nodes
362
+ // that are not specifically needed for current query,
363
+ // but may be needed for other cursor operations user is doing
364
+ // For example, if we have not disabled prefetching,
365
+ // and the user is doing a dictionary wide scan, then
366
+ // even though a query may only want one basement node,
367
+ // we fetch all basement nodes in a leaf node.
368
+ bool disable_prefetching;
369
+
370
+ // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
371
+ // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
372
+ int child_to_read;
373
+
374
+ // when we read internal nodes, we want to read all the data off disk in one I/O
375
+ // then we'll treat it as normal and only decompress the needed partitions etc.
376
+ bool read_all_partitions;
377
+
378
+ // Accounting: How many bytes were read, and how much time did we spend doing I/O?
379
+ uint64_t bytes_read;
380
+ tokutime_t io_time;
381
+ tokutime_t decompress_time;
382
+ tokutime_t deserialize_time;
383
+
384
+ private:
385
+ void _create_internal (FT ft_);
386
+ };
301
387
302
388
// Only exported for tests.
303
389
// Cachetable callbacks for ftnodes.
@@ -333,47 +419,6 @@ STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
333
419
334
420
void toku_verify_or_set_counts (FTNODE);
335
421
336
- //
337
- // Helper function to fill a ftnode_fetch_extra with data
338
- // that will tell the fetch callback that the entire node is
339
- // necessary. Used in cases where the entire node
340
- // is required, such as for flushes.
341
- //
342
- void fill_bfe_for_full_read (struct ftnode_fetch_extra *bfe, FT ft);
343
-
344
- //
345
- // Helper function to fill a ftnode_fetch_extra with data
346
- // that will tell the fetch callback that an explicit range of children is
347
- // necessary. Used in cases where the portion of the node that is required
348
- // is known in advance, e.g. for keysrange when the left and right key
349
- // are in the same basement node.
350
- //
351
- void fill_bfe_for_keymatch (struct ftnode_fetch_extra *bfe, FT ft,
352
- const DBT *left, const DBT *right,
353
- bool disable_prefetching, bool read_all_partitions);
354
- //
355
- // Helper function to fill a ftnode_fetch_extra with data
356
- // that will tell the fetch callback that some subset of the node
357
- // necessary. Used in cases where some of the node is required
358
- // such as for a point query.
359
- //
360
- void fill_bfe_for_subset_read (struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
361
- const DBT *left, const DBT *right,
362
- bool left_is_neg_infty, bool right_is_pos_infty,
363
- bool disable_prefetching, bool read_all_partitions);
364
-
365
- //
366
- // Helper function to fill a ftnode_fetch_extra with data
367
- // that will tell the fetch callback that no partitions are
368
- // necessary, only the pivots and/or subtree estimates.
369
- // Currently used for stat64.
370
- //
371
- void fill_bfe_for_min_read (struct ftnode_fetch_extra *bfe, FT ft);
372
-
373
- void fill_bfe_for_prefetch (struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor);
374
-
375
- void destroy_bfe_for_prefetch (struct ftnode_fetch_extra *bfe);
376
-
377
422
// TODO: consider moving this to ft/pivotkeys.cc
378
423
class pivot_bounds {
379
424
public:
@@ -396,11 +441,6 @@ class pivot_bounds {
396
441
const DBT _upper_bound_inclusive;
397
442
};
398
443
399
- // TODO: move into the ftnode_fetch_extra class
400
- bool toku_bfe_wants_child_available (struct ftnode_fetch_extra * bfe, int childnum);
401
- int toku_bfe_leftmost_child_wanted (struct ftnode_fetch_extra *bfe, FTNODE node);
402
- int toku_bfe_rightmost_child_wanted (struct ftnode_fetch_extra *bfe, FTNODE node);
403
-
404
444
// allocate a block number
405
445
// allocate and initialize a ftnode
406
446
// put the ftnode into the cache table
@@ -584,7 +624,7 @@ typedef struct {
584
624
TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
585
625
} FT_STATUS_S, *FT_STATUS;
586
626
587
- void toku_ft_status_update_pivot_fetch_reason (struct ftnode_fetch_extra *bfe);
627
+ void toku_ft_status_update_pivot_fetch_reason (ftnode_fetch_extra *bfe);
588
628
void toku_ft_status_update_flush_reason (FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
589
629
void toku_ft_status_update_serialize_times (FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
590
630
void toku_ft_status_update_deserialize_times (FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
0 commit comments