Skip to content

Commit 45794ea

Browse files
committed
FT-279 Clean up ftnode_fetch_extra struct and, most importantly, its
initialization code
1 parent 3e8a298 commit 45794ea

30 files changed

+387
-461
lines changed

ft/ft-cachetable-wrappers.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ toku_pin_ftnode_for_query(
209209
UNLOCKERS unlockers,
210210
ANCESTORS ancestors,
211211
const pivot_bounds &bounds,
212-
FTNODE_FETCH_EXTRA bfe,
212+
ftnode_fetch_extra *bfe,
213213
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
214214
FTNODE *node_p,
215215
bool* msgs_applied)
@@ -322,7 +322,7 @@ toku_pin_ftnode_with_dep_nodes(
322322
FT ft,
323323
BLOCKNUM blocknum,
324324
uint32_t fullhash,
325-
FTNODE_FETCH_EXTRA bfe,
325+
ftnode_fetch_extra *bfe,
326326
pair_lock_type lock_type,
327327
uint32_t num_dependent_nodes,
328328
FTNODE *dependent_nodes,
@@ -364,7 +364,7 @@ toku_pin_ftnode_with_dep_nodes(
364364
void toku_pin_ftnode(FT ft,
365365
BLOCKNUM blocknum,
366366
uint32_t fullhash,
367-
FTNODE_FETCH_EXTRA bfe,
367+
ftnode_fetch_extra *bfe,
368368
pair_lock_type lock_type,
369369
FTNODE *node_p,
370370
bool move_messages) {

ft/ft-cachetable-wrappers.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ toku_pin_ftnode_for_query(
148148
UNLOCKERS unlockers,
149149
ANCESTORS ancestors,
150150
const pivot_bounds &bounds,
151-
FTNODE_FETCH_EXTRA bfe,
151+
ftnode_fetch_extra *bfe,
152152
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
153153
FTNODE *node_p,
154154
bool* msgs_applied
@@ -159,7 +159,7 @@ void toku_pin_ftnode(
159159
FT ft,
160160
BLOCKNUM blocknum,
161161
uint32_t fullhash,
162-
FTNODE_FETCH_EXTRA bfe,
162+
ftnode_fetch_extra *bfe,
163163
pair_lock_type lock_type,
164164
FTNODE *node_p,
165165
bool move_messages
@@ -171,7 +171,7 @@ void toku_pin_ftnode_with_dep_nodes(
171171
FT ft,
172172
BLOCKNUM blocknum,
173173
uint32_t fullhash,
174-
FTNODE_FETCH_EXTRA bfe,
174+
ftnode_fetch_extra *bfe,
175175
pair_lock_type lock_type,
176176
uint32_t num_dependent_nodes,
177177
FTNODE *dependent_nodes,

ft/ft-flusher.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -491,8 +491,8 @@ ct_maybe_merge_child(struct flusher_advice *fa,
491491
uint32_t fullhash;
492492
CACHEKEY root;
493493
toku_calculate_root_offset_pointer(ft, &root, &fullhash);
494-
struct ftnode_fetch_extra bfe;
495-
fill_bfe_for_full_read(&bfe, ft);
494+
ftnode_fetch_extra bfe;
495+
bfe.create_for_full_read(ft);
496496
toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
497497
toku_ftnode_assert_fully_in_memory(root_node);
498498
}
@@ -1075,8 +1075,8 @@ ft_split_child(
10751075

10761076
static void bring_node_fully_into_memory(FTNODE node, FT ft) {
10771077
if (!toku_ftnode_fully_in_memory(node)) {
1078-
struct ftnode_fetch_extra bfe;
1079-
fill_bfe_for_full_read(&bfe, ft);
1078+
ftnode_fetch_extra bfe;
1079+
bfe.create_for_full_read(ft);
10801080
toku_cachetable_pf_pinned_pair(
10811081
node,
10821082
toku_ftnode_pf_callback,
@@ -1379,8 +1379,8 @@ ft_merge_child(
13791379
FTNODE childa, childb;
13801380
{
13811381
uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma);
1382-
struct ftnode_fetch_extra bfe;
1383-
fill_bfe_for_full_read(&bfe, ft);
1382+
ftnode_fetch_extra bfe;
1383+
bfe.create_for_full_read(ft);
13841384
toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true);
13851385
}
13861386
// for test
@@ -1390,8 +1390,8 @@ ft_merge_child(
13901390
dep_nodes[0] = node;
13911391
dep_nodes[1] = childa;
13921392
uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb);
1393-
struct ftnode_fetch_extra bfe;
1394-
fill_bfe_for_full_read(&bfe, ft);
1393+
ftnode_fetch_extra bfe;
1394+
bfe.create_for_full_read(ft);
13951395
toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true);
13961396
}
13971397

@@ -1520,10 +1520,10 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
15201520
ft->blocktable.verify_blocknum_allocated(targetchild);
15211521
uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum);
15221522
FTNODE child;
1523-
struct ftnode_fetch_extra bfe;
1523+
ftnode_fetch_extra bfe;
15241524
// Note that we don't read the entire node into memory yet.
15251525
// The idea is let's try to do the minimum work before releasing the parent lock
1526-
fill_bfe_for_min_read(&bfe, ft);
1526+
bfe.create_for_min_read(ft);
15271527
toku_pin_ftnode_with_dep_nodes(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child, true);
15281528

15291529
// for test

ft/ft-hot-flusher.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,8 @@ toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
328328
// Get root node (the first parent of each successive HOT
329329
// call.)
330330
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
331-
struct ftnode_fetch_extra bfe;
332-
fill_bfe_for_full_read(&bfe, ft_handle->ft);
331+
ftnode_fetch_extra bfe;
332+
bfe.create_for_full_read(ft_handle->ft);
333333
toku_pin_ftnode(ft_handle->ft,
334334
(BLOCKNUM) root_key,
335335
fullhash,

ft/ft-internal.h

Lines changed: 87 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,92 @@ struct ft_handle {
298298
PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
299299
PAIR_ATTR make_invalid_pair_attr(void);
300300

301+
//
302+
// Field in ftnode_fetch_extra that tells the
303+
// partial fetch callback what piece of the node
304+
// is needed by the ydb
305+
//
306+
enum ftnode_fetch_type {
307+
ftnode_fetch_none = 1, // no partitions needed.
308+
ftnode_fetch_subset, // some subset of partitions needed
309+
ftnode_fetch_prefetch, // this is part of a prefetch call
310+
ftnode_fetch_all, // every partition is needed
311+
ftnode_fetch_keymatch, // one child is needed if it holds both keys
312+
};
313+
314+
// Info passed to cachetable fetch callbacks to say which parts of a node
315+
// should be fetched (perhaps a subset, perhaps the whole thing, depending
316+
// on operation)
317+
class ftnode_fetch_extra {
318+
public:
319+
// Used when the whole node must be in memory, such as for flushes.
320+
void create_for_full_read(FT ft);
321+
322+
// A subset of children are necessary. Used by point queries.
323+
void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right,
324+
bool left_is_neg_infty, bool right_is_pos_infty,
325+
bool disable_prefetching, bool read_all_partitions);
326+
327+
// No partitions are necessary - only pivots and/or subtree estimates.
328+
// Currently used for stat64.
329+
void create_for_min_read(FT ft);
330+
331+
// Used to prefetch partitions that fall within the bounds given by the cursor.
332+
void create_for_prefetch(FT ft, struct ft_cursor *cursor);
333+
334+
// Only a portion of the node (within a keyrange) is required.
335+
// Used by keysrange when the left and right key are in the same basement node.
336+
void create_for_keymatch(FT ft, const DBT *left, const DBT *right,
337+
bool disable_prefetching, bool read_all_partitions);
338+
339+
void destroy(void);
340+
341+
// return: true if a specific childnum is required to be in memory
342+
bool wants_child_available(int childnum) const;
343+
344+
// return: the childnum of the leftmost child that is required to be in memory
345+
int leftmost_child_wanted(FTNODE node) const;
346+
347+
// return: the childnum of the rightmost child that is required to be in memory
348+
int rightmost_child_wanted(FTNODE node) const;
349+
350+
// needed for reading a node off disk
351+
FT ft;
352+
353+
enum ftnode_fetch_type type;
354+
355+
// used in the case where type == ftnode_fetch_subset
356+
// parameters needed to find out which child needs to be decompressed (so it can be read)
357+
ft_search *search;
358+
DBT range_lock_left_key, range_lock_right_key;
359+
bool left_is_neg_infty, right_is_pos_infty;
360+
361+
// states if we should try to aggressively fetch basement nodes
362+
// that are not specifically needed for current query,
363+
// but may be needed for other cursor operations user is doing
364+
// For example, if we have not disabled prefetching,
365+
// and the user is doing a dictionary wide scan, then
366+
// even though a query may only want one basement node,
367+
// we fetch all basement nodes in a leaf node.
368+
bool disable_prefetching;
369+
370+
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
371+
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
372+
int child_to_read;
373+
374+
// when we read internal nodes, we want to read all the data off disk in one I/O
375+
// then we'll treat it as normal and only decompress the needed partitions etc.
376+
bool read_all_partitions;
377+
378+
// Accounting: How many bytes were read, and how much time did we spend doing I/O?
379+
uint64_t bytes_read;
380+
tokutime_t io_time;
381+
tokutime_t decompress_time;
382+
tokutime_t deserialize_time;
383+
384+
private:
385+
void _create_internal(FT ft_);
386+
};
301387

302388
// Only exported for tests.
303389
// Cachetable callbacks for ftnodes.
@@ -333,47 +419,6 @@ STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
333419

334420
void toku_verify_or_set_counts(FTNODE);
335421

336-
//
337-
// Helper function to fill a ftnode_fetch_extra with data
338-
// that will tell the fetch callback that the entire node is
339-
// necessary. Used in cases where the entire node
340-
// is required, such as for flushes.
341-
//
342-
void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft);
343-
344-
//
345-
// Helper function to fill a ftnode_fetch_extra with data
346-
// that will tell the fetch callback that an explicit range of children is
347-
// necessary. Used in cases where the portion of the node that is required
348-
// is known in advance, e.g. for keysrange when the left and right key
349-
// are in the same basement node.
350-
//
351-
void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
352-
const DBT *left, const DBT *right,
353-
bool disable_prefetching, bool read_all_partitions);
354-
//
355-
// Helper function to fill a ftnode_fetch_extra with data
356-
// that will tell the fetch callback that some subset of the node
357-
// necessary. Used in cases where some of the node is required
358-
// such as for a point query.
359-
//
360-
void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
361-
const DBT *left, const DBT *right,
362-
bool left_is_neg_infty, bool right_is_pos_infty,
363-
bool disable_prefetching, bool read_all_partitions);
364-
365-
//
366-
// Helper function to fill a ftnode_fetch_extra with data
367-
// that will tell the fetch callback that no partitions are
368-
// necessary, only the pivots and/or subtree estimates.
369-
// Currently used for stat64.
370-
//
371-
void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft);
372-
373-
void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor);
374-
375-
void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
376-
377422
// TODO: consider moving this to ft/pivotkeys.cc
378423
class pivot_bounds {
379424
public:
@@ -396,11 +441,6 @@ class pivot_bounds {
396441
const DBT _upper_bound_inclusive;
397442
};
398443

399-
// TODO: move into the ftnode_fetch_extra class
400-
bool toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
401-
int toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
402-
int toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
403-
404444
// allocate a block number
405445
// allocate and initialize a ftnode
406446
// put the ftnode into the cache table
@@ -584,7 +624,7 @@ typedef struct {
584624
TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
585625
} FT_STATUS_S, *FT_STATUS;
586626

587-
void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
627+
void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
588628
void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
589629
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
590630
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);

0 commit comments

Comments
 (0)