Skip to content

Commit

Permalink
Merge branch 'bb-10.0-vicentiu' into 10.0
Browse files Browse the repository at this point in the history
Includes Percona XtraDB and TokuDB 5.6.36-82.1
  • Loading branch information
cvicentiu committed Aug 4, 2017
2 parents d85d6c9 + d2a1908 commit a346a56
Show file tree
Hide file tree
Showing 127 changed files with 34,128 additions and 514 deletions.
2 changes: 1 addition & 1 deletion storage/tokudb/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET(TOKUDB_VERSION 5.6.36-82.0)
SET(TOKUDB_VERSION 5.6.36-82.1)
# PerconaFT only supports x86-64 and cmake-2.8.9+
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
Expand Down
10 changes: 10 additions & 0 deletions storage/tokudb/PerconaFT/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ project(TokuDB)
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")

# detect when we are being built as a subproject
if (DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
add_definitions( -DMYSQL_TOKUDB_ENGINE=1)
if ((CMAKE_BUILD_TYPE MATCHES "Debug") AND
(CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC"))
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/sql)
endif ()
endif ()

## Versions of gcc >= 4.9.0 require special version of 'ar' and 'ranlib' for
## link-time optimizations to work properly.
##
Expand Down
5 changes: 3 additions & 2 deletions storage/tokudb/PerconaFT/buildheader/make_tdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ static void print_db_env_struct (void) {
"int (*dirtool_attach)(DB_ENV *, DB_TXN *, const char *, const char *)",
"int (*dirtool_detach)(DB_ENV *, DB_TXN *, const char *)",
"int (*dirtool_move)(DB_ENV *, DB_TXN *, const char *, const char *)",
"void (*kill_waiter)(DB_ENV *, void *extra)",
NULL};

sort_and_dump_fields("db_env", true, extra);
Expand Down Expand Up @@ -548,8 +549,8 @@ static void print_db_txn_struct (void) {
"int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)",
"int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *, uint32_t flags)",
"uint64_t (*id64) (DB_TXN*)",
"void (*set_client_id)(DB_TXN *, uint64_t client_id)",
"uint64_t (*get_client_id)(DB_TXN *)",
"void (*set_client_id)(DB_TXN *, uint64_t client_id, void *client_extra)",
"void (*get_client_id)(DB_TXN *, uint64_t *client_id, void **client_extra)",
"bool (*is_prepared)(DB_TXN *)",
"DB_TXN *(*get_child)(DB_TXN *)",
"uint64_t (*get_start_time)(DB_TXN *)",
Expand Down
3 changes: 3 additions & 0 deletions storage/tokudb/PerconaFT/cmake_modules/TokuThirdParty.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ ExternalProject_Add(build_snappy
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_AR=${CMAKE_AR}
-DCMAKE_NM=${CMAKE_NM}
-DCMAKE_RANLIB=${CMAKE_RANLIB}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
${USE_PROJECT_CMAKE_MODULE_PATH}
Expand Down
9 changes: 8 additions & 1 deletion storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,10 @@ int toku_cachetable_openf (CACHEFILE *cfptr, CACHETABLE ct, const char *fname_in

char *
toku_cachefile_fname_in_env (CACHEFILE cf) {
return cf->fname_in_env;
if (cf) {
return cf->fname_in_env;
}
return nullptr;
}

void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) {
Expand Down Expand Up @@ -2890,6 +2893,10 @@ toku_cachefile_get_cachetable(CACHEFILE cf) {
return cf->cachetable;
}

CACHEFILE toku_pair_get_cachefile(PAIR pair) {
return pair->cachefile;
}

//Only called by ft_end_checkpoint
//Must have access to cf->fd (must be protected)
void toku_cachefile_fsync(CACHEFILE cf) {
Expand Down
3 changes: 3 additions & 0 deletions storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,9 @@ void *toku_cachefile_get_userdata(CACHEFILE);
CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf);
// Effect: Get the cachetable.

CACHEFILE toku_pair_get_cachefile(PAIR);
// Effect: Get the cachefile of the pair

void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair);
// Effect: Swaps the value_data of old_pair and new_pair.
// Requires: both old_pair and new_pair to be pinned with write locks.
Expand Down
97 changes: 77 additions & 20 deletions storage/tokudb/PerconaFT/ft/ft-ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -651,8 +651,12 @@ void toku_ftnode_clone_callback(void *value_data,
// set new pair attr if necessary
if (node->height == 0) {
*new_attr = make_ftnode_pair_attr(node);
node->logical_rows_delta = 0;
cloned_node->logical_rows_delta = 0;
for (int i = 0; i < node->n_children; i++) {
if (BP_STATE(node, i) == PT_AVAIL) {
BLB_LRD(node, i) = 0;
BLB_LRD(cloned_node, i) = 0;
}
}
} else {
new_attr->is_valid = false;
}
Expand Down Expand Up @@ -700,9 +704,26 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
if (ftnode->height == 0) {
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
if (!ftnode->dirty) {
toku_ft_adjust_logical_row_count(
ft, -ftnode->logical_rows_delta);

// A leaf node (height == 0) is being evicted (!keep_me) and is
// not a checkpoint clone (!is_clone). This leaf node may have
// had messages applied to satisfy a query, but was never
// actually dirtied (!ftnode->dirty && !write_me). **Note that
// if (write_me) would persist the node and clear the dirty
// flag **. This message application may have updated the trees
// logical row count. Since these message applications are not
// persisted, we need undo the logical row count adjustments as
// they may occur again in the future if/when the node is
// re-read from disk for another query or change.
if (!ftnode->dirty && !write_me) {
int64_t lrc_delta = 0;
for (int i = 0; i < ftnode->n_children; i++) {
if (BP_STATE(ftnode, i) == PT_AVAIL) {
lrc_delta -= BLB_LRD(ftnode, i);
BLB_LRD(ftnode, i) = 0;
}
}
toku_ft_adjust_logical_row_count(ft, lrc_delta);
}
} else {
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
Expand All @@ -711,17 +732,18 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
toku_free(*disk_data);
} else {
if (ftnode->height == 0) {
// No need to adjust logical row counts when flushing a clone
// as they should have been zeroed out anyway when cloned.
// Clones are 'copies' of work already done so doing it again
// (adjusting row counts) would be redundant and leads to
// inaccurate counts.
for (int i = 0; i < ftnode->n_children; i++) {
if (BP_STATE(ftnode, i) == PT_AVAIL) {
BASEMENTNODE bn = BLB(ftnode, i);
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
}
}
if (!ftnode->dirty) {
toku_ft_adjust_logical_row_count(
ft, -ftnode->logical_rows_delta);
}
}
}
toku_ftnode_free(&ftnode);
Expand All @@ -748,24 +770,48 @@ toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
}
}

int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash,
void **ftnode_pv, void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) {
int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile),
PAIR p,
int fd,
BLOCKNUM blocknum,
uint32_t fullhash,
void **ftnode_pv,
void **disk_data,
PAIR_ATTR *sizep,
int *dirtyp,
void *extraargs) {
assert(extraargs);
assert(*ftnode_pv == NULL);
FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
assert(*ftnode_pv == nullptr);
FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs;
FTNODE *node=(FTNODE*)ftnode_pv;
FTNODE *node = (FTNODE *)ftnode_pv;
// deserialize the node, must pass the bfe in because we cannot
// evaluate what piece of the the node is necessary until we get it at
// least partially into memory
int r = toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe);
int r =
toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe);
if (r != 0) {
if (r == TOKUDB_BAD_CHECKSUM) {
fprintf(stderr,
"Checksum failure while reading node in file %s.\n",
toku_cachefile_fname_in_env(cachefile));
fprintf(
stderr,
"%s:%d:toku_ftnode_fetch_callback - "
"file[%s], blocknum[%ld], toku_deserialize_ftnode_from "
"failed with a checksum error.\n",
__FILE__,
__LINE__,
toku_cachefile_fname_in_env(cachefile),
blocknum.b);
} else {
fprintf(stderr, "Error deserializing node, errno = %d", r);
fprintf(
stderr,
"%s:%d:toku_ftnode_fetch_callback - "
"file[%s], blocknum[%ld], toku_deserialize_ftnode_from "
"failed with %d.\n",
__FILE__,
__LINE__,
toku_cachefile_fname_in_env(cachefile),
blocknum.b,
r);
}
// make absolutely sure we crash before doing anything else.
abort();
Expand All @@ -774,7 +820,8 @@ int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNU
if (r == 0) {
*sizep = make_ftnode_pair_attr(*node);
(*node)->ct_pair = p;
*dirtyp = (*node)->dirty; // deserialize could mark the node as dirty (presumably for upgrade)
*dirtyp = (*node)->dirty; // deserialize could mark the node as dirty
// (presumably for upgrade)
}
return r;
}
Expand Down Expand Up @@ -947,6 +994,16 @@ int toku_ftnode_pe_callback(void *ftnode_pv,
basements_to_destroy[num_basements_to_destroy++] = bn;
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
// A basement node is being partially evicted.
// This masement node may have had messages applied to it to
// satisfy a query, but was never actually dirtied.
// This message application may have updated the trees
// logical row count. Since these message applications are
// not being persisted, we need undo the logical row count
// adjustments as they may occur again in the future if/when
// the node is re-read from disk for another query or change.
toku_ft_adjust_logical_row_count(ft,
-bn->logical_rows_delta);
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
Expand Down
3 changes: 2 additions & 1 deletion storage/tokudb/PerconaFT/ft/ft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,8 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
}

int fd = toku_cachefile_get_fd(cf);
int r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft);
const char *fn = toku_cachefile_fname_in_env(cf);
int r = toku_deserialize_ft_from(fd, fn, max_acceptable_lsn, &ft);
if (r == TOKUDB_BAD_CHECKSUM) {
fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf));
assert(false); // make absolutely sure we crash before doing anything else
Expand Down
19 changes: 9 additions & 10 deletions storage/tokudb/PerconaFT/ft/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void toku_destroy_ftnode_internals(FTNODE node) {
if (node->height > 0) {
destroy_nonleaf_childinfo(BNC(node,i));
} else {
paranoid_invariant(BLB_LRD(node, i) == 0);
destroy_basement_node(BLB(node, i));
}
} else if (BP_STATE(node,i) == PT_COMPRESSED) {
Expand Down Expand Up @@ -386,15 +387,15 @@ static void bnc_apply_messages_to_basement_node(
const pivot_bounds &
bounds, // contains pivot key bounds of this basement node
txn_gc_info *gc_info,
bool *msgs_applied,
int64_t* logical_rows_delta) {
bool *msgs_applied) {
int r;
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);

// Determine the offsets in the message trees between which we need to
// apply messages from this buffer
STAT64INFO_S stats_delta = {0, 0};
uint64_t workdone_this_ancestor = 0;
int64_t logical_rows_delta = 0;

uint32_t stale_lbi, stale_ube;
if (!bn->stale_ancestor_messages_applied) {
Expand Down Expand Up @@ -470,7 +471,7 @@ static void bnc_apply_messages_to_basement_node(
gc_info,
&workdone_this_ancestor,
&stats_delta,
logical_rows_delta);
&logical_rows_delta);
}
} else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages, and mark
Expand All @@ -482,7 +483,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
.logical_rows_delta = logical_rows_delta};
.logical_rows_delta = &logical_rows_delta};
if (fresh_ube - fresh_lbi > 0)
*msgs_applied = true;
r = bnc->fresh_message_tree
Expand All @@ -503,7 +504,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
.logical_rows_delta = logical_rows_delta};
.logical_rows_delta = &logical_rows_delta};

r = bnc->stale_message_tree
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
Expand All @@ -521,6 +522,8 @@ static void bnc_apply_messages_to_basement_node(
if (stats_delta.numbytes || stats_delta.numrows) {
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
bn->logical_rows_delta += logical_rows_delta;
}

static void
Expand All @@ -534,7 +537,6 @@ apply_ancestors_messages_to_bn(
bool* msgs_applied
)
{
int64_t logical_rows_delta = 0;
BASEMENTNODE curr_bn = BLB(node, childnum);
const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
Expand All @@ -547,16 +549,13 @@ apply_ancestors_messages_to_bn(
curr_ancestors->childnum,
curr_bounds,
gc_info,
msgs_applied,
&logical_rows_delta
msgs_applied
);
// We don't want to check this ancestor node again if the
// next time we query it, the msn hasn't changed.
curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
}
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
node->logical_rows_delta += logical_rows_delta;
// At this point, we know all the stale messages above this
// basement node have been applied, and any new messages will be
// fresh, so we don't need to look at stale messages for this
Expand Down
20 changes: 15 additions & 5 deletions storage/tokudb/PerconaFT/ft/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,6 @@ struct ftnode {
int height;
int dirty;
uint32_t fullhash;
// current count of rows add or removed as a result of message application
// to this node as a basement, irrelevant for internal nodes, gets reset
// when node is undirtied. Used to back out tree scoped LRC id node is
// evicted but not persisted
int64_t logical_rows_delta;

// for internal nodes, if n_children==fanout+1 then the tree needs to be
// rebalanced. for leaf nodes, represents number of basement nodes
Expand Down Expand Up @@ -211,6 +206,10 @@ struct ftnode_leaf_basement_node {
unsigned int seqinsert; // number of sequential inserts to this leaf
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied;
// current count of rows added or removed as a result of message application
// to this basement node, gets reset when node is undirtied.
// Used to back out tree scoped LRC id node is evicted but not persisted
int64_t logical_rows_delta;
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
};
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
Expand Down Expand Up @@ -385,6 +384,16 @@ enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node);
enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);

inline const char* toku_ftnode_get_cachefile_fname_in_env(FTNODE node) {
if (node->ct_pair) {
CACHEFILE cf = toku_pair_get_cachefile(node->ct_pair);
if (cf) {
return toku_cachefile_fname_in_env(cf);
}
}
return nullptr;
}

/**
* Finds the next child for HOT to flush to, given that everything up to
* and including k has been flattened.
Expand Down Expand Up @@ -577,3 +586,4 @@ static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) {
#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
#define BLB_LRD(node, i) (BLB(node,i)->logical_rows_delta)
Loading

0 comments on commit a346a56

Please sign in to comment.