Skip to content

Commit

Permalink
5.6.35-80.0
Browse files Browse the repository at this point in the history
  • Loading branch information
cvicentiu committed Mar 4, 2017
1 parent d4f0686 commit d71df7e
Show file tree
Hide file tree
Showing 20 changed files with 756 additions and 83 deletions.
2 changes: 1 addition & 1 deletion storage/tokudb/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SET(TOKUDB_VERSION 5.6.34-79.1)
SET(TOKUDB_VERSION 5.6.35-80.0)
# PerconaFT only supports x86-64 and cmake-2.8.9+
IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND
NOT CMAKE_VERSION VERSION_LESS "2.8.9")
Expand Down
23 changes: 13 additions & 10 deletions storage/tokudb/PerconaFT/ft/ft-ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -651,10 +651,8 @@ void toku_ftnode_clone_callback(void *value_data,
// set new pair attr if necessary
if (node->height == 0) {
*new_attr = make_ftnode_pair_attr(node);
for (int i = 0; i < node->n_children; i++) {
BLB(node, i)->logical_rows_delta = 0;
BLB(cloned_node, i)->logical_rows_delta = 0;
}
node->logical_rows_delta = 0;
cloned_node->logical_rows_delta = 0;
} else {
new_attr->is_valid = false;
}
Expand Down Expand Up @@ -702,6 +700,10 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
if (ftnode->height == 0) {
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
if (!ftnode->dirty) {
toku_ft_adjust_logical_row_count(
ft, -ftnode->logical_rows_delta);
}
} else {
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
Expand All @@ -714,11 +716,12 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
BASEMENTNODE bn = BLB(ftnode, i);
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
if (!ftnode->dirty)
toku_ft_adjust_logical_row_count(
ft, -bn->logical_rows_delta);
}
}
if (!ftnode->dirty) {
toku_ft_adjust_logical_row_count(
ft, -ftnode->logical_rows_delta);
}
}
}
toku_ftnode_free(&ftnode);
Expand Down Expand Up @@ -944,8 +947,6 @@ int toku_ftnode_pe_callback(void *ftnode_pv,
basements_to_destroy[num_basements_to_destroy++] = bn;
toku_ft_decrease_stats(&ft->in_memory_stats,
bn->stat64_delta);
toku_ft_adjust_logical_row_count(ft,
-bn->logical_rows_delta);
set_BNULL(node, i);
BP_STATE(node, i) = PT_ON_DISK;
num_partial_evictions++;
Expand Down Expand Up @@ -2652,7 +2653,7 @@ static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir(
return result;
}

static bool toku_create_subdirs_if_needed(const char *path) {
bool toku_create_subdirs_if_needed(const char *path) {
static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH;

Expand Down Expand Up @@ -4563,6 +4564,8 @@ int toku_ft_rename_iname(DB_TXN *txn,
bs_new_name);
}

if (!toku_create_subdirs_if_needed(new_iname_full.get()))
return get_error_errno();
r = toku_os_rename(old_iname_full.get(), new_iname_full.get());
if (r != 0)
return r;
Expand Down
5 changes: 5 additions & 0 deletions storage/tokudb/PerconaFT/ft/ft-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,8 @@ void toku_ft_set_direct_io(bool direct_io_on);
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);

void toku_note_deserialized_basement_node(bool fixed_key_size);

// Creates all directories for the path if necessary,
// returns true if all dirs are created successfully or
// all dirs exist, false otherwise.
bool toku_create_subdirs_if_needed(const char* path);
3 changes: 2 additions & 1 deletion storage/tokudb/PerconaFT/ft/logger/recover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,8 @@ static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) {
return 1;

if (old_exist && !new_exist &&
(toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
(!toku_create_subdirs_if_needed(new_iname_full.get()) ||
toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
toku_fsync_directory(old_iname_full.get()) == -1 ||
toku_fsync_directory(new_iname_full.get()) == -1))
return 1;
Expand Down
18 changes: 10 additions & 8 deletions storage/tokudb/PerconaFT/ft/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,15 @@ static void bnc_apply_messages_to_basement_node(
const pivot_bounds &
bounds, // contains pivot key bounds of this basement node
txn_gc_info *gc_info,
bool *msgs_applied) {
bool *msgs_applied,
int64_t* logical_rows_delta) {
int r;
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);

// Determine the offsets in the message trees between which we need to
// apply messages from this buffer
STAT64INFO_S stats_delta = {0, 0};
uint64_t workdone_this_ancestor = 0;
int64_t logical_rows_delta = 0;

uint32_t stale_lbi, stale_ube;
if (!bn->stale_ancestor_messages_applied) {
Expand Down Expand Up @@ -470,7 +470,7 @@ static void bnc_apply_messages_to_basement_node(
gc_info,
&workdone_this_ancestor,
&stats_delta,
&logical_rows_delta);
logical_rows_delta);
}
} else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages, and mark
Expand All @@ -482,7 +482,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
.logical_rows_delta = &logical_rows_delta};
.logical_rows_delta = logical_rows_delta};
if (fresh_ube - fresh_lbi > 0)
*msgs_applied = true;
r = bnc->fresh_message_tree
Expand All @@ -503,7 +503,7 @@ static void bnc_apply_messages_to_basement_node(
.gc_info = gc_info,
.workdone = &workdone_this_ancestor,
.stats_to_update = &stats_delta,
.logical_rows_delta = &logical_rows_delta};
.logical_rows_delta = logical_rows_delta};

r = bnc->stale_message_tree
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
Expand All @@ -521,8 +521,6 @@ static void bnc_apply_messages_to_basement_node(
if (stats_delta.numbytes || stats_delta.numrows) {
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
bn->logical_rows_delta += logical_rows_delta;
}

static void
Expand All @@ -536,6 +534,7 @@ apply_ancestors_messages_to_bn(
bool* msgs_applied
)
{
int64_t logical_rows_delta = 0;
BASEMENTNODE curr_bn = BLB(node, childnum);
const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
Expand All @@ -548,13 +547,16 @@ apply_ancestors_messages_to_bn(
curr_ancestors->childnum,
curr_bounds,
gc_info,
msgs_applied
msgs_applied,
&logical_rows_delta
);
// We don't want to check this ancestor node again if the
// next time we query it, the msn hasn't changed.
curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
}
}
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
node->logical_rows_delta += logical_rows_delta;
// At this point, we know all the stale messages above this
// basement node have been applied, and any new messages will be
// fresh, so we don't need to look at stale messages for this
Expand Down
54 changes: 33 additions & 21 deletions storage/tokudb/PerconaFT/ft/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,36 +157,49 @@ class ftnode_pivot_keys {

// TODO: class me up
struct ftnode {
MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
// max_msn_applied that will be written to disk
MSN max_msn_applied_to_node_on_disk;
unsigned int flags;
BLOCKNUM blocknum; // Which block number is this node?
int layout_version; // What version of the data structure?
int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging)
int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging)
uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
int dirty;
// Which block number is this node?
BLOCKNUM blocknum;
// What version of the data structure?
int layout_version;
// different (<) from layout_version if upgraded from a previous version
// (useful for debugging)
int layout_version_original;
// transient, not serialized to disk, (useful for debugging)
int layout_version_read_from_disk;
// build_id (svn rev number) of software that wrote this node to disk
uint32_t build_id;
// height is always >= 0. 0 for leaf, >0 for nonleaf.
int height;
int dirty;
uint32_t fullhash;
// current count of rows add or removed as a result of message application
// to this node as a basement, irrelevant for internal nodes, gets reset
// when node is undirtied. Used to back out tree scoped LRC id node is
// evicted but not persisted
int64_t logical_rows_delta;

// for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
// for leaf nodes, represents number of basement nodes
// for internal nodes, if n_children==fanout+1 then the tree needs to be
// rebalanced. for leaf nodes, represents number of basement nodes
int n_children;
ftnode_pivot_keys pivotkeys;

// What's the oldest referenced xid that this node knows about? The real oldest
// referenced xid might be younger, but this is our best estimate. We use it
// as a heuristic to transition provisional mvcc entries from provisional to
// committed (from implicity committed to really committed).
// What's the oldest referenced xid that this node knows about? The real
// oldest referenced xid might be younger, but this is our best estimate.
// We use it as a heuristic to transition provisional mvcc entries from
// provisional to committed (from implicity committed to really committed).
//
// A better heuristic would be the oldest live txnid, but we use this since it
// still works well most of the time, and its readily available on the inject
// code path.
// A better heuristic would be the oldest live txnid, but we use this since
// it still works well most of the time, and its readily available on the
// inject code path.
TXNID oldest_referenced_xid_known;

// array of size n_children, consisting of ftnode partitions
// each one is associated with a child
// for internal nodes, the ith partition corresponds to the ith message buffer
// for leaf nodes, the ith partition corresponds to the ith basement node
// each one is associated with a child for internal nodes, the ith
// partition corresponds to the ith message buffer for leaf nodes, the ith
// partition corresponds to the ith basement node
struct ftnode_partition *bp;
struct ctpair *ct_pair;
};
Expand All @@ -199,7 +212,6 @@ struct ftnode_leaf_basement_node {
MSN max_msn_applied; // max message sequence number applied
bool stale_ancestor_messages_applied;
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
int64_t logical_rows_delta;
};
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;

Expand Down
3 changes: 1 addition & 2 deletions storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,6 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
bn->seqinsert = orig_bn->seqinsert;
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
bn->stat64_delta = orig_bn->stat64_delta;
bn->logical_rows_delta = orig_bn->logical_rows_delta;
bn->data_buffer.clone(&orig_bn->data_buffer);
return bn;
}
Expand All @@ -1007,7 +1006,6 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
bn->seqinsert = 0;
bn->stale_ancestor_messages_applied = false;
bn->stat64_delta = ZEROSTATS;
bn->logical_rows_delta = 0;
bn->data_buffer.init_zero();
return bn;
}
Expand Down Expand Up @@ -1432,6 +1430,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum)
node->fullhash = fullhash;
node->blocknum = blocknum;
node->dirty = 0;
node->logical_rows_delta = 0;
node->bp = nullptr;
node->oldest_referenced_xid_known = TXNID_NONE;
return node;
Expand Down
3 changes: 2 additions & 1 deletion storage/tokudb/PerconaFT/ft/txn/roll.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ int toku_rollback_frename(BYTESTRING old_iname,
return 1;

if (!old_exist && new_exist &&
(toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
(!toku_create_subdirs_if_needed(old_iname_full.get()) ||
toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
toku_fsync_directory(new_iname_full.get()) == -1 ||
toku_fsync_directory(old_iname_full.get()) == -1))
return 1;
Expand Down
5 changes: 0 additions & 5 deletions storage/tokudb/PerconaFT/util/dmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,6 @@ class dmt {

void convert_from_tree_to_array(void);

__attribute__((nonnull(2,5)))
void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree);

template<typename iterate_extra_t,
Expand Down Expand Up @@ -627,16 +626,12 @@ class dmt {
__attribute__((nonnull))
void rebalance(subtree *const subtree);

__attribute__((nonnull))
static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n);

__attribute__((nonnull))
static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n);

__attribute__((nonnull))
static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr);

__attribute__((nonnull))
static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr);

template<typename dmtcmp_t,
Expand Down
2 changes: 0 additions & 2 deletions storage/tokudb/PerconaFT/util/omt.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ class omt {
* By taking ownership of the array, we save a malloc and memcpy,
* and possibly a free (if the caller is done with the array).
*/
__attribute__((nonnull))
void create_steal_sorted_array(omtdata_t **const values, const uint32_t numvalues, const uint32_t new_capacity);

/**
Expand Down Expand Up @@ -667,7 +666,6 @@ class omt {

void set_at_internal(const subtree &subtree, const omtdata_t &value, const uint32_t idx);

__attribute__((nonnull(2,5)))
void delete_internal(subtree *const subtreep, const uint32_t idx, omt_node *const copyn, subtree **const rebalance_subtree);

template<typename iterate_extra_t,
Expand Down
Loading

0 comments on commit d71df7e

Please sign in to comment.