diff --git a/.bzrignore b/.bzrignore index 4142df6575fd6..522630264eca9 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1148,289 +1148,24 @@ extra/jemalloc/jemalloc-* extra/jemalloc/build *.tdb storage/tokudb/ft-index/CTestCustom.cmake -storage/tokudb/ft-index/DartConfiguration.tcl -storage/tokudb/ft-index/ctags-stamp -storage/tokudb/ft-index/valgrind.suppressions storage/tokudb/ft-index/xz storage/tokudb/ft-index/buildheader/db.h storage/tokudb/ft-index/buildheader/make_tdb storage/tokudb/ft-index/buildheader/runcat.sh -storage/tokudb/ft-index/ft/ftverify storage/tokudb/ft-index/ft/log_code.cc storage/tokudb/ft-index/ft/log_header.h storage/tokudb/ft-index/ft/log_print.cc storage/tokudb/ft-index/ft/logformat -storage/tokudb/ft-index/ft/tdb-recover -storage/tokudb/ft-index/ft/tdb_logprint -storage/tokudb/ft-index/ft/tokuftdump -storage/tokudb/ft-index/ft/tests/benchmark-test -storage/tokudb/ft-index/ft/tests/block_allocator_test -storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark -storage/tokudb/ft-index/ft/tests/cachetable-4357 -storage/tokudb/ft-index/ft/tests/cachetable-4365 -storage/tokudb/ft-index/ft/tests/cachetable-5097 -storage/tokudb/ft-index/ft/tests/cachetable-5978 -storage/tokudb/ft-index/ft/tests/cachetable-5978-2 -storage/tokudb/ft-index/ft/tests/cachetable-all-write -storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending -storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes -storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes -storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test -storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2 -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash -storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple -storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned -storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction -storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2 -storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3 -storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4 -storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch -storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node -storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking -storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove -storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test -storage/tokudb/ft-index/ft/tests/cachetable-debug-test -storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test -storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2 -storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test -storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2 -storage/tokudb/ft-index/ft/tests/cachetable-evictor-class -storage/tokudb/ft-index/ft/tests/cachetable-fd-test -storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor -storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner -storage/tokudb/ft-index/ft/tests/cachetable-flush-test -storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test -storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile -storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch -storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test -storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test -storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-put-test -storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test -storage/tokudb/ft-index/ft/tests/cachetable-simple-clone -storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2 -storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin -storage/tokudb/ft-index/ft/tests/cachetable-simple-pin -storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap -storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes -storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking -storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap -storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes -storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin -storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking -storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-simple-verify -storage/tokudb/ft-index/ft/tests/cachetable-test -storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test -storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint -storage/tokudb/ft-index/ft/tests/cachetable-unpin-test -storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit -storage/tokudb/ft-index/ft/tests/comparator-test -storage/tokudb/ft-index/ft/tests/compress-test -storage/tokudb/ft-index/ft/tests/dbufio-test -storage/tokudb/ft-index/ft/tests/dbufio-test-destroy -storage/tokudb/ft-index/ft/tests/fifo-test -storage/tokudb/ft-index/ft/tests/ft-bfe-query -storage/tokudb/ft-index/ft/tests/ft-clock-test -storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark -storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test -storage/tokudb/ft-index/ft/tests/ft-serialize-test -storage/tokudb/ft-index/ft/tests/ft-test -storage/tokudb/ft-index/ft/tests/ft-test-cursor -storage/tokudb/ft-index/ft/tests/ft-test-cursor-2 -storage/tokudb/ft-index/ft/tests/ft-test-header -storage/tokudb/ft-index/ft/tests/ft-test0 -storage/tokudb/ft-index/ft/tests/ft-test1 -storage/tokudb/ft-index/ft/tests/ft-test2 -storage/tokudb/ft-index/ft/tests/ft-test3 -storage/tokudb/ft-index/ft/tests/ft-test4 -storage/tokudb/ft-index/ft/tests/ft-test5 -storage/tokudb/ft-index/ft/tests/ftloader-test -storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate -storage/tokudb/ft-index/ft/tests/ftloader-test-extractor -storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors -storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio -storage/tokudb/ft-index/ft/tests/ftloader-test-open -storage/tokudb/ft-index/ft/tests/ftloader-test-vm -storage/tokudb/ft-index/ft/tests/ftloader-test-writer -storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors -storage/tokudb/ft-index/ft/tests/is_empty -storage/tokudb/ft-index/ft/tests/keyrange -storage/tokudb/ft-index/ft/tests/keytest -storage/tokudb/ft-index/ft/tests/le-cursor-provdel -storage/tokudb/ft-index/ft/tests/le-cursor-right -storage/tokudb/ft-index/ft/tests/le-cursor-walk -storage/tokudb/ft-index/ft/tests/list-test -storage/tokudb/ft-index/ft/tests/log-test -storage/tokudb/ft-index/ft/tests/log-test-maybe-trim -storage/tokudb/ft-index/ft/tests/log-test2 -storage/tokudb/ft-index/ft/tests/log-test3 -storage/tokudb/ft-index/ft/tests/log-test4 -storage/tokudb/ft-index/ft/tests/log-test5 -storage/tokudb/ft-index/ft/tests/log-test6 -storage/tokudb/ft-index/ft/tests/log-test7 -storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum -storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir -storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile -storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2 -storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3 -storage/tokudb/ft-index/ft/tests/logcursor-print -storage/tokudb/ft-index/ft/tests/logcursor-timestamp -storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy -storage/tokudb/ft-index/ft/tests/logfilemgr-print -storage/tokudb/ft-index/ft/tests/make-tree -storage/tokudb/ft-index/ft/tests/minicron-test -storage/tokudb/ft-index/ft/tests/msnfilter -storage/tokudb/ft-index/ft/tests/omt-test -storage/tokudb/ft-index/ft/tests/orthopush-flush -storage/tokudb/ft-index/ft/tests/pqueue-test -storage/tokudb/ft-index/ft/tests/queue-test -storage/tokudb/ft-index/ft/tests/quicklz-test -storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry -storage/tokudb/ft-index/ft/tests/recovery-cbegin -storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend -storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello -storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin -storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file -storage/tokudb/ft-index/ft/tests/recovery-empty -storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file -storage/tokudb/ft-index/ft/tests/recovery-hello -storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan -storage/tokudb/ft-index/ft/tests/recovery-no-datadir -storage/tokudb/ft-index/ft/tests/recovery-no-log -storage/tokudb/ft-index/ft/tests/recovery-no-logdir -storage/tokudb/ft-index/ft/tests/recovery-test5123 -storage/tokudb/ft-index/ft/tests/shortcut -storage/tokudb/ft-index/ft/tests/subblock-test-checksum -storage/tokudb/ft-index/ft/tests/subblock-test-compression -storage/tokudb/ft-index/ft/tests/subblock-test-index -storage/tokudb/ft-index/ft/tests/subblock-test-size -storage/tokudb/ft-index/ft/tests/test-assert -storage/tokudb/ft-index/ft/tests/test-bjm -storage/tokudb/ft-index/ft/tests/test-checkpoint-during-flush -storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge -storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance -storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split -storage/tokudb/ft-index/ft/tests/test-del-inorder -storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner -storage/tokudb/ft-index/ft/tests/test-dump-ft -storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner -storage/tokudb/ft-index/ft/tests/test-ft-overflow -storage/tokudb/ft-index/ft/tests/test-hot-with-bounds -storage/tokudb/ft-index/ft/tests/test-inc-split -storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn -storage/tokudb/ft-index/ft/tests/test-leafentry-nested -storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner -storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush -storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush -storage/tokudb/ft-index/ft/tests/test-txn-child-manager -storage/tokudb/ft-index/ft/tests/test1308a -storage/tokudb/ft-index/ft/tests/test3681 -storage/tokudb/ft-index/ft/tests/test3856 -storage/tokudb/ft-index/ft/tests/test3884 -storage/tokudb/ft-index/ft/tests/test4115 -storage/tokudb/ft-index/ft/tests/test4244 -storage/tokudb/ft-index/ft/tests/test_block_allocator_merge -storage/tokudb/ft-index/ft/tests/test_logcursor -storage/tokudb/ft-index/ft/tests/test_oexcl -storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free -storage/tokudb/ft-index/ft/tests/upgrade_test_simple -storage/tokudb/ft-index/ft/tests/verify-bad-msn -storage/tokudb/ft-index/ft/tests/verify-bad-pivots -storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf -storage/tokudb/ft-index/ft/tests/verify-dup-pivots -storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs -storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf -storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots -storage/tokudb/ft-index/ft/tests/x1764-test -storage/tokudb/ft-index/ft/tests/xid_lsn_independent -storage/tokudb/ft-index/ft/tests/ybt-test -storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy -storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release -storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove -storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large -storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all -storage/tokudb/ft-index/locktree/tests/lock_request_create_set -storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys -storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock -storage/tokudb/ft-index/locktree/tests/lock_request_start_pending -storage/tokudb/ft-index/locktree/tests/locktree_conflicts -storage/tokudb/ft-index/locktree/tests/locktree_create_destroy -storage/tokudb/ft-index/locktree/tests/locktree_infinity -storage/tokudb/ft-index/locktree/tests/locktree_misc -storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock -storage/tokudb/ft-index/locktree/tests/locktree_simple_lock -storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization -storage/tokudb/ft-index/locktree/tests/manager_create_destroy -storage/tokudb/ft-index/locktree/tests/manager_locktree_map -storage/tokudb/ft-index/locktree/tests/manager_params -storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt -storage/tokudb/ft-index/locktree/tests/manager_status -storage/tokudb/ft-index/locktree/tests/range_buffer_test -storage/tokudb/ft-index/locktree/tests/txnid_set_test -storage/tokudb/ft-index/locktree/tests/wfg_test storage/tokudb/ft-index/portability/merge_archives_tokuportability_static.cmake +storage/tokudb/ft-index/portability/toku_config.h storage/tokudb/ft-index/portability/tokuportability_static_depends.cc -storage/tokudb/ft-index/portability/tests/test-active-cpus -storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails -storage/tokudb/ft-index/portability/tests/test-cpu-freq -storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17 -storage/tokudb/ft-index/portability/tests/test-fair-rwlock -storage/tokudb/ft-index/portability/tests/test-filesystem-sizes -storage/tokudb/ft-index/portability/tests/test-flock -storage/tokudb/ft-index/portability/tests/test-fsync -storage/tokudb/ft-index/portability/tests/test-fsync-directory -storage/tokudb/ft-index/portability/tests/test-gettime -storage/tokudb/ft-index/portability/tests/test-gettimeofday -storage/tokudb/ft-index/portability/tests/test-hugepage -storage/tokudb/ft-index/portability/tests/test-max-data -storage/tokudb/ft-index/portability/tests/test-memory-status -storage/tokudb/ft-index/portability/tests/test-pagesize -storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock -storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr -storage/tokudb/ft-index/portability/tests/test-pwrite4g -storage/tokudb/ft-index/portability/tests/test-snprintf -storage/tokudb/ft-index/portability/tests/test-stat -storage/tokudb/ft-index/portability/tests/test-toku-malloc -storage/tokudb/ft-index/portability/tests/test-xid -storage/tokudb/ft-index/portability/tests/try-assert-zero -storage/tokudb/ft-index/portability/tests/try-assert0 -storage/tokudb/ft-index/portability/tests/try-leak-lost -storage/tokudb/ft-index/portability/tests/try-leak-reachable -storage/tokudb/ft-index/portability/tests/try-uninit storage/tokudb/ft-index/src/merge_archives_tokufractaltree_static.cmake storage/tokudb/ft-index/src/tokufractaltree_static_depends.cc -storage/tokudb/ft-index/src/tests/recovery_fileops_unit_dir -storage/tokudb/ft-index/toku_include/toku_config.h -storage/tokudb/ft-index/util/tests/marked-omt-test -storage/tokudb/ft-index/util/tests/omt-tmpl-test -storage/tokudb/ft-index/util/tests/sort-tmpl-test -storage/tokudb/ft-index/util/tests/test-kibbutz -storage/tokudb/ft-index/util/tests/test-kibbutz2 -storage/tokudb/ft-index/util/tests/test-rwlock -storage/tokudb/ft-index/util/tests/test-rwlock-cheapness -storage/tokudb/ft-index/util/tests/test_circular_buffer -storage/tokudb/ft-index/util/tests/test_doubly_linked_list -storage/tokudb/ft-index/util/tests/test_partitioned_counter -storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833 -storage/tokudb/ft-index/util/tests/threadpool-test -storage/tokudb/ft-index/util/tests/threadpool-testrunf -storage/tokudb/ft-index/utils/tokudb_dump -storage/tokudb/ft-index/utils/tokudb_gen -storage/tokudb/ft-index/utils/tokudb_load +storage/tokudb/ft-index/tools/ba_replay +storage/tokudb/ft-index/tools/ftverify +storage/tokudb/ft-index/tools/tdb-recover +storage/tokudb/ft-index/tools/tdb_logprint +storage/tokudb/ft-index/tools/tokudb_dump +storage/tokudb/ft-index/tools/tokuftdump libmysql/libmysql_versions.ld scripts/mysql_config.pl diff --git a/sql/slave.h b/sql/slave.h index 6b4bcffe10941..c220f88161982 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -230,22 +230,16 @@ pthread_handler_t handle_slave_sql(void *arg); bool net_request_file(NET* net, const char* fname); extern bool volatile abort_loop; -extern Master_info main_mi, *active_mi; /* active_mi for multi-master */ -extern LIST master_list; +extern Master_info *active_mi; /* active_mi for multi-master */ extern my_bool replicate_same_server_id; extern int disconnect_slave_event_count, abort_slave_event_count ; /* the master variables are defaults read from my.cnf or command line */ -extern uint master_port, master_connect_retry, report_port; -extern char * master_user, *master_password, *master_host; +extern uint report_port; extern char *master_info_file, *report_user; extern char *report_host, *report_password; -extern my_bool master_ssl; -extern char *master_ssl_ca, *master_ssl_capath, *master_ssl_cert; -extern char *master_ssl_cipher, *master_ssl_key; - extern I_List threads; #else diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 2951a8d3a7e3d..71530d883c228 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -75,15 +75,12 @@ set_cflags_if_supported(-Wno-missing-field-initializers) ADD_SUBDIRECTORY(ft-index) -# TODO: clean up includes in ft-index INCLUDE_DIRECTORIES(ft-index) -INCLUDE_DIRECTORIES(ft-index/include) INCLUDE_DIRECTORIES(ft-index/portability) -INCLUDE_DIRECTORIES(ft-index/toku_include) INCLUDE_DIRECTORIES(ft-index/util) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index/buildheader) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index/toku_include) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/ft-index/portability) SET(TOKUDB_PLUGIN_DYNAMIC "ha_tokudb") SET(TOKUDB_SOURCES ha_tokudb.cc) diff --git a/storage/tokudb/README.md b/storage/tokudb/README.md index e49e26f118d5a..ff1773fc2b0a4 100644 --- a/storage/tokudb/README.md +++ b/storage/tokudb/README.md @@ -1,17 +1,17 @@ TokuDB ====== -TokuDB is a high-performance, transactional storage engine for MySQL and +TokuDB is a high-performance, write optimized, transactional storage engine for MySQL and MariaDB. For more details, see our [product page][products]. -This repository contains the MySQL plugin that uses the [TokuKV][tokukv] +This repository contains the MySQL plugin that uses the [TokuFT][tokuft] core. There are also patches to the MySQL and MariaDB kernels, available in our forks of [mysql][mysql] and [mariadb][mariadb]. [products]: http://www.tokutek.com/products/tokudb-for-mysql/ -[tokukv]: http://github.com/Tokutek/ft-index +[tokuft]: http://github.com/Tokutek/ft-index [mysql]: http://github.com/Tokutek/mysql [mariadb]: http://github.com/Tokutek/mariadb diff --git a/storage/tokudb/ft-index/CMakeLists.txt b/storage/tokudb/ft-index/CMakeLists.txt index ce10c0a0219eb..c6846dae6790e 100644 --- a/storage/tokudb/ft-index/CMakeLists.txt +++ b/storage/tokudb/ft-index/CMakeLists.txt @@ -51,14 +51,12 @@ if (USE_VALGRIND AND NOT VALGRIND_INCLUDE_DIR MATCHES NOTFOUND) ) endif() include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/toku_include ${CMAKE_CURRENT_SOURCE_DIR}/portability ${CMAKE_CURRENT_SOURCE_DIR} ## so you can include from inside src/ ${CMAKE_CURRENT_BINARY_DIR} ## for logging code ) ## include where config.h will be generated -include_directories(${CMAKE_CURRENT_BINARY_DIR}/toku_include) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/portability) ## build db.h and include where it will be generated add_subdirectory(buildheader) @@ -76,12 +74,7 @@ add_subdirectory(portability) add_subdirectory(ft) add_subdirectory(locktree) add_subdirectory(src) -add_subdirectory(utils) - -## subdirectories that just install things -#add_subdirectory(include) -add_subdirectory(toku_include) -#add_subdirectory(examples) +add_subdirectory(tools) INSTALL_DOCUMENTATION(README.md README-TOKUDB COMPONENT Server) diff --git a/storage/tokudb/ft-index/CTestCustom.cmake b/storage/tokudb/ft-index/CTestCustom.cmake new file mode 100644 index 0000000000000..62b592a5149d2 --- /dev/null +++ b/storage/tokudb/ft-index/CTestCustom.cmake @@ -0,0 +1,241 @@ +cmake_policy(SET CMP0012 NEW) + +## these tests shouldn't run with valgrind +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE + ft/bnc-insert-benchmark + ft/ft_loader-test-extractor-1 + ft/ft_loader-test-extractor-2 + ft/ft_loader-test-extractor-3 + ft/upgrade_test_simple + portability/test-cache-line-boundary-fails + portability/try-leak-lost + portability/try-leak-reachable + portability/try-leak-uninit + util/helgrind_test_circular_buffer + util/helgrind_test_partitioned_counter + util/helgrind_test_partitioned_counter_5833 + ydb/diskfull.tdb + ydb/drd_test_4015.tdb + ydb/drd_test_groupcommit_count.tdb + ydb/filesize.tdb + ydb/helgrind_helgrind1.tdb + ydb/helgrind_helgrind2.tdb + ydb/helgrind_helgrind3.tdb + ydb/helgrind_test_groupcommit_count.tdb + ydb/hot-optimize-table-tests.tdb + ydb/insert-dup-prelock.tdb + ydb/loader-cleanup-test2.tdb + ydb/loader-cleanup-test3.tdb + ydb/loader-stress-test4.tdb + ydb/maxsize-for-loader-B.tdb + ydb/openlimit17.tdb + ydb/openlimit17-locktree.tdb + ydb/preload-db-nested.tdb + ydb/stress-gc.tdb + ydb/stress-gc2.tdb + ydb/stress-test.tdb + ydb/test-5138.tdb + ydb/test-prepare.tdb + ydb/test-prepare2.tdb + ydb/test-prepare3.tdb + ydb/test-recover1.tdb + ydb/test-recover2.tdb + ydb/test-recover3.tdb + ydb/test-xa-prepare.tdb + ydb/test4573-logtrim.tdb + ydb/test_3645.tdb + ydb/test_groupcommit_perf.tdb + ydb/test_large_update_broadcast_small_cachetable.tdb + ydb/test_update_broadcast_stress.tdb + ydb/test_update_stress.tdb + ydb/upgrade-test-4.tdb + ) + +if (NOT @RUN_HELGRIND_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + util/helgrind_test_circular_buffer + util/helgrind_test_partitioned_counter + util/helgrind_test_partitioned_counter_5833 + ydb/helgrind_helgrind1.tdb + ydb/helgrind_helgrind2.tdb + ydb/helgrind_helgrind3.tdb + ydb/helgrind_test_groupcommit_count.tdb + ) +endif () + +if (NOT @RUN_DRD_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_test_groupcommit_count.tdb + ydb/drd_test_4015.tdb + ) +endif () + +## osx's pthreads prefer writers, so this test will deadlock +if (@CMAKE_SYSTEM_NAME@ STREQUAL Darwin) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE portability/test-pthread-rwlock-rwr) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE portability/test-pthread-rwlock-rwr) +endif () + +## tests that are supposed to crash will generate memcheck failures +set(tests_that_should_fail + ft/test-assertA + ft/test-assertB + portability/try-assert-zero + portability/try-assert0 + ydb/recover-missing-dbfile-2.abortrecover + ydb/recover-missing-dbfile.abortrecover + ydb/test_db_no_env.tdb + ydb/test_truncate_txn_abort.tdb + ) +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${tests_that_should_fail}) + +## don't run drd stress tests with valgrind either (because that would do valgrind twice) +set(stress_tests + test_stress0.tdb + test_stress1.tdb + test_stress2.tdb + test_stress3.tdb + test_stress4.tdb + test_stress5.tdb + test_stress6.tdb + test_stress7.tdb + test_stress_hot_indexing.tdb + test_stress_openclose.tdb + test_stress_with_verify.tdb + ) +foreach(test ${stress_tests}) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE + ydb/drd_tiny_${test} + ydb/drd_mid_${test} + ydb/drd_large_${test} + ) + if(NOT @RUN_LONG_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_large_${test} + ) + endif() + if (NOT @RUN_DRD_TESTS@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE + ydb/drd_tiny_${test} + ydb/drd_mid_${test} + ydb/drd_large_${test} + ) + endif () +endforeach(test) + +## upgrade stress tests are 5 minutes long, don't need to run them always +if(NOT @RUN_LONG_TESTS@) + foreach(test ${stress_tests}) + if (NOT ${test} MATCHES test_stress_openclose) + foreach(oldver 4.2.0 5.0.8 5.2.7 6.0.0 6.1.0 6.5.1 6.6.3) + foreach(p_or_s pristine stressed) + if (NOT (${test} MATCHES test_stress4 AND ${p_or_s} MATCHES stressed)) + foreach(size 2000) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ydb/${test}/upgrade/${oldver}/${p_or_s}/${size}) + endforeach(size) + endif () + endforeach(p_or_s) + endforeach(oldver) + endif () + endforeach(test) +endif() + +set(tdb_tests_that_should_fail "ydb/${stress_tests}") +string(REGEX REPLACE ";" ";ydb/" stress_tests "${stress_tests}") + +set(recover_stress_tests + ydb/recover-test_stress1.abortrecover + ydb/recover-test_stress2.abortrecover + ydb/recover-test_stress3.abortrecover + ydb/recover-test_stress_openclose.abortrecover + ) + +## we run stress tests separately, only run them if asked to +if(NOT @RUN_STRESS_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${stress_tests} ${recover_stress_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${stress_tests} ${recover_stress_tests}) +endif() + +set(perf_tests + ydb/perf_checkpoint_var.tdb + ydb/perf_cursor_nop.tdb + ydb/perf_malloc_free.tdb + ydb/perf_nop.tdb + ydb/perf_ptquery.tdb + ydb/perf_ptquery2.tdb + ydb/perf_read_write.tdb + ydb/perf_xmalloc_free.tdb + ) + +## we also don't need to run perf tests every time +if(NOT @RUN_PERF_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${perf_tests}) +endif() + +## don't run perf tests with valgrind (that's slow) +file(GLOB perf_test_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/src/tests" perf_*.cc) +string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" perf_tests "${perf_test_srcs}") +set(tdb_tests_that_should_fail "ydb/${perf_tests}") +string(REGEX REPLACE ";" ";ydb/" perf_tests "${perf_tests}") +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests}) + +## these tests fail often and aren't helpful +set(known_failing_tests + ydb/diskfull.tdb + ) +list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${known_failing_tests}) +list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${known_failing_tests}) + +## these tests take a long time, only run them if asked to +set(long_running_tests + ft/is_empty + ft/upgrade_test_simple + ydb/checkpoint_1.tdb + ydb/checkpoint_stress.tdb + ydb/hotindexer-with-queries.tdb + ydb/hot-optimize-table-tests.tdb + ydb/loader-cleanup-test0.tdb + ydb/loader-cleanup-test0z.tdb + ydb/loader-cleanup-test2.tdb + ydb/loader-cleanup-test2z.tdb + ydb/loader-stress-test4.tdb + ydb/loader-stress-test4z.tdb + ydb/manyfiles.tdb + ydb/preload-db-nested.tdb + ydb/recover_stress.tdb + ydb/root_fifo_1.tdb + ydb/root_fifo_2.tdb + ydb/root_fifo_31.tdb + ydb/root_fifo_32.tdb + ydb/stress-gc.tdb + ydb/stress-test.tdb + ydb/test3529.tdb + ydb/test_logmax.tdb + ydb/test_txn_nested2.tdb + ydb/test_update_broadcast_stress.tdb + ydb/test_update_stress.tdb + ) +if(NOT @RUN_LONG_TESTS@) + list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${long_running_tests}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${long_running_tests}) +endif() + +## ignore log_print.cc in coverage report +list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE "log_print.cc") + +list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION + # don't complain about warnings in xz source + "xz-4.999.9beta/src/liblzma" + # don't complain about clang missing warnings from xz code + "clang: warning: unknown warning option" + # don't complain about warnings in jemalloc source + "jemalloc/src" + "jemalloc/internal" + # don't complain about valgrind headers leaving things unused + "valgrind/valgrind.h" + "valgrind/memcheck.h" + # don't complain about ranlib or libtool on empty archive + "has no symbols" + "the table of contents is empty" + ) diff --git a/storage/tokudb/ft-index/README-TOKUDB b/storage/tokudb/ft-index/README-TOKUDB index 68fb40b36712d..7d70059a912f8 100644 --- a/storage/tokudb/ft-index/README-TOKUDB +++ b/storage/tokudb/ft-index/README-TOKUDB @@ -25,7 +25,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/README.md b/storage/tokudb/ft-index/README.md index 72b8988165a4d..2914ff9be2c00 100644 --- a/storage/tokudb/ft-index/README.md +++ b/storage/tokudb/ft-index/README.md @@ -1,16 +1,16 @@ -TokuKV +TokuFT ====== -TokuKV is a high-performance, transactional key-value store, used in the +TokuFT is a high-performance, transactional key-value store, used in the TokuDB storage engine for MySQL and MariaDB and in TokuMX, the high-performance MongoDB distribution. -TokuKV is provided as a shared library with an interface similar to +TokuFT is provided as a shared library with an interface similar to Berkeley DB. To build the full MySQL product, see the instructions for [Tokutek/ft-engine][ft-engine]. To build TokuMX, see the instructions -for [Tokutek/mongo][mongo]. This document covers TokuKV only. +for [Tokutek/mongo][mongo]. This document covers TokuFT only. [ft-engine]: https://github.com/Tokutek/ft-engine [mongo]: https://github.com/Tokutek/mongo @@ -19,7 +19,7 @@ for [Tokutek/mongo][mongo]. This document covers TokuKV only. Building -------- -TokuKV is built using CMake >= 2.8.9. Out-of-source builds are +TokuFT is built using CMake >= 2.8.9. Out-of-source builds are recommended. You need a C++11 compiler, though only GCC >= 4.7 and Apple's Clang are tested. You also need zlib development packages (`yum install zlib-devel` or `apt-get install zlib1g-dev`). @@ -35,7 +35,6 @@ mkdir build cd build CC=gcc47 CXX=g++47 cmake \ -D CMAKE_BUILD_TYPE=Debug \ - -D USE_BDB=OFF \ -D BUILD_TESTING=OFF \ -D USE_VALGRIND=OFF \ -D CMAKE_INSTALL_PREFIX=../prefix/ \ @@ -50,14 +49,14 @@ to that if you are planning to run benchmarks or in production. ### Platforms -TokuKV is supported on 64-bit Centos, should work on other 64-bit linux -distributions, and may work on OSX 10.8 and FreeBSD. TokuKV is not +TokuFT is supported on 64-bit Centos, should work on other 64-bit linux +distributions, and may work on OSX 10.8 and FreeBSD. TokuFT is not supported on 32-bit systems. [Transparent hugepages][transparent-hugepages] is a feature in newer linux kernel versions that causes problems for the memory usage tracking -calculations in TokuKV and can lead to memory overcommit. If you have -this feature enabled, TokuKV will not start, and you should turn it off. +calculations in TokuFT and can lead to memory overcommit. If you have +this feature enabled, TokuFT will not start, and you should turn it off. If you want to run with transparent hugepages on, you can set an environment variable `TOKU_HUGE_PAGES_OK=1`, but only do this for testing, and only with a small cache size. @@ -68,31 +67,26 @@ and only with a small cache size. Examples -------- -There are some sample programs that can use either TokuKV or Berkeley DB +There are some sample programs that can use either TokuFT or Berkeley DB in the `examples/` directory. Follow the above instructions to build and -install TokuKV, and then look in the installed `examples/` directory for +install TokuFT, and then look in the installed `examples/` directory for instructions on building and running them. Testing ------- -TokuKV uses CTest for testing. The CDash testing dashboard is not +TokuFT uses CTest for testing. The CDash testing dashboard is not currently public, but you can run the tests without submitting them. There are some large data files not stored in the git repository, that will be made available soon. For now, the tests that use these files will not run. -Many of the tests are linked with both TokuKV and Berkeley DB, as a sanity -check on the tests themselves. To build these tests, you will need -Berkeley DB and its header files installed. If you do not have Berkeley -DB installed, just don't pass `USE_BDB=ON`. - In the build directory from above: ```sh -cmake -D BUILD_TESTING=ON [-D USE_BDB=ON] .. +cmake -D BUILD_TESTING=ON .. ctest -D ExperimentalStart \ -D ExperimentalConfigure \ -D ExperimentalBuild \ @@ -103,7 +97,7 @@ ctest -D ExperimentalStart \ Contributing ------------ -Please report bugs in TokuKV here on github. +Please report bugs in TokuFT to the [issue tracker][jira]. We have two publicly accessible mailing lists for TokuDB: @@ -121,11 +115,13 @@ and two for TokuMX: We are also available on IRC on freenode.net, in the #tokutek channel. +[jira]: https://tokutek.atlassian.net/browse/FT/ + License ------- -TokuKV is available under the GPL version 2, with slight modifications. +TokuFT is available under the GPL version 2, with slight modifications. See [README-TOKUDB][license]. [license]: http://github.com/Tokutek/ft-index/blob/master/README-TOKUDB diff --git a/storage/tokudb/ft-index/buildheader/make_tdb.cc b/storage/tokudb/ft-index/buildheader/make_tdb.cc index 037822894cf1a..9890b8ed34bd2 100644 --- a/storage/tokudb/ft-index/buildheader/make_tdb.cc +++ b/storage/tokudb/ft-index/buildheader/make_tdb.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -341,8 +341,8 @@ static void print_defines (void) { dodefine_from_track(txn_flags, DB_TXN_READ_ONLY); } - /* TOKUDB specific error codes*/ - printf("/* TOKUDB specific error codes */\n"); + /* TokuFT specific error codes*/ + printf("/* TokuFT specific error codes */\n"); dodefine(TOKUDB_OUT_OF_LOCKS); dodefine(TOKUDB_SUCCEEDED_EARLY); dodefine(TOKUDB_FOUND_BUT_REJECTED); @@ -422,7 +422,7 @@ static void print_db_env_struct (void) { "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */", "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */", "int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */", - "int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */", + "int (*checkpointing_resume) (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */", "int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */", "int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */", "int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */", @@ -465,6 +465,7 @@ static void print_db_env_struct (void) { "void (*set_loader_memory_size)(DB_ENV *env, uint64_t (*get_loader_memory_size_callback)(void))", "uint64_t (*get_loader_memory_size)(DB_ENV *env)", "void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))", + "void (*do_backtrace) (DB_ENV *env)", NULL}; sort_and_dump_fields("db_env", true, extra); @@ -545,6 +546,7 @@ static void print_db_struct (void) { "int (*change_fanout)(DB *db, uint32_t fanout)", "int (*get_fanout)(DB *db, uint32_t *fanout)", "int (*set_fanout)(DB *db, uint32_t fanout)", + "int (*set_memcmp_magic)(DB *db, uint8_t magic)", "int (*set_indexer)(DB*, DB_INDEXER*)", "void (*get_indexer)(DB*, DB_INDEXER**)", "int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)", @@ -573,11 +575,10 @@ static void print_db_txn_struct (void) { STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE])"); STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)"); STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)"); - STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/"); + STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In TokuFT, mgrp is a DB_ENV, not a DB_TXNMGR */"); STRUCT_SETUP(DB_TXN, parent, "DB_TXN *%s"); const char *extra[] = { "int (*txn_stat)(DB_TXN *, struct txn_stat **)", - "struct toku_list open_txns", "int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*)", "int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)", "int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *)", @@ -614,6 +615,7 @@ static void print_dbc_struct (void) { "int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)", "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)", "void (*c_remove_restriction)(DBC*)", + "char _internal[512]", NULL}; sort_and_dump_fields("dbc", false, extra); } @@ -635,12 +637,11 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) { //printf("#include \n"); printf("#if defined(__cplusplus) || defined(__cilkplusplus)\nextern \"C\" {\n#endif\n"); - printf("#define TOKUDB 1\n"); printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR); printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR); - printf("/* As of r40364 (post TokuDB 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n"); + printf("/* As of r40364 (post TokuFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n"); printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH); - printf("#define DB_VERSION_STRING \"Tokutek: TokuDB %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH); + printf("#define DB_VERSION_STRING \"Tokutek: TokuFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH); #ifndef DB_GID_SIZE #define DB_GID_SIZE DB_XIDDATASIZE @@ -654,7 +655,6 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) { " char data[DB_GID_SIZE];\n" "} TOKU_XA_XID;\n"); - //Typedef toku_off_t printf("#ifndef TOKU_OFF_T_DEFINED\n" "#define TOKU_OFF_T_DEFINED\n" "typedef int64_t toku_off_t;\n" @@ -673,7 +673,10 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) { printf("typedef uint32_t db_recno_t;\n"); printf("typedef int(*YDB_CALLBACK_FUNCTION)(DBT const*, DBT const*, void*);\n"); - printf("#include \n"); + printf("struct simple_dbt {\n"); + printf(" uint32_t len;\n"); + printf(" void *data;\n"); + printf("};\n"); //stat64 printf("typedef struct __toku_db_btree_stat64 {\n"); diff --git a/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in b/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in index e52baa6421f45..66e23a824bdf3 100644 --- a/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in +++ b/storage/tokudb/ft-index/cmake/merge_archives_unix.cmake.in @@ -26,15 +26,54 @@ SET(CMAKE_RANLIB "@CMAKE_RANLIB@") SET(TEMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/merge_archives_${TARGET}) MAKE_DIRECTORY(${TEMP_DIR}) -# Extract each archive to its own subdirectory(avoid object filename clashes) +# Extract each archive to its own subdirectory(avoid object filename +# clashes) Since the lib may contain objects with the same name, we first +# list the archive contents, then uniquify the object names as we extract +# them. FOREACH(LIB ${STATIC_LIBS}) GET_FILENAME_COMPONENT(NAME_NO_EXT ${LIB} NAME_WE) SET(TEMP_SUBDIR ${TEMP_DIR}/${NAME_NO_EXT}) MAKE_DIRECTORY(${TEMP_SUBDIR}) EXECUTE_PROCESS( - COMMAND ${CMAKE_AR} -x ${LIB} - WORKING_DIRECTORY ${TEMP_SUBDIR} + COMMAND ${CMAKE_AR} -t ${LIB} + OUTPUT_VARIABLE LIB_OBJS ) + STRING(REGEX REPLACE "\n" ";" LIB_OBJ_LIST "${LIB_OBJS}") + STRING(REGEX REPLACE ";$" "" LIB_OBJ_LIST "${LIB_OBJ_LIST}") + + LIST(LENGTH LIB_OBJ_LIST LENGTH_WITH_DUPS) + SET(LIB_OBJ_LIST_NO_DUPS ${LIB_OBJ_LIST}) + IF (LENGTH_WITH_DUPS GREATER 0) + LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS) + ENDIF () + LIST(LENGTH LIB_OBJ_LIST_NO_DUPS LENGTH_WITHOUT_DUPS) + + IF(LENGTH_WITH_DUPS EQUAL LENGTH_WITHOUT_DUPS) + # Optimization for when lib doesn't actually have duplicate object + # names, we can just extract everything. + EXECUTE_PROCESS( + COMMAND ${CMAKE_AR} -x ${LIB} + WORKING_DIRECTORY ${TEMP_SUBDIR} + ) + ELSE() + LIST(SORT LIB_OBJ_LIST) + SET(SAME_OBJ_COUNT 1) + SET(LAST_OBJ_NAME) + FOREACH(OBJ ${LIB_OBJ_LIST}) + IF(OBJ STREQUAL LAST_OBJ_NAME) + GET_FILENAME_COMPONENT(OBJ_NO_EXT ${OBJ} NAME_WE) + FILE(RENAME "${TEMP_SUBDIR}/${OBJ}" "${TEMP_SUBDIR}/${OBJ_NO_EXT}.${SAME_OBJ_COUNT}.o") + MATH(EXPR SAME_OBJ_COUNT "${SAME_OBJ_COUNT}+1") + ELSE() + SET(SAME_OBJ_COUNT 1) + ENDIF() + SET(LAST_OBJ_NAME "${OBJ}") + EXECUTE_PROCESS( + COMMAND ${CMAKE_AR} -xN ${SAME_OBJ_COUNT} ${LIB} ${OBJ} + WORKING_DIRECTORY ${TEMP_SUBDIR} + ) + ENDFOREACH() + ENDIF() FILE(GLOB_RECURSE LIB_OBJECTS "${TEMP_SUBDIR}/*.o") SET(OBJECTS ${OBJECTS} ${LIB_OBJECTS}) @@ -51,11 +90,7 @@ ENDFOREACH() FILE(TO_NATIVE_PATH ${TARGET_LOCATION} ${TARGET_LOCATION}) # Now pack the objects into library with ar. EXECUTE_PROCESS( - COMMAND ${CMAKE_AR} -r ${TARGET_LOCATION} ${ALL_OBJECTS} - WORKING_DIRECTORY ${TEMP_DIR} -) -EXECUTE_PROCESS( - COMMAND ${CMAKE_RANLIB} ${TARGET_LOCATION} + COMMAND ${CMAKE_AR} rcs ${TARGET_LOCATION} ${ALL_OBJECTS} WORKING_DIRECTORY ${TEMP_DIR} ) diff --git a/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake b/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake deleted file mode 100644 index 495f2e87b3e5d..0000000000000 --- a/storage/tokudb/ft-index/cmake_modules/FindBDB.cmake +++ /dev/null @@ -1,27 +0,0 @@ -# - Try to find BDB -# Once done this will define -# BDB_FOUND - System has BDB -# BDB_INCLUDE_DIRS - The BDB include directories -# BDB_LIBRARIES - The libraries needed to use BDB -# BDB_DEFINITIONS - Compiler switches required for using BDB - -find_path(BDB_INCLUDE_DIR db.h) - -find_library(BDB_LIBRARY NAMES db libdb) - -include(CheckSymbolExists) -## check if the found bdb has DB_TXN_SNAPSHOT -set(CMAKE_REQUIRED_INCLUDES ${BDB_INCLUDE_DIR}) -check_symbol_exists(DB_TXN_SNAPSHOT "db.h" HAVE_DB_TXN_SNAPSHOT) -if(HAVE_DB_TXN_SNAPSHOT) - set(BDB_INCLUDE_DIRS ${BDB_INCLUDE_DIR}) - set(BDB_LIBRARIES ${BDB_LIBRARY}) - - include(FindPackageHandleStandardArgs) - # handle the QUIETLY and REQUIRED arguments and set BDB_FOUND to TRUE - # if all listed variables are TRUE - find_package_handle_standard_args(BDB DEFAULT_MSG - BDB_LIBRARY BDB_INCLUDE_DIR) - - mark_as_advanced(BDB_INCLUDE_DIR BDB_LIBRARY) -endif() diff --git a/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake b/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake deleted file mode 100644 index e764ad30c07b6..0000000000000 --- a/storage/tokudb/ft-index/cmake_modules/TokuBuildTagDatabases.cmake +++ /dev/null @@ -1,128 +0,0 @@ -## set up lists of sources and headers for tags -file(GLOB_RECURSE all_srcs - buildheader/*.cc - db-benchmark-test/*.cc - ft/*.cc - include/*.cc - locktree/*.cc - portability/*.cc - src/*.cc - toku_include/*.cc - utils/*.cc - util/*.cc - db-benchmark-test/*.cc - ) -list(APPEND all_srcs - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_code.cc - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_print.cc - ) -file(GLOB_RECURSE all_hdrs - buildheader/*.h - db-benchmark-test/*.h - ft/*.h - include/*.h - locktree/*.h - portability/*.h - src/*.h - toku_include/*.h - utils/*.h - util/*.h - db-benchmark-test/*.h - ) -list(APPEND all_hdrs - ${CMAKE_CURRENT_BINARY_DIR}/toku_include/toku_config.h - ${CMAKE_CURRENT_BINARY_DIR}/buildheader/db.h - ${CMAKE_CURRENT_BINARY_DIR}/ft/log_header.h - ) - -option(USE_CTAGS "Build the ctags database." ON) -if (USE_CTAGS AND - # Macs by default are not case-sensitive, so tags and TAGS clobber each other. Do etags and not ctags in that case, because Emacs is superior. :P - (NOT APPLE OR NOT USE_ETAGS)) - find_program(CTAGS "ctags") - if (NOT CTAGS MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/tags" - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp" - COMMAND ${CTAGS} -o tags ${all_srcs} ${all_hdrs} - COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_ctags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tags" ctags-stamp) - endif () -endif () - -option(USE_ETAGS "Build the etags database." ON) -if (USE_ETAGS) - find_program(ETAGS "etags") - if (NOT ETAGS MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp" - COMMAND ${ETAGS} -o TAGS ${all_srcs} ${all_hdrs} - COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_etags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" etags-stamp) - endif () -endif () - -option(USE_CSCOPE "Build the cscope database." ON) -if (USE_CSCOPE) - find_program(CSCOPE "cscope") - if (NOT CSCOPE MATCHES NOTFOUND) - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "") - foreach(file ${all_srcs} ${all_hdrs}) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "${file}\n") - endforeach(file) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out" - COMMAND ${CSCOPE} -b -q -R -i"${CMAKE_CURRENT_BINARY_DIR}/cscope.files" -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${CMAKE_CURRENT_SOURCE_DIR}/include" -I"${CMAKE_CURRENT_SOURCE_DIR}/toku_include" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/ft" -I"${CMAKE_CURRENT_SOURCE_DIR}/src" -I"${CMAKE_CURRENT_SOURCE_DIR}/locktree" -I"${CMAKE_CURRENT_SOURCE_DIR}/utils" -I"${CMAKE_CURRENT_SOURCE_DIR}/db-benchmark-test" -I"${CMAKE_CURRENT_BINARY_DIR}" -I"${CMAKE_CURRENT_BINARY_DIR}/toku_include" -I"${CMAKE_CURRENT_BINARY_DIR}/buildheader" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_cscope.out ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out" - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out" - "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out") - endif () -endif () - -option(USE_GTAGS "Build the gtags database." ON) -if (USE_GTAGS) - find_program(GTAGS "gtags") - if (NOT GTAGS MATCHES NOTFOUND) - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "") - foreach(file ${all_srcs} ${all_hdrs}) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "${file}\n") - endforeach(file) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GPATH" - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS" - COMMAND ${GTAGS} -f "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_GTAGS ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS" - "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS" - "${CMAKE_CURRENT_SOURCE_DIR}/GPATH" - "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS") - endif () -endif () - -option(USE_MKID "Build the idutils database." ON) -if (USE_MKID) - find_program(MKID "mkid") - if (NOT MKID MATCHES NOTFOUND) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/ID" - COMMAND ${MKID} ${all_srcs} ${all_hdrs} - DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(build_MKID ALL DEPENDS - "${CMAKE_CURRENT_SOURCE_DIR}/ID") - endif () -endif () diff --git a/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake index 59dff0aadd460..e7fd27525d594 100644 --- a/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake +++ b/storage/tokudb/ft-index/cmake_modules/TokuFeatureDetection.cmake @@ -2,11 +2,6 @@ find_package(Threads) find_package(ZLIB REQUIRED) -option(USE_BDB "Build some tools and tests with bdb (requires a proper BerkeleyDB include directory and library)." ON) -if(USE_BDB) - find_package(BDB REQUIRED) -endif() - option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON) if(USE_VALGRIND) find_package(Valgrind REQUIRED) diff --git a/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake b/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake index 9e6c9d4834c10..5b6882cc4a162 100644 --- a/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake +++ b/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake @@ -94,8 +94,6 @@ if (BUILD_TESTING OR BUILD_FT_TESTS OR BUILD_SRC_TESTS) ## set up full valgrind suppressions file (concatenate the suppressions files) file(READ ft/valgrind.suppressions valgrind_suppressions) file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" "${valgrind_suppressions}") - file(READ src/tests/bdb.suppressions bdb_suppressions) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" "${bdb_suppressions}") file(READ bash.suppressions bash_suppressions) file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" "${bash_suppressions}") diff --git a/storage/tokudb/ft-index/examples/CMakeLists.txt b/storage/tokudb/ft-index/examples/CMakeLists.txt deleted file mode 100644 index 01ad01aa8d2fe..0000000000000 --- a/storage/tokudb/ft-index/examples/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# detect when we are being built as a subproject -if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) - install( - FILES - db-insert.c - db-insert-multiple.c - db-scan.c - db-update.c - Makefile - README.examples - DESTINATION - examples - COMPONENT - tokukv_examples - ) -endif () \ No newline at end of file diff --git a/storage/tokudb/ft-index/examples/Makefile b/storage/tokudb/ft-index/examples/Makefile deleted file mode 100644 index 7f11d23dfd80c..0000000000000 --- a/storage/tokudb/ft-index/examples/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -SRCS = $(wildcard *.c) -TARGETS = $(patsubst %.c,%,$(SRCS)) $(patsubst %.c,%-bdb,$(SRCS)) -CPPFLAGS = -I../include -D_GNU_SOURCE -CFLAGS = -g -std=c99 -Wall -Wextra -Werror -Wno-missing-field-initializers -ifeq ($(USE_STATIC_LIBS),1) -LIBTOKUDB = tokufractaltree_static -LIBTOKUPORTABILITY = tokuportability_static -else -LIBTOKUDB = tokufractaltree -LIBTOKUPORTABILITY = tokuportability -endif -LDFLAGS = -L../lib -l$(LIBTOKUDB) -l$(LIBTOKUPORTABILITY) -Wl,-rpath,../lib -lpthread -lz -ldl - -default local: $(TARGETS) - -%: %.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -o $@ $(LDFLAGS) - -%-bdb: %.c - $(CC) -D_GNU_SOURCE -DBDB $(CFLAGS) $^ -o $@ -ldb - -check: $(TARGETS) - ./db-insert -x && ./db-scan --lwc --prelock --prelockflag - -checknox: $(TARGETS) - ./db-insert && ./db-scan --nox --lwc --prelock --prelockflag - -clean: - rm -rf $(TARGETS) bench.* update.env.* insertm.env.* diff --git a/storage/tokudb/ft-index/examples/README.examples b/storage/tokudb/ft-index/examples/README.examples deleted file mode 100644 index 2fc6071d6869d..0000000000000 --- a/storage/tokudb/ft-index/examples/README.examples +++ /dev/null @@ -1,85 +0,0 @@ -The examples includes a pair of programs that can be compiled to use either the Berkeley DB library or the Tokutek Fractal Tree index library. - -Note: The file formats are different from TokuDB and Berkley DB. Thus -you cannot access a database created by Berkeley DB using the Tokutek -DB, or vice-versa. - -db-insert is a program that inserts random key-value pairs into a database. - -db-scan is a program that scans through the key-value pairs, reading every row, from a database. - -db-update is a program that upserts key-value pairs into a database. If the key already exists it increment a count in the value. - -db-insert-multiple is a program and inserts key-value pairs into multiple databases. This is is now TokuDB maintains consistent -secondary databases. - -To build it and run it (it's been tested on Fedora 10): -$ make (Makes the binaries) -Run the insertion workload under TokuDB: -$ ./db-insert -Run the insertion workload under BDB: -$ ./db-insert-bdb - -Here is what the output looks like (this on a Thinkpad X61s laptop -running Fedora 10). BDB is a little faster for sequential insertions -(the first three columns), but much much slower for random insertions -(the next 3 columns), so that TokuDB is faster on combined workload. - -$ ./db-insert -serial and random insertions of 1048576 per batch -serial 2.609965s 401759/s random 10.983798s 95466/s cumulative 13.593869s 154272/s -serial 3.053433s 343409/s random 12.008670s 87318/s cumulative 28.656115s 146367/s -serial 5.198312s 201715/s random 15.087426s 69500/s cumulative 48.954605s 128516/s -serial 6.096396s 171999/s random 13.550688s 77382/s cumulative 68.638321s 122215/s -Shutdown 4.025110s -Total time 72.677498s for 8388608 insertions = 115422/s -$ ./db-insert-bdb -serial and random insertions of 1048576 per batch -serial 2.623888s 399627/s random 8.770850s 119552/s cumulative 11.394805s 184045/s -serial 3.081946s 340232/s random 21.046589s 49822/s cumulative 35.523434s 118071/s -serial 14.160498s 74049/s random 497.117523s 2109/s cumulative 546.804504s 11506/s -serial 1.534212s 683462/s random 1128.525146s 929/s cumulative 1676.863892s 5003/s -Shutdown 195.879242s -Total time 1872.746582s for 8388608 insertions = 4479/s - -The files are smaller for TokuDB than BDB. - -$ ls -lh bench.tokudb/ -total 39M --rwxrwxr-x 1 bradley bradley 39M 2009-07-28 15:36 bench.db -$ ls -lh bench.bdb/ -total 322M --rw-r--r-- 1 bradley bradley 322M 2009-07-28 16:14 bench.db - -When scanning the table, one can run out of locks with BDB. There are ways around it (increase the lock table size). - -$ ./db-scan-bdb --nox -Lock table is out of available object entries -db-scan-bdb: db-scan.c:177: scanscan_hwc: Assertion `r==(-30988)' failed. -Aborted - -TokuDB is fine on a big table scan. - -$ ./db-scan --nox -Scan 33162304 bytes (2072644 rows) in 7.924463s at 4.184801MB/s -Scan 33162304 bytes (2072644 rows) in 3.062239s at 10.829431MB/s -0:3 1:53 2:56 -miss=3 hit=53 wait_reading=0 wait=0 -VmPeak: 244668 kB -VmHWM: 68096 kB -VmRSS: 1232 kB - -The update-bdb program upserts 1B rows into a BDB database. When the database gets larger than memory, the throughput -should tank since every update needs to read a block from the storage system. The storage system becomes the performance -bottleneck. The program uses 1 1GB cache in front of the kernel's file system buffer cache. The program should hit the wall -at about 300M rows on a machine with 16GB of memory since keys are 8 bytes and values are 8 bytes in size. - -$ ./db-update-bdb - -The update program upserts 1B rows into a TokuDB database. Throughput should be not degrade significantly since the cost -of the storage system reads is amortized over 1000's of update operations. One should expect TokuDB to be at least 50 times -faster than BDB. - -$ ./db-update - -There isn't much documentation for the Tokutek Fractal Tree index library, but most of the API is like Berkeley DB's. diff --git a/storage/tokudb/ft-index/examples/db-insert-multiple.c b/storage/tokudb/ft-index/examples/db-insert-multiple.c deleted file mode 100644 index e77dd94547f2d..0000000000000 --- a/storage/tokudb/ft-index/examples/db-insert-multiple.c +++ /dev/null @@ -1,510 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// measure the performance of insertions into multiple dictionaries using ENV->put_multiple -// the table schema is t(a bigint, b bigint, c bigint, d bigint, primary key(a), key(b), key(c,d), clustering key(d)) -// the primary key(a) is represented with key=a and value=b,c,d -// the key(b) index is represented with key=b,a and no value -// the key(c,d) index is represented with key=c,d,a and no value -// the clustering key(d) is represented with key=d,a and value=b,c -// a is auto increment -// b, c and d are random - -#include "../include/toku_config.h" -#include -#include -#include -#include -#include -#include -#include -#if defined(HAVE_BYTESWAP_H) -# include -#elif defined(HAVE_LIBKERN_OSBYTEORDER_H) -# include -# define bswap_64 OSSwapInt64 -#endif -#include -#include "db.h" - -static int force_multiple = 1; - -struct table { - int ndbs; - DB **dbs; -#if defined(TOKUDB) - DBT *mult_keys; - DBT *mult_vals; - uint32_t *mult_flags; -#endif -}; - -#if defined(TOKUDB) -static void table_init_dbt(DBT *dbt, size_t length) { - dbt->flags = DB_DBT_USERMEM; - dbt->data = malloc(length); - dbt->ulen = length; - dbt->size = 0; -} - -static void table_destroy_dbt(DBT *dbt) { - free(dbt->data); -} -#endif - -static void table_init(struct table *t, int ndbs, DB **dbs, size_t key_length __attribute__((unused)), size_t val_length __attribute__((unused))) { - t->ndbs = ndbs; - t->dbs = dbs; -#if defined(TOKUDB) - t->mult_keys = calloc(ndbs, sizeof (DBT)); - int i; - for (i = 0; i < ndbs; i++) - table_init_dbt(&t->mult_keys[i], key_length); - t->mult_vals = calloc(ndbs, sizeof (DBT)); - for (i = 0; i < ndbs; i++) - table_init_dbt(&t->mult_vals[i], val_length); - t->mult_flags = calloc(ndbs, sizeof (uint32_t)); - for (i = 0; i < ndbs; i++) - t->mult_flags[i] = 0; -#endif -} - -static void table_destroy(struct table *t) { -#if defined(TOKUDB) - int i; - for (i = 0; i < t->ndbs; i++) - table_destroy_dbt(&t->mult_keys[i]); - free(t->mult_keys); - for (i = 0; i < t->ndbs; i++) - table_destroy_dbt(&t->mult_vals[i]); - free(t->mult_vals); - free(t->mult_flags); -#else - assert(t); -#endif -} - -static int verbose = 0; - -static long random64(void) { - return ((long)random() << 32LL) + (long)random(); -} - -static long htonl64(long x) { -#if BYTE_ORDER == LITTLE_ENDIAN - return bswap_64(x); -#else -#error -#endif -} - -#if defined(TOKUDB) -static int my_generate_row_for_put(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val) { - assert(src_db); - assert(dest_key->flags == DB_DBT_USERMEM && dest_key->ulen >= 4 * 8); - assert(dest_val->flags == DB_DBT_USERMEM && dest_val->ulen >= 4 * 8); - int index_num; - assert(dest_db->descriptor->dbt.size == sizeof index_num); - memcpy(&index_num, dest_db->descriptor->dbt.data, sizeof index_num); - switch (htonl(index_num) % 4) { - case 0: - // dest_key = src_key - dest_key->size = src_key->size; - memcpy(dest_key->data, src_key->data, src_key->size); - // dest_val = src_val - dest_val->size = src_val->size; - memcpy(dest_val->data, src_val->data, src_val->size); - break; - case 1: - // dest_key = b,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - // dest_val = null - dest_val->size = 0; - break; - case 2: - // dest_key = c,d,a - dest_key->size = 3 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8); - memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8); - // dest_val = null - dest_val->size = 0; - break; - case 3: - // dest_key = d,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - // dest_val = b,c - dest_val->size = 2 * 8; - memcpy((char *)dest_val->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_val->data + 8, (char *)src_val->data + 8, 8); - break; - default: - assert(0); - } - return 0; -} - -#else - -static int my_secondary_key(DB *db, const DBT *src_key, const DBT *src_val, DBT *dest_key) { - assert(dest_key->flags == 0 && dest_key->data == NULL); - dest_key->flags = DB_DBT_APPMALLOC; - dest_key->data = malloc(4 * 8); assert(dest_key->data); - switch ((intptr_t)db->app_private % 4) { - case 0: - // dest_key = src_key - dest_key->size = src_key->size; - memcpy(dest_key->data, src_key->data, src_key->size); - break; - case 1: - // dest_key = b,a - dest_key->size = 2 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - break; - case 2: - // dest_key = c,d,a - dest_key->size = 3 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 8, 8); - memcpy((char *)dest_key->data + 8, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 16, (char *)src_key->data + 0, 8); - break; - case 3: - // dest_key = d,a,b,c - dest_key->size = 4 * 8; - memcpy((char *)dest_key->data + 0, (char *)src_val->data + 16, 8); - memcpy((char *)dest_key->data + 8, (char *)src_key->data + 0, 8); - memcpy((char *)dest_key->data + 16, (char *)src_val->data + 0, 8); - memcpy((char *)dest_key->data + 24, (char *)src_val->data + 8, 8); - break; - default: - assert(0); - } - return 0; -} -#endif - -static void insert_row(DB_ENV *db_env, struct table *t, DB_TXN *txn, long a, long b, long c, long d) { - int r; - - // generate the primary key - char key_buffer[8]; - a = htonl64(a); - memcpy(key_buffer, &a, sizeof a); - - // generate the primary value - char val_buffer[3*8]; - b = htonl64(b); - memcpy(val_buffer+0, &b, sizeof b); - c = htonl64(c); - memcpy(val_buffer+8, &c, sizeof c); - d = htonl64(d); - memcpy(val_buffer+16, &d, sizeof d); - - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT value = { .data = val_buffer, .size = sizeof val_buffer }; -#if defined(TOKUDB) - if (!force_multiple && t->ndbs == 1) { - r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, t->mult_flags[0]); assert(r == 0); - } else { - r = db_env->put_multiple(db_env, t->dbs[0], txn, &key, &value, t->ndbs, &t->dbs[0], t->mult_keys, t->mult_vals, t->mult_flags); assert(r == 0); - } -#else - assert(db_env); - r = t->dbs[0]->put(t->dbs[0], txn, &key, &value, 0); assert(r == 0); -#endif -} - -static inline float tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -static void insert_all(DB_ENV *db_env, struct table *t, long nrows, long max_rows_per_txn, long key_range, long rows_per_report, bool do_txn) { - int r; - - struct timeval tstart; - r = gettimeofday(&tstart, NULL); assert(r == 0); - struct timeval tlast = tstart; - DB_TXN *txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - } - long n_rows_per_txn = 0; - long rowi; - for (rowi = 0; rowi < nrows; rowi++) { - long a = rowi; - long b = random64() % key_range; - long c = random64() % key_range; - long d = random64() % key_range; - insert_row(db_env, t, txn, a, b, c, d); - n_rows_per_txn++; - - // maybe commit - if (do_txn && n_rows_per_txn == max_rows_per_txn) { - r = txn->commit(txn, 0); assert(r == 0); - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - n_rows_per_txn = 0; - } - - // maybe report performance - if (((rowi + 1) % rows_per_report) == 0) { - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - float last_time = tdiff(&tnow, &tlast); - float total_time = tdiff(&tnow, &tstart); - printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout); - tlast = tnow; - } - } - - if (do_txn) { - r = txn->commit(txn, 0); assert(r == 0); - } - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout); -} - -int main(int argc, char *argv[]) { -#if defined(TOKDUB) - char *db_env_dir = "insertm.env.tokudb"; -#else - char *db_env_dir = "insertm.env.bdb"; -#endif - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; - long rows = 100000000; - long rows_per_txn = 1000; - long rows_per_report = 100000; - long key_range = 100000; - bool do_txn = true; - u_int32_t pagesize = 0; - u_int64_t cachesize = 1000000000; - int ndbs = 4; -#if defined(TOKUDB) - u_int32_t checkpoint_period = 60; -#endif - - int i; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (strcmp(arg, "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(arg, "--ndbs") == 0 && i+1 < argc) { - ndbs = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--rows") == 0 && i+1 < argc) { - rows = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) { - rows_per_txn = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) { - rows_per_report = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--key_range") == 0 && i+1 < argc) { - key_range = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--txn") == 0 && i+1 < argc) { - do_txn = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) { - pagesize = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) { - cachesize = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--force_multiple") == 0 && i+1 < argc) { - force_multiple = atoi(argv[++i]); - continue; - } -#if defined(TOKUDB) - if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) { - checkpoint_period = atoi(argv[++i]); - continue; - } -#endif - - assert(0); - } - - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - // create and open the env - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - if (cachesize) { - const u_int64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } -#if defined(TOKUDB) - r = db_env->set_generate_row_callback_for_put(db_env, my_generate_row_for_put); assert(r == 0); -#endif - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(TOKUDB) - if (checkpoint_period) { - r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0); - u_int32_t period; - r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - - // create the db - DB *dbs[ndbs]; - for (i = 0; i < ndbs; i++) { - DB *db = NULL; - r = db_create(&db, db_env, 0); assert(r == 0); - DB_TXN *create_txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0); - } - if (pagesize) { - r = db->set_pagesize(db, pagesize); assert(r == 0); - } - char db_filename[32]; sprintf(db_filename, "test%d", i); - r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); - -#if defined(TOKUDB) - DESCRIPTOR_S new_descriptor; - int index_num = htonl(i); - new_descriptor.dbt.data = &index_num; - new_descriptor.dbt.size = sizeof i; - r = db->change_descriptor(db, create_txn, &new_descriptor.dbt, 0); assert(r == 0); -#else - db->app_private = (void *) (intptr_t) i; - if (i > 0) { - r = dbs[0]->associate(dbs[0], create_txn, db, my_secondary_key, 0); assert(r == 0); - } -#endif - if (do_txn) { - r = create_txn->commit(create_txn, 0); assert(r == 0); - } - dbs[i] = db; - } - - // insert all rows - struct table table; - table_init(&table, ndbs, dbs, 4 * 8, 4 * 8); - - insert_all(db_env, &table, rows, rows_per_txn, key_range, rows_per_report, do_txn); - - table_destroy(&table); - - // shutdown - for (i = 0; i < ndbs; i++) { - DB *db = dbs[i]; - r = db->close(db, 0); assert(r == 0); db = NULL; - } - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff --git a/storage/tokudb/ft-index/examples/db-insert.c b/storage/tokudb/ft-index/examples/db-insert.c deleted file mode 100644 index 87cd9d35e21b4..0000000000000 --- a/storage/tokudb/ft-index/examples/db-insert.c +++ /dev/null @@ -1,610 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -// Define BDB if you want to compile this to use Berkeley DB -#include -#include -#ifdef BDB -#include -#include -#define DIRSUF bdb -#else -#include -#define DIRSUF tokudb -#endif - -#include -#include -#include -#include -#include - -static inline float toku_tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -#if !defined(DB_PRELOCKED_WRITE) -#define NO_DB_PRELOCKED -#define DB_PRELOCKED_WRITE 0 -#endif - -int verbose=1; - -enum { SERIAL_SPACING = 1<<6 }; -enum { DEFAULT_ITEMS_TO_INSERT_PER_ITERATION = 1<<20 }; -enum { DEFAULT_ITEMS_PER_TRANSACTION = 1<<14 }; - -static void insert (long long v); -#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, db_strerror(r)); assert(__r==0); }) -#define CKERR2(r,rexpect) if (r!=rexpect) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, db_strerror(r)); assert(r==rexpect); - -/* default test parameters */ -int keysize = sizeof (long long); -int valsize = sizeof (long long); -int pagesize = 0; -long long cachesize = 1000000000; // 1GB -int dupflags = 0; -int noserial = 0; // Don't do the serial stuff -int norandom = 0; // Don't do the random stuff -int prelock = 0; -int prelockflag = 0; -int items_per_transaction = DEFAULT_ITEMS_PER_TRANSACTION; -int items_per_iteration = DEFAULT_ITEMS_TO_INSERT_PER_ITERATION; -int finish_child_first = 0; // Commit or abort child first (before doing so to the parent). No effect if child does not exist. -int singlex_child = 0; // Do a single transaction, but do all work with a child -int singlex = 0; // Do a single transaction -int singlex_create = 0; // Create the db using the single transaction (only valid if singlex) -int insert1first = 0; // insert 1 before doing the rest -int do_transactions = 0; -int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used -int do_abort = 0; -int n_insertions_since_txn_began=0; -int env_open_flags = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL; -u_int32_t put_flags = 0; -double compressibility = -1; // -1 means make it very compressible. 1 means use random bits everywhere. 2 means half the bits are random. -int do_append = 0; -u_int32_t checkpoint_period = 60; - -static void do_prelock(DB* db, DB_TXN* txn) { - if (prelock) { -#if !defined(NO_DB_PRELOCKED) - int r = db->pre_acquire_table_lock(db, txn); - assert(r==0); -#else - (void) db; (void) txn; -#endif - } -} - -#define STRINGIFY2(s) #s -#define STRINGIFY(s) STRINGIFY2(s) -const char *dbdir = "./bench." STRINGIFY(DIRSUF); -char *dbfilename = "bench.db"; -char *dbname; - -DB_ENV *dbenv; -DB *db; -DB_TXN *parenttid=0; -DB_TXN *tid=0; - - -static void benchmark_setup (void) { - int r; - - if (!do_append) { - char unlink_cmd[strlen(dbdir) + strlen("rm -rf ") + 1]; - snprintf(unlink_cmd, sizeof(unlink_cmd), "rm -rf %s", dbdir); - //printf("unlink_cmd=%s\n", unlink_cmd); - system(unlink_cmd); - - if (strcmp(dbdir, ".") != 0) { - r = mkdir(dbdir,S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); - assert(r == 0); - } - } - - r = db_env_create(&dbenv, 0); - assert(r == 0); - -#if !defined(TOKUDB) -#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4 - if (dbenv->set_lk_max) { - r = dbenv->set_lk_max(dbenv, items_per_transaction*2); - assert(r==0); - } -#elif (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 7) || DB_VERSION_MAJOR >= 5 - if (dbenv->set_lk_max_locks) { - r = dbenv->set_lk_max_locks(dbenv, items_per_transaction*2); - assert(r==0); - } - if (dbenv->set_lk_max_lockers) { - r = dbenv->set_lk_max_lockers(dbenv, items_per_transaction*2); - assert(r==0); - } - if (dbenv->set_lk_max_objects) { - r = dbenv->set_lk_max_objects(dbenv, items_per_transaction*2); - assert(r==0); - } -#else -#error -#endif -#endif - - if (dbenv->set_cachesize) { - r = dbenv->set_cachesize(dbenv, cachesize / (1024*1024*1024), cachesize % (1024*1024*1024), 1); - if (r != 0) - printf("WARNING: set_cachesize %d\n", r); - } - { - r = dbenv->open(dbenv, dbdir, env_open_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); - assert(r == 0); - } - -#if defined(TOKUDB) - if (checkpoint_period) { - printf("set checkpoint_period %u\n", checkpoint_period); - r = dbenv->checkpointing_set_period(dbenv, checkpoint_period); assert(r == 0); - u_int32_t period; - r = dbenv->checkpointing_get_period(dbenv, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - r = db_create(&db, dbenv, 0); - assert(r == 0); - - if (do_transactions) { - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - if (pagesize && db->set_pagesize) { - r = db->set_pagesize(db, pagesize); - assert(r == 0); - } - if (dupflags) { - r = db->set_flags(db, dupflags); - assert(r == 0); - } - r = db->open(db, tid, dbfilename, NULL, DB_BTREE, DB_CREATE, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); - if (r!=0) fprintf(stderr, "errno=%d, %s\n", errno, strerror(errno)); - assert(r == 0); - if (insert1first) { - if (do_transactions) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - insert(-1); - if (singlex) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - } - else if (singlex && !singlex_create) { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); CKERR(r); - } - if (do_transactions) { - if (singlex) - do_prelock(db, tid); - else { - r=tid->commit(tid, 0); - assert(r==0); - tid = NULL; - } - } - if (singlex_child) { - parenttid = tid; - tid = NULL; - r=dbenv->txn_begin(dbenv, parenttid, &tid, 0); CKERR(r); - } - -} - -static void benchmark_shutdown (void) { - int r; - - if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) { -#if defined(TOKUDB) - //There should be a single 'truncate' in the rollback instead of many 'insert' entries. - struct txn_stat *s; - r = tid->txn_stat(tid, &s); - assert(r==0); - //TODO: #1125 Always do the test after performance testing is done. - if (singlex_child) fprintf(stderr, "SKIPPED 'small rollback' test for child txn\n"); - else - assert(s->rollback_raw_count < 100); // gross test, not worth investigating details - free(s); - //system("ls -l bench.tokudb"); -#endif - } - if (do_transactions && singlex) { - if (!singlex_child || finish_child_first) { - assert(tid); - r = (do_abort ? tid->abort(tid) : tid->commit(tid, 0)); assert(r==0); - tid = NULL; - } - if (singlex_child) { - assert(parenttid); - r = (do_abort ? parenttid->abort(parenttid) : parenttid->commit(parenttid, 0)); assert(r==0); - parenttid = NULL; - } - else - assert(!parenttid); - } - assert(!tid); - assert(!parenttid); - - r = db->close(db, 0); - assert(r == 0); - r = dbenv->close(dbenv, 0); - assert(r == 0); -} - -static void long_long_to_array (unsigned char *a, int array_size, unsigned long long l) { - int i; - for (i=0; i<8 && i>(56-8*i))&0xff; -} - -static DBT *fill_dbt(DBT *dbt, const void *data, int size) { - memset(dbt, 0, sizeof *dbt); - dbt->size = size; - dbt->data = (void *) data; - return dbt; -} - -// Fill array with 0's if compressibilty==-1, otherwise fill array with data that is likely to compress by a factor of compressibility. -static void fill_array (unsigned char *data, int size) { - memset(data, 0, size); - if (compressibility>0) { - int i; - for (i=0; iput(db, tid, fill_dbt(&kt, kc, keysize), fill_dbt(&vt, vc, valsize), put_flags); - CKERR(r); - if (do_transactions) { - if (n_insertions_since_txn_began>=items_per_transaction && !singlex) { - n_insertions_since_txn_began=0; - r = tid->commit(tid, 0); assert(r==0); - tid = NULL; - r=dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - n_insertions_since_txn_began=0; - } - n_insertions_since_txn_began++; - } -} - -static void serial_insert_from (long long from) { - long long i; - if (do_transactions && !singlex) { - int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - { - DBT k,v; - r=db->put(db, tid, fill_dbt(&k, "a", 1), fill_dbt(&v, "b", 1), put_flags); - CKERR(r); - } - } - for (i=0; icommit(tid, 0); assert(r==0); - tid=NULL; - } -} - -static long long llrandom (void) { - return (((long long)(random()))<<32) + random(); -} - -static void random_insert_below (long long below) { - long long i; - if (do_transactions && !singlex) { - int r = dbenv->txn_begin(dbenv, 0, &tid, 0); assert(r==0); - do_prelock(db, tid); - } - for (i=0; icommit(tid, 0); assert(r==0); - tid=NULL; - } -} - -static void biginsert (long long n_elements, struct timeval *starttime) { - long long i; - struct timeval t1,t2; - int iteration; - for (i=0, iteration=0; i= argc) return print_usage(argv[0]); - items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--abort") == 0) { - do_abort = 1; - } else if (strcmp(arg, "--periter") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - items_per_iteration = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--cachesize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - cachesize = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0); - } else if (strcmp(arg, "--keysize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - keysize = atoi(argv[++i]); - } else if (strcmp(arg, "--valsize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - valsize = atoi(argv[++i]); - } else if (strcmp(arg, "--pagesize") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - pagesize = atoi(argv[++i]); - } else if (strcmp(arg, "--env") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - dbdir = argv[++i]; - } else if (strcmp(arg, "--prelock") == 0) { - prelock=1; - } else if (strcmp(arg, "--prelockflag") == 0) { - prelock=1; - prelockflag=1; - } else if (strcmp(arg, "--srandom") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - srandom(atoi(argv[++i])); - } else if (strcmp(arg, "--append") == 0) { - do_append = 1; - } else if (strcmp(arg, "--checkpoint-period") == 0) { - if (i+1 >= argc) return print_usage(argv[9]); - checkpoint_period = (u_int32_t) atoi(argv[++i]); - } else if (strcmp(arg, "--unique_checks") == 0) { - if (i+1 >= argc) return print_usage(argv[0]); - int unique_checks = atoi(argv[++i]); - if (unique_checks) - put_flags = DB_NOOVERWRITE; - else - put_flags = 0; - } else { - return print_usage(argv[0]); - } - } - if (do_transactions) { - env_open_flags |= DB_INIT_TXN | if_transactions_do_logging | DB_INIT_LOCK; - } - if (do_transactions && prelockflag) { - put_flags |= DB_PRELOCKED_WRITE; - } - if (i -#include -#include -#include -#include -#ifdef BDB -#include -#define DIRSUF bdb -#else -#include -#define DIRSUF tokudb -#endif -#include -#include -#include -#include -#include -#include -#include - -static const char *pname; -static enum run_mode { RUN_HWC, RUN_LWC, RUN_VERIFY, RUN_RANGE} run_mode = RUN_HWC; -static int do_txns=1, prelock=0, prelockflag=0; -static u_int32_t lock_flag = 0; -static long limitcount=-1; -static u_int32_t cachesize = 127*1024*1024; -static u_int64_t start_range = 0, end_range = 0; -static int n_experiments = 2; -static int bulk_fetch = 1; - -static int print_usage (const char *argv0) { - fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR]\n", argv0); - fprintf(stderr, " --verify-lwc means to run the light weight cursor and the heavyweight cursor to verify that they get the same answer.\n"); - fprintf(stderr, " --lwc run light weight cursors instead of heavy weight cursors\n"); - fprintf(stderr, " --prelock acquire a read lock on the entire table before running\n"); - fprintf(stderr, " --prelockflag pass DB_PRELOCKED to the the cursor get operation whenever the locks have been acquired\n"); - fprintf(stderr, " --prelockwriteflag pass DB_PRELOCKED_WRITE to the cursor get operation\n"); - fprintf(stderr, " --nox no transactions (no locking)\n"); - fprintf(stderr, " --count COUNT read the first COUNT rows and then stop.\n"); - fprintf(stderr, " --cachesize N set the env cachesize to N bytes\n"); - fprintf(stderr, " --srandom N srandom(N)\n"); - fprintf(stderr, " --env DIR put db files in DIR instead of default\n"); - fprintf(stderr, " --bulk_fetch 0|1 do bulk fetch on lwc operations (default: 1)\n"); - return 1; -} - -static DB_ENV *env; -static DB *db; -static DB_TXN *tid=0; - -#define STRINGIFY2(s) #s -#define STRINGIFY(s) STRINGIFY2(s) -static const char *dbdir = "./bench." STRINGIFY(DIRSUF); /* DIRSUF is passed in as a -D argument to the compiler. */ -static int env_open_flags_yesx = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG|DB_INIT_LOCK; -static int env_open_flags_nox = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL; -static char *dbfilename = "bench.db"; - - -static void parse_args (int argc, const char *argv[]) { - pname=argv[0]; - argc--; argv++; - int specified_run_mode=0; - while (argc>0) { - if (strcmp(*argv,"--verify-lwc")==0) { - if (specified_run_mode && run_mode!=RUN_VERIFY) { two_modes: fprintf(stderr, "You specified two run modes\n"); exit(1); } - run_mode = RUN_VERIFY; - } else if (strcmp(*argv, "--lwc")==0) { - if (specified_run_mode && run_mode!=RUN_LWC) goto two_modes; - run_mode = RUN_LWC; - } else if (strcmp(*argv, "--hwc")==0) { - if (specified_run_mode && run_mode!=RUN_VERIFY) goto two_modes; - run_mode = RUN_HWC; - } else if (strcmp(*argv, "--prelock")==0) prelock=1; -#ifdef TOKUDB - else if (strcmp(*argv, "--prelockflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED; } - else if (strcmp(*argv, "--prelockwriteflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED_WRITE; } -#endif - else if (strcmp(*argv, "--nox")==0) { do_txns=0; } - else if (strcmp(*argv, "--count")==0) { - char *end; - argc--; argv++; - errno=0; limitcount=strtol(*argv, &end, 10); assert(errno==0); - printf("Limiting count to %ld\n", limitcount); - } else if (strcmp(*argv, "--cachesize")==0 && argc>0) { - char *end; - argc--; argv++; - cachesize=(u_int32_t)strtol(*argv, &end, 10); - } else if (strcmp(*argv, "--env") == 0) { - argc--; argv++; - if (argc==0) exit(print_usage(pname)); - dbdir = *argv; - } else if (strcmp(*argv, "--range") == 0 && argc > 2) { - run_mode = RUN_RANGE; - argc--; argv++; - start_range = strtoll(*argv, NULL, 10); - argc--; argv++; - end_range = strtoll(*argv, NULL, 10); - } else if (strcmp(*argv, "--experiments") == 0 && argc > 1) { - argc--; argv++; - n_experiments = strtol(*argv, NULL, 10); - } else if (strcmp(*argv, "--srandom") == 0 && argc > 1) { - argc--; argv++; - srandom(atoi(*argv)); - } else if (strcmp(*argv, "--bulk_fetch") == 0 && argc > 1) { - argc--; argv++; - bulk_fetch = atoi(*argv); - } else { - exit(print_usage(pname)); - } - argc--; argv++; - } - //Prelocking is meaningless without transactions - if (do_txns==0) { - prelockflag=0; - lock_flag=0; - prelock=0; - } -} - -static void scanscan_setup (void) { - int r; - r = db_env_create(&env, 0); assert(r==0); - r = env->set_cachesize(env, 0, cachesize, 1); assert(r==0); - r = env->open(env, dbdir, do_txns? env_open_flags_yesx : env_open_flags_nox, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); assert(r==0); - r = db_create(&db, env, 0); assert(r==0); - if (do_txns) { - r = env->txn_begin(env, 0, &tid, 0); assert(r==0); - } - r = db->open(db, tid, dbfilename, NULL, DB_BTREE, 0, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); assert(r==0); -#ifdef TOKUDB - if (prelock) { - r = db->pre_acquire_table_lock(db, tid); - assert(r==0); - } -#endif -} - -static void scanscan_shutdown (void) { - int r; - r = db->close(db, 0); assert(r==0); - if (do_txns) { - r = tid->commit(tid, 0); assert(r==0); - } - r = env->close(env, 0); assert(r==0); -} - -static double gettime (void) { - struct timeval tv; - int r = gettimeofday(&tv, 0); - assert(r==0); - return tv.tv_sec + 1e-6*tv.tv_usec; -} - -static void scanscan_hwc (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc, 0); assert(r==0); - memset(&k, 0, sizeof(k)); - memset(&v, 0, sizeof(v)); - u_int32_t c_get_flags = DB_NEXT; - if (prelockflag && (counter || prelock)) { - c_get_flags |= lock_flag; - } - while (0 == (r = dbc->c_get(dbc, &k, &v, c_get_flags))) { - - //printf("r=%d\n", r); - - totalbytes += k.size + v.size; - rowcounter++; - if (limitcount>0 && rowcounter>=limitcount) break; - } - assert(r==DB_NOTFOUND); - r = dbc->c_close(dbc); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", totalbytes, rowcounter, tdiff, 1e-6*totalbytes/tdiff); - } -} - -#ifdef TOKUDB - -struct extra_count { - long long totalbytes; - int rowcounter; -}; - -static int counttotalbytes (DBT const *key, DBT const *data, void *extrav) { - struct extra_count *e=extrav; - e->totalbytes += key->size + data->size; - e->rowcounter++; - return bulk_fetch ? TOKUDB_CURSOR_CONTINUE : 0; -} - -static void scanscan_lwc (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc, 0); assert(r==0); - u_int32_t f_flags = 0; - if (prelockflag && (counter || prelock)) { - f_flags |= lock_flag; - } - long rowcounter=0; - while (0 == (r = dbc->c_getf_next(dbc, f_flags, counttotalbytes, &e))) { - rowcounter++; - if (limitcount>0 && rowcounter>=limitcount) break; - } - r = dbc->c_close(dbc); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff); - } -} -#endif - -static void scanscan_range (void) { - int r; - - double texperiments[n_experiments]; - u_int64_t k = 0; - char kv[8]; - DBT key, val; - - int counter; - for (counter = 0; counter < n_experiments; counter++) { - - if (1) { //if ((counter&1) == 0) { - makekey: - // generate a random key in the key range - k = (start_range + (random() % (end_range - start_range))) * (1<<6); - int i; - for (i = 0; i < 8; i++) - kv[i] = k >> (56-8*i); - } - memset(&key, 0, sizeof key); key.data = &kv, key.size = sizeof kv; - memset(&val, 0, sizeof val); - - double tstart = gettime(); - - DBC *dbc; - r = db->cursor(db, tid, &dbc, 0); assert(r==0); - - // set the cursor to the random key - r = dbc->c_get(dbc, &key, &val, DB_SET_RANGE+lock_flag); - if (r != 0) { - assert(r == DB_NOTFOUND); - printf("%s:%d %" PRIu64 "\n", __FUNCTION__, __LINE__, k); - goto makekey; - } - -#ifdef TOKUDB - // do the range scan - long rowcounter = 0; - struct extra_count e = {0,0}; - while (limitcount > 0 && rowcounter < limitcount) { - r = dbc->c_getf_next(dbc, prelockflag ? lock_flag : 0, counttotalbytes, &e); - if (r != 0) - break; - rowcounter++; - } -#endif - - r = dbc->c_close(dbc); - assert(r==0); - - texperiments[counter] = gettime() - tstart; - printf("%" PRIu64 " %f\n", k, texperiments[counter]); fflush(stdout); - } - - // print the times - double tsum = 0.0, tmin = 0.0, tmax = 0.0; - for (counter = 0; counter < n_experiments; counter++) { - if (counter==0 || texperiments[counter] < tmin) - tmin = texperiments[counter]; - if (counter==0 || texperiments[counter] > tmax) - tmax = texperiments[counter]; - tsum += texperiments[counter]; - } - printf("%f %f %f/%d = %f\n", tmin, tmax, tsum, n_experiments, tsum / n_experiments); -} - -#ifdef TOKUDB - -struct extra_verify { - long long totalbytes; - int rowcounter; - DBT k,v; // the k and v are gotten using the old cursor -}; - -static int -checkbytes (DBT const *key, DBT const *data, void *extrav) { - struct extra_verify *e=extrav; - e->totalbytes += key->size + data->size; - e->rowcounter++; - assert(e->k.size == key->size); - assert(e->v.size == data->size); - assert(memcmp(e->k.data, key->data, key->size)==0); - assert(memcmp(e->v.data, data->data, data->size)==0); - assert(e->k.data != key->data); - assert(e->v.data != data->data); - return 0; -} - - -static void scanscan_verify (void) { - int r; - int counter=0; - for (counter=0; countercursor(db, tid, &dbc1, 0); assert(r==0); - r = db->cursor(db, tid, &dbc2, 0); assert(r==0); - memset(&v.k, 0, sizeof(v.k)); - memset(&v.v, 0, sizeof(v.v)); - u_int32_t f_flags = 0; - u_int32_t c_get_flags = DB_NEXT; - if (prelockflag && (counter || prelock)) { - f_flags |= lock_flag; - c_get_flags |= lock_flag; - } - while (1) { - int r1,r2; - r2 = dbc1->c_get(dbc1, &v.k, &v.v, c_get_flags); - r1 = dbc2->c_getf_next(dbc2, f_flags, checkbytes, &v); - assert(r1==r2); - if (r1) break; - } - r = dbc1->c_close(dbc1); assert(r==0); - r = dbc2->c_close(dbc2); assert(r==0); - double thistime = gettime(); - double tdiff = thistime-prevtime; - printf("verify %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", v.totalbytes, v.rowcounter, tdiff, 1e-6*v.totalbytes/tdiff); - } -} - -#endif - -int main (int argc, const char *argv[]) { - - parse_args(argc,argv); - - scanscan_setup(); - switch (run_mode) { - case RUN_HWC: scanscan_hwc(); break; -#ifdef TOKUDB - case RUN_LWC: scanscan_lwc(); break; - case RUN_VERIFY: scanscan_verify(); break; -#endif - case RUN_RANGE: scanscan_range(); break; - default: assert(0); break; - } - scanscan_shutdown(); - - return 0; -} diff --git a/storage/tokudb/ft-index/examples/db-update.c b/storage/tokudb/ft-index/examples/db-update.c deleted file mode 100644 index e2ab1ecdce41c..0000000000000 --- a/storage/tokudb/ft-index/examples/db-update.c +++ /dev/null @@ -1,379 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// measure the performance of a simulated "insert on duplicate key update" operation -// the table schema is t(a int, b int, c int, d int, primary key(a, b)) -// a and b are random -// c is the sum of the observations -// d is the first observation - -#include -#include -#include -#include -#include -#include -#include -#include -#include "db.h" - -static size_t key_size = 8; -static size_t val_size = 8; -static int verbose = 0; - -static void db_error(const DB_ENV *env, const char *prefix, const char *msg) { - printf("%s: %p %s %s\n", __FUNCTION__, env, prefix, msg); -} - -static int get_int(void *p) { - int v; - memcpy(&v, p, sizeof v); - return htonl(v); -} - -#if defined(TOKUDB) -static int my_update_callback(DB *db, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra) { - assert(db); - assert(key); - if (old_val == NULL) { - // insert new_val = extra - set_val(extra, set_extra); - } else { - if (verbose) printf("u"); - // update new_val = old_val + extra - assert(old_val->size == val_size && extra->size == val_size); - char new_val_buffer[val_size]; - memcpy(new_val_buffer, old_val->data, sizeof new_val_buffer); - int newc = htonl(get_int(old_val->data) + get_int(extra->data)); // newc = oldc + newc - memcpy(new_val_buffer, &newc, sizeof newc); - DBT new_val = { .data = new_val_buffer, .size = sizeof new_val_buffer }; - set_val(&new_val, set_extra); - } - return 0; -} -#endif - -static void insert_and_update(DB *db, DB_TXN *txn, int a, int b, int c, int d, bool do_update_callback) { -#if !defined(TOKUDB) - assert(!do_update_callback); -#endif - int r; - - // generate the key - assert(key_size >= 8); - char key_buffer[key_size]; - int newa = htonl(a); - memcpy(key_buffer, &newa, sizeof newa); - int newb = htonl(b); - memcpy(key_buffer+4, &newb, sizeof newb); - - // generate the value - assert(val_size >= 8); - char val_buffer[val_size]; - int newc = htonl(c); - memcpy(val_buffer, &newc, sizeof newc); - int newd = htonl(d); - memcpy(val_buffer+4, &newd, sizeof newd); - -#if defined(TOKUDB) - if (do_update_callback) { - // extra = value_buffer, implicit combine column c update function - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT extra = { .data = val_buffer, .size = sizeof val_buffer }; - r = db->update(db, txn, &key, &extra, 0); assert(r == 0); - } else -#endif - { - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT value = { .data = val_buffer, .size = sizeof val_buffer }; - DBT oldvalue = { }; - r = db->get(db, txn, &key, &oldvalue, 0); - assert(r == 0 || r == DB_NOTFOUND); - if (r == 0) { - // update it - if (verbose) printf("U"); - int oldc = get_int(oldvalue.data); - newc = htonl(oldc + c); // newc = oldc + newc - memcpy(val_buffer, &newc, sizeof newc); - r = db->put(db, txn, &key, &value, 0); - assert(r == 0); - } else if (r == DB_NOTFOUND) { - r = db->put(db, txn, &key, &value, 0); - assert(r == 0); - } - } -} - -static inline float tdiff (struct timeval *a, struct timeval *b) { - return (a->tv_sec - b->tv_sec) +1e-6*(a->tv_usec - b->tv_usec); -} - -static void insert_and_update_all(DB_ENV *db_env, DB *db, long nrows, long max_rows_per_txn, int key_range, long rows_per_report, bool do_update_callback, bool do_txn) { - int r; - struct timeval tstart; - r = gettimeofday(&tstart, NULL); assert(r == 0); - struct timeval tlast = tstart; - DB_TXN *txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - } - long n_rows_per_txn = 0; - long rowi; - for (rowi = 0; rowi < nrows; rowi++) { - int a = random() % key_range; - int b = random() % key_range; - int c = 1; - int d = 0; // timestamp - insert_and_update(db, txn, a, b, c, d, do_update_callback); - n_rows_per_txn++; - - // maybe commit - if (do_txn && n_rows_per_txn == max_rows_per_txn) { - r = txn->commit(txn, 0); assert(r == 0); - r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0); - n_rows_per_txn = 0; - } - - // maybe report performance - if (((rowi + 1) % rows_per_report) == 0) { - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - float last_time = tdiff(&tnow, &tlast); - float total_time = tdiff(&tnow, &tstart); - printf("%ld %.3f %.0f/s %.0f/s\n", rowi + 1, last_time, rows_per_report/last_time, rowi/total_time); fflush(stdout); - tlast = tnow; - } - } - - if (do_txn) { - r = txn->commit(txn, 0); assert(r == 0); - } - struct timeval tnow; - r = gettimeofday(&tnow, NULL); assert(r == 0); - printf("total %ld %.3f %.0f/s\n", nrows, tdiff(&tnow, &tstart), nrows/tdiff(&tnow, &tstart)); fflush(stdout); -} - -int main(int argc, char *argv[]) { -#if defined(TOKUDB) - char *db_env_dir = "update.env.tokudb"; -#else - char *db_env_dir = "update.env.bdb"; -#endif - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; - char *db_filename = "update.db"; - long rows = 1000000000; - long rows_per_txn = 100; - long rows_per_report = 100000; - int key_range = 1000000; -#if defined(TOKUDB) - bool do_update_callback = true; -#else - bool do_update_callback = false; -#endif - bool do_txn = false; - u_int64_t cachesize = 1000000000; - u_int32_t pagesize = 0; -#if defined(TOKUDB) - u_int32_t checkpoint_period = 60; -#endif - - int i; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (strcmp(arg, "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(arg, "--rows") == 0 && i+1 < argc) { - rows = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_txn") == 0 && i+1 < argc) { - rows_per_txn = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--rows_per_report") == 0 && i+1 < argc) { - rows_per_report = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--key_range") == 0 && i+1 < argc) { - key_range = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--txn") == 0 && i+1 < argc) { - do_txn = atoi(argv[++i]) != 0; - continue; - } - if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) { - pagesize = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) { - cachesize = atol(argv[++i]); - continue; - } - if (strcmp(arg, "--update_callback") == 0 && i+1 < argc) { - do_update_callback = atoi(argv[++i]) != 0; - continue; - } - if (strcmp(arg, "--key_size") == 0 && i+1 < argc) { - key_size = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--val_size") == 0 && i+1 < argc) { - val_size = atoi(argv[++i]); - continue; - } -#if defined(TOKUDB) - if (strcmp(arg, "--checkpoint_period") == 0 && i+1 < argc) { - checkpoint_period = atoi(argv[++i]); - continue; - } -#endif - - assert(0); - } - - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - // create and open the env - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); -#if defined(TOKUDB) - db_env->set_update(db_env, my_update_callback); -#endif - if (cachesize) { - if (verbose) printf("cachesize %llu\n", (unsigned long long)cachesize); - const u_int64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - db_env->set_errcall(db_env, db_error); - if (verbose) printf("env %s\n", db_env_dir); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(TOKUDB) - if (checkpoint_period) { - r = db_env->checkpointing_set_period(db_env, checkpoint_period); assert(r == 0); - u_int32_t period; - r = db_env->checkpointing_get_period(db_env, &period); assert(r == 0 && period == checkpoint_period); - } -#endif - - // create the db - DB *db = NULL; - r = db_create(&db, db_env, 0); assert(r == 0); - DB_TXN *create_txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert(r == 0); - } - if (pagesize) { - r = db->set_pagesize(db, pagesize); assert(r == 0); - } - r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); - if (do_txn) { - r = create_txn->commit(create_txn, 0); assert(r == 0); - } - - // insert on duplicate key update - insert_and_update_all(db_env, db, rows, rows_per_txn, key_range, rows_per_report, do_update_callback, do_txn); - - // shutdown - r = db->close(db, 0); assert(r == 0); db = NULL; - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff --git a/storage/tokudb/ft-index/ft/CMakeLists.txt b/storage/tokudb/ft-index/ft/CMakeLists.txt index 3627f1c3dfee5..a433c7fc3a7d3 100644 --- a/storage/tokudb/ft-index/ft/CMakeLists.txt +++ b/storage/tokudb/ft-index/ft/CMakeLists.txt @@ -7,15 +7,17 @@ set_source_files_properties( "${CMAKE_CURRENT_BINARY_DIR}/log_header.h" PROPERTIES GENERATED TRUE) -add_executable(logformat logformat.cc) +add_executable(logformat logger/logformat.cc) target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static) +add_space_separated_property(TARGET logformat LINK_FLAGS --coverage) + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_header.h" COMMAND $ . - DEPENDS logformat + DEPENDS logger/logformat ) add_custom_target( generate_log_code @@ -23,55 +25,52 @@ add_custom_target( ) set(FT_SOURCES - background_job_manager - block_allocator - block_table bndata - cachetable - checkpoint - compress - dbufio - fifo + cachetable/background_job_manager + cachetable/cachetable + cachetable/checkpoint + cursor ft ft-cachetable-wrappers ft-flusher ft-hot-flusher - ftloader - ftloader-callback - ft_msg - ft_node-serialize - ft-node-deserialize ft-ops - ft-serialize ft-test-helpers ft-verify - key + loader/callbacks + loader/dbufio + loader/loader + loader/pqueue leafentry le-cursor - logcursor - logfilemgr - logger - log_upgrade - memarena - minicron - omt - pqueue - queue - quicklz - recover - rollback - rollback-apply - rollback-ct-callbacks - rollback_log_node_cache - roll - sub_block - txn - txn_child_manager - txn_manager + logger/logcursor + logger/logfilemgr + logger/logger + logger/log_upgrade + logger/recover + msg + msg_buffer + node + pivotkeys + serialize/block_allocator + serialize/block_allocator_strategy + serialize/block_table + serialize/compress + serialize/ft_node-serialize + serialize/ft-node-deserialize + serialize/ft-serialize + serialize/quicklz + serialize/sub_block + txn/rollback + txn/rollback-apply + txn/rollback-ct-callbacks + txn/rollback_log_node_cache + txn/roll + txn/txn + txn/txn_child_manager + txn/txn_manager + txn/xids ule - x1764 - xids - ybt "${CMAKE_CURRENT_BINARY_DIR}/log_code" "${CMAKE_CURRENT_BINARY_DIR}/log_print" ) @@ -88,24 +87,7 @@ add_dependencies(ft_static install_tdb_h generate_log_code build_lzma) ## link with lzma (which should be static) and link dependers with zlib target_link_libraries(ft LINK_PRIVATE util_static lzma ${LIBTOKUPORTABILITY}) -target_link_libraries(ft LINK_PUBLIC ${ZLIB_LIBRARY} ) +target_link_libraries(ft LINK_PUBLIC z) target_link_libraries(ft_static LINK_PRIVATE lzma) -## build the bins in this directory -foreach(tool tokuftdump tdb_logprint tdb-recover ftverify) - add_executable(${tool} ${tool}.cc) - add_dependencies(${tool} install_tdb_h) - target_link_libraries(${tool} ft_static util_static ${ZLIB_LIBRARY} lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) - add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden) -endforeach(tool) - -# link in math.h library just for this tool. -target_link_libraries(ftverify m) - -install( - TARGETS tokuftdump - COMPONENT Server - DESTINATION ${INSTALL_BINDIR} - ) - add_subdirectory(tests) diff --git a/storage/tokudb/ft-index/ft/block_allocator.cc b/storage/tokudb/ft-index/ft/block_allocator.cc deleted file mode 100644 index a16df3537600c..0000000000000 --- a/storage/tokudb/ft-index/ft/block_allocator.cc +++ /dev/null @@ -1,473 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ident "$Id$" - -#include "block_allocator.h" -#include -#include -#include -#include -#include - -// Here's a very simple implementation. -// It's not very fast at allocating or freeing. -// Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size. - -struct block_allocator { - uint64_t reserve_at_beginning; // How much to reserve at the beginning - uint64_t alignment; // Block alignment - uint64_t n_blocks; // How many blocks - uint64_t blocks_array_size; // How big is the blocks_array. Must be >= n_blocks. - struct block_allocator_blockpair *blocks_array; // These blocks are sorted by address. - uint64_t n_bytes_in_use; // including the reserve_at_beginning -}; - -void -block_allocator_validate (BLOCK_ALLOCATOR ba) { - uint64_t i; - uint64_t n_bytes_in_use = ba->reserve_at_beginning; - for (i=0; in_blocks; i++) { - n_bytes_in_use += ba->blocks_array[i].size; - if (i>0) { - assert(ba->blocks_array[i].offset > ba->blocks_array[i-1].offset); - assert(ba->blocks_array[i].offset >= ba->blocks_array[i-1].offset + ba->blocks_array[i-1].size ); - } - } - assert(n_bytes_in_use == ba->n_bytes_in_use); -} - -#if 0 -#define VALIDATE(b) block_allocator_validate(b) -#else -#define VALIDATE(b) ((void)0) -#endif - -#if 0 -void -block_allocator_print (BLOCK_ALLOCATOR ba) { - uint64_t i; - for (i=0; in_blocks; i++) { - printf("%" PRId64 ":%" PRId64 " ", ba->blocks_array[i].offset, ba->blocks_array[i].size); - } - printf("\n"); - VALIDATE(ba); -} -#endif - -void -create_block_allocator (BLOCK_ALLOCATOR *ba, uint64_t reserve_at_beginning, uint64_t alignment) { - assert(alignment>=512 && 0==(alignment%512)); // the alignment must be at least 512 and aligned with 512 to make DIRECT_IO happy. - BLOCK_ALLOCATOR XMALLOC(result); - result->reserve_at_beginning = reserve_at_beginning; - result->alignment = alignment; - result->n_blocks = 0; - result->blocks_array_size = 1; - XMALLOC_N(result->blocks_array_size, result->blocks_array); - result->n_bytes_in_use = reserve_at_beginning; - *ba = result; - VALIDATE(result); -} - -void -destroy_block_allocator (BLOCK_ALLOCATOR *bap) { - BLOCK_ALLOCATOR ba = *bap; - *bap = 0; - toku_free(ba->blocks_array); - toku_free(ba); -} - -static void -grow_blocks_array_by (BLOCK_ALLOCATOR ba, uint64_t n_to_add) { - if (ba->n_blocks + n_to_add > ba->blocks_array_size) { - uint64_t new_size = ba->n_blocks + n_to_add; - uint64_t at_least = ba->blocks_array_size * 2; - if (at_least > new_size) { - new_size = at_least; - } - ba->blocks_array_size = new_size; - XREALLOC_N(ba->blocks_array_size, ba->blocks_array); - } -} - - -static void -grow_blocks_array (BLOCK_ALLOCATOR ba) { - grow_blocks_array_by(ba, 1); -} - -void -block_allocator_merge_blockpairs_into (uint64_t d, struct block_allocator_blockpair dst[/*d*/], - uint64_t s, const struct block_allocator_blockpair src[/*s*/]) -{ - uint64_t tail = d+s; - while (d>0 && s>0) { - struct block_allocator_blockpair *dp = &dst[d-1]; - struct block_allocator_blockpair const *sp = &src[s-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - assert(tail>0); - if (dp->offset > sp->offset) { - *tp = *dp; - d--; - tail--; - } else { - *tp = *sp; - s--; - tail--; - } - } - while (d>0) { - struct block_allocator_blockpair *dp = &dst[d-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - *tp = *dp; - d--; - tail--; - } - while (s>0) { - struct block_allocator_blockpair const *sp = &src[s-1]; - struct block_allocator_blockpair *tp = &dst[tail-1]; - *tp = *sp; - s--; - tail--; - } -} - -static int -compare_blockpairs (const void *av, const void *bv) { - const struct block_allocator_blockpair *a = (const struct block_allocator_blockpair *) av; - const struct block_allocator_blockpair *b = (const struct block_allocator_blockpair *) bv; - if (a->offset < b->offset) return -1; - if (a->offset > b->offset) return +1; - return 0; -} - -void -block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair pairs[/*n_blocks*/]) -// See the documentation in block_allocator.h -{ - VALIDATE(ba); - qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs); - for (uint64_t i=0; i= ba->reserve_at_beginning); - assert(pairs[i].offset%ba->alignment == 0); - ba->n_bytes_in_use += pairs[i].size; - invariant(pairs[i].size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block. - } - grow_blocks_array_by(ba, n_blocks); - block_allocator_merge_blockpairs_into(ba->n_blocks, ba->blocks_array, - n_blocks, pairs); - ba->n_blocks += n_blocks; - VALIDATE(ba); -} - -void -block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) { - struct block_allocator_blockpair p = {.offset = offset, .size=size}; - // Just do a linear search for the block. - // This data structure is a sorted array (no gaps or anything), so the search isn't really making this any slower than the insertion. - // To speed up the insertion when opening a file, we provide the block_allocator_alloc_blocks_at function. - block_allocator_alloc_blocks_at(ba, 1, &p); -} - -static inline uint64_t -align (uint64_t value, BLOCK_ALLOCATOR ba) -// Effect: align a value by rounding up. -{ - return ((value+ba->alignment-1)/ba->alignment)*ba->alignment; -} - -void block_allocator_alloc_block(BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset) -// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). -{ - invariant(size > 0); //Allocator does not support size 0 blocks. See block_allocator_free_block. - grow_blocks_array(ba); - ba->n_bytes_in_use += size; - if (ba->n_blocks==0) { - assert(ba->n_bytes_in_use == ba->reserve_at_beginning + size); // we know exactly how many are in use - ba->blocks_array[0].offset = align(ba->reserve_at_beginning, ba); - ba->blocks_array[0].size = size; - *offset = ba->blocks_array[0].offset; - ba->n_blocks++; - return; - } - // Implement first fit. - { - uint64_t end_of_reserve = align(ba->reserve_at_beginning, ba); - if (end_of_reserve + size <= ba->blocks_array[0].offset ) { - // Check to see if the space immediately after the reserve is big enough to hold the new block. - struct block_allocator_blockpair *bp = &ba->blocks_array[0]; - memmove(bp+1, bp, (ba->n_blocks)*sizeof(*bp)); - bp[0].offset = end_of_reserve; - bp[0].size = size; - ba->n_blocks++; - *offset = end_of_reserve; - VALIDATE(ba); - return; - } - } - for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) { - // Consider the space after blocknum - struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t answer_offset = align(this_offset + this_size, ba); - if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block. - // It fits, so allocate it here. - memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(*bp)); - bp[1].offset = answer_offset; - bp[1].size = size; - ba->n_blocks++; - *offset = answer_offset; - VALIDATE(ba); - return; - } - // It didn't fit anywhere, so fit it on the end. - assert(ba->n_blocks < ba->blocks_array_size); - struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks]; - uint64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba); - bp->offset = answer_offset; - bp->size = size; - ba->n_blocks++; - *offset = answer_offset; - VALIDATE(ba); -} - -static int64_t -find_block (BLOCK_ALLOCATOR ba, uint64_t offset) -// Find the index in the blocks array that has a particular offset. Requires that the block exist. -// Use binary search so it runs fast. -{ - VALIDATE(ba); - if (ba->n_blocks==1) { - assert(ba->blocks_array[0].offset == offset); - return 0; - } - uint64_t lo = 0; - uint64_t hi = ba->n_blocks; - while (1) { - assert(loblocks_array[mid].offset; - //printf("lo=%" PRId64 " hi=%" PRId64 " mid=%" PRId64 " thisoff=%" PRId64 " offset=%" PRId64 "\n", lo, hi, mid, thisoff, offset); - if (thisoff < offset) { - lo = mid+1; - } else if (thisoff > offset) { - hi = mid; - } else { - return mid; - } - } -} - -// To support 0-sized blocks, we need to include size as an input to this function. -// All 0-sized blocks at the same offset can be considered identical, but -// a 0-sized block can share offset with a non-zero sized block. -// The non-zero sized block is not exchangable with a zero sized block (or vice versa), -// so inserting 0-sized blocks can cause corruption here. -void -block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset) { - VALIDATE(ba); - int64_t bn = find_block(ba, offset); - assert(bn>=0); // we require that there is a block with that offset. Might as well abort if no such block exists. - ba->n_bytes_in_use -= ba->blocks_array[bn].size; - memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct block_allocator_blockpair)); - ba->n_blocks--; - VALIDATE(ba); -} - -uint64_t -block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset) { - int64_t bn = find_block(ba, offset); - assert(bn>=0); // we require that there is a block with that offset. Might as well abort if no such block exists. - return ba->blocks_array[bn].size; -} - -uint64_t -block_allocator_allocated_limit (BLOCK_ALLOCATOR ba) { - if (ba->n_blocks==0) return ba->reserve_at_beginning; - else { - struct block_allocator_blockpair *last = &ba->blocks_array[ba->n_blocks-1]; - return last->offset + last->size; - } -} - -int -block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size) -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. -// Return the offset and size of the block with that number. -// Return 0 if there is a block that big, return nonzero if b is too big. -{ - if (b==0) { - *offset=0; - *size =ba->reserve_at_beginning; - return 0; - } else if (b > ba->n_blocks) { - return -1; - } else { - *offset=ba->blocks_array[b-1].offset; - *size =ba->blocks_array[b-1].size; - return 0; - } -} - -void -block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report) { - //Requires: report->file_size_bytes is filled in - //Requires: report->data_bytes is filled in - //Requires: report->checkpoint_bytes_additional is filled in - - assert(ba->n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); - - report->unused_bytes = 0; - report->unused_blocks = 0; - report->largest_unused_block = 0; - if (ba->n_blocks > 0) { - //Deal with space before block 0 and after reserve: - { - struct block_allocator_blockpair *bp = &ba->blocks_array[0]; - assert(bp->offset >= align(ba->reserve_at_beginning, ba)); - uint64_t free_space = bp->offset - align(ba->reserve_at_beginning, ba); - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space between blocks: - for (uint64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) { - // Consider the space after blocknum - struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, ba); - uint64_t next_offset = bp[1].offset; - uint64_t free_space = next_offset - end_of_this_block; - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space after last block - { - struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks-1]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, ba); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - } - else { - //No blocks. Just the reserve. - uint64_t end_of_this_block = align(ba->reserve_at_beginning, ba); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } -} diff --git a/storage/tokudb/ft-index/ft/block_allocator.h b/storage/tokudb/ft-index/ft/block_allocator.h deleted file mode 100644 index 289e7251c84f8..0000000000000 --- a/storage/tokudb/ft-index/ft/block_allocator.h +++ /dev/null @@ -1,230 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BLOCK_ALLOCATOR_H -#define BLOCK_ALLOCATOR_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" - - -#define BLOCK_ALLOCATOR_ALIGNMENT 4096 -// How much must be reserved at the beginning for the block? -// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. -// So 4096 should be enough. -#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096 -#if (BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT) != 0 -#error -#endif - -// Block allocator. -// Overview: A block allocator manages the allocation of variable-sized blocks. -// The translation of block numbers to addresses is handled elsewhere. -// The allocation of block numbers is handled elsewhere. - -// We can create a block allocator. -// When creating a block allocator we also specify a certain-sized -// block at the beginning that is preallocated (and cannot be allocated -// or freed) - -// We can allocate blocks of a particular size at a particular location. -// We can allocate blocks of a particular size at a location chosen by the allocator. -// We can free blocks. -// We can determine the size of a block. - - -#define BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE (2*BLOCK_ALLOCATOR_HEADER_RESERVE) - -typedef struct block_allocator *BLOCK_ALLOCATOR; - -void create_block_allocator (BLOCK_ALLOCATOR * ba, uint64_t reserve_at_beginning, uint64_t alignment); -// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. -// All blocks be start on a multiple of ALIGNMENT. -// Aborts if we run out of memory. -// Parameters -// ba (OUT): Result stored here. -// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. -// alignment (IN) Block alignment. - -void destroy_block_allocator (BLOCK_ALLOCATOR *ba); -// Effect: Destroy a block allocator at *ba. -// Also, set *ba=NULL. -// Rationale: If there was only one copy of the pointer, this kills that copy too. -// Paramaters: -// ba (IN/OUT): - - -void block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset); -// Effect: Allocate a block of the specified size at a particular offset. -// Aborts if anything goes wrong. -// The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use. -// Usage note: To allocate several blocks (e.g., when opening a BRT), use block_allocator_alloc_blocks_at(). -// Requires: The resulting block may not overlap any other allocated block. -// And the offset must be a multiple of the block alignment. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// size (IN): The size of the block. -// offset (IN): The location of the block. - - -struct block_allocator_blockpair { - uint64_t offset; - uint64_t size; -}; -void block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, uint64_t n_blocks, struct block_allocator_blockpair *pairs); -// Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair. -// This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks. -// Modifies: pairs (sorts them). - -void block_allocator_alloc_block (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *offset); -// Effect: Allocate a block of the specified size at an address chosen by the allocator. -// Aborts if anything goes wrong. -// The block address will be a multiple of the alignment. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// size (IN): The size of the block. (The size does not have to be aligned.) -// offset (OUT): The location of the block. - -void block_allocator_free_block (BLOCK_ALLOCATOR ba, uint64_t offset); -// Effect: Free the block at offset. -// Requires: There must be a block currently allocated at that offset. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// offset (IN): The offset of the block. - - -uint64_t block_allocator_block_size (BLOCK_ALLOCATOR ba, uint64_t offset); -// Effect: Return the size of the block that starts at offset. -// Requires: There must be a block currently allocated at that offset. -// Parameters: -// ba (IN/OUT): The block allocator. (Modifies ba.) -// offset (IN): The offset of the block. - -void block_allocator_validate (BLOCK_ALLOCATOR ba); -// Effect: Check to see if the block allocator is OK. This may take a long time. -// Usage Hints: Probably only use this for unit tests. - -void block_allocator_print (BLOCK_ALLOCATOR ba); -// Effect: Print information about the block allocator. -// Rationale: This is probably useful only for debugging. - -uint64_t block_allocator_allocated_limit (BLOCK_ALLOCATOR ba); -// Effect: Return the unallocated block address of "infinite" size. -// That is, return the smallest address that is above all the allocated blocks. -// Rationale: When writing the root FIFO we don't know how big the block is. -// So we start at the "infinite" block, write the fifo, and then -// allocate_block_at of the correct size and offset to account for the root FIFO. - -int block_allocator_get_nth_block_in_layout_order (BLOCK_ALLOCATOR ba, uint64_t b, uint64_t *offset, uint64_t *size); -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. -// Return the offset and size of the block with that number. -// Return 0 if there is a block that big, return nonzero if b is too big. -// Rationale: This is probably useful only for tests. - -void block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION report); -// Effect: Fill in report to indicate how the file is used. -// Requires: -// report->file_size_bytes is filled in -// report->data_bytes is filled in -// report->checkpoint_bytes_additional is filled in - -void block_allocator_merge_blockpairs_into (uint64_t d, struct block_allocator_blockpair dst[/*d*/], - uint64_t s, const struct block_allocator_blockpair src[/*s*/]); -// Effect: Merge dst[d] and src[s] into dst[d+s], merging in place. -// Initially dst and src hold sorted arrays (sorted by increasing offset). -// Finally dst contains all d+s elements sorted in order. -// Requires: -// dst and src are sorted. -// dst must be large enough. -// No blocks may overlap. -// Rationale: This is exposed so it can be tested by a glass box tester. Otherwise it would be static (file-scope) function inside block_allocator.c - - -#endif diff --git a/storage/tokudb/ft-index/ft/block_table.cc b/storage/tokudb/ft-index/ft/block_table.cc deleted file mode 100644 index 3e0bae1709ad7..0000000000000 --- a/storage/tokudb/ft-index/ft/block_table.cc +++ /dev/null @@ -1,1199 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include "ft-internal.h" // ugly but pragmatic, need access to dirty bits while holding translation lock -#include "fttypes.h" -#include "block_table.h" -#include "memory.h" -#include "toku_assert.h" -#include -#include "block_allocator.h" -#include "rbuf.h" -#include "wbuf.h" -#include - -//When the translation (btt) is stored on disk: -// In Header: -// size_on_disk -// location_on_disk -// In block translation table (in order): -// smallest_never_used_blocknum -// blocknum_freelist_head -// array -// a checksum -struct translation { //This is the BTT (block translation table) - enum translation_type type; - int64_t length_of_array; //Number of elements in array (block_translation). always >= smallest_never_used_blocknum - BLOCKNUM smallest_never_used_blocknum; - BLOCKNUM blocknum_freelist_head; // next (previously used) unused blocknum (free list) - struct block_translation_pair *block_translation; - - // Where and how big is the block translation vector stored on disk. - // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size - // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff -}; - -static const BLOCKNUM freelist_null = {-1}; // in a freelist, this indicates end of list -static const DISKOFF size_is_free = (DISKOFF)-1; // value of block_translation_pair.size if blocknum is unused -static const DISKOFF diskoff_unused = (DISKOFF)-2; // value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock - -/******** - * There are three copies of the translation table (btt) in the block table: - * - * checkpointed Is initialized by deserializing from disk, - * and is the only version ever read from disk. - * When read from disk it is copied to current. - * It is immutable. It can be replaced by an inprogress btt. - * - * inprogress Is only filled by copying from current, - * and is the only version ever serialized to disk. - * (It is serialized to disk on checkpoint and clean shutdown.) - * At end of checkpoint it replaces 'checkpointed'. - * During a checkpoint, any 'pending' dirty writes will update - * inprogress. - * - * current Is initialized by copying from checkpointed, - * is the only version ever modified while the database is in use, - * and is the only version ever copied to inprogress. - * It is never stored on disk. - ********/ - - -struct block_table { - struct translation current; // The current translation is the one used by client threads. It is not represented on disk. - struct translation inprogress; // the translation used by the checkpoint currently in progress. If the checkpoint thread allocates a block, it must also update the current translation. - struct translation checkpointed; // the translation for the data that shall remain inviolate on disk until the next checkpoint finishes, after which any blocks used only in this translation can be freed. - - // The in-memory data structure for block allocation. There is no on-disk data structure for block allocation. - // Note: This is *allocation* not *translation*. The block_allocator is unaware of which blocks are used for which translation, but simply allocates and deallocates blocks. - BLOCK_ALLOCATOR block_allocator; - toku_mutex_t mutex; - struct nb_mutex safe_file_size_lock; - bool checkpoint_skipped; - uint64_t safe_file_size; -}; - -//forward decls -static int64_t calculate_size_on_disk (struct translation *t); -static inline bool translation_prevents_freeing (struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); -static inline void lock_for_blocktable (BLOCK_TABLE bt); -static inline void unlock_for_blocktable (BLOCK_TABLE bt); - - - -static void -ft_set_dirty(FT ft, bool for_checkpoint){ - toku_mutex_assert_locked(&ft->blocktable->mutex); - paranoid_invariant(ft->h->type == FT_CURRENT); - if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); - ft->checkpoint_header->dirty = 1; - } - else { - ft->h->dirty = 1; - } -} - -static void -maybe_truncate_file(BLOCK_TABLE bt, int fd, uint64_t size_needed_before) { - toku_mutex_assert_locked(&bt->mutex); - uint64_t new_size_needed = block_allocator_allocated_limit(bt->block_allocator); - //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. - if (new_size_needed < size_needed_before && new_size_needed < bt->safe_file_size) { - nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex); - - // Must hold safe_file_size_lock to change safe_file_size. - if (new_size_needed < bt->safe_file_size) { - int64_t safe_file_size_before = bt->safe_file_size; - // Not safe to use the 'to-be-truncated' portion until truncate is done. - bt->safe_file_size = new_size_needed; - unlock_for_blocktable(bt); - - uint64_t size_after; - toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); - lock_for_blocktable(bt); - - bt->safe_file_size = size_after; - } - nb_mutex_unlock(&bt->safe_file_size_lock); - } -} - -void -toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd) { - lock_for_blocktable(bt); - maybe_truncate_file(bt, fd, bt->safe_file_size); - unlock_for_blocktable(bt); -} - - -static void -copy_translation(struct translation * dst, struct translation * src, enum translation_type newtype) { - paranoid_invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); //verify invariant - paranoid_invariant(newtype==TRANSLATION_DEBUG || - (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || - (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); - dst->type = newtype; - dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; - dst->blocknum_freelist_head = src->blocknum_freelist_head; - // destination btt is of fixed size. Allocate+memcpy the exact length necessary. - dst->length_of_array = dst->smallest_never_used_blocknum.b; - XMALLOC_N(dst->length_of_array, dst->block_translation); - memcpy(dst->block_translation, - src->block_translation, - dst->length_of_array * sizeof(*dst->block_translation)); - //New version of btt is not yet stored on disk. - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; -} - -int64_t -toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt) { - BLOCKNUM b; - struct translation *t = &bt->current; - int64_t num_blocks = 0; - { - //Reserved blocknums do not get upgraded; They are part of the header. - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { - if (t->block_translation[b.b].size != size_is_free) { - num_blocks++; - } - } - } - return num_blocks; -} - -static void -maybe_optimize_translation(struct translation *t) { - //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just - //on a free list. Doing so requires us to regenerate the free list. - //This is O(n) work, so do it only if you're already doing that. - - BLOCKNUM b; - paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - //Calculate how large the free suffix is. - int64_t freed; - { - for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { - if (t->block_translation[b.b-1].size != size_is_free) { - break; - } - } - freed = t->smallest_never_used_blocknum.b - b.b; - } - if (freed>0) { - t->smallest_never_used_blocknum.b = b.b; - if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { - //We're using more memory than necessary to represent this now. Reduce. - uint64_t new_length = t->smallest_never_used_blocknum.b * 2; - XREALLOC_N(new_length, t->block_translation); - t->length_of_array = new_length; - //No need to zero anything out. - } - - //Regenerate free list. - t->blocknum_freelist_head.b = freelist_null.b; - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { - if (t->block_translation[b.b].size == size_is_free) { - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; - } - } - } -} - -// block table must be locked by caller of this function -void -toku_block_translation_note_start_checkpoint_unlocked (BLOCK_TABLE bt) { - toku_mutex_assert_locked(&bt->mutex); - // Copy current translation to inprogress translation. - paranoid_invariant(bt->inprogress.block_translation == NULL); - //We're going to do O(n) work to copy the translation, so we - //can afford to do O(n) work by optimizing the translation - maybe_optimize_translation(&bt->current); - copy_translation(&bt->inprogress, &bt->current, TRANSLATION_INPROGRESS); - - bt->checkpoint_skipped = false; -} - -//#define PRNTF(str, b, siz, ad, bt) printf("%s[%d] %s %" PRId64 " %" PRId64 " %" PRId64 "\n", __FUNCTION__, __LINE__, str, b, siz, ad); fflush(stdout); if (bt) block_allocator_validate(((BLOCK_TABLE)(bt))->block_allocator); -//Debugging function -#define PRNTF(str, b, siz, ad, bt) - -void toku_block_translation_note_skipped_checkpoint (BLOCK_TABLE bt) { - //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header - lock_for_blocktable(bt); - paranoid_invariant_notnull(bt->inprogress.block_translation); - bt->checkpoint_skipped = true; - unlock_for_blocktable(bt); -} - -// Purpose: free any disk space used by previous checkpoint that isn't in use by either -// - current state -// - in-progress checkpoint -// capture inprogress as new checkpointed. -// For each entry in checkpointBTT -// if offset does not match offset in inprogress -// assert offset does not match offset in current -// free (offset,len) from checkpoint -// move inprogress to checkpoint (resetting type) -// inprogress = NULL -void -toku_block_translation_note_end_checkpoint (BLOCK_TABLE bt, int fd) { - // Free unused blocks - lock_for_blocktable(bt); - uint64_t allocated_limit_at_start = block_allocator_allocated_limit(bt->block_allocator); - paranoid_invariant_notnull(bt->inprogress.block_translation); - if (bt->checkpoint_skipped) { - toku_free(bt->inprogress.block_translation); - memset(&bt->inprogress, 0, sizeof(bt->inprogress)); - goto end; - } - - //Make certain inprogress was allocated space on disk - assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); - assert(bt->inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); - - { - int64_t i; - struct translation *t = &bt->checkpointed; - - for (i = 0; i < t->length_of_array; i++) { - struct block_translation_pair *pair = &t->block_translation[i]; - if (pair->size > 0 && !translation_prevents_freeing(&bt->inprogress, make_blocknum(i), pair)) { - assert(!translation_prevents_freeing(&bt->current, make_blocknum(i), pair)); - PRNTF("free", i, pair->size, pair->u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, pair->u.diskoff); - } - } - toku_free(bt->checkpointed.block_translation); - bt->checkpointed = bt->inprogress; - bt->checkpointed.type = TRANSLATION_CHECKPOINTED; - memset(&bt->inprogress, 0, sizeof(bt->inprogress)); - maybe_truncate_file(bt, fd, allocated_limit_at_start); - } -end: - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const)) -static inline bool -is_valid_blocknum(struct translation *t, BLOCKNUM b) { - //Sanity check: Verify invariant - paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); - return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; -} - -static inline void -verify_valid_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) { - paranoid_invariant(is_valid_blocknum(t, b)); -} - -__attribute__((nonnull,const)) -static inline bool -is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { - //Sanity check: Verify invariant - paranoid_invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); - return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; -} - -//Can be freed -static inline void -verify_valid_freeable_blocknum (struct translation *UU(t), BLOCKNUM UU(b)) { - paranoid_invariant(is_valid_freeable_blocknum(t, b)); -} - -static void -blocktable_lock_init (BLOCK_TABLE bt) { - memset(&bt->mutex, 0, sizeof(bt->mutex)); - toku_mutex_init(&bt->mutex, NULL); -} - -static void -blocktable_lock_destroy (BLOCK_TABLE bt) { - toku_mutex_destroy(&bt->mutex); -} - -static inline void -lock_for_blocktable (BLOCK_TABLE bt) { - // Locks the blocktable_mutex. - toku_mutex_lock(&bt->mutex); -} - -static inline void -unlock_for_blocktable (BLOCK_TABLE bt) { - toku_mutex_unlock(&bt->mutex); -} - -void -toku_ft_lock (FT ft) { - BLOCK_TABLE bt = ft->blocktable; - lock_for_blocktable(bt); -} - -void -toku_ft_unlock (FT ft) { - BLOCK_TABLE bt = ft->blocktable; - toku_mutex_assert_locked(&bt->mutex); - unlock_for_blocktable(bt); -} - -// Also used only in brt-serialize-test. -void -toku_block_free(BLOCK_TABLE bt, uint64_t offset) { - lock_for_blocktable(bt); -PRNTF("freeSOMETHINGunknown", 0L, 0L, offset, bt); - block_allocator_free_block(bt->block_allocator, offset); - unlock_for_blocktable(bt); -} - -static int64_t -calculate_size_on_disk (struct translation *t) { - int64_t r = (8 + // smallest_never_used_blocknum - 8 + // blocknum_freelist_head - t->smallest_never_used_blocknum.b * 16 + // Array - 4); // 4 for checksum - return r; -} - -// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. -static inline bool -translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { - return (t->block_translation && - b.b < t->smallest_never_used_blocknum.b && - old_pair->u.diskoff == t->block_translation[b.b].u.diskoff); -} - -static void -blocknum_realloc_on_disk_internal (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) { - toku_mutex_assert_locked(&bt->mutex); - ft_set_dirty(ft, for_checkpoint); - - struct translation *t = &bt->current; - struct block_translation_pair old_pair = t->block_translation[b.b]; -PRNTF("old", b.b, old_pair.size, old_pair.u.diskoff, bt); - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - ((!for_checkpoint && translation_prevents_freeing(&bt->inprogress, b, &old_pair)) || - translation_prevents_freeing(&bt->checkpointed, b, &old_pair)); - if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { -PRNTF("Freed", b.b, old_pair.size, old_pair.u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff); - } - - uint64_t allocator_offset = diskoff_unused; - t->block_translation[b.b].size = size; - if (size > 0) { - // Allocate a new block if the size is greater than 0, - // if the size is just 0, offset will be set to diskoff_unused - block_allocator_alloc_block(bt->block_allocator, size, &allocator_offset); - } - t->block_translation[b.b].u.diskoff = allocator_offset; - *offset = allocator_offset; - -PRNTF("New", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt); - //Update inprogress btt if appropriate (if called because Pending bit is set). - if (for_checkpoint) { - paranoid_invariant(b.b < bt->inprogress.length_of_array); - bt->inprogress.block_translation[b.b] = t->block_translation[b.b]; - } -} - -static void -ensure_safe_write_unlocked(BLOCK_TABLE bt, int fd, DISKOFF block_size, DISKOFF block_offset) { - // Requires: holding bt->mutex - uint64_t size_needed = block_size + block_offset; - if (size_needed > bt->safe_file_size) { - // Must hold safe_file_size_lock to change safe_file_size. - nb_mutex_lock(&bt->safe_file_size_lock, &bt->mutex); - if (size_needed > bt->safe_file_size) { - unlock_for_blocktable(bt); - - int64_t size_after; - toku_maybe_preallocate_in_file(fd, size_needed, bt->safe_file_size, &size_after); - - lock_for_blocktable(bt); - bt->safe_file_size = size_after; - } - nb_mutex_unlock(&bt->safe_file_size_lock); - } -} - -void -toku_blocknum_realloc_on_disk (BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) { - lock_for_blocktable(bt); - struct translation *t = &bt->current; - verify_valid_freeable_blocknum(t, b); - blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, for_checkpoint); - - ensure_safe_write_unlocked(bt, fd, size, *offset); - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const)) -static inline bool -pair_is_unallocated(struct block_translation_pair *pair) { - return pair->size == 0 && pair->u.diskoff == diskoff_unused; -} - -static void blocknum_alloc_translation_on_disk_unlocked(BLOCK_TABLE bt) -// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. -// The space must be 512-byte aligned (both the starting address and the size). -// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. -{ - toku_mutex_assert_locked(&bt->mutex); - - struct translation *t = &bt->inprogress; - paranoid_invariant_notnull(t->block_translation); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - //Each inprogress is allocated only once - paranoid_invariant(pair_is_unallocated(&t->block_translation[b.b])); - - //Allocate a new block - int64_t size = calculate_size_on_disk(t); - uint64_t offset; - block_allocator_alloc_block(bt->block_allocator, size, &offset); -PRNTF("blokAllokator", 1L, size, offset, bt); - t->block_translation[b.b].u.diskoff = offset; - t->block_translation[b.b].size = size; -} - -void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, - int64_t *address, int64_t *size) -// Effect: Fills wbuf (which starts uninitialized) with bt -// A clean shutdown runs checkpoint start so that current and inprogress are copies. -// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) -// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. -// It *is* guaranteed that we can read up to the next 512-byte boundary, however -{ - lock_for_blocktable(bt); - struct translation *t = &bt->inprogress; - - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - blocknum_alloc_translation_on_disk_unlocked(bt); // The allocated block must be 512-byte aligned to make O_DIRECT happy. - uint64_t size_translation = calculate_size_on_disk(t); - uint64_t size_aligned = roundup_to_multiple(512, size_translation); - assert((int64_t)size_translation==t->block_translation[b.b].size); - { - //Init wbuf - if (0) - printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); - char *XMALLOC_N_ALIGNED(512, size_aligned, buf); - for (uint64_t i=size_translation; ismallest_never_used_blocknum); - wbuf_BLOCKNUM(w, t->blocknum_freelist_head); - int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { - if (0) - printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); - wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); - wbuf_DISKOFF(w, t->block_translation[i].size); - } - uint32_t checksum = x1764_finish(&w->checksum); - wbuf_int(w, checksum); - *address = t->block_translation[b.b].u.diskoff; - *size = size_translation; - assert((*address)%512 == 0); - - ensure_safe_write_unlocked(bt, fd, size_aligned, *address); - unlock_for_blocktable(bt); -} - - -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -static void -translate_blocknum_to_offset_size_unlocked(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { - struct translation *t = &bt->current; - verify_valid_blocknum(t, b); - if (offset) *offset = t->block_translation[b.b].u.diskoff; - if (size) *size = t->block_translation[b.b].size; -} - -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void -toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { - lock_for_blocktable(bt); - translate_blocknum_to_offset_size_unlocked(bt, b, offset, size); - unlock_for_blocktable(bt); -} - -//Only called by toku_allocate_blocknum -static void -maybe_expand_translation (struct translation *t) { -// Effect: expand the array to maintain size invariant -// given that one more never-used blocknum will soon be used. - if (t->length_of_array <= t->smallest_never_used_blocknum.b) { - //expansion is necessary - uint64_t new_length = t->smallest_never_used_blocknum.b * 2; - XREALLOC_N(new_length, t->block_translation); - uint64_t i; - for (i = t->length_of_array; i < new_length; i++) { - t->block_translation[i].u.next_free_blocknum = freelist_null; - t->block_translation[i].size = size_is_free; - } - t->length_of_array = new_length; - } -} - -void -toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) { - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM result; - struct translation * t = &bt->current; - if (t->blocknum_freelist_head.b == freelist_null.b) { - // no previously used blocknums are available - // use a never used blocknum - maybe_expand_translation(t); //Ensure a never used blocknums is available - result = t->smallest_never_used_blocknum; - t->smallest_never_used_blocknum.b++; - } else { // reuse a previously used blocknum - result = t->blocknum_freelist_head; - BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; - t->blocknum_freelist_head = next; - } - //Verify the blocknum is free - paranoid_invariant(t->block_translation[result.b].size == size_is_free); - //blocknum is not free anymore - t->block_translation[result.b].u.diskoff = diskoff_unused; - t->block_translation[result.b].size = 0; - verify_valid_freeable_blocknum(t, result); - *res = result; - ft_set_dirty(ft, false); -} - -void -toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT ft) { - lock_for_blocktable(bt); - toku_allocate_blocknum_unlocked(bt, res, ft); - unlock_for_blocktable(bt); -} - -static void -free_blocknum_in_translation(struct translation *t, BLOCKNUM b) -{ - verify_valid_freeable_blocknum(t, b); - paranoid_invariant(t->block_translation[b.b].size != size_is_free); - - PRNTF("free_blocknum", b.b, t->block_translation[b.b].size, t->block_translation[b.b].u.diskoff, bt); - t->block_translation[b.b].size = size_is_free; - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; -} - -static void -free_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) { -// Effect: Free a blocknum. -// If the blocknum holds the only reference to a block on disk, free that block - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM b = *bp; - bp->b = 0; //Remove caller's reference. - - struct block_translation_pair old_pair = bt->current.block_translation[b.b]; - - free_blocknum_in_translation(&bt->current, b); - if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); - free_blocknum_in_translation(&bt->inprogress, b); - } - - //If the size is 0, no disk block has ever been assigned to this blocknum. - if (old_pair.size > 0) { - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - (translation_prevents_freeing(&bt->inprogress, b, &old_pair) || - translation_prevents_freeing(&bt->checkpointed, b, &old_pair)); - if (!cannot_free) { -PRNTF("free_blocknum_free", b.b, old_pair.size, old_pair.u.diskoff, bt); - block_allocator_free_block(bt->block_allocator, old_pair.u.diskoff); - } - } - else { - paranoid_invariant(old_pair.size==0); - paranoid_invariant(old_pair.u.diskoff == diskoff_unused); - } - ft_set_dirty(ft, for_checkpoint); -} - -void -toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *bp, FT ft, bool for_checkpoint) { - lock_for_blocktable(bt); - free_blocknum_unlocked(bt, bp, ft, for_checkpoint); - unlock_for_blocktable(bt); -} - -//Verify there are no free blocks. -void -toku_block_verify_no_free_blocknums(BLOCK_TABLE UU(bt)) { - paranoid_invariant(bt->current.blocknum_freelist_head.b == freelist_null.b); -} - -// Frees blocknums that have a size of 0 and unused diskoff -// Currently used for eliminating unused cached rollback log nodes -void -toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root) { - lock_for_blocktable(bt); - int64_t smallest = bt->current.smallest_never_used_blocknum.b; - for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { - if (i == root.b) { - continue; - } - BLOCKNUM b = make_blocknum(i); - if (bt->current.block_translation[b.b].size == 0) { - invariant(bt->current.block_translation[b.b].u.diskoff == diskoff_unused); - free_blocknum_in_translation(&bt->current, b); - } - } - unlock_for_blocktable(bt); -} - -__attribute__((nonnull,const,unused)) -static inline bool -no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root) { - bool ok = true; - lock_for_blocktable(bt); - int64_t smallest = bt->current.smallest_never_used_blocknum.b; - if (root.b < RESERVED_BLOCKNUMS) { - ok = false; - goto cleanup; - } - int64_t i; - for (i=RESERVED_BLOCKNUMS; i < smallest; i++) { - if (i == root.b) { - continue; - } - BLOCKNUM b = make_blocknum(i); - if (bt->current.block_translation[b.b].size != size_is_free) { - ok = false; - goto cleanup; - } - } - cleanup: - unlock_for_blocktable(bt); - return ok; -} - -//Verify there are no data blocks except root. -// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. -void -toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE UU(bt), BLOCKNUM UU(root)) { - paranoid_invariant(no_data_blocks_except_root(bt, root)); -} - -__attribute__((nonnull,const,unused)) -static inline bool -blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b) { - lock_for_blocktable(bt); - struct translation *t = &bt->current; - verify_valid_blocknum(t, b); - bool ok = t->block_translation[b.b].size != size_is_free; - unlock_for_blocktable(bt); - return ok; -} - -//Verify a blocknum is currently allocated. -void -toku_verify_blocknum_allocated(BLOCK_TABLE UU(bt), BLOCKNUM UU(b)) { - paranoid_invariant(blocknum_allocated(bt, b)); -} - -//Only used by toku_dump_translation table (debug info) -static void -dump_translation(FILE *f, struct translation *t) { - if (t->block_translation) { - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); - fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); - fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); - fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); - fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); - int64_t i; - for (i=0; ilength_of_array; i++) { - fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); - } - fprintf(f, "\n"); - } - else fprintf(f, " does not exist\n"); -} - -//Only used by toku_ft_dump which is only for debugging purposes -// "pretty" just means we use tabs so we can parse output easier later -void -toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt) { - lock_for_blocktable(bt); - struct translation *t = &bt->checkpointed; - assert(t->block_translation != nullptr); - for (int64_t i = 0; i < t->length_of_array; ++i) { - fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); - } - unlock_for_blocktable(bt); -} - -//Only used by toku_ft_dump which is only for debugging purposes -void -toku_dump_translation_table(FILE *f, BLOCK_TABLE bt) { - lock_for_blocktable(bt); - fprintf(f, "Current block translation:"); - dump_translation(f, &bt->current); - fprintf(f, "Checkpoint in progress block translation:"); - dump_translation(f, &bt->inprogress); - fprintf(f, "Checkpointed block translation:"); - dump_translation(f, &bt->checkpointed); - unlock_for_blocktable(bt); -} - -//Only used by ftdump -void -toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b) { - lock_for_blocktable(bt); - - struct translation *t = &bt->current; - if (b.b < t->length_of_array) { - struct block_translation_pair *bx = &t->block_translation[b.b]; - printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); - } - unlock_for_blocktable(bt); -} - - -//Must not call this function when anything else is using the blocktable. -//No one may use the blocktable afterwards. -void -toku_blocktable_destroy(BLOCK_TABLE *btp) { - BLOCK_TABLE bt = *btp; - *btp = NULL; - if (bt->current.block_translation) toku_free(bt->current.block_translation); - if (bt->inprogress.block_translation) toku_free(bt->inprogress.block_translation); - if (bt->checkpointed.block_translation) toku_free(bt->checkpointed.block_translation); - - destroy_block_allocator(&bt->block_allocator); - blocktable_lock_destroy(bt); - nb_mutex_destroy(&bt->safe_file_size_lock); - toku_free(bt); -} - - -static BLOCK_TABLE -blocktable_create_internal (void) { -// Effect: Fill it in, including the translation table, which is uninitialized - BLOCK_TABLE XCALLOC(bt); - blocktable_lock_init(bt); - nb_mutex_init(&bt->safe_file_size_lock); - - //There are two headers, so we reserve space for two. - uint64_t reserve_per_header = BLOCK_ALLOCATOR_HEADER_RESERVE; - - //Must reserve in multiples of BLOCK_ALLOCATOR_ALIGNMENT - //Round up the per-header usage if necessary. - //We want each header aligned. - uint64_t remainder = BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT; - if (remainder!=0) { - reserve_per_header += BLOCK_ALLOCATOR_ALIGNMENT; - reserve_per_header -= remainder; - } - assert(2*reserve_per_header == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - create_block_allocator(&bt->block_allocator, - BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - BLOCK_ALLOCATOR_ALIGNMENT); - return bt; -} - - - -static void -translation_default(struct translation *t) { // destination into which to create a default translation - t->type = TRANSLATION_CHECKPOINTED; - t->smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); - t->length_of_array = t->smallest_never_used_blocknum.b; - t->blocknum_freelist_head = freelist_null; - XMALLOC_N(t->length_of_array, t->block_translation); - int64_t i; - for (i = 0; i < t->length_of_array; i++) { - t->block_translation[i].size = 0; - t->block_translation[i].u.diskoff = diskoff_unused; - } -} - - -static int -translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize - DISKOFF location_on_disk, //Location of translation_buffer - uint64_t size_on_disk, - unsigned char * translation_buffer) { // buffer with serialized translation - int r = 0; - assert(location_on_disk!=0); - t->type = TRANSLATION_CHECKPOINTED; - { - // check the checksum - uint32_t x1764 = x1764_memory(translation_buffer, size_on_disk - 4); - uint64_t offset = size_on_disk - 4; - //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); - uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); - if (x1764 != stored_x1764) { - fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); - r = TOKUDB_BAD_CHECKSUM; - goto exit; - } - } - struct rbuf rt; - rt.buf = translation_buffer; - rt.ndone = 0; - rt.size = size_on_disk-4;//4==checksum - - t->smallest_never_used_blocknum = rbuf_blocknum(&rt); - t->length_of_array = t->smallest_never_used_blocknum.b; - assert(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - t->blocknum_freelist_head = rbuf_blocknum(&rt); - XMALLOC_N(t->length_of_array, t->block_translation); - int64_t i; - for (i=0; i < t->length_of_array; i++) { - t->block_translation[i].u.diskoff = rbuf_diskoff(&rt); - t->block_translation[i].size = rbuf_diskoff(&rt); -PRNTF("ReadIn", i, t->block_translation[i].size, t->block_translation[i].u.diskoff, NULL); - } - assert(calculate_size_on_disk(t) == (int64_t)size_on_disk); - assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t)size_on_disk); - assert(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); -exit: - return r; -} - -// We just initialized a translation, inform block allocator to reserve space for each blocknum in use. -static void -blocktable_note_translation (BLOCK_ALLOCATOR allocator, struct translation *t) { - //This is where the space for them will be reserved (in addition to normal blocks). - //See RESERVED_BLOCKNUMS - - // Previously this added blocks one at a time. Now we make an array and pass it in so it can be sorted and merged. See #3218. - struct block_allocator_blockpair *XMALLOC_N(t->smallest_never_used_blocknum.b, pairs); - uint64_t n_pairs = 0; - for (int64_t i=0; ismallest_never_used_blocknum.b; i++) { - struct block_translation_pair pair = t->block_translation[i]; - if (pair.size > 0) { - paranoid_invariant(pair.u.diskoff != diskoff_unused); - int cur_pair = n_pairs++; - pairs[cur_pair] = (struct block_allocator_blockpair) { .offset = (uint64_t) pair.u.diskoff, - .size = (uint64_t) pair.size }; - } - } - block_allocator_alloc_blocks_at(allocator, n_pairs, pairs); - toku_free(pairs); -} - - -// Fill in the checkpointed translation from buffer, and copy checkpointed to current. -// The one read from disk is the last known checkpointed one, so we are keeping it in -// place and then setting current (which is never stored on disk) for current use. -// The translation_buffer has translation only, we create the rest of the block_table. -int -toku_blocktable_create_from_buffer(int fd, - BLOCK_TABLE *btp, - DISKOFF location_on_disk, //Location of translation_buffer - DISKOFF size_on_disk, - unsigned char *translation_buffer) { - BLOCK_TABLE bt = blocktable_create_internal(); - int r = translation_deserialize_from_buffer(&bt->checkpointed, location_on_disk, size_on_disk, translation_buffer); - if (r != 0) { - goto exit; - } - blocktable_note_translation(bt->block_allocator, &bt->checkpointed); - // we just filled in checkpointed, now copy it to current. - copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT); - - int64_t file_size; - r = toku_os_get_file_size(fd, &file_size); - lazy_assert_zero(r); - invariant(file_size >= 0); - bt->safe_file_size = file_size; - - *btp = bt; -exit: - return r; -} - - -void -toku_blocktable_create_new(BLOCK_TABLE *btp) { - BLOCK_TABLE bt = blocktable_create_internal(); - translation_default(&bt->checkpointed); // create default btt (empty except for reserved blocknums) - blocktable_note_translation(bt->block_allocator, &bt->checkpointed); - // we just created a default checkpointed, now copy it to current. - copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT); - - *btp = bt; -} - -int -toku_blocktable_iterate (BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { - struct translation *src; - - int r = 0; - switch (type) { - case TRANSLATION_CURRENT: src = &bt->current; break; - case TRANSLATION_INPROGRESS: src = &bt->inprogress; break; - case TRANSLATION_CHECKPOINTED: src = &bt->checkpointed; break; - default: r = EINVAL; break; - } - struct translation fakecurrent; - struct translation *t = &fakecurrent; - if (r==0) { - lock_for_blocktable(bt); - copy_translation(t, src, TRANSLATION_DEBUG); - t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] = - src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; - unlock_for_blocktable(bt); - int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { - struct block_translation_pair pair = t->block_translation[i]; - if (data_only && i< RESERVED_BLOCKNUMS) continue; - if (used_only && pair.size <= 0) continue; - r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); - if (r!=0) break; - } - toku_free(t->block_translation); - } - return r; -} - -typedef struct { - int64_t used_space; - int64_t total_space; -} frag_extra; - -static int -frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { - frag_extra *info = (frag_extra *) extra; - - if (size + address > info->total_space) - info->total_space = size + address; - info->used_space += size; - return 0; -} - -void -toku_blocktable_internal_fragmentation (BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep) { - frag_extra info = {0,0}; - int r = toku_blocktable_iterate(bt, TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); - assert_zero(r); - - if (total_sizep) *total_sizep = info.total_space; - if (used_sizep) *used_sizep = info.used_space; -} - -void -toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft) { - toku_mutex_assert_locked(&bt->mutex); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - blocknum_realloc_on_disk_internal(bt, b, size, offset, ft, false); -} - -void -toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT ft, int fd) { - lock_for_blocktable(bt); - toku_realloc_descriptor_on_disk_unlocked(bt, size, offset, ft); - - ensure_safe_write_unlocked(bt, fd, size, *offset); - unlock_for_blocktable(bt); -} - -void -toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size) { - lock_for_blocktable(bt); - BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - translate_blocknum_to_offset_size_unlocked(bt, b, offset, size); - unlock_for_blocktable(bt); -} - -void -toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report) { - //Requires: blocktable lock is held. - //Requires: report->file_size_bytes is already filled in. - - //Count the headers. - report->data_bytes = BLOCK_ALLOCATOR_HEADER_RESERVE; - report->data_blocks = 1; - report->checkpoint_bytes_additional = BLOCK_ALLOCATOR_HEADER_RESERVE; - report->checkpoint_blocks_additional = 1; - - struct translation *current = &bt->current; - int64_t i; - for (i = 0; i < current->length_of_array; i++) { - struct block_translation_pair *pair = ¤t->block_translation[i]; - if (pair->size > 0) { - report->data_bytes += pair->size; - report->data_blocks++; - } - } - struct translation *checkpointed = &bt->checkpointed; - for (i = 0; i < checkpointed->length_of_array; i++) { - struct block_translation_pair *pair = &checkpointed->block_translation[i]; - if (pair->size > 0 && - !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) - ) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; - } - } - struct translation *inprogress = &bt->inprogress; - for (i = 0; i < inprogress->length_of_array; i++) { - struct block_translation_pair *pair = &inprogress->block_translation[i]; - if (pair->size > 0 && - !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) && - !(i < checkpointed->length_of_array && - checkpointed->block_translation[i].size > 0 && - checkpointed->block_translation[i].u.diskoff == pair->u.diskoff) - ) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; - } - } - - block_allocator_get_unused_statistics(bt->block_allocator, report); -} - -void -toku_blocktable_get_info64(BLOCK_TABLE bt, struct ftinfo64 *s) { - lock_for_blocktable(bt); - - struct translation *current = &bt->current; - s->num_blocks_allocated = current->length_of_array; - s->num_blocks_in_use = 0; - s->size_allocated = 0; - s->size_in_use = 0; - - for (int64_t i = 0; i < current->length_of_array; ++i) { - struct block_translation_pair *block = ¤t->block_translation[i]; - if (block->size != size_is_free) { - ++s->num_blocks_in_use; - s->size_in_use += block->size; - if (block->u.diskoff != diskoff_unused) { - uint64_t limit = block->u.diskoff + block->size; - if (limit > s->size_allocated) { - s->size_allocated = limit; - } - } - } - } - - unlock_for_blocktable(bt); -} - -int -toku_blocktable_iterate_translation_tables(BLOCK_TABLE bt, uint64_t checkpoint_count, - int (*iter)(uint64_t checkpoint_count, - int64_t total_num_rows, - int64_t blocknum, - int64_t diskoff, - int64_t size, - void *extra), - void *iter_extra) { - int error = 0; - lock_for_blocktable(bt); - - int64_t total_num_rows = bt->current.length_of_array + bt->checkpointed.length_of_array; - for (int64_t i = 0; error == 0 && i < bt->current.length_of_array; ++i) { - struct block_translation_pair *block = &bt->current.block_translation[i]; - error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); - } - for (int64_t i = 0; error == 0 && i < bt->checkpointed.length_of_array; ++i) { - struct block_translation_pair *block = &bt->checkpointed.block_translation[i]; - error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); - } - - unlock_for_blocktable(bt); - return error; -} diff --git a/storage/tokudb/ft-index/ft/block_table.h b/storage/tokudb/ft-index/ft/block_table.h deleted file mode 100644 index a9f17ad0e7e9b..0000000000000 --- a/storage/tokudb/ft-index/ft/block_table.h +++ /dev/null @@ -1,176 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BLOCKTABLE_H -#define BLOCKTABLE_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" - - -typedef struct block_table *BLOCK_TABLE; - -//Needed by tests, ftdump -struct block_translation_pair { - union { // If in the freelist, use next_free_blocknum, otherwise diskoff. - DISKOFF diskoff; - BLOCKNUM next_free_blocknum; - } u; - DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free -}; - -void toku_blocktable_create_new(BLOCK_TABLE *btp); -int toku_blocktable_create_from_buffer(int fd, BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); -void toku_blocktable_destroy(BLOCK_TABLE *btp); - -void toku_ft_lock(FT h); -void toku_ft_unlock(FT h); - -void toku_block_translation_note_start_checkpoint_unlocked(BLOCK_TABLE bt); -void toku_block_translation_note_end_checkpoint(BLOCK_TABLE bt, int fd); -void toku_block_translation_note_skipped_checkpoint(BLOCK_TABLE bt); -void toku_maybe_truncate_file_on_open(BLOCK_TABLE bt, int fd); - -//Blocknums -void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, FT h); -void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, FT h); -void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, FT h, bool for_checkpoint); -void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b); -void toku_block_verify_no_data_blocks_except_root(BLOCK_TABLE bt, BLOCKNUM root); -void toku_free_unused_blocknums(BLOCK_TABLE bt, BLOCKNUM root); -void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt); -void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h, int fd); -void toku_realloc_descriptor_on_disk_unlocked(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, FT h); -void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size); - -//Blocks and Blocknums -void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint); -void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size); - -//Serialization -void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, int fd, struct wbuf *w, int64_t *address, int64_t *size); - -void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt); - - -//DEBUG ONLY (ftdump included), tests included -void toku_blocknum_dump_translation(BLOCK_TABLE bt, BLOCKNUM b); -void toku_dump_translation_table_pretty(FILE *f, BLOCK_TABLE bt); -void toku_dump_translation_table(FILE *f, BLOCK_TABLE bt); -void toku_block_free(BLOCK_TABLE bt, uint64_t offset); -typedef int(*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); -enum translation_type {TRANSLATION_NONE=0, - TRANSLATION_CURRENT, - TRANSLATION_INPROGRESS, - TRANSLATION_CHECKPOINTED, - TRANSLATION_DEBUG}; - -int toku_blocktable_iterate(BLOCK_TABLE bt, enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); -void toku_blocktable_internal_fragmentation(BLOCK_TABLE bt, int64_t *total_sizep, int64_t *used_sizep); - -void toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMENTATION report); -//Requires: blocktable lock is held. -//Requires: report->file_size_bytes is already filled in. - -int64_t toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt); - -void toku_blocktable_get_info64(BLOCK_TABLE, struct ftinfo64 *); - -int toku_blocktable_iterate_translation_tables(BLOCK_TABLE, uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); - -//Unmovable reserved first, then reallocable. -// We reserve one blocknum for the translation table itself. -enum {RESERVED_BLOCKNUM_NULL =0, - RESERVED_BLOCKNUM_TRANSLATION=1, - RESERVED_BLOCKNUM_DESCRIPTOR =2, - RESERVED_BLOCKNUMS}; - - -#endif - diff --git a/storage/tokudb/ft-index/ft/bndata.cc b/storage/tokudb/ft-index/ft/bndata.cc index b68a05c119a80..a277e52aa0bd0 100644 --- a/storage/tokudb/ft-index/ft/bndata.cc +++ b/storage/tokudb/ft-index/ft/bndata.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,47 +89,198 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include +#include -static uint32_t klpair_size(KLPAIR klpair){ - return sizeof(*klpair) + klpair->keylen + leafentry_memsize(get_le_from_klpair(klpair)); -} - -static uint32_t klpair_disksize(KLPAIR klpair){ - return sizeof(*klpair) + klpair->keylen + leafentry_disksize(get_le_from_klpair(klpair)); +using namespace toku; +uint32_t bn_data::klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const { + return sizeof(*klpair) + keylen_from_klpair_len(klpair_len) + leafentry_disksize(get_le_from_klpair(klpair)); } void bn_data::init_zero() { toku_mempool_zero(&m_buffer_mempool); + m_disksize_of_keys = 0; } void bn_data::initialize_empty() { - toku_mempool_zero(&m_buffer_mempool); - m_buffer.create_no_array(); + init_zero(); + m_buffer.create(); +} + +void bn_data::add_key(uint32_t keylen) { + m_disksize_of_keys += sizeof(keylen) + keylen; +} + +void bn_data::add_keys(uint32_t n_keys, uint32_t combined_klpair_len) { + invariant(n_keys * sizeof(uint32_t) <= combined_klpair_len); + m_disksize_of_keys += combined_klpair_len; +} + +void bn_data::remove_key(uint32_t keylen) { + m_disksize_of_keys -= sizeof(keylen) + keylen; +} + +// Deserialize from format optimized for keys being inlined. +// Currently only supports fixed-length keys. +void bn_data::initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version UU(), + uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length, + uint32_t fixed_klpair_length) { + paranoid_invariant(version >= FT_LAYOUT_VERSION_26); // Support was added @26 + uint32_t ndone_before = rb->ndone; + init_zero(); + invariant(all_keys_same_length); // Until otherwise supported. + const void *keys_src; + rbuf_literal_bytes(rb, &keys_src, key_data_size); + //Generate dmt + this->m_buffer.create_from_sorted_memory_of_fixed_size_elements( + keys_src, num_entries, key_data_size, fixed_klpair_length); + toku_mempool_construct(&this->m_buffer_mempool, val_data_size); + + const void *vals_src; + rbuf_literal_bytes(rb, &vals_src, val_data_size); + + if (num_entries > 0) { + void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size); + paranoid_invariant_notnull(vals_dest); + memcpy(vals_dest, vals_src, val_data_size); + } + + add_keys(num_entries, num_entries * fixed_klpair_length); + + toku_note_deserialized_basement_node(all_keys_same_length); + + invariant(rb->ndone - ndone_before == data_size); +} + +static int +wbufwriteleafentry(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t UU(idx), struct wbuf * const wb) { + // need to pack the leafentry as it was in versions + // where the key was integrated into it (< 26) + uint32_t begin_spot UU() = wb->ndone; + uint32_t le_disk_size = leafentry_disksize(le); + wbuf_nocrc_uint8_t(wb, le->type); + wbuf_nocrc_uint32_t(wb, keylen); + if (le->type == LE_CLEAN) { + wbuf_nocrc_uint32_t(wb, le->u.clean.vallen); + wbuf_nocrc_literal_bytes(wb, key, keylen); + wbuf_nocrc_literal_bytes(wb, le->u.clean.val, le->u.clean.vallen); + } + else { + paranoid_invariant(le->type == LE_MVCC); + wbuf_nocrc_uint32_t(wb, le->u.mvcc.num_cxrs); + wbuf_nocrc_uint8_t(wb, le->u.mvcc.num_pxrs); + wbuf_nocrc_literal_bytes(wb, key, keylen); + wbuf_nocrc_literal_bytes(wb, le->u.mvcc.xrs, le_disk_size - (1 + 4 + 1)); + } + uint32_t end_spot UU() = wb->ndone; + paranoid_invariant((end_spot - begin_spot) == keylen + sizeof(keylen) + le_disk_size); + return 0; +} + +void bn_data::serialize_to_wbuf(struct wbuf *const wb) { + prepare_to_serialize(); + serialize_header(wb); + if (m_buffer.value_length_is_fixed()) { + serialize_rest(wb); + } else { + // + // iterate over leafentries and place them into the buffer + // + iterate(wb); + } +} + +// If we have fixed-length keys, we prepare the dmt and mempool. +// The mempool is prepared by removing any fragmented space and ordering leafentries in the same order as their keys. +void bn_data::prepare_to_serialize(void) { + if (m_buffer.value_length_is_fixed()) { + m_buffer.prepare_for_serialize(); + dmt_compress_kvspace(0, nullptr, true); // Gets it ready for easy serialization. + } } -void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size) { +void bn_data::serialize_header(struct wbuf *wb) const { + bool fixed = m_buffer.value_length_is_fixed(); + + //key_data_size + wbuf_nocrc_uint(wb, m_disksize_of_keys); + //val_data_size + wbuf_nocrc_uint(wb, toku_mempool_get_used_size(&m_buffer_mempool)); + //fixed_klpair_length + wbuf_nocrc_uint(wb, m_buffer.get_fixed_length()); + // all_keys_same_length + wbuf_nocrc_uint8_t(wb, fixed); + // keys_vals_separate + wbuf_nocrc_uint8_t(wb, fixed); +} + +void bn_data::serialize_rest(struct wbuf *wb) const { + //Write keys + invariant(m_buffer.value_length_is_fixed()); //Assumes prepare_to_serialize was called + m_buffer.serialize_values(m_disksize_of_keys, wb); + + //Write leafentries + //Just ran dmt_compress_kvspace so there is no fragmentation and also leafentries are in sorted order. + paranoid_invariant(toku_mempool_get_frag_size(&m_buffer_mempool) == 0); + uint32_t val_data_size = toku_mempool_get_used_size(&m_buffer_mempool); + wbuf_nocrc_literal_bytes(wb, toku_mempool_get_base(&m_buffer_mempool), val_data_size); +} + +// Deserialize from rbuf +void bn_data::deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version) { + uint32_t key_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small) + uint32_t val_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small) + + bool all_keys_same_length = false; + bool keys_vals_separate = false; + uint32_t fixed_klpair_length = 0; + + // In version 25 and older there is no header. Skip reading header for old version. + if (version >= FT_LAYOUT_VERSION_26) { + uint32_t ndone_before = rb->ndone; + key_data_size = rbuf_int(rb); + val_data_size = rbuf_int(rb); + fixed_klpair_length = rbuf_int(rb); // 0 if !all_keys_same_length + all_keys_same_length = rbuf_char(rb); + keys_vals_separate = rbuf_char(rb); + invariant(all_keys_same_length == keys_vals_separate); // Until we support otherwise + uint32_t header_size = rb->ndone - ndone_before; + data_size -= header_size; + invariant(header_size == HEADER_LENGTH); + if (keys_vals_separate) { + invariant(fixed_klpair_length >= sizeof(klpair_struct) || num_entries == 0); + initialize_from_separate_keys_and_vals(num_entries, rb, data_size, version, + key_data_size, val_data_size, all_keys_same_length, + fixed_klpair_length); + return; + } + } + // Version >= 26 and version 25 deserialization are now identical except that <= 25 might allocate too much memory. + const void *bytes; + rbuf_literal_bytes(rb, &bytes, data_size); + const unsigned char *CAST_FROM_VOIDP(buf, bytes); if (data_size == 0) { invariant_zero(num_entries); } - KLPAIR *XMALLOC_N(num_entries, array); // create array of pointers to leafentries - unsigned char *newmem = NULL; - // add same wiggle room that toku_mempool_construct would, 25% extra - uint32_t allocated_bytes = data_size + data_size/4; - CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes)); - unsigned char* curr_src_pos = buf; + init_zero(); + klpair_dmt_t::builder dmt_builder; + dmt_builder.create(num_entries, key_data_size); + + // TODO(leif): clean this up (#149) + unsigned char *newmem = nullptr; + // add 25% extra wiggle room + uint32_t allocated_bytes_vals = val_data_size + (val_data_size / 4); + CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes_vals)); + const unsigned char* curr_src_pos = buf; unsigned char* curr_dest_pos = newmem; for (uint32_t i = 0; i < num_entries; i++) { - KLPAIR curr_kl = (KLPAIR)curr_dest_pos; - array[i] = curr_kl; - uint8_t curr_type = curr_src_pos[0]; curr_src_pos++; // first thing we do is lay out the key, // to do so, we must extract it from the leafentry // and write it in uint32_t keylen = 0; - void* keyp = NULL; + const void* keyp = nullptr; keylen = *(uint32_t *)curr_src_pos; curr_src_pos += sizeof(uint32_t); uint32_t clean_vallen = 0; @@ -150,12 +301,10 @@ void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uin keyp = curr_src_pos; curr_src_pos += keylen; } - // now that we have the keylen and the key, we can copy it - // into the destination - *(uint32_t *)curr_dest_pos = keylen; - curr_dest_pos += sizeof(keylen); - memcpy(curr_dest_pos, keyp, keylen); - curr_dest_pos += keylen; + uint32_t le_offset = curr_dest_pos - newmem; + dmt_builder.append(klpair_dmtwriter(keylen, le_offset, keyp)); + add_key(keylen); + // now curr_dest_pos is pointing to where the leafentry should be packed curr_dest_pos[0] = curr_type; curr_dest_pos++; @@ -173,31 +322,44 @@ void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uin *(uint8_t *)curr_dest_pos = num_pxrs; curr_dest_pos += sizeof(num_pxrs); // now we need to pack the rest of the data - uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, curr_src_pos); + uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, const_cast(curr_src_pos)); memcpy(curr_dest_pos, curr_src_pos, num_rest_bytes); curr_dest_pos += num_rest_bytes; curr_src_pos += num_rest_bytes; } } - uint32_t num_bytes_read UU() = (uint32_t)(curr_src_pos - buf); - paranoid_invariant( num_bytes_read == data_size); - uint32_t num_bytes_written = curr_dest_pos - newmem; - paranoid_invariant( num_bytes_written == data_size); - toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(num_bytes_written), allocated_bytes); - - // destroy old omt that was created by toku_create_empty_bn(), so we can create a new one - m_buffer.destroy(); - m_buffer.create_steal_sorted_array(&array, num_entries, num_entries); + dmt_builder.build(&this->m_buffer); + toku_note_deserialized_basement_node(m_buffer.value_length_is_fixed()); + + uint32_t num_bytes_read = (uint32_t)(curr_src_pos - buf); + invariant(num_bytes_read == data_size); + + uint32_t num_bytes_written = curr_dest_pos - newmem + m_disksize_of_keys; + invariant(num_bytes_written == data_size); + toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(curr_dest_pos - newmem), allocated_bytes_vals); + + invariant(get_disk_size() == data_size); + // Versions older than 26 might have allocated too much memory. Try to shrink the mempool now that we + // know how much memory we need. + if (version < FT_LAYOUT_VERSION_26) { + // Unnecessary after version 26 + // Reallocate smaller mempool to save memory + invariant_zero(toku_mempool_get_frag_size(&m_buffer_mempool)); + toku_mempool_realloc_larger(&m_buffer_mempool, toku_mempool_get_used_size(&m_buffer_mempool)); + } } uint64_t bn_data::get_memory_size() { uint64_t retval = 0; + //TODO: Maybe ask for memory_size instead of mempool_footprint (either this todo or the next) // include fragmentation overhead but do not include space in the // mempool that has not yet been allocated for leaf entries size_t poolsize = toku_mempool_footprint(&m_buffer_mempool); - invariant(poolsize >= get_disk_size()); retval += poolsize; + // This one includes not-yet-allocated for nodes (just like old constant-key omt) + //TODO: Maybe ask for mempool_footprint instead of memory_size. retval += m_buffer.memory_size(); + invariant(retval >= get_disk_size()); return retval; } @@ -205,169 +367,264 @@ void bn_data::delete_leafentry ( uint32_t idx, uint32_t keylen, uint32_t old_le_size - ) + ) { + remove_key(keylen); m_buffer.delete_at(idx); - toku_mempool_mfree(&m_buffer_mempool, 0, old_le_size + keylen + sizeof(keylen)); // Must pass 0, since le is no good any more. + toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size); } /* mempool support */ -struct omt_compressor_state { +struct dmt_compressor_state { struct mempool *new_kvspace; - KLPAIR *newvals; + class bn_data *bd; }; -static int move_it (const KLPAIR &klpair, const uint32_t idx, struct omt_compressor_state * const oc) { - uint32_t size = klpair_size(klpair); - KLPAIR CAST_FROM_VOIDP(newdata, toku_mempool_malloc(oc->new_kvspace, size, 1)); +static int move_it (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct dmt_compressor_state * const oc) { + LEAFENTRY old_le = oc->bd->get_le_from_klpair(klpair); + uint32_t size = leafentry_memsize(old_le); + void* newdata = toku_mempool_malloc(oc->new_kvspace, size); paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen - memcpy(newdata, klpair, size); - oc->newvals[idx] = newdata; + memcpy(newdata, old_le, size); + klpair->le_offset = toku_mempool_get_offset_from_pointer_and_base(oc->new_kvspace, newdata); return 0; } -// Compress things, and grow the mempool if needed. -void bn_data::omt_compress_kvspace(size_t added_size, void **maybe_free) { - uint32_t total_size_needed = toku_mempool_get_used_space(&m_buffer_mempool) + added_size; - // set the new mempool size to be twice of the space we actually need. - // On top of the 25% that is padded within toku_mempool_construct (which we - // should consider getting rid of), that should be good enough. - struct mempool new_kvspace; - toku_mempool_construct(&new_kvspace, 2*total_size_needed); - uint32_t numvals = omt_size(); - KLPAIR *XMALLOC_N(numvals, newvals); - struct omt_compressor_state oc = { &new_kvspace, newvals }; +// Compress things, and grow or shrink the mempool if needed. +// May (always if force_compress) have a side effect of putting contents of mempool in sorted order. +void bn_data::dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress) { + uint32_t total_size_needed = toku_mempool_get_used_size(&m_buffer_mempool) + added_size; - m_buffer.iterate_on_range< decltype(oc), move_it >(0, omt_size(), &oc); + // If there is no fragmentation, e.g. in serial inserts, we can just increase the size + // of the mempool and move things over with a cheap memcpy. If force_compress is true, + // the caller needs the side effect that all contents are put in sorted order. + bool do_compress = toku_mempool_get_frag_size(&m_buffer_mempool) > 0 || force_compress; - m_buffer.destroy(); - m_buffer.create_steal_sorted_array(&newvals, numvals, numvals); + void *old_mempool_base = toku_mempool_get_base(&m_buffer_mempool); + struct mempool new_kvspace; + if (do_compress) { + size_t requested_size = force_compress ? total_size_needed : ((total_size_needed * 3) / 2); + toku_mempool_construct(&new_kvspace, requested_size); + struct dmt_compressor_state oc = { &new_kvspace, this }; + m_buffer.iterate_ptr< decltype(oc), move_it >(&oc); + } else { + toku_mempool_construct(&new_kvspace, total_size_needed); + size_t old_offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool); + void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit); + memcpy(new_mempool_base, old_mempool_base, old_offset_limit); + } if (maybe_free) { - *maybe_free = m_buffer_mempool.base; + *maybe_free = old_mempool_base; } else { - toku_free(m_buffer_mempool.base); + toku_free(old_mempool_base); } m_buffer_mempool = new_kvspace; } // Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items // from the OMT (which items refer to items in the old mempool) into the new mempool. -// If MAYBE_FREE is NULL then free the old mempool's space. +// If MAYBE_FREE is nullptr then free the old mempool's space. // Otherwise, store the old mempool's space in maybe_free. -KLPAIR bn_data::mempool_malloc_from_omt(size_t size, void **maybe_free) { - void *v = toku_mempool_malloc(&m_buffer_mempool, size, 1); - if (v == NULL) { - omt_compress_kvspace(size, maybe_free); - v = toku_mempool_malloc(&m_buffer_mempool, size, 1); +LEAFENTRY bn_data::mempool_malloc_and_update_dmt(size_t size, void **maybe_free) { + void *v = toku_mempool_malloc(&m_buffer_mempool, size); + if (v == nullptr) { + dmt_compress_kvspace(size, maybe_free, false); + v = toku_mempool_malloc(&m_buffer_mempool, size); paranoid_invariant_notnull(v); } - return (KLPAIR)v; + return (LEAFENTRY)v; } -//TODO: probably not free the "maybe_free" right away? void bn_data::get_space_for_overwrite( uint32_t idx, - const void* keyp, - uint32_t keylen, + const void* keyp UU(), + uint32_t keylen UU(), + uint32_t old_keylen, uint32_t old_le_size, uint32_t new_size, - LEAFENTRY* new_le_space + LEAFENTRY* new_le_space, + void **const maybe_free ) { - void* maybe_free = nullptr; - uint32_t size_alloc = new_size + keylen + sizeof(keylen); - KLPAIR new_kl = mempool_malloc_from_omt( - size_alloc, - &maybe_free - ); - uint32_t size_freed = old_le_size + keylen + sizeof(keylen); - toku_mempool_mfree(&m_buffer_mempool, nullptr, size_freed); // Must pass nullptr, since le is no good any more. - new_kl->keylen = keylen; - memcpy(new_kl->key_le, keyp, keylen); - m_buffer.set_at(new_kl, idx); - *new_le_space = get_le_from_klpair(new_kl); - // free at end, so that the keyp and keylen - // passed in is still valid - if (maybe_free) { - toku_free(maybe_free); - } + *maybe_free = nullptr; + LEAFENTRY new_le = mempool_malloc_and_update_dmt(new_size, maybe_free); + toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size); + klpair_struct* klp = nullptr; + uint32_t klpair_len; + int r = m_buffer.fetch(idx, &klpair_len, &klp); + invariant_zero(r); + paranoid_invariant(klp!=nullptr); + // Old key length should be consistent with what is stored in the DMT + invariant(keylen_from_klpair_len(klpair_len) == old_keylen); + + size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le); + paranoid_invariant(new_le_offset <= UINT32_MAX - new_size); // Not using > 4GB + klp->le_offset = new_le_offset; + + paranoid_invariant(new_le == get_le_from_klpair(klp)); + *new_le_space = new_le; } -//TODO: probably not free the "maybe_free" right away? void bn_data::get_space_for_insert( uint32_t idx, const void* keyp, uint32_t keylen, size_t size, - LEAFENTRY* new_le_space + LEAFENTRY* new_le_space, + void **const maybe_free ) { - void* maybe_free = nullptr; - uint32_t size_alloc = size + keylen + sizeof(keylen); - KLPAIR new_kl = mempool_malloc_from_omt( - size_alloc, - &maybe_free - ); - new_kl->keylen = keylen; - memcpy(new_kl->key_le, keyp, keylen); - m_buffer.insert_at(new_kl, idx); - *new_le_space = get_le_from_klpair(new_kl); - // free at end, so that the keyp and keylen - // passed in is still valid (you never know if - // it was part of the old mempool, this is just - // safer). - if (maybe_free) { - toku_free(maybe_free); - } + add_key(keylen); + + *maybe_free = nullptr; + LEAFENTRY new_le = mempool_malloc_and_update_dmt(size, maybe_free); + size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le); + + klpair_dmtwriter kl(keylen, new_le_offset, keyp); + m_buffer.insert_at(kl, idx); + + *new_le_space = new_le; } -void bn_data::move_leafentries_to( - BN_DATA dest_bd, - uint32_t lbi, //lower bound inclusive - uint32_t ube //upper bound exclusive - ) -//Effect: move leafentries in the range [lbi, ube) from this to src_omt to newly created dest_omt -{ - paranoid_invariant(lbi < ube); - paranoid_invariant(ube <= omt_size()); - KLPAIR *XMALLOC_N(ube-lbi, newklpointers); // create new omt - - size_t mpsize = toku_mempool_get_used_space(&m_buffer_mempool); // overkill, but safe - struct mempool *dest_mp = &dest_bd->m_buffer_mempool; - struct mempool *src_mp = &m_buffer_mempool; - toku_mempool_construct(dest_mp, mpsize); - - uint32_t i = 0; - for (i = lbi; i < ube; i++) { - KLPAIR curr_kl= 0; - m_buffer.fetch(i, &curr_kl); - - size_t kl_size = klpair_size(curr_kl); - KLPAIR new_kl = NULL; - CAST_FROM_VOIDP(new_kl, toku_mempool_malloc(dest_mp, kl_size, 1)); - memcpy(new_kl, curr_kl, kl_size); - newklpointers[i-lbi] = new_kl; - toku_mempool_mfree(src_mp, curr_kl, kl_size); +class split_klpairs_extra { + bn_data *const m_left_bn; + bn_data *const m_right_bn; + klpair_dmt_t::builder *const m_left_builder; + klpair_dmt_t::builder *const m_right_builder; + struct mempool *const m_left_dest_mp; + uint32_t m_split_at; + + struct mempool *left_dest_mp(void) const { return m_left_dest_mp; } + struct mempool *right_dest_mp(void) const { return &m_right_bn->m_buffer_mempool; } + + void copy_klpair(const uint32_t klpair_len, const klpair_struct &klpair, + klpair_dmt_t::builder *const builder, + struct mempool *const dest_mp, + bn_data *const bn) { + LEAFENTRY old_le = m_left_bn->get_le_from_klpair(&klpair); + size_t le_size = leafentry_memsize(old_le); + + void *new_le = toku_mempool_malloc(dest_mp, le_size); + paranoid_invariant_notnull(new_le); + memcpy(new_le, old_le, le_size); + size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(dest_mp, new_le); + size_t keylen = keylen_from_klpair_len(klpair_len); + builder->append(klpair_dmtwriter(keylen, le_offset, klpair.key)); + + bn->add_key(keylen); + } + + int move_leafentry(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx) { + m_left_bn->remove_key(keylen_from_klpair_len(klpair_len)); + + if (idx < m_split_at) { + copy_klpair(klpair_len, klpair, m_left_builder, left_dest_mp(), m_left_bn); + } else { + copy_klpair(klpair_len, klpair, m_right_builder, right_dest_mp(), m_right_bn); + } + return 0; } - dest_bd->m_buffer.create_steal_sorted_array(&newklpointers, ube-lbi, ube-lbi); - // now remove the elements from src_omt - for (i=ube-1; i >= lbi; i--) { - m_buffer.delete_at(i); + public: + split_klpairs_extra(bn_data *const left_bn, bn_data *const right_bn, + klpair_dmt_t::builder *const left_builder, + klpair_dmt_t::builder *const right_builder, + struct mempool *const left_new_mp, + uint32_t split_at) + : m_left_bn(left_bn), + m_right_bn(right_bn), + m_left_builder(left_builder), + m_right_builder(right_builder), + m_left_dest_mp(left_new_mp), + m_split_at(split_at) {} + static int cb(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, split_klpairs_extra *const thisp) { + return thisp->move_leafentry(klpair_len, klpair, idx); } +}; + +void bn_data::split_klpairs( + bn_data* right_bd, + uint32_t split_at //lower bound inclusive for right_bd + ) +{ + // We use move_leafentries_to during a split, and the split algorithm should never call this + // if it's splitting on a boundary, so there must be some leafentries in the range to move. + paranoid_invariant(split_at < num_klpairs()); + + right_bd->init_zero(); + + size_t mpsize = toku_mempool_get_used_size(&m_buffer_mempool); // overkill, but safe + + struct mempool new_left_mp; + toku_mempool_construct(&new_left_mp, mpsize); + + struct mempool *right_mp = &right_bd->m_buffer_mempool; + toku_mempool_construct(right_mp, mpsize); + + klpair_dmt_t::builder left_dmt_builder; + left_dmt_builder.create(split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end) + + klpair_dmt_t::builder right_dmt_builder; + right_dmt_builder.create(num_klpairs() - split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end) + + split_klpairs_extra extra(this, right_bd, &left_dmt_builder, &right_dmt_builder, &new_left_mp, split_at); + + int r = m_buffer.iterate(&extra); + invariant_zero(r); + + m_buffer.destroy(); + toku_mempool_destroy(&m_buffer_mempool); + + m_buffer_mempool = new_left_mp; + + left_dmt_builder.build(&m_buffer); + right_dmt_builder.build(&right_bd->m_buffer); + + // Potentially shrink memory pool for destination. + // We overallocated ("overkill") above + struct mempool *const left_mp = &m_buffer_mempool; + paranoid_invariant_zero(toku_mempool_get_frag_size(left_mp)); + toku_mempool_realloc_larger(left_mp, toku_mempool_get_used_size(left_mp)); + paranoid_invariant_zero(toku_mempool_get_frag_size(right_mp)); + toku_mempool_realloc_larger(right_mp, toku_mempool_get_used_size(right_mp)); } uint64_t bn_data::get_disk_size() { - return toku_mempool_get_used_space(&m_buffer_mempool); + return m_disksize_of_keys + + toku_mempool_get_used_size(&m_buffer_mempool); +} + +struct verify_le_in_mempool_state { + size_t offset_limit; + class bn_data *bd; +}; + +static int verify_le_in_mempool (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct verify_le_in_mempool_state * const state) { + invariant(klpair->le_offset < state->offset_limit); + + LEAFENTRY le = state->bd->get_le_from_klpair(klpair); + uint32_t size = leafentry_memsize(le); + + size_t end_offset = klpair->le_offset+size; + + invariant(end_offset <= state->offset_limit); + return 0; } +//This is a debug-only (paranoid) verification. +//Verifies the dmt is valid, and all leafentries are entirely in the mempool's memory. void bn_data::verify_mempool(void) { - // TODO: implement something + //Verify the dmt itself <- paranoid and slow + m_buffer.verify(); + + verify_le_in_mempool_state state = { .offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool), .bd = this }; + //Verify every leafentry pointed to by the keys in the dmt are fully inside the mempool + m_buffer.iterate_ptr< decltype(state), verify_le_in_mempool >(&state); } -uint32_t bn_data::omt_size(void) const { +uint32_t bn_data::num_klpairs(void) const { return m_buffer.size(); } @@ -375,40 +632,54 @@ void bn_data::destroy(void) { // The buffer may have been freed already, in some cases. m_buffer.destroy(); toku_mempool_destroy(&m_buffer_mempool); + m_disksize_of_keys = 0; } -//TODO: Splitting key/val requires changing this -void bn_data::replace_contents_with_clone_of_sorted_array( +void bn_data::set_contents_as_clone_of_sorted_array( uint32_t num_les, const void** old_key_ptrs, uint32_t* old_keylens, - LEAFENTRY* old_les, - size_t *le_sizes, - size_t mempool_size - ) + LEAFENTRY* old_les, + size_t *le_sizes, + size_t total_key_size, + size_t total_le_size + ) { - toku_mempool_construct(&m_buffer_mempool, mempool_size); - KLPAIR *XMALLOC_N(num_les, le_array); + //Enforce "just created" invariant. + paranoid_invariant_zero(m_disksize_of_keys); + paranoid_invariant_zero(num_klpairs()); + paranoid_invariant_null(toku_mempool_get_base(&m_buffer_mempool)); + paranoid_invariant_zero(toku_mempool_get_size(&m_buffer_mempool)); + + toku_mempool_construct(&m_buffer_mempool, total_le_size); + m_buffer.destroy(); + m_disksize_of_keys = 0; + + klpair_dmt_t::builder dmt_builder; + dmt_builder.create(num_les, total_key_size); + for (uint32_t idx = 0; idx < num_les; idx++) { - KLPAIR new_kl = (KLPAIR)toku_mempool_malloc( - &m_buffer_mempool, - le_sizes[idx] + old_keylens[idx] + sizeof(uint32_t), - 1); // point to new location - new_kl->keylen = old_keylens[idx]; - memcpy(new_kl->key_le, old_key_ptrs[idx], new_kl->keylen); - memcpy(get_le_from_klpair(new_kl), old_les[idx], le_sizes[idx]); - CAST_FROM_VOIDP(le_array[idx], new_kl); + void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx]); + paranoid_invariant_notnull(new_le); + memcpy(new_le, old_les[idx], le_sizes[idx]); + size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(&m_buffer_mempool, new_le); + dmt_builder.append(klpair_dmtwriter(old_keylens[idx], le_offset, old_key_ptrs[idx])); + add_key(old_keylens[idx]); } - //TODO: Splitting key/val requires changing this; keys are stored in old omt.. cannot delete it yet? - m_buffer.destroy(); - m_buffer.create_steal_sorted_array(&le_array, num_les, num_les); + dmt_builder.build(&this->m_buffer); +} + +LEAFENTRY bn_data::get_le_from_klpair(const klpair_struct *klpair) const { + void * ptr = toku_mempool_get_pointer_from_base_and_offset(&this->m_buffer_mempool, klpair->le_offset); + LEAFENTRY CAST_FROM_VOIDP(le, ptr); + return le; } // get info about a single leafentry by index int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) { - KLPAIR klpair = NULL; - int r = m_buffer.fetch(idx, &klpair); + klpair_struct* klpair = nullptr; + int r = m_buffer.fetch(idx, nullptr, &klpair); if (r == 0) { *le = get_le_from_klpair(klpair); } @@ -416,59 +687,41 @@ int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) { } int bn_data::fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key) { - KLPAIR klpair = NULL; - int r = m_buffer.fetch(idx, &klpair); + klpair_struct* klpair = nullptr; + uint32_t klpair_len; + int r = m_buffer.fetch(idx, &klpair_len, &klpair); if (r == 0) { - *len = klpair->keylen; - *key = klpair->key_le; + *len = keylen_from_klpair_len(klpair_len); + *key = klpair->key; *le = get_le_from_klpair(klpair); } return r; } int bn_data::fetch_klpair_disksize(uint32_t idx, size_t *size) { - KLPAIR klpair = NULL; - int r = m_buffer.fetch(idx, &klpair); + klpair_struct* klpair = nullptr; + uint32_t klpair_len; + int r = m_buffer.fetch(idx, &klpair_len, &klpair); if (r == 0) { - *size = klpair_disksize(klpair); + *size = klpair_disksize(klpair_len, klpair); } return r; } -int bn_data::fetch_le_key_and_len(uint32_t idx, uint32_t *len, void** key) { - KLPAIR klpair = NULL; - int r = m_buffer.fetch(idx, &klpair); +int bn_data::fetch_key_and_len(uint32_t idx, uint32_t *len, void** key) { + klpair_struct* klpair = nullptr; + uint32_t klpair_len; + int r = m_buffer.fetch(idx, &klpair_len, &klpair); if (r == 0) { - *len = klpair->keylen; - *key = klpair->key_le; + *len = keylen_from_klpair_len(klpair_len); + *key = klpair->key; } return r; } - -struct mp_pair { - void* orig_base; - void* new_base; - klpair_omt_t* omt; -}; - -static int fix_mp_offset(const KLPAIR &klpair, const uint32_t idx, struct mp_pair * const p) { - char* old_value = (char *) klpair; - char *new_value = old_value - (char *)p->orig_base + (char *)p->new_base; - p->omt->set_at((KLPAIR)new_value, idx); - return 0; -} - void bn_data::clone(bn_data* orig_bn_data) { toku_mempool_clone(&orig_bn_data->m_buffer_mempool, &m_buffer_mempool); m_buffer.clone(orig_bn_data->m_buffer); - struct mp_pair p; - p.orig_base = toku_mempool_get_base(&orig_bn_data->m_buffer_mempool); - p.new_base = toku_mempool_get_base(&m_buffer_mempool); - p.omt = &m_buffer; - - int r = m_buffer.iterate_on_range(0, omt_size(), &p); - invariant_zero(r); + this->m_disksize_of_keys = orig_bn_data->m_disksize_of_keys; } - diff --git a/storage/tokudb/ft-index/ft/bndata.h b/storage/tokudb/ft-index/ft/bndata.h index 37e80c329677f..0cded5de5fc51 100644 --- a/storage/tokudb/ft-index/ft/bndata.h +++ b/storage/tokudb/ft-index/ft/bndata.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,169 +88,299 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - #pragma once -#include -#include "leafentry.h" -#include - -#if 0 //for implementation -static int -UU() verify_in_mempool(OMTVALUE lev, uint32_t UU(idx), void *mpv) -{ - LEAFENTRY CAST_FROM_VOIDP(le, lev); - struct mempool *CAST_FROM_VOIDP(mp, mpv); - int r = toku_mempool_inrange(mp, le, leafentry_memsize(le)); - lazy_assert(r); - return 0; -} - toku_omt_iterate(bn->buffer, verify_in_mempool, &bn->buffer_mempool); +#include "util/dmt.h" +#include "util/mempool.h" -#endif +#include "ft/leafentry.h" +#include "ft/serialize/wbuf.h" +// Key/leafentry pair stored in a dmt. The key is inlined, the offset (in leafentry mempool) is stored for the leafentry. struct klpair_struct { - uint32_t keylen; - uint8_t key_le[0]; // key, followed by le + uint32_t le_offset; //Offset of leafentry (in leafentry mempool) + uint8_t key[0]; // key, followed by le }; -typedef struct klpair_struct *KLPAIR; - -static inline LEAFENTRY get_le_from_klpair(KLPAIR klpair){ - uint32_t keylen = klpair->keylen; - LEAFENTRY le = (LEAFENTRY)(klpair->key_le + keylen); - return le; +static constexpr uint32_t keylen_from_klpair_len(const uint32_t klpair_len) { + return klpair_len - __builtin_offsetof(klpair_struct, key); } -template -static int wrappy_fun_find(const KLPAIR &klpair, const omtcmp_t &extra) { - //TODO: kill this function when we split, and/or use toku_fill_dbt + +static_assert(__builtin_offsetof(klpair_struct, key) == 1*sizeof(uint32_t), "klpair alignment issues"); +static_assert(__builtin_offsetof(klpair_struct, key) == sizeof(klpair_struct), "klpair size issues"); + +// A wrapper for the heaviside function provided to dmt->find*. +// Needed because the heaviside functions provided to bndata do not know about the internal types. +// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions. +template +static int klpair_find_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const dmtcmp_t &extra) { DBT kdbt; - kdbt.data = klpair->key_le; - kdbt.size = klpair->keylen; + kdbt.data = const_cast(reinterpret_cast(klpair.key)); + kdbt.size = keylen_from_klpair_len(klpair_len); return h(kdbt, extra); } +template +struct klpair_iterate_extra { + public: + inner_iterate_extra_t *inner; + const class bn_data * bd; +}; + +// A wrapper for the high-order function provided to dmt->iterate* +// Needed because the heaviside functions provided to bndata do not know about the internal types. +// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions. template -static int wrappy_fun_iterate(const KLPAIR &klpair, const uint32_t idx, iterate_extra_t *const extra) { - uint32_t keylen = klpair->keylen; - void* key = klpair->key_le; - LEAFENTRY le = get_le_from_klpair(klpair); - return h(key, keylen, le, idx, extra); + int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)> +static int klpair_iterate_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, klpair_iterate_extra *const extra) { + const void* key = &klpair.key; + LEAFENTRY le = extra->bd->get_le_from_klpair(&klpair); + return f(key, keylen_from_klpair_len(klpair_len), le, idx, extra->inner); +} + + +namespace toku { +// dmt writer for klpair_struct +class klpair_dmtwriter { + public: + // Return the size needed for the klpair_struct that this dmtwriter represents + size_t get_size(void) const { + return sizeof(klpair_struct) + this->keylen; + } + // Write the klpair_struct this dmtwriter represents to a destination + void write_to(klpair_struct *const dest) const { + dest->le_offset = this->le_offset; + memcpy(dest->key, this->keyp, this->keylen); + } + + klpair_dmtwriter(uint32_t _keylen, uint32_t _le_offset, const void* _keyp) + : keylen(_keylen), le_offset(_le_offset), keyp(_keyp) {} + klpair_dmtwriter(const uint32_t klpair_len, klpair_struct *const src) + : keylen(keylen_from_klpair_len(klpair_len)), le_offset(src->le_offset), keyp(src->key) {} + private: + const uint32_t keylen; + const uint32_t le_offset; + const void* keyp; +}; } -typedef toku::omt klpair_omt_t; +typedef toku::dmt klpair_dmt_t; // This class stores the data associated with a basement node class bn_data { public: + // Initialize an empty bn_data _without_ a dmt backing. + // Externally only used for deserialization. void init_zero(void); + + // Initialize an empty bn_data _with_ a dmt void initialize_empty(void); - void initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size); - // globals + + // Deserialize a bn_data from rbuf. + // This is the entry point for deserialization. + void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version); + + // Retrieve the memory footprint of this basement node. + // May over or under count: see Tokutek/ft-index#136 + // Also see dmt's implementation. uint64_t get_memory_size(void); + + // Get the serialized size of this basement node. uint64_t get_disk_size(void); + + // Perform (paranoid) verification that all leafentries are fully contained within the mempool void verify_mempool(void); - // Interact with "omt" - uint32_t omt_size(void) const; + // size() of key dmt + uint32_t num_klpairs(void) const; + // iterate() on key dmt (and associated leafentries) template - int omt_iterate(iterate_extra_t *const iterate_extra) const { - return omt_iterate_on_range(0, omt_size(), iterate_extra); + int iterate(iterate_extra_t *const iterate_extra) const { + return iterate_on_range(0, num_klpairs(), iterate_extra); } + // iterate_on_range() on key dmt (and associated leafentries) template - int omt_iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const { - return m_buffer.iterate_on_range< iterate_extra_t, wrappy_fun_iterate >(left, right, iterate_extra); + int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const { + klpair_iterate_extra klpair_extra = { iterate_extra, this }; + return m_buffer.iterate_on_range< klpair_iterate_extra, klpair_iterate_wrapper >(left, right, &klpair_extra); } - template - int find_zero(const omtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const { - KLPAIR klpair = NULL; - int r = m_buffer.find_zero< omtcmp_t, wrappy_fun_find >(extra, &klpair, idxp); + // find_zero() on key dmt + template + int find_zero(const dmtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const { + klpair_struct* klpair = nullptr; + uint32_t klpair_len; + int r = m_buffer.find_zero< dmtcmp_t, klpair_find_wrapper >(extra, &klpair_len, &klpair, idxp); if (r == 0) { if (value) { *value = get_le_from_klpair(klpair); } if (key) { - paranoid_invariant(keylen != NULL); - *key = klpair->key_le; - *keylen = klpair->keylen; + paranoid_invariant_notnull(keylen); + *key = klpair->key; + *keylen = keylen_from_klpair_len(klpair_len); } else { - paranoid_invariant(keylen == NULL); + paranoid_invariant_null(keylen); } } return r; } - template - int find(const omtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const { - KLPAIR klpair = NULL; - int r = m_buffer.find< omtcmp_t, wrappy_fun_find >(extra, direction, &klpair, idxp); + // find() on key dmt (and associated leafentries) + template + int find(const dmtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const { + klpair_struct* klpair = nullptr; + uint32_t klpair_len; + int r = m_buffer.find< dmtcmp_t, klpair_find_wrapper >(extra, direction, &klpair_len, &klpair, idxp); if (r == 0) { if (value) { *value = get_le_from_klpair(klpair); } if (key) { - paranoid_invariant(keylen != NULL); - *key = klpair->key_le; - *keylen = klpair->keylen; + paranoid_invariant_notnull(keylen); + *key = klpair->key; + *keylen = keylen_from_klpair_len(klpair_len); } else { - paranoid_invariant(keylen == NULL); + paranoid_invariant_null(keylen); } } return r; } - // get info about a single leafentry by index + // Fetch leafentry by index + __attribute__((__nonnull__)) int fetch_le(uint32_t idx, LEAFENTRY *le); + // Fetch (leafentry, key, keylen) by index + __attribute__((__nonnull__)) int fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key); + // Fetch (serialized size of leafentry, key, and keylen) by index + __attribute__((__nonnull__)) int fetch_klpair_disksize(uint32_t idx, size_t *size); - int fetch_le_key_and_len(uint32_t idx, uint32_t *len, void** key); + // Fetch (key, keylen) by index + __attribute__((__nonnull__)) + int fetch_key_and_len(uint32_t idx, uint32_t *len, void** key); - // Interact with another bn_data - void move_leafentries_to(BN_DATA dest_bd, - uint32_t lbi, //lower bound inclusive - uint32_t ube //upper bound exclusive - ); + // Move leafentries (and associated key/keylens) from this basement node to dest_bd + // Moves indexes [lbi-ube) + __attribute__((__nonnull__)) + void split_klpairs(bn_data* dest_bd, uint32_t first_index_for_dest); + // Destroy this basement node and free memory. void destroy(void); - // Replaces contents, into brand new mempool. - // Returns old mempool base, expects caller to free it. - void replace_contents_with_clone_of_sorted_array( + // Uses sorted array as input for this basement node. + // Expects this to be a basement node just initialized with initialize_empty() + void set_contents_as_clone_of_sorted_array( uint32_t num_les, const void** old_key_ptrs, uint32_t* old_keylens, LEAFENTRY* old_les, size_t *le_sizes, - size_t mempool_size + size_t total_key_size, + size_t total_le_size ); + // Make this basement node a clone of orig_bn_data. + // orig_bn_data still owns all its memory (dmt, mempool) + // this basement node will have a new dmt, mempool containing same data. void clone(bn_data* orig_bn_data); + + // Delete klpair index idx with provided keylen and old leafentry with size old_le_size void delete_leafentry ( uint32_t idx, uint32_t keylen, uint32_t old_le_size ); - void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space); - void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space); + + // Allocates space in the mempool to store a new leafentry. + // This may require reorganizing the mempool and updating the dmt. + __attribute__((__nonnull__)) + void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size, + uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free); + + // Allocates space in the mempool to store a new leafentry + // and inserts a new key into the dmt + // This may require reorganizing the mempool and updating the dmt. + __attribute__((__nonnull__)) + void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space, void **const maybe_free); + + // Gets a leafentry given a klpair from this basement node. + LEAFENTRY get_le_from_klpair(const klpair_struct *klpair) const; + + void serialize_to_wbuf(struct wbuf *const wb); + + // Prepares this basement node for serialization. + // Must be called before serializing this basement node. + // Between calling prepare_to_serialize and actually serializing, the basement node may not be modified + void prepare_to_serialize(void); + + // Serialize the basement node header to a wbuf + // Requires prepare_to_serialize() to have been called first. + void serialize_header(struct wbuf *wb) const; + + // Serialize all keys and leafentries to a wbuf + // Requires prepare_to_serialize() (and serialize_header()) has been called first. + // Currently only supported when all keys are fixed-length. + void serialize_rest(struct wbuf *wb) const; + + static const uint32_t HEADER_LENGTH = 0 + + sizeof(uint32_t) // key_data_size + + sizeof(uint32_t) // val_data_size + + sizeof(uint32_t) // fixed_key_length + + sizeof(uint8_t) // all_keys_same_length + + sizeof(uint8_t) // keys_vals_separate + + 0; private: - // Private functions - KLPAIR mempool_malloc_from_omt(size_t size, void **maybe_free); - void omt_compress_kvspace(size_t added_size, void **maybe_free); - klpair_omt_t m_buffer; // pointers to individual leaf entries + // split_klpairs_extra should be a local class in split_klpairs, but + // the dmt template parameter for iterate needs linkage, so it has to be a + // separate class, but we want it to be able to call e.g. add_key + friend class split_klpairs_extra; + + // Allocates space in the mempool. + // If there is insufficient space, the mempool is enlarged and leafentries may be shuffled to reduce fragmentation. + // If shuffling happens, the offsets stored in the dmt are updated. + LEAFENTRY mempool_malloc_and_update_dmt(size_t size, void **maybe_free); + + // Change the size of the mempool to support what is already in it, plus added_size. + // possibly "compress" by shuffling leafentries around to reduce fragmentation to 0. + // If fragmentation is already 0 and force_compress is not true, shuffling may be skipped. + // If shuffling happens, leafentries will be stored in the mempool in sorted order. + void dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress); + + // Note that a key was added (for maintaining disk-size of this basement node) + void add_key(uint32_t keylen); + + // Note that multiple keys were added (for maintaining disk-size of this basement node) + void add_keys(uint32_t n_keys, uint32_t combined_klpair_len); + + // Note that a key was removed (for maintaining disk-size of this basement node) + void remove_key(uint32_t keylen); + + klpair_dmt_t m_buffer; // pointers to individual leaf entries struct mempool m_buffer_mempool; // storage for all leaf entries friend class bndata_bugfix_test; -}; + // Get the serialized size of a klpair. + // As of Jan 14, 2014, serialized size of a klpair is independent of whether this basement node has fixed-length keys. + uint32_t klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const; + + // The disk/memory size of all keys. (Note that the size of memory for the leafentries is maintained by m_buffer_mempool) + size_t m_disksize_of_keys; + + // Deserialize this basement node from rbuf + // all keys will be first followed by all leafentries (both in sorted order) + void initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version, + uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length, + uint32_t fixed_klpair_length); +}; diff --git a/storage/tokudb/ft-index/ft/background_job_manager.cc b/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc similarity index 97% rename from storage/tokudb/ft-index/ft/background_job_manager.cc rename to storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc index 9a9c9467106a9..8db05018d3ceb 100644 --- a/storage/tokudb/ft-index/ft/background_job_manager.cc +++ b/storage/tokudb/ft-index/ft/cachetable/background_job_manager.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +89,12 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "background_job_manager.h" -#include "toku_config.h" +#include #include #include +#include "cachetable/background_job_manager.h" + struct background_job_manager_struct { bool accepting_jobs; uint32_t num_jobs; diff --git a/storage/tokudb/ft-index/ft/background_job_manager.h b/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h similarity index 97% rename from storage/tokudb/ft-index/ft/background_job_manager.h rename to storage/tokudb/ft-index/ft/cachetable/background_job_manager.h index 5474a569454dc..d977abae4183c 100644 --- a/storage/tokudb/ft-index/ft/background_job_manager.h +++ b/storage/tokudb/ft-index/ft/cachetable/background_job_manager.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef BACKGROUND_JOB_MANAGER_H -#define BACKGROUND_JOB_MANAGER_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -130,5 +130,3 @@ void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm); // has completed, bjm_add_background_job returns an error. // void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm); - -#endif diff --git a/storage/tokudb/ft-index/ft/cachetable-internal.h b/storage/tokudb/ft-index/ft/cachetable/cachetable-internal.h similarity index 98% rename from storage/tokudb/ft-index/ft/cachetable-internal.h rename to storage/tokudb/ft-index/ft/cachetable/cachetable-internal.h index a02449f3c0712..d5dc3ffa5fb11 100644 --- a/storage/tokudb/ft-index/ft/cachetable-internal.h +++ b/storage/tokudb/ft-index/ft/cachetable/cachetable-internal.h @@ -1,9 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TokuDB_cachetable_internal_h -#define TokuDB_cachetable_internal_h - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -33,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,10 +87,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "background_job_manager.h" +#include "cachetable/background_job_manager.h" #include #include #include @@ -179,8 +178,6 @@ class pair_list; // Maps to a file on disk. // struct cachefile { - CACHEFILE next; - CACHEFILE prev; // these next two fields are protected by cachetable's list lock // they are managed whenever we add or remove a pair from // the cachetable. As of Riddler, this linked list is only used to @@ -440,14 +437,12 @@ class cachefile_list { bool evict_some_stale_pair(evictor* ev); void free_stale_data(evictor* ev); // access to these fields are protected by the lock - CACHEFILE m_active_head; // head of CACHEFILEs that are active - CACHEFILE m_stale_head; // head of CACHEFILEs that are stale - CACHEFILE m_stale_tail; // tail of CACHEFILEs that are stale FILENUM m_next_filenum_to_use; uint32_t m_next_hash_id_to_use; toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD toku::omt m_active_filenum; toku::omt m_active_fileid; + toku::omt m_stale_fileid; private: CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid); }; @@ -521,8 +516,8 @@ class evictor { void add_pair_attr(PAIR_ATTR attr); void remove_pair_attr(PAIR_ATTR attr); void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr); - void add_to_size_current(long size); - void remove_from_size_current(long size); + void add_cloned_data_size(long size); + void remove_cloned_data_size(long size); uint64_t reserve_memory(double fraction, uint64_t upper_bound); void release_reserved_memory(uint64_t reserved_memory); void run_eviction_thread(); @@ -536,6 +531,8 @@ class evictor { void get_state(long *size_current_ptr, long *size_limit_ptr); void fill_engine_status(); private: + void add_to_size_current(long size); + void remove_from_size_current(long size); void run_eviction(); bool run_eviction_on_pair(PAIR p); void try_evict_pair(PAIR p); @@ -551,6 +548,7 @@ class evictor { pair_list* m_pl; cachefile_list* m_cf_list; int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable + int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status // changes to these two values are protected // by ev_thread_lock int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader) @@ -654,5 +652,3 @@ struct cachetable { char *env_dir; }; - -#endif // End of header guardian. diff --git a/storage/tokudb/ft-index/ft/cachetable.cc b/storage/tokudb/ft-index/ft/cachetable/cachetable.cc similarity index 95% rename from storage/tokudb/ft-index/ft/cachetable.cc rename to storage/tokudb/ft-index/ft/cachetable/cachetable.cc index b489c9525f128..feda4abc76a5f 100644 --- a/storage/tokudb/ft-index/ft/cachetable.cc +++ b/storage/tokudb/ft-index/ft/cachetable/cachetable.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,24 +89,26 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include #include #include #include -#include "cachetable.h" -#include -#include "checkpoint.h" -#include "log-internal.h" -#include "cachetable-internal.h" -#include -#include + +#include +#include #include #include +#include +#include #include -#include -#include -#include + +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/cachetable-internal.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log-internal.h" +#include "util/rwlock.h" +#include "util/scoped_malloc.h" +#include "util/status.h" +#include "util/context.h" /////////////////////////////////////////////////////////////////////////////////// // Engine status @@ -127,7 +129,7 @@ static CACHETABLE_STATUS_S ct_status; // Note, toku_cachetable_get_status() is below, after declaration of cachetable. -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ct_status, k, c, t, "cachetable: " l, inc) static void status_init(void) { @@ -144,6 +146,7 @@ status_init(void) { STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(CT_SIZE_CLONED, CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); @@ -370,7 +373,7 @@ toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir) { // What cachefile goes with particular iname (iname relative to env)? // The transaction that is adding the reference might not have a reference -// to the brt, therefore the cachefile might be closing. +// to the ft, therefore the cachefile might be closing. // If closing, we want to return that it is not there, but must wait till after // the close has finished. // Once the close has finished, there must not be a cachefile with that name @@ -380,7 +383,7 @@ int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CAC } // What cachefile goes with particular fd? -// This function can only be called if the brt is still open, so file must +// This function can only be called if the ft is still open, so file must // still be open int toku_cachefile_of_filenum (CACHETABLE ct, FILENUM filenum, CACHEFILE *cf) { return ct->cf_list.cachefile_of_filenum(filenum, cf); @@ -642,7 +645,7 @@ static void cachetable_free_pair(PAIR p) { cachetable_evictions++; PAIR_ATTR new_attr = p->attr; // Note that flush_callback is called with write_me false, so the only purpose of this - // call is to tell the brt layer to evict the node (keep_me is false). + // call is to tell the ft layer to evict the node (keep_me is false). // Also, because we have already removed the PAIR from the cachetable in // cachetable_remove_pair, we cannot pass in p->cachefile and p->cachefile->fd // for the first two parameters, as these may be invalid (#5171), so, we @@ -704,7 +707,7 @@ static void cachetable_only_write_locked_data( p->disk_data = disk_data; if (is_clone) { p->cloned_value_data = NULL; - ev->remove_from_size_current(p->cloned_value_size); + ev->remove_cloned_data_size(p->cloned_value_size); p->cloned_value_size = 0; } } @@ -949,7 +952,7 @@ clone_pair(evictor* ev, PAIR p) { ev->change_pair_attr(old_attr, new_attr); } p->cloned_value_size = clone_size; - ev->add_to_size_current(p->cloned_value_size); + ev->add_cloned_data_size(p->cloned_value_size); } static void checkpoint_cloned_pair(void* extra) { @@ -1302,8 +1305,6 @@ void toku_cachetable_pf_pinned_pair( pair_unlock(p); } - -// NOW A TEST ONLY FUNCTION!!! int toku_cachetable_get_and_pin ( CACHEFILE cachefile, CACHEKEY key, @@ -1573,7 +1574,7 @@ static bool try_pin_pair( return try_again; } -int toku_cachetable_get_and_pin_with_dep_pairs_batched ( +int toku_cachetable_get_and_pin_with_dep_pairs ( CACHEFILE cachefile, CACHEKEY key, uint32_t fullhash, @@ -1589,7 +1590,7 @@ int toku_cachetable_get_and_pin_with_dep_pairs_batched ( PAIR* dependent_pairs, enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs ) -// See cachetable.h +// See cachetable/cachetable.h { CACHETABLE ct = cachefile->cachetable; bool wait = false; @@ -1766,43 +1767,6 @@ int toku_cachetable_get_and_pin_with_dep_pairs_batched ( return 0; } -int toku_cachetable_get_and_pin_with_dep_pairs ( - CACHEFILE cachefile, - CACHEKEY key, - uint32_t fullhash, - void**value, - long *sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - pair_lock_type lock_type, - void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs - ) -// See cachetable.h -{ - int r = toku_cachetable_get_and_pin_with_dep_pairs_batched( - cachefile, - key, - fullhash, - value, - sizep, - write_callback, - fetch_callback, - pf_req_callback, - pf_callback, - lock_type, - read_extraargs, - num_dependent_pairs, - dependent_pairs, - dependent_dirty - ); - return r; -} - // Lookup a key in the cachetable. If it is found and it is not being written, then // acquire a read lock on the pair, update the LRU list, and return sucess. // @@ -2048,7 +2012,7 @@ maybe_pin_pair( return retval; } -int toku_cachetable_get_and_pin_nonblocking_batched( +int toku_cachetable_get_and_pin_nonblocking( CACHEFILE cf, CACHEKEY key, uint32_t fullhash, @@ -2062,7 +2026,7 @@ int toku_cachetable_get_and_pin_nonblocking_batched( void *read_extraargs, UNLOCKERS unlockers ) -// See cachetable.h. +// See cachetable/cachetable.h. { CACHETABLE ct = cf->cachetable; assert(lock_type == PL_READ || @@ -2200,40 +2164,6 @@ int toku_cachetable_get_and_pin_nonblocking_batched( abort(); } -int toku_cachetable_get_and_pin_nonblocking ( - CACHEFILE cf, - CACHEKEY key, - uint32_t fullhash, - void**value, - long* sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - pair_lock_type lock_type, - void *read_extraargs, - UNLOCKERS unlockers - ) -// See cachetable.h. -{ - int r = 0; - r = toku_cachetable_get_and_pin_nonblocking_batched( - cf, - key, - fullhash, - value, - sizep, - write_callback, - fetch_callback, - pf_req_callback, - pf_callback, - lock_type, - read_extraargs, - unlockers - ); - return r; -} - struct cachefile_prefetch_args { PAIR p; CACHETABLE_FETCH_CALLBACK fetch_callback; @@ -2279,7 +2209,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, void *read_extraargs, bool *doing_prefetch) -// Effect: See the documentation for this function in cachetable.h +// Effect: See the documentation for this function in cachetable/cachetable.h { int r = 0; PAIR p = NULL; @@ -2582,6 +2512,11 @@ toku_cachetable_minicron_shutdown(CACHETABLE ct) { ct->cl.destroy(); } +void toku_cachetable_prepare_close(CACHETABLE ct UU()) { + extern bool toku_serialize_in_parallel; + toku_serialize_in_parallel = true; +} + /* Requires that it all be flushed. */ void toku_cachetable_close (CACHETABLE *ctp) { CACHETABLE ct = *ctp; @@ -3708,6 +3643,7 @@ int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KI m_size_reserved = unreservable_memory(_size_limit); m_size_current = 0; + m_size_cloned_data = 0; m_size_evicting = 0; m_size_nonleaf = create_partitioned_counter(); @@ -3842,6 +3778,22 @@ void evictor::remove_from_size_current(long size) { (void) toku_sync_fetch_and_sub(&m_size_current, size); } +// +// Adds the size of cloned data to necessary variables in the evictor +// +void evictor::add_cloned_data_size(long size) { + (void) toku_sync_fetch_and_add(&m_size_cloned_data, size); + add_to_size_current(size); +} + +// +// Removes the size of cloned data to necessary variables in the evictor +// +void evictor::remove_cloned_data_size(long size) { + (void) toku_sync_fetch_and_sub(&m_size_cloned_data, size); + remove_from_size_current(size); +} + // // TODO: (Zardosht) comment this function // @@ -4406,6 +4358,7 @@ void evictor::fill_engine_status() { STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf); STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback); STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure); + STATUS_VALUE(CT_SIZE_CLONED) = m_size_cloned_data; STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count); STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time); STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count); @@ -4487,43 +4440,48 @@ void checkpointer::increment_num_txns() { m_checkpoint_num_txns++; } -// -// Update the user data in any cachefiles in our checkpoint list. -// -void checkpointer::update_cachefiles() { - CACHEFILE cf; - for(cf = m_cf_list->m_active_head; cf; cf=cf->next) { +struct iterate_begin_checkpoint { + LSN lsn_of_checkpoint_in_progress; + iterate_begin_checkpoint(LSN lsn) : lsn_of_checkpoint_in_progress(lsn) { } + static int fn(const CACHEFILE &cf, const uint32_t UU(idx), struct iterate_begin_checkpoint *info) { assert(cf->begin_checkpoint_userdata); if (cf->for_checkpoint) { - cf->begin_checkpoint_userdata(m_lsn_of_checkpoint_in_progress, - cf->userdata); + cf->begin_checkpoint_userdata(info->lsn_of_checkpoint_in_progress, cf->userdata); } + return 0; } +}; + +// +// Update the user data in any cachefiles in our checkpoint list. +// +void checkpointer::update_cachefiles() { + struct iterate_begin_checkpoint iterate(m_lsn_of_checkpoint_in_progress); + int r = m_cf_list->m_active_fileid.iterate(&iterate); + assert_zero(r); } +struct iterate_note_pin { + static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) { + assert(cf->note_pin_by_checkpoint); + cf->note_pin_by_checkpoint(cf, cf->userdata); + cf->for_checkpoint = true; + return 0; + } +}; + // // Sets up and kicks off a checkpoint. // void checkpointer::begin_checkpoint() { // 1. Initialize the accountability counters. - m_checkpoint_num_files = 0; m_checkpoint_num_txns = 0; // 2. Make list of cachefiles to be included in the checkpoint. - // TODO: How do we remove the non-lock cachetable reference here? m_cf_list->read_lock(); - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { - // The caller must serialize open, close, and begin checkpoint. - // So we should never see a closing cachefile here. - // Is there an assert we can add here? - - // Putting this check here so that this method may be called - // by cachetable tests. - assert(cf->note_pin_by_checkpoint); - cf->note_pin_by_checkpoint(cf, cf->userdata); - cf->for_checkpoint = true; - m_checkpoint_num_files++; - } + m_cf_list->m_active_fileid.iterate(nullptr); + m_checkpoint_num_files = m_cf_list->m_active_fileid.size(); m_cf_list->read_unlock(); // 3. Create log entries for this checkpoint. @@ -4548,6 +4506,14 @@ void checkpointer::begin_checkpoint() { m_list->write_pending_exp_unlock(); } +struct iterate_log_fassociate { + static int fn(const CACHEFILE &cf, uint32_t UU(idx), void **UU(extra)) { + assert(cf->log_fassociate_during_checkpoint); + cf->log_fassociate_during_checkpoint(cf, cf->userdata); + return 0; + } +}; + // // Assuming the logger exists, this will write out the folloing // information to the log. @@ -4571,10 +4537,7 @@ void checkpointer::log_begin_checkpoint() { m_lsn_of_checkpoint_in_progress = begin_lsn; // Log the list of open dictionaries. - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { - assert(cf->log_fassociate_during_checkpoint); - cf->log_fassociate_during_checkpoint(cf, cf->userdata); - } + m_cf_list->m_active_fileid.iterate(nullptr); // Write open transactions to the log. r = toku_txn_manager_iter_over_live_txns( @@ -4632,7 +4595,8 @@ void checkpointer::remove_background_job() { } void checkpointer::end_checkpoint(void (*testcallback_f)(void*), void* testextra) { - CACHEFILE *XMALLOC_N(m_checkpoint_num_files, checkpoint_cfs); + toku::scoped_malloc checkpoint_cfs_buf(m_checkpoint_num_files * sizeof(CACHEFILE)); + CACHEFILE *checkpoint_cfs = reinterpret_cast(checkpoint_cfs_buf.get()); this->fill_checkpoint_cfs(checkpoint_cfs); this->checkpoint_pending_pairs(); @@ -4644,22 +4608,33 @@ void checkpointer::end_checkpoint(void (*testcallback_f)(void*), void* testextr this->log_end_checkpoint(); this->end_checkpoint_userdata(checkpoint_cfs); - //Delete list of cachefiles in the checkpoint, + // Delete list of cachefiles in the checkpoint, this->remove_cachefiles(checkpoint_cfs); - toku_free(checkpoint_cfs); } -void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) { - m_cf_list->read_lock(); - uint32_t curr_index = 0; - for (CACHEFILE cf = m_cf_list->m_active_head; cf; cf = cf->next) { +struct iterate_checkpoint_cfs { + CACHEFILE *checkpoint_cfs; + uint32_t checkpoint_num_files; + uint32_t curr_index; + iterate_checkpoint_cfs(CACHEFILE *cfs, uint32_t num_files) : + checkpoint_cfs(cfs), checkpoint_num_files(num_files), curr_index(0) { + } + static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_checkpoint_cfs *info) { if (cf->for_checkpoint) { - assert(curr_index < m_checkpoint_num_files); - checkpoint_cfs[curr_index] = cf; - curr_index++; + assert(info->curr_index < info->checkpoint_num_files); + info->checkpoint_cfs[info->curr_index] = cf; + info->curr_index++; } + return 0; } - assert(curr_index == m_checkpoint_num_files); +}; + +void checkpointer::fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs) { + struct iterate_checkpoint_cfs iterate(checkpoint_cfs, m_checkpoint_num_files); + + m_cf_list->read_lock(); + m_cf_list->m_active_fileid.iterate(&iterate); + assert(iterate.curr_index == m_checkpoint_num_files); m_cf_list->read_unlock(); } @@ -4744,19 +4719,18 @@ void checkpointer::remove_cachefiles(CACHEFILE* checkpoint_cfs) { static_assert(std::is_pod::value, "cachefile_list isn't POD"); void cachefile_list::init() { - m_active_head = NULL; - m_stale_head = NULL; - m_stale_tail = NULL; m_next_filenum_to_use.fileid = 0; m_next_hash_id_to_use = 0; toku_pthread_rwlock_init(&m_lock, NULL); m_active_filenum.create(); m_active_fileid.create(); + m_stale_fileid.create(); } void cachefile_list::destroy() { m_active_filenum.destroy(); m_active_fileid.destroy(); + m_stale_fileid.destroy(); toku_pthread_rwlock_destroy(&m_lock); } @@ -4775,34 +4749,31 @@ void cachefile_list::write_lock() { void cachefile_list::write_unlock() { toku_pthread_rwlock_wrunlock(&m_lock); } -int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) { - read_lock(); - CACHEFILE extant; - int r; - r = ENOENT; - for (extant = m_active_head; extant; extant = extant->next) { - if (extant->fname_in_env && - !strcmp(extant->fname_in_env, iname_in_env)) { - *cf = extant; - r = 0; - break; + +struct iterate_find_iname { + const char *iname_in_env; + CACHEFILE found_cf; + iterate_find_iname(const char *iname) : iname_in_env(iname), found_cf(nullptr) { } + static int fn(const CACHEFILE &cf, uint32_t UU(idx), struct iterate_find_iname *info) { + if (cf->fname_in_env && strcmp(cf->fname_in_env, info->iname_in_env) == 0) { + info->found_cf = cf; + return -1; } + return 0; } - read_unlock(); - return r; -} +}; + +int cachefile_list::cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf) { + struct iterate_find_iname iterate(iname_in_env); -int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) { read_lock(); - CACHEFILE extant; - int r = ENOENT; - *cf = NULL; - for (extant = m_active_head; extant; extant = extant->next) { - if (extant->filenum.fileid==filenum.fileid) { - *cf = extant; - r = 0; - break; - } + int r = m_active_fileid.iterate(&iterate); + if (iterate.found_cf != nullptr) { + assert(strcmp(iterate.found_cf->fname_in_env, iname_in_env) == 0); + *cf = iterate.found_cf; + r = 0; + } else { + r = ENOENT; } read_unlock(); return r; @@ -4819,20 +4790,23 @@ static int cachefile_find_by_filenum(const CACHEFILE &a_cf, const FILENUM &b) { } } +int cachefile_list::cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf) { + read_lock(); + int r = m_active_filenum.find_zero(filenum, cf, nullptr); + if (r == DB_NOTFOUND) { + r = ENOENT; + } else { + invariant_zero(r); + } + read_unlock(); + return r; +} + static int cachefile_find_by_fileid(const CACHEFILE &a_cf, const struct fileid &b) { return toku_fileid_cmp(a_cf->fileid, b); } void cachefile_list::add_cf_unlocked(CACHEFILE cf) { - invariant(cf->next == NULL); - invariant(cf->prev == NULL); - cf->next = m_active_head; - cf->prev = NULL; - if (m_active_head) { - m_active_head->prev = cf; - } - m_active_head = cf; - int r; r = m_active_filenum.insert(cf, cf->filenum, nullptr); assert_zero(r); @@ -4842,36 +4816,13 @@ void cachefile_list::add_cf_unlocked(CACHEFILE cf) { void cachefile_list::add_stale_cf(CACHEFILE cf) { write_lock(); - invariant(cf->next == NULL); - invariant(cf->prev == NULL); - - cf->next = m_stale_head; - cf->prev = NULL; - if (m_stale_head) { - m_stale_head->prev = cf; - } - m_stale_head = cf; - if (m_stale_tail == NULL) { - m_stale_tail = cf; - } + int r = m_stale_fileid.insert(cf, cf->fileid, nullptr); + assert_zero(r); write_unlock(); } void cachefile_list::remove_cf(CACHEFILE cf) { write_lock(); - invariant(m_active_head != NULL); - if (cf->next) { - cf->next->prev = cf->prev; - } - if (cf->prev) { - cf->prev->next = cf->next; - } - if (cf == m_active_head) { - invariant(cf->prev == NULL); - m_active_head = cf->next; - } - cf->prev = NULL; - cf->next = NULL; uint32_t idx; int r; @@ -4889,24 +4840,12 @@ void cachefile_list::remove_cf(CACHEFILE cf) { } void cachefile_list::remove_stale_cf_unlocked(CACHEFILE cf) { - invariant(m_stale_head != NULL); - invariant(m_stale_tail != NULL); - if (cf->next) { - cf->next->prev = cf->prev; - } - if (cf->prev) { - cf->prev->next = cf->next; - } - if (cf == m_stale_head) { - invariant(cf->prev == NULL); - m_stale_head = cf->next; - } - if (cf == m_stale_tail) { - invariant(cf->next == NULL); - m_stale_tail = cf->prev; - } - cf->prev = NULL; - cf->next = NULL; + uint32_t idx; + int r; + r = m_stale_fileid.find_zero(cf->fileid, nullptr, &idx); + assert_zero(r); + r = m_stale_fileid.delete_at(idx); + assert_zero(r); } FILENUM cachefile_list::reserve_filenum() { @@ -4922,11 +4861,6 @@ FILENUM cachefile_list::reserve_filenum() { break; } FILENUM filenum = m_next_filenum_to_use; -#if TOKU_DEBUG_PARANOID - for (CACHEFILE extant = m_active_head; extant; extant = extant->next) { - assert(filenum.fileid != extant->filenum.fileid); - } -#endif m_next_filenum_to_use.fileid++; write_unlock(); return filenum; @@ -4938,91 +4872,77 @@ uint32_t cachefile_list::get_new_hash_id_unlocked() { return retval; } -CACHEFILE cachefile_list::find_cachefile_in_list_unlocked( - CACHEFILE start, - struct fileid* fileid - ) -{ - CACHEFILE retval = NULL; - for (CACHEFILE extant = start; extant; extant = extant->next) { - if (toku_fileids_are_equal(&extant->fileid, fileid)) { - // Clients must serialize cachefile open, close, and unlink - // So, during open, we should never see a closing cachefile - // or one that has been marked as unlink on close. - assert(!extant->unlink_on_close); - retval = extant; - goto exit; - } - } -exit: - return retval; -} - CACHEFILE cachefile_list::find_cachefile_unlocked(struct fileid* fileid) { CACHEFILE cf = nullptr; int r = m_active_fileid.find_zero(*fileid, &cf, nullptr); if (r == 0) { assert(!cf->unlink_on_close); } -#if TOKU_DEBUG_PARANOID - assert(cf == find_cachefile_in_list_unlocked(m_active_head, fileid)); -#endif return cf; } CACHEFILE cachefile_list::find_stale_cachefile_unlocked(struct fileid* fileid) { - return find_cachefile_in_list_unlocked(m_stale_head, fileid); + CACHEFILE cf = nullptr; + int r = m_stale_fileid.find_zero(*fileid, &cf, nullptr); + if (r == 0) { + assert(!cf->unlink_on_close); + } + return cf; } void cachefile_list::verify_unused_filenum(FILENUM filenum) { int r = m_active_filenum.find_zero(filenum, nullptr, nullptr); assert(r == DB_NOTFOUND); -#if TOKU_DEBUG_PARANOID - for (CACHEFILE extant = m_active_head; extant; extant = extant->next) { - invariant(extant->filenum.fileid != filenum.fileid); - } -#endif } // returns true if some eviction ran, false otherwise bool cachefile_list::evict_some_stale_pair(evictor* ev) { - PAIR p = NULL; - CACHEFILE cf_to_destroy = NULL; write_lock(); - if (m_stale_tail == NULL) { + if (m_stale_fileid.size() == 0) { write_unlock(); return false; } - p = m_stale_tail->cf_head; + + CACHEFILE stale_cf = nullptr; + int r = m_stale_fileid.fetch(0, &stale_cf); + assert_zero(r); + // we should not have a cf in the stale list // that does not have any pairs + PAIR p = stale_cf->cf_head; paranoid_invariant(p != NULL); - evict_pair_from_cachefile(p); // now that we have evicted something, // let's check if the cachefile is needed anymore - if (m_stale_tail->cf_head == NULL) { - cf_to_destroy = m_stale_tail; - remove_stale_cf_unlocked(m_stale_tail); + // + // it is not needed if the latest eviction caused + // the cf_head for that cf to become null + bool destroy_cf = stale_cf->cf_head == nullptr; + if (destroy_cf) { + remove_stale_cf_unlocked(stale_cf); } write_unlock(); ev->remove_pair_attr(p->attr); cachetable_free_pair(p); - if (cf_to_destroy) { - cachefile_destroy(cf_to_destroy); + if (destroy_cf) { + cachefile_destroy(stale_cf); } return true; } void cachefile_list::free_stale_data(evictor* ev) { write_lock(); - while (m_stale_tail != NULL) { - PAIR p = m_stale_tail->cf_head; + while (m_stale_fileid.size() != 0) { + CACHEFILE stale_cf = nullptr; + int r = m_stale_fileid.fetch(0, &stale_cf); + assert_zero(r); + // we should not have a cf in the stale list // that does not have any pairs + PAIR p = stale_cf->cf_head; paranoid_invariant(p != NULL); evict_pair_from_cachefile(p); @@ -5031,10 +4951,9 @@ void cachefile_list::free_stale_data(evictor* ev) { // now that we have evicted something, // let's check if the cachefile is needed anymore - if (m_stale_tail->cf_head == NULL) { - CACHEFILE cf_to_destroy = m_stale_tail; - remove_stale_cf_unlocked(m_stale_tail); - cachefile_destroy(cf_to_destroy); + if (stale_cf->cf_head == NULL) { + remove_stale_cf_unlocked(stale_cf); + cachefile_destroy(stale_cf); } } write_unlock(); diff --git a/storage/tokudb/ft-index/ft/cachetable.h b/storage/tokudb/ft-index/ft/cachetable/cachetable.h similarity index 91% rename from storage/tokudb/ft-index/ft/cachetable.h rename to storage/tokudb/ft-index/ft/cachetable/cachetable.h index c654927aa211d..a1ea83406a142 100644 --- a/storage/tokudb/ft-index/ft/cachetable.h +++ b/storage/tokudb/ft-index/ft/cachetable/cachetable.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef CACHETABLE_H -#define CACHETABLE_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +87,17 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include -#include "fttypes.h" -#include "minicron.h" + +#include "ft/logger/logger.h" +#include "ft/serialize/block_table.h" +#include "ft/txn/txn.h" +#include "util/minicron.h" // Maintain a cache mapping from cachekeys to values (void*) // Some of the keys can be pinned. Don't pin too many or for too long. @@ -111,6 +114,42 @@ PATENT RIGHTS GRANT: typedef BLOCKNUM CACHEKEY; +class checkpointer; +typedef class checkpointer *CHECKPOINTER; +typedef struct cachetable *CACHETABLE; +typedef struct cachefile *CACHEFILE; +typedef struct ctpair *PAIR; + +// This struct hold information about values stored in the cachetable. +// As one can tell from the names, we are probably violating an +// abstraction layer by placing names. +// +// The purpose of having this struct is to have a way for the +// cachetable to accumulate the some totals we are interested in. +// Breaking this abstraction layer by having these names was the +// easiest way. +// +typedef struct pair_attr_s { + long size; // size PAIR's value takes in memory + long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status + long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status + long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status + long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts + bool is_valid; +} PAIR_ATTR; + +static inline PAIR_ATTR make_pair_attr(long size) { + PAIR_ATTR result={ + .size = size, + .nonleaf_size = 0, + .leaf_size = 0, + .rollback_size = 0, + .cache_pressure_size = 0, + .is_valid = true + }; + return result; +} + void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period); uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct); void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations); @@ -122,7 +161,7 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct); // create and initialize a cache table // size_limit is the upper limit on the size of the size of the values in the table // pass 0 if you want the default -int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER); +int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, struct tokulogger *logger); // Create a new cachetable. // Effects: a new cachetable is created and initialized. @@ -147,15 +186,20 @@ int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CAC // Return the filename char *toku_cachefile_fname_in_cwd (CACHEFILE cf); -void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, TOKULOGGER); +void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger); -void toku_cachetable_end_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, +void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, void (*testcallback_f)(void*), void * testextra); + // Shuts down checkpoint thread // Requires no locks be held that are taken by the checkpoint function void toku_cachetable_minicron_shutdown(CACHETABLE ct); +// Prepare to close the cachetable. This informs the cachetable that it is about to be closed +// so that it can tune its checkpoint resource use. +void toku_cachetable_prepare_close(CACHETABLE ct); + // Close the cachetable. // Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed. void toku_cachetable_close(CACHETABLE *ct); @@ -344,28 +388,6 @@ void toku_cachetable_put(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, // then the required PAIRs are written to disk for checkpoint. // KEY PROPERTY OF DEPENDENT PAIRS: They are already locked by the client // Returns: 0 if the memory object is in memory, otherwise an error number. -// Rationale: -// begin_batched_pin and end_batched_pin take and release a read lock on the pair list. -// Normally, that would be done within this get_and_pin, but we want to pin multiple nodes with a single acquisition of the read lock. -int toku_cachetable_get_and_pin_with_dep_pairs_batched ( - CACHEFILE cachefile, - CACHEKEY key, - uint32_t fullhash, - void**value, - long *sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, - pair_lock_type lock_type, - void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback - uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint - PAIR* dependent_pairs, - enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs - ); - -// Effect: call toku_cachetable_get_and_pin_with_dep_pairs_batched once, -// wrapped in begin_batched_pin and end_batched_pin calls. int toku_cachetable_get_and_pin_with_dep_pairs ( CACHEFILE cachefile, CACHEKEY key, @@ -383,7 +405,6 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs ); - // Get and pin a memory object. // Effects: If the memory object is in the cachetable acquire the PAIR lock on it. // Otherwise, fetch it from storage by calling the fetch callback. If the fetch @@ -417,15 +438,13 @@ struct unlockers { bool locked; void (*f)(void* extra); void *extra; - UNLOCKERS next; + struct unlockers *next; }; +typedef struct unlockers *UNLOCKERS; // Effect: If the block is in the cachetable, then return it. // Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN. -// Rationale: -// begin_batched_pin and end_batched_pin take and release a read lock on the pair list. -// Normally, that would be done within this get_and_pin, but we want to pin multiple nodes with a single acquisition of the read lock. -int toku_cachetable_get_and_pin_nonblocking_batched ( +int toku_cachetable_get_and_pin_nonblocking ( CACHEFILE cf, CACHEKEY key, uint32_t fullhash, @@ -440,23 +459,6 @@ int toku_cachetable_get_and_pin_nonblocking_batched ( UNLOCKERS unlockers ); -// Effect: call toku_cachetable_get_and_pin_nonblocking_batched once, -// wrapped in begin_batched_pin and end_batched_pin calls. -int toku_cachetable_get_and_pin_nonblocking ( - CACHEFILE cf, - CACHEKEY key, - uint32_t fullhash, - void**value, - long *sizep, - CACHETABLE_WRITE_CALLBACK write_callback, - CACHETABLE_FETCH_CALLBACK fetch_callback, - CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback __attribute__((unused)), - CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback __attribute__((unused)), - pair_lock_type lock_type, - void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback - UNLOCKERS unlockers - ); - int toku_cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**); // Effect: Maybe get and pin a memory object. // This function is similar to the get_and_pin function except that it @@ -549,15 +551,15 @@ void toku_cachefile_unlink_on_close(CACHEFILE cf); bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); // Return the logger associated with the cachefile -TOKULOGGER toku_cachefile_logger (CACHEFILE); +struct tokulogger *toku_cachefile_logger(CACHEFILE cf); // Return the filenum associated with the cachefile -FILENUM toku_cachefile_filenum (CACHEFILE); +FILENUM toku_cachefile_filenum(CACHEFILE cf); // Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two. -uint32_t toku_cachetable_hash (CACHEFILE cachefile, CACHEKEY key); +uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key); -uint32_t toku_cachefile_fullhash_of_header (CACHEFILE cachefile); +uint32_t toku_cachefile_fullhash_of_header(CACHEFILE cf); // debug functions @@ -599,6 +601,7 @@ typedef enum { CT_SIZE_LEAF, // number of bytes in cachetable belonging to leaf nodes CT_SIZE_ROLLBACK, // number of bytes in cachetable belonging to rollback nodes CT_SIZE_CACHEPRESSURE, // number of bytes causing cache pressure (sum of buffers and workdone counters) + CT_SIZE_CLONED, // number of bytes of cloned data in the system CT_EVICTIONS, CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed CT_CLEANER_PERIOD, @@ -644,5 +647,3 @@ void toku_pair_list_set_lock_size(uint32_t num_locks); // layer. __attribute__((const,nonnull)) bool toku_ctpair_is_write_locked(PAIR pair); - -#endif /* CACHETABLE_H */ diff --git a/storage/tokudb/ft-index/ft/checkpoint.cc b/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc similarity index 97% rename from storage/tokudb/ft-index/ft/checkpoint.cc rename to storage/tokudb/ft-index/ft/cachetable/checkpoint.cc index bc4629a1d0884..492893ddc7b69 100644 --- a/storage/tokudb/ft-index/ft/checkpoint.cc +++ b/storage/tokudb/ft-index/ft/cachetable/checkpoint.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,17 +126,18 @@ PATENT RIGHTS GRANT: * *****/ -#include #include -#include "fttypes.h" -#include "cachetable.h" -#include "log-internal.h" -#include "logger.h" -#include "checkpoint.h" -#include -#include -#include +#include "portability/toku_portability.h" +#include "portability/toku_atomic.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/recover.h" +#include "util/frwlock.h" +#include "util/status.h" /////////////////////////////////////////////////////////////////////////////////// // Engine status @@ -146,7 +147,7 @@ PATENT RIGHTS GRANT: static CHECKPOINT_STATUS_S cp_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(cp_status, k, c, t, "checkpoint: " l, inc) static void status_init(void) { diff --git a/storage/tokudb/ft-index/ft/checkpoint.h b/storage/tokudb/ft-index/ft/cachetable/checkpoint.h similarity index 96% rename from storage/tokudb/ft-index/ft/checkpoint.h rename to storage/tokudb/ft-index/ft/cachetable/checkpoint.h index 9e1725af91b5e..57a41210e838e 100644 --- a/storage/tokudb/ft-index/ft/checkpoint.h +++ b/storage/tokudb/ft-index/ft/cachetable/checkpoint.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_CHECKPOINT_H -#define TOKU_CHECKPOINT_H /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +86,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#include "cachetable.h" - #include -void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period); +#include "ft/cachetable/cachetable.h" + //Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to // new_period seconds. 0 means disable. +void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period); uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct); @@ -160,13 +160,11 @@ typedef enum {SCHEDULED_CHECKPOINT = 0, // "normal" checkpoint taken on check // Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held. // Callbacks are primarily intended for use in testing. // caller_id identifies why the checkpoint is being taken. -int toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger, - void (*callback_f)(void*), void * extra, - void (*callback2_f)(void*), void * extra2, +int toku_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, + void (*callback_f)(void *extra), void *extra, + void (*callback2_f)(void *extra2), void *extra2, checkpoint_caller_t caller_id); - - /****** * These functions are called from the ydb level. * They return status information and have no side effects. @@ -200,6 +198,3 @@ typedef struct { } CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS; void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat); - - -#endif diff --git a/storage/tokudb/ft-index/ft/checksum-benchmarks/adler32.cc b/storage/tokudb/ft-index/ft/checksum-benchmarks/adler32.cc deleted file mode 100644 index e151cb9c6802e..0000000000000 --- a/storage/tokudb/ft-index/ft/checksum-benchmarks/adler32.cc +++ /dev/null @@ -1,236 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include -#include - -const unsigned int prime = 2000000011; - -unsigned int karprabin (unsigned char *datac, int N) { - assert(N%4==0); - unsigned int *data=(unsigned int*)datac; - N=N/4; - int i; - unsigned int result=0; - for (i=0; itv_sec-start->tv_sec) +1e-6*(end->tv_usec - start->tv_usec); -} - -int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { - struct timeval start, end; - const int N=2<<20; - unsigned char *data=malloc(N); - int i; - assert(data); - for (i=0; i #include -#include -#include +#include "portability/memory.h" + +#include "util/dbt.h" + +typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b); + +int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len); + +int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default"))); namespace toku { -// a comparator object encapsulates the data necessary for -// comparing two keys in a fractal tree. it further understands -// that points may be positive or negative infinity. - -class comparator { -public: - void set_descriptor(DESCRIPTOR desc) { - m_fake_db.cmp_descriptor = desc; - } - - void create(ft_compare_func cmp, DESCRIPTOR desc) { - m_cmp = cmp; - memset(&m_fake_db, 0, sizeof(m_fake_db)); - m_fake_db.cmp_descriptor = desc; - } - - int compare(const DBT *a, const DBT *b) { - if (toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b)) { - return toku_dbt_infinite_compare(a, b); - } else { - return m_cmp(&m_fake_db, a, b); + // a comparator object encapsulates the data necessary for + // comparing two keys in a fractal tree. it further understands + // that points may be positive or negative infinity. + + class comparator { + void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) { + _cmp = cmp; + _fake_db->cmp_descriptor = desc; + _memcmp_magic = memcmp_magic; + } + + public: + // This magic value is reserved to mean that the magic has not been set. + static const uint8_t MEMCMP_MAGIC_NONE = 0; + + void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) { + XCALLOC(_fake_db); + init(cmp, desc, memcmp_magic); + } + + // inherit the attributes of another comparator, but keep our own + // copy of fake_db that is owned separately from the one given. + void inherit(const comparator &cmp) { + invariant_notnull(_fake_db); + invariant_notnull(cmp._cmp); + invariant_notnull(cmp._fake_db); + init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic); + } + + // like inherit, but doesn't require that the this comparator + // was already created + void create_from(const comparator &cmp) { + XCALLOC(_fake_db); + inherit(cmp); + } + + void destroy() { + toku_free(_fake_db); + } + + const DESCRIPTOR_S *get_descriptor() const { + return _fake_db->cmp_descriptor; + } + + ft_compare_func get_compare_func() const { + return _cmp; + } + + uint8_t get_memcmp_magic() const { + return _memcmp_magic; + } + + bool valid() const { + return _cmp != nullptr; + } + + inline bool dbt_has_memcmp_magic(const DBT *dbt) const { + return *reinterpret_cast(dbt->data) == _memcmp_magic; + } + + int operator()(const DBT *a, const DBT *b) const { + if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) { + return toku_dbt_infinite_compare(a, b); + } else if (_memcmp_magic != MEMCMP_MAGIC_NONE + // If `a' has the memcmp magic.. + && dbt_has_memcmp_magic(a) + // ..then we expect `b' to also have the memcmp magic + && __builtin_expect(dbt_has_memcmp_magic(b), 1)) { + return toku_builtin_compare_fun(nullptr, a, b); + } else { + // yikes, const sadness here + return _cmp(const_cast(_fake_db), a, b); + } } - } -private: - struct __toku_db m_fake_db; - ft_compare_func m_cmp; -}; + private: + DB *_fake_db; + ft_compare_func _cmp; + uint8_t _memcmp_magic; + }; } /* namespace toku */ diff --git a/storage/tokudb/ft-index/ft/cursor.cc b/storage/tokudb/ft-index/ft/cursor.cc new file mode 100644 index 0000000000000..9814a49416b1c --- /dev/null +++ b/storage/tokudb/ft-index/ft/cursor.cc @@ -0,0 +1,505 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "ft/ft-internal.h" + +#include "ft/cursor.h" +#include "ft/leafentry.h" +#include "ft/txn/txn.h" +#include "util/dbt.h" + +int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn, + bool is_snapshot_read, + bool disable_prefetching, + bool is_temporary) { + if (is_snapshot_read) { + invariant(ttxn != NULL); + int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn); + if (accepted != TOKUDB_ACCEPT) { + invariant(accepted == 0); + return TOKUDB_MVCC_DICTIONARY_TOO_NEW; + } + } + + memset(cursor, 0, sizeof(*cursor)); + cursor->ft_handle = ft_handle; + cursor->ttxn = ttxn; + cursor->is_snapshot_read = is_snapshot_read; + cursor->disable_prefetching = disable_prefetching; + cursor->is_temporary = is_temporary; + return 0; +} + +void toku_ft_cursor_destroy(FT_CURSOR cursor) { + toku_destroy_dbt(&cursor->key); + toku_destroy_dbt(&cursor->val); + toku_destroy_dbt(&cursor->range_lock_left_key); + toku_destroy_dbt(&cursor->range_lock_right_key); +} + +// deprecated, should only be used by tests +int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn, + bool is_snapshot_read, bool disable_prefetching) { + FT_CURSOR XCALLOC(cursor); + int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, is_snapshot_read, disable_prefetching, false); + if (r == 0) { + *cursorptr = cursor; + } else { + toku_free(cursor); + } + return r; +} + +// deprecated, should only be used by tests +void toku_ft_cursor_close(FT_CURSOR cursor) { + toku_ft_cursor_destroy(cursor); + toku_free(cursor); +} + +void toku_ft_cursor_remove_restriction(FT_CURSOR cursor) { + cursor->out_of_range_error = 0; + cursor->direction = 0; +} + +void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) { + cursor->interrupt_cb = cb; + cursor->interrupt_cb_extra = extra; +} + +void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) { + cursor->is_leaf_mode = true; +} + +int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor) { + return cursor->is_leaf_mode; +} + +// TODO: Rename / cleanup - this has nothing to do with locking +void toku_ft_cursor_set_range_lock(FT_CURSOR cursor, + const DBT *left, const DBT *right, + bool left_is_neg_infty, bool right_is_pos_infty, + int out_of_range_error) { + // Destroy any existing keys and then clone the given left, right keys + toku_destroy_dbt(&cursor->range_lock_left_key); + if (left_is_neg_infty) { + cursor->left_is_neg_infty = true; + } else { + toku_clone_dbt(&cursor->range_lock_left_key, *left); + } + + toku_destroy_dbt(&cursor->range_lock_right_key); + if (right_is_pos_infty) { + cursor->right_is_pos_infty = true; + } else { + toku_clone_dbt(&cursor->range_lock_right_key, *right); + } + + // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster) + cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error; + cursor->direction = 0; +} + +void toku_ft_cursor_set_prefetching(FT_CURSOR cursor) { + cursor->prefetching = true; +} + +bool toku_ft_cursor_prefetching(FT_CURSOR cursor) { + return cursor->prefetching; +} + +//Return true if cursor is uninitialized. false otherwise. +bool toku_ft_cursor_not_set(FT_CURSOR cursor) { + assert((cursor->key.data==NULL) == (cursor->val.data==NULL)); + return (bool)(cursor->key.data == NULL); +} + +struct ft_cursor_search_struct { + FT_GET_CALLBACK_FUNCTION getf; + void *getf_v; + FT_CURSOR cursor; + ft_search *search; +}; + +/* search for the first kv pair that matches the search object */ +static int ft_cursor_search(FT_CURSOR cursor, ft_search *search, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) { + int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch); + return r; +} + +static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) { + return ft_handle->ft->cmp(k, x); +} + +int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) { + return 1; +} + +static int ft_cursor_compare_set(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */ +} + +static int +ft_cursor_current_getf(uint32_t keylen, const void *key, + uint32_t vallen, const void *val, + void *v, bool lock_only) { + struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); + int r; + if (key==NULL) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); + } else { + FT_CURSOR cursor = bcss->cursor; + DBT newkey; + toku_fill_dbt(&newkey, key, keylen); + if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY + if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; + } + else + r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); + } + return r; +} + +static int ft_cursor_compare_next(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */ +} + +int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + if (toku_ft_cursor_not_set(cursor)) { + return EINVAL; + } + cursor->direction = 0; + if (op == DB_CURRENT) { + struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0}; + ft_search search; + ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); + int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false); + ft_search_finish(&search); + return r; + } + return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval); +} + +int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_check_restricted_range(FT_CURSOR c, const void *key, uint32_t keylen) { + if (c->out_of_range_error) { + FT ft = c->ft_handle->ft; + DBT found_key; + toku_fill_dbt(&found_key, key, keylen); + if ((!c->left_is_neg_infty && c->direction <= 0 && ft->cmp(&found_key, &c->range_lock_left_key) < 0) || + (!c->right_is_pos_infty && c->direction >= 0 && ft->cmp(&found_key, &c->range_lock_right_key) > 0)) { + invariant(c->out_of_range_error); + return c->out_of_range_error; + } + } + // Reset cursor direction to mitigate risk if some query type doesn't set the direction. + // It is always correct to check both bounds (which happens when direction==0) but it can be slower. + c->direction = 0; + return 0; +} + +int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, + uint32_t *keylen, void **key, uint32_t *vallen, void **val) { + int r = 0; + // if we are searching towards the end, limit is last element + // if we are searching towards the beginning, limit is the first element + uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0; + + //Starting with the prev, find the first real (non-provdel) leafentry. + while (index != limit) { + index += direction; + LEAFENTRY le; + void* foundkey = NULL; + uint32_t foundkeylen = 0; + + r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey); + invariant_zero(r); + + if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->is_snapshot_read, cursor->ttxn)) { + le_extract_val( + le, + toku_ft_cursor_is_leaf_mode(cursor), + cursor->is_snapshot_read, + cursor->ttxn, + vallen, + val + ); + *key = foundkey; + *keylen = foundkeylen; + + cursor->direction = direction; + r = toku_ft_cursor_check_restricted_range(cursor, *key, *keylen); + if (r!=0) { + paranoid_invariant(r == cursor->out_of_range_error); + // We already got at least one entry from the bulk fetch. + // Return 0 (instead of out of range error). + r = 0; + break; + } + r = getf(*keylen, *key, *vallen, *val, getf_v, false); + if (r == TOKUDB_CURSOR_CONTINUE) { + continue; + } + else { + break; + } + } + } + + return r; +} + +int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = +1; + ft_search search; + ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, true); + ft_search_finish(&search); + if (r == 0) { + toku_ft_cursor_set_prefetching(cursor); + } + return r; +} + +static int ft_cursor_search_eq_k_x_getf(uint32_t keylen, const void *key, + uint32_t vallen, const void *val, + void *v, bool lock_only) { + struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); + int r; + if (key==NULL) { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false); + } else { + FT_CURSOR cursor = bcss->cursor; + DBT newkey; + toku_fill_dbt(&newkey, key, keylen); + if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) { + r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); + } else { + r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); + if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; + } + } + return r; +} + +/* search for the kv pair that matches the search object and is equal to k */ +static int ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search}; + int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false); + return r; +} + +static int ft_cursor_compare_prev(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */ +} + +int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = -1; + ft_search search; + ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, true); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */ +} + +int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle); + int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v); + ft_search_finish(&search); + return r; +} + +int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +static int ft_cursor_compare_set_range_reverse(const ft_search &search, const DBT *x) { + FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context); + return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */ +} + +int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + cursor->direction = 0; + ft_search search; + ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle); + int r = ft_cursor_search(cursor, &search, getf, getf_v, false); + ft_search_finish(&search); + return r; +} + +//TODO: When tests have been rewritten, get rid of this function. +//Only used by tests. +int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) { + int op = get_flags & DB_OPFLAGS_MASK; + if (get_flags & ~DB_OPFLAGS_MASK) + return EINVAL; + + switch (op) { + case DB_CURRENT: + case DB_CURRENT_BINDING: + return toku_ft_cursor_current(cursor, op, getf, getf_v); + case DB_FIRST: + return toku_ft_cursor_first(cursor, getf, getf_v); + case DB_LAST: + return toku_ft_cursor_last(cursor, getf, getf_v); + case DB_NEXT: + if (toku_ft_cursor_not_set(cursor)) { + return toku_ft_cursor_first(cursor, getf, getf_v); + } else { + return toku_ft_cursor_next(cursor, getf, getf_v); + } + case DB_PREV: + if (toku_ft_cursor_not_set(cursor)) { + return toku_ft_cursor_last(cursor, getf, getf_v); + } else { + return toku_ft_cursor_prev(cursor, getf, getf_v); + } + case DB_SET: + return toku_ft_cursor_set(cursor, key, getf, getf_v); + case DB_SET_RANGE: + return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v); + default: ;// Fall through + } + return EINVAL; +} + +void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) { + *pkey = &cursor->key; + *pval = &cursor->val; +} + +bool toku_ft_cursor_uninitialized(FT_CURSOR c) { + return toku_ft_cursor_not_set(c); +} + +int toku_ft_lookup(FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) { + FT_CURSOR cursor; + int r = toku_ft_cursor(ft_handle, &cursor, NULL, false, false); + if (r != 0) { + return r; + } + + r = toku_ft_cursor_set(cursor, k, getf, getf_v); + + toku_ft_cursor_close(cursor); + return r; +} diff --git a/storage/tokudb/ft-index/ft/ft-search.h b/storage/tokudb/ft-index/ft/cursor.h similarity index 59% rename from storage/tokudb/ft-index/ft/ft-search.h rename to storage/tokudb/ft-index/ft/cursor.h index 9c26be456de62..da2b3d5c8a161 100644 --- a/storage/tokudb/ft-index/ft/ft-search.h +++ b/storage/tokudb/ft-index/ft/cursor.h @@ -1,6 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" + /* COPYING CONDITIONS NOTICE: @@ -29,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,30 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#ifndef FT_SEARCH_H -#define FT_SEARCH_H - +#pragma once + +#include + +#include "ft/ft-internal.h" + +/* an ft cursor is represented as a kv pair in a tree */ +struct ft_cursor { + FT_HANDLE ft_handle; + DBT key, val; // The key-value pair that the cursor currently points to + DBT range_lock_left_key, range_lock_right_key; + bool prefetching; + bool left_is_neg_infty, right_is_pos_infty; + bool is_snapshot_read; // true if query is read_committed, false otherwise + bool is_leaf_mode; + bool disable_prefetching; + bool is_temporary; + int out_of_range_error; + int direction; + TOKUTXN ttxn; + FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; + void *interrupt_cb_extra; +}; +typedef struct ft_cursor *FT_CURSOR; enum ft_search_direction_e { FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */ @@ -109,7 +127,7 @@ typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *); /* the search object contains the compare function, search direction, and the kv pair that is used in the compare function. the context is the user's private data */ -typedef struct ft_search { +struct ft_search { ft_search_compare_func_t compare; enum ft_search_direction_e direction; const DBT *k; @@ -137,22 +155,83 @@ typedef struct ft_search { // way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528. DBT pivot_bound; const DBT *k_bound; -} ft_search_t; +}; /* initialize the search compare object */ -static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_func_t compare, enum ft_search_direction_e direction, - const DBT *k, const DBT *k_bound, void *context) { - so->compare = compare; - so->direction = direction; - so->k = k; - so->context = context; - toku_init_dbt(&so->pivot_bound); - so->k_bound = k_bound; - return so; +static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare, + enum ft_search_direction_e direction, + const DBT *k, const DBT *k_bound, void *context) { + search->compare = compare; + search->direction = direction; + search->k = k; + search->context = context; + toku_init_dbt(&search->pivot_bound); + search->k_bound = k_bound; + return search; } -static inline void ft_search_finish(ft_search_t *so) { - toku_destroy_dbt(&so->pivot_bound); +static inline void ft_search_finish(ft_search *search) { + toku_destroy_dbt(&search->pivot_bound); } -#endif + +int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, + bool is_snapshot_read, + bool disable_prefetching, + bool is_temporary); + +void toku_ft_cursor_destroy(FT_CURSOR cursor); + +int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +void toku_ft_cursor_set_prefetching(FT_CURSOR cursor); + +bool toku_ft_cursor_prefetching(FT_CURSOR cursor); + +bool toku_ft_cursor_not_set(FT_CURSOR cursor); + +void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor); + +void toku_ft_cursor_remove_restriction(FT_CURSOR cursor); + +void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra); + +int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor); + +void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); + +int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); + +bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result)); + +void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); + +int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen); + +int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, + FT_GET_CALLBACK_FUNCTION getf, void *getf_v, + uint32_t *keylen, void **key, uint32_t *vallen, void **val); + +// used by get_key_after_bytes +int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x); +int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x); + +// deprecated, should only be used by tests, and eventually removed +int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result)); +void toku_ft_cursor_close(FT_CURSOR cursor); +int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags); +int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn); diff --git a/storage/tokudb/ft-index/ft/fifo.cc b/storage/tokudb/ft-index/ft/fifo.cc deleted file mode 100644 index 7f6e1778df65c..0000000000000 --- a/storage/tokudb/ft-index/ft/fifo.cc +++ /dev/null @@ -1,254 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fifo.h" -#include "xids.h" -#include "ybt.h" -#include -#include - -struct fifo { - int n_items_in_fifo; - char *memory; // An array of bytes into which fifo_entries are embedded. - int memory_size; // How big is fifo_memory - int memory_used; // How many bytes are in use? -}; - -const int fifo_initial_size = 4096; -static void fifo_init(struct fifo *fifo) { - fifo->n_items_in_fifo = 0; - fifo->memory = 0; - fifo->memory_size = 0; - fifo->memory_used = 0; -} - -__attribute__((const,nonnull)) -static int fifo_entry_size(struct fifo_entry *entry) { - return sizeof (struct fifo_entry) + entry->keylen + entry->vallen - + xids_get_size(&entry->xids_s) - - sizeof(XIDS_S); //Prevent double counting from fifo_entry+xids_get_size -} - -__attribute__((const,nonnull)) -size_t toku_ft_msg_memsize_in_fifo(FT_MSG cmd) { - // This must stay in sync with fifo_entry_size because that's what we - // really trust. But sometimes we only have an in-memory FT_MSG, not - // a serialized fifo_entry so we have to fake it. - return sizeof (struct fifo_entry) + cmd->u.id.key->size + cmd->u.id.val->size - + xids_get_size(cmd->xids) - - sizeof(XIDS_S); -} - -int toku_fifo_create(FIFO *ptr) { - struct fifo *XMALLOC(fifo); - if (fifo == 0) return ENOMEM; - fifo_init(fifo); - *ptr = fifo; - return 0; -} - -void toku_fifo_resize(FIFO fifo, size_t new_size) { - XREALLOC_N(new_size, fifo->memory); - fifo->memory_size = new_size; -} - -void toku_fifo_free(FIFO *ptr) { - FIFO fifo = *ptr; - if (fifo->memory) toku_free(fifo->memory); - fifo->memory=0; - toku_free(fifo); - *ptr = 0; -} - -int toku_fifo_n_entries(FIFO fifo) { - return fifo->n_items_in_fifo; -} - -static int next_power_of_two (int n) { - int r = 4096; - while (r < n) { - r*=2; - assert(r>0); - } - return r; -} - -int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest) { - int need_space_here = sizeof(struct fifo_entry) - + keylen + datalen - + xids_get_size(xids) - - sizeof(XIDS_S); //Prevent double counting - int need_space_total = fifo->memory_used+need_space_here; - if (fifo->memory == NULL || need_space_total > fifo->memory_size) { - // resize the fifo to the next power of 2 greater than the needed space - int next_2 = next_power_of_two(need_space_total); - toku_fifo_resize(fifo, next_2); - } - struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used); - fifo_entry_set_msg_type(entry, type); - entry->msn = msn; - xids_cpy(&entry->xids_s, xids); - entry->is_fresh = is_fresh; - entry->keylen = keylen; - unsigned char *e_key = xids_get_end_of_array(&entry->xids_s); - memcpy(e_key, key, keylen); - entry->vallen = datalen; - memcpy(e_key + keylen, data, datalen); - if (dest) { - *dest = fifo->memory_used; - } - fifo->n_items_in_fifo++; - fifo->memory_used += need_space_here; - return 0; -} - -int toku_fifo_iterate_internal_start(FIFO UU(fifo)) { return 0; } -int toku_fifo_iterate_internal_has_more(FIFO fifo, int off) { return off < fifo->memory_used; } -int toku_fifo_iterate_internal_next(FIFO fifo, int off) { - struct fifo_entry *e = (struct fifo_entry *)(fifo->memory + off); - return off + fifo_entry_size(e); -} -struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off) { - return (struct fifo_entry *)(fifo->memory + off); -} -size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) { - return fifo_entry_size(e); -} - -void toku_fifo_iterate (FIFO fifo, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void *arg) { - FIFO_ITERATE(fifo, - key, keylen, data, datalen, type, msn, xids, is_fresh, - f(key,keylen,data,datalen,type,msn,xids,is_fresh, arg)); -} - -unsigned int toku_fifo_buffer_size_in_use (FIFO fifo) { - return fifo->memory_used; -} - -unsigned long toku_fifo_memory_size_in_use(FIFO fifo) { - return sizeof(*fifo)+fifo->memory_used; -} - -unsigned long toku_fifo_memory_footprint(FIFO fifo) { - size_t size_used = toku_memory_footprint(fifo->memory, fifo->memory_used); - long rval = sizeof(*fifo) + size_used; - return rval; -} - -DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry) { - return toku_fill_dbt(dbt, xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen); -} - -struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off) { - return toku_fifo_iterate_internal_get_entry(fifo, off); -} - -void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo) { - struct fifo *XMALLOC(new_fifo); - assert(new_fifo); - new_fifo->n_items_in_fifo = orig_fifo->n_items_in_fifo; - new_fifo->memory_used = orig_fifo->memory_used; - new_fifo->memory_size = new_fifo->memory_used; - XMALLOC_N(new_fifo->memory_size, new_fifo->memory); - memcpy( - new_fifo->memory, - orig_fifo->memory, - new_fifo->memory_size - ); - *cloned_fifo = new_fifo; -} - -bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2) { - return ( - fifo1->memory_used == fifo2->memory_used && - memcmp(fifo1->memory, fifo2->memory, fifo1->memory_used) == 0 - ); -} diff --git a/storage/tokudb/ft-index/ft/fifo.h b/storage/tokudb/ft-index/ft/fifo.h deleted file mode 100644 index e178ec6de2469..0000000000000 --- a/storage/tokudb/ft-index/ft/fifo.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FIFO_H -#define FIFO_H -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "fttypes.h" -#include "xids-internal.h" -#include "xids.h" - - -// If the fifo_entry is unpacked, the compiler aligns the xids array and we waste a lot of space -#if TOKU_WINDOWS -#pragma pack(push, 1) -#endif - -struct __attribute__((__packed__)) fifo_entry { - unsigned int keylen; - unsigned int vallen; - unsigned char type; - bool is_fresh; - MSN msn; - XIDS_S xids_s; -}; - -// get and set the brt message type for a fifo entry. -// it is internally stored as a single unsigned char. -static inline enum ft_msg_type -fifo_entry_get_msg_type(const struct fifo_entry * entry) -{ - enum ft_msg_type msg_type; - msg_type = (enum ft_msg_type) entry->type; - return msg_type; -} - -static inline void -fifo_entry_set_msg_type(struct fifo_entry * entry, - enum ft_msg_type msg_type) -{ - unsigned char type = (unsigned char) msg_type; - entry->type = type; -} - -#if TOKU_WINDOWS -#pragma pack(pop) -#endif - -typedef struct fifo *FIFO; - -int toku_fifo_create(FIFO *); - -void toku_fifo_resize(FIFO fifo, size_t new_size); - -void toku_fifo_free(FIFO *); - -int toku_fifo_n_entries(FIFO); - -int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, int32_t *dest); - -unsigned int toku_fifo_buffer_size_in_use (FIFO fifo); -unsigned long toku_fifo_memory_size_in_use(FIFO fifo); // return how much memory in the fifo holds useful data - -unsigned long toku_fifo_memory_footprint(FIFO fifo); // return how much memory the fifo occupies - -//These two are problematic, since I don't want to malloc() the bytevecs, but dequeueing the fifo frees the memory. -//int toku_fifo_peek_deq (FIFO, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, uint32_t *type, TXNID *xid); -//int toku_fifo_peek_deq_cmdstruct (FIFO, FT_MSG, DBT*, DBT*); // fill in the FT_MSG, using the two DBTs for the DBT part. -void toku_fifo_iterate(FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, void*), void*); - -#define FIFO_ITERATE(fifo,keyvar,keylenvar,datavar,datalenvar,typevar,msnvar,xidsvar,is_freshvar,body) ({ \ - for (int fifo_iterate_off = toku_fifo_iterate_internal_start(fifo); \ - toku_fifo_iterate_internal_has_more(fifo, fifo_iterate_off); \ - fifo_iterate_off = toku_fifo_iterate_internal_next(fifo, fifo_iterate_off)) { \ - struct fifo_entry *e = toku_fifo_iterate_internal_get_entry(fifo, fifo_iterate_off); \ - ITEMLEN keylenvar = e->keylen; \ - ITEMLEN datalenvar = e->vallen; \ - enum ft_msg_type typevar = fifo_entry_get_msg_type(e); \ - MSN msnvar = e->msn; \ - XIDS xidsvar = &e->xids_s; \ - bytevec keyvar = xids_get_end_of_array(xidsvar); \ - bytevec datavar = (const uint8_t*)keyvar + e->keylen; \ - bool is_freshvar = e->is_fresh; \ - body; \ - } }) - -#define FIFO_CURRENT_ENTRY_MEMSIZE toku_fifo_internal_entry_memsize(e) - -// Internal functions for the iterator. -int toku_fifo_iterate_internal_start(FIFO fifo); -int toku_fifo_iterate_internal_has_more(FIFO fifo, int off); -int toku_fifo_iterate_internal_next(FIFO fifo, int off); -struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off); -size_t toku_fifo_internal_entry_memsize(struct fifo_entry *e) __attribute__((const,nonnull)); -size_t toku_ft_msg_memsize_in_fifo(FT_MSG cmd) __attribute__((const,nonnull)); - -DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry); -struct fifo_entry *toku_fifo_get_entry(FIFO fifo, int off); - -void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo); - -bool toku_are_fifos_same(FIFO fifo1, FIFO fifo2); - - - - -#endif diff --git a/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc b/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc index 16edf068d4dc6..b8bee800f3654 100644 --- a/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc +++ b/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,13 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include "ft/serialize/block_table.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-internal.h" +#include "ft/ft.h" +#include "ft/node.h" -#include -#include -#include -#include #include static void @@ -103,23 +104,23 @@ ftnode_get_key_and_fullhash( uint32_t* fullhash, void* extra) { - FT h = (FT) extra; - BLOCKNUM name; - toku_allocate_blocknum(h->blocktable, &name, h); - *cachekey = name; - *fullhash = toku_cachetable_hash(h->cf, name); + FT ft = (FT) extra; + BLOCKNUM blocknum; + ft->blocktable.allocate_blocknum(&blocknum, ft); + *cachekey = blocknum; + *fullhash = toku_cachetable_hash(ft->cf, blocknum); } void cachetable_put_empty_node_with_dep_nodes( - FT h, + FT ft, uint32_t num_dependent_nodes, FTNODE* dependent_nodes, - BLOCKNUM* name, //output + BLOCKNUM* blocknum, //output uint32_t* fullhash, //output FTNODE* result) { - FTNODE XMALLOC(new_node); + FTNODE XCALLOC(new_node); PAIR dependent_pairs[num_dependent_nodes]; enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes]; for (uint32_t i = 0; i < num_dependent_nodes; i++) { @@ -128,18 +129,18 @@ cachetable_put_empty_node_with_dep_nodes( } toku_cachetable_put_with_dep_pairs( - h->cf, + ft->cf, ftnode_get_key_and_fullhash, new_node, make_pair_attr(sizeof(FTNODE)), - get_write_callbacks_for_node(h), - h, + get_write_callbacks_for_node(ft), + ft, num_dependent_nodes, dependent_pairs, dependent_dirty_bits, - name, + blocknum, fullhash, - toku_node_save_ct_pair); + toku_ftnode_save_ct_pair); *result = new_node; } @@ -153,13 +154,13 @@ create_new_ftnode_with_dep_nodes( FTNODE* dependent_nodes) { uint32_t fullhash = 0; - BLOCKNUM name; + BLOCKNUM blocknum; cachetable_put_empty_node_with_dep_nodes( ft, num_dependent_nodes, dependent_nodes, - &name, + &blocknum, &fullhash, result); @@ -170,7 +171,7 @@ create_new_ftnode_with_dep_nodes( toku_initialize_empty_ftnode( *result, - name, + blocknum, height, n_children, ft->h->layout_version, @@ -201,14 +202,14 @@ toku_create_new_ftnode ( // then a PL_WRITE_CHEAP lock is grabbed // int -toku_pin_ftnode_batched( - FT_HANDLE brt, +toku_pin_ftnode_for_query( + FT_HANDLE ft_handle, BLOCKNUM blocknum, uint32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, - const PIVOT_BOUNDS bounds, - FTNODE_FETCH_EXTRA bfe, + const pivot_bounds &bounds, + ftnode_fetch_extra *bfe, bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this FTNODE *node_p, bool* msgs_applied) @@ -225,13 +226,13 @@ toku_pin_ftnode_batched( paranoid_invariant(bfe->type == ftnode_fetch_subset); } - int r = toku_cachetable_get_and_pin_nonblocking_batched( - brt->ft->cf, + int r = toku_cachetable_get_and_pin_nonblocking( + ft_handle->ft->cf, blocknum, fullhash, &node_v, NULL, - get_write_callbacks_for_node(brt->ft), + get_write_callbacks_for_node(ft_handle->ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -245,7 +246,7 @@ toku_pin_ftnode_batched( node = static_cast(node_v); if (apply_ancestor_messages && node->height == 0) { needs_ancestors_messages = toku_ft_leaf_needs_ancestors_messages( - brt->ft, + ft_handle->ft, node, ancestors, bounds, @@ -255,20 +256,20 @@ toku_pin_ftnode_batched( if (needs_ancestors_messages) { toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION); - toku_unpin_ftnode_read_only(brt->ft, node); - int rr = toku_cachetable_get_and_pin_nonblocking_batched( - brt->ft->cf, - blocknum, - fullhash, - &node_v, - NULL, - get_write_callbacks_for_node(brt->ft), - toku_ftnode_fetch_callback, - toku_ftnode_pf_req_callback, - toku_ftnode_pf_callback, - PL_WRITE_CHEAP, - bfe, //read_extraargs - unlockers); + toku_unpin_ftnode_read_only(ft_handle->ft, node); + int rr = toku_cachetable_get_and_pin_nonblocking( + ft_handle->ft->cf, + blocknum, + fullhash, + &node_v, + NULL, + get_write_callbacks_for_node(ft_handle->ft), + toku_ftnode_fetch_callback, + toku_ftnode_pf_req_callback, + toku_ftnode_pf_callback, + PL_WRITE_CHEAP, + bfe, //read_extraargs + unlockers); if (rr != 0) { assert(rr == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP. r = TOKUDB_TRY_AGAIN; @@ -276,7 +277,7 @@ toku_pin_ftnode_batched( } node = static_cast(node_v); toku_apply_ancestors_messages_to_node( - brt, + ft_handle, node, ancestors, bounds, @@ -317,54 +318,14 @@ toku_pin_ftnode_batched( } void -toku_pin_ftnode_off_client_thread_and_maybe_move_messages( - FT h, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, - pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p, - bool move_messages) -{ - toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( - h, - blocknum, - fullhash, - bfe, - lock_type, - num_dependent_nodes, - dependent_nodes, - node_p, - move_messages - ); -} - -void -toku_pin_ftnode_off_client_thread( - FT h, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, - pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p) -{ - toku_pin_ftnode_off_client_thread_and_maybe_move_messages( - h, blocknum, fullhash, bfe, lock_type, num_dependent_nodes, dependent_nodes, node_p, true); -} - -void -toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( - FT h, +toku_pin_ftnode_with_dep_nodes( + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, + FTNODE *dependent_nodes, FTNODE *node_p, bool move_messages) { @@ -376,13 +337,13 @@ toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty; } - int r = toku_cachetable_get_and_pin_with_dep_pairs_batched( - h->cf, + int r = toku_cachetable_get_and_pin_with_dep_pairs( + ft->cf, blocknum, fullhash, &node_v, NULL, - get_write_callbacks_for_node(h), + get_write_callbacks_for_node(ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -392,27 +353,22 @@ toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( dependent_pairs, dependent_dirty_bits ); - assert(r==0); + invariant_zero(r); FTNODE node = (FTNODE) node_v; - if ((lock_type != PL_READ) && node->height > 0 && move_messages) { - toku_move_ftnode_messages_to_stale(h, node); + if (lock_type != PL_READ && node->height > 0 && move_messages) { + toku_move_ftnode_messages_to_stale(ft, node); } *node_p = node; } -void -toku_pin_ftnode_off_client_thread_batched( - FT h, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, - pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p) -{ - toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( - h, blocknum, fullhash, bfe, lock_type, num_dependent_nodes, dependent_nodes, node_p, true); +void toku_pin_ftnode(FT ft, + BLOCKNUM blocknum, + uint32_t fullhash, + ftnode_fetch_extra *bfe, + pair_lock_type lock_type, + FTNODE *node_p, + bool move_messages) { + toku_pin_ftnode_with_dep_nodes(ft, blocknum, fullhash, bfe, lock_type, 0, nullptr, node_p, move_messages); } int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep) { @@ -429,24 +385,12 @@ int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pai return r; } -void -toku_unpin_ftnode_off_client_thread(FT ft, FTNODE node) -{ - int r = toku_cachetable_unpin( - ft->cf, - node->ct_pair, - (enum cachetable_dirty) node->dirty, - make_ftnode_pair_attr(node) - ); - assert(r==0); -} - -void -toku_unpin_ftnode(FT ft, FTNODE node) -{ - // printf("%*sUnpin %ld\n", 8-node->height, "", node->thisnodename.b); - //VERIFY_NODE(brt,node); - toku_unpin_ftnode_off_client_thread(ft, node); +void toku_unpin_ftnode(FT ft, FTNODE node) { + int r = toku_cachetable_unpin(ft->cf, + node->ct_pair, + static_cast(node->dirty), + make_ftnode_pair_attr(node)); + invariant_zero(r); } void @@ -460,3 +404,25 @@ toku_unpin_ftnode_read_only(FT ft, FTNODE node) ); assert(r==0); } + +void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b) +// Effect: Swap the blocknum, fullhash, and PAIR for for a and b +// Requires: Both nodes are pinned +{ + BLOCKNUM tmp_blocknum = a->blocknum; + uint32_t tmp_fullhash = a->fullhash; + PAIR tmp_pair = a->ct_pair; + + a->blocknum = b->blocknum; + a->fullhash = b->fullhash; + a->ct_pair = b->ct_pair; + + b->blocknum = tmp_blocknum; + b->fullhash = tmp_fullhash; + b->ct_pair = tmp_pair; + + // A and B swapped pair pointers, but we still have to swap + // the actual pair values (ie: the FTNODEs they represent) + // in the cachetable. + toku_cachetable_swap_pair_values(a->ct_pair, b->ct_pair); +} diff --git a/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h b/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h index dd43d0ec0332c..72056e4d58c3e 100644 --- a/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h +++ b/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_CACHETABLE_WRAPPERS_H -#define FT_CACHETABLE_WRAPPERS_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,14 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "cachetable.h" +#include "ft/cachetable/cachetable.h" +#include "ft/ft-internal.h" +#include "ft/node.h" /** * Put an empty node (that is, no fields filled) into the cachetable. @@ -102,7 +103,7 @@ PATENT RIGHTS GRANT: */ void cachetable_put_empty_node_with_dep_nodes( - FT h, + FT ft, uint32_t num_dependent_nodes, FTNODE* dependent_nodes, BLOCKNUM* name, //output @@ -117,7 +118,7 @@ cachetable_put_empty_node_with_dep_nodes( */ void create_new_ftnode_with_dep_nodes( - FT h, + FT ft, FTNODE *result, int height, int n_children, @@ -138,52 +139,42 @@ toku_create_new_ftnode ( int n_children ); -/** - * Batched version of toku_pin_ftnode, see cachetable batched API for more - * details. - */ +// This function returns a pinned ftnode to the caller. int -toku_pin_ftnode_batched( - FT_HANDLE brt, +toku_pin_ftnode_for_query( + FT_HANDLE ft_h, BLOCKNUM blocknum, uint32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, - const PIVOT_BOUNDS pbounds, - FTNODE_FETCH_EXTRA bfe, + const pivot_bounds &bounds, + ftnode_fetch_extra *bfe, bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this FTNODE *node_p, bool* msgs_applied ); -/** - * Unfortunately, this function is poorly named - * as over time, client threads have also started - * calling this function. - * This function returns a pinned ftnode to the caller. - * Unlike toku_pin_ftnode, this function blocks until the node is pinned. - */ -void -toku_pin_ftnode_off_client_thread( - FT h, +// Pins an ftnode without dependent pairs +void toku_pin_ftnode( + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p + FTNODE *node_p, + bool move_messages ); -void -toku_pin_ftnode_off_client_thread_and_maybe_move_messages( - FT h, +// Pins an ftnode with dependent pairs +// Unlike toku_pin_ftnode_for_query, this function blocks until the node is pinned. +void toku_pin_ftnode_with_dep_nodes( + FT ft, BLOCKNUM blocknum, uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, + ftnode_fetch_extra *bfe, pair_lock_type lock_type, uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, + FTNODE *dependent_nodes, FTNODE *node_p, bool move_messages ); @@ -195,53 +186,10 @@ toku_pin_ftnode_off_client_thread_and_maybe_move_messages( int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep); /** - * Batched version of toku_pin_ftnode_off_client_thread, see cachetable - * batched API for more details. + * Effect: Unpin an ftnode. */ -void -toku_pin_ftnode_off_client_thread_batched_and_maybe_move_messages( - FT h, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, - pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p, - bool move_messages - ); - -/** - * Batched version of toku_pin_ftnode_off_client_thread, see cachetable - * batched API for more details. - */ -void -toku_pin_ftnode_off_client_thread_batched( - FT h, - BLOCKNUM blocknum, - uint32_t fullhash, - FTNODE_FETCH_EXTRA bfe, - pair_lock_type lock_type, - uint32_t num_dependent_nodes, - FTNODE* dependent_nodes, - FTNODE *node_p - ); - -/** - * Effect: Unpin a brt node. Used for - * nodes that were pinned off client thread. - */ -void -toku_unpin_ftnode_off_client_thread(FT h, FTNODE node); - -/** - * Effect: Unpin a brt node. - * Used for nodes pinned on a client thread - */ -void -toku_unpin_ftnode(FT h, FTNODE node); - -void -toku_unpin_ftnode_read_only(FT ft, FTNODE node); +void toku_unpin_ftnode(FT ft, FTNODE node); +void toku_unpin_ftnode_read_only(FT ft, FTNODE node); -#endif +// Effect: Swaps pair values of two pinned nodes +void toku_ftnode_swap_pair_values(FTNODE nodea, FTNODE nodeb); diff --git a/storage/tokudb/ft-index/ft/ft-flusher-internal.h b/storage/tokudb/ft-index/ft/ft-flusher-internal.h index 512f5ffd27d7e..f26b2d56ef531 100644 --- a/storage/tokudb/ft-index/ft/ft-flusher-internal.h +++ b/storage/tokudb/ft-index/ft/ft-flusher-internal.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_FLUSHER_INTERNAL_H -#define FT_FLUSHER_INTERNAL_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include - #define flt_flush_before_applying_inbox 1 #define flt_flush_before_child_pin 2 #define ft_flush_aflter_child_pin 3 @@ -115,7 +113,7 @@ typedef struct flusher_advice FLUSHER_ADVICE; * Cleaner thread merging leaf nodes: follow down to a key * Hot optimize table: follow down to the right of a key */ -typedef int (*FA_PICK_CHILD)(FT h, FTNODE parent, void* extra); +typedef int (*FA_PICK_CHILD)(FT ft, FTNODE parent, void* extra); /** * Decide whether to call `toku_ft_flush_some_child` on the child if it is @@ -139,7 +137,7 @@ typedef bool (*FA_SHOULD_RECURSIVELY_FLUSH)(FTNODE child, void* extra); * Hot optimize table: just do the merge */ typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -172,7 +170,7 @@ typedef void (*FA_UPDATE_STATUS)(FTNODE child, int dirtied, void* extra); * by `ft_split_child`. If -1 is returned, `ft_split_child` defaults to * the old behavior. */ -typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT h, +typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT ft, FTNODE node, int childnuma, int childnumb, @@ -223,18 +221,16 @@ dont_destroy_basement_nodes(void* extra); void default_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, void* extra); int -default_pick_child_after_split(FT h, +default_pick_child_after_split(FT ft, FTNODE parent, int childnuma, int childnumb, void *extra); - -#endif // End of header guardian. diff --git a/storage/tokudb/ft-index/ft/ft-flusher.cc b/storage/tokudb/ft-index/ft/ft-flusher.cc index 20975c7aa0117..4db92fa9d2b9c 100644 --- a/storage/tokudb/ft-index/ft/ft-flusher.cc +++ b/storage/tokudb/ft-index/ft/ft-flusher.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,22 +89,25 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/ft-flusher.h" +#include "ft/ft-flusher-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft_node-serialize.h" +#include "portability/toku_assert.h" +#include "portability/toku_atomic.h" +#include "util/status.h" +#include "util/context.h" /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ static FT_FLUSHER_STATUS_S ft_flusher_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_flusher_status, k, c, t, "brt flusher: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_flusher_status, k, c, t, "ft flusher: " l, inc) #define STATUS_VALUE(x) ft_flusher_status.status[x].value.num void toku_ft_flusher_status_init(void) { @@ -179,25 +182,21 @@ static int find_heaviest_child(FTNODE node) { int max_child = 0; - int max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0); - int i; - - if (0) printf("%s:%d weights: %d", __FILE__, __LINE__, max_weight); - paranoid_invariant(node->n_children>0); - for (i=1; in_children; i++) { -#ifdef TOKU_DEBUG_PARANOID - if (BP_WORKDONE(node,i)) { - assert(toku_bnc_nbytesinbuf(BNC(node,i)) > 0); + uint64_t max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0); + + invariant(node->n_children > 0); + for (int i = 1; i < node->n_children; i++) { + uint64_t bytes_in_buf = toku_bnc_nbytesinbuf(BNC(node, i)); + uint64_t workdone = BP_WORKDONE(node, i); + if (workdone > 0) { + invariant(bytes_in_buf > 0); } -#endif - int this_weight = toku_bnc_nbytesinbuf(BNC(node,i)) + BP_WORKDONE(node,i);; - if (0) printf(" %d", this_weight); + uint64_t this_weight = bytes_in_buf + workdone; if (max_weight < this_weight) { max_child = i; max_weight = this_weight; } } - if (0) printf("\n"); return max_child; } @@ -235,7 +234,7 @@ update_flush_status(FTNODE child, int cascades) { } static void -maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h) +maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT ft) { // If the node is already fully in memory, as in upgrade, we don't // need to destroy the basement nodes because they are all equally @@ -247,7 +246,7 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h) if (BP_STATE(child, i) == PT_AVAIL && node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) { - toku_evict_bn_from_memory(child, i, h); + toku_evict_bn_from_memory(child, i, ft); } } } @@ -255,14 +254,14 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT h) static void ft_merge_child( - FT h, + FT ft, FTNODE node, int childnum_to_merge, bool *did_react, struct flusher_advice *fa); static int -pick_heaviest_child(FT UU(h), +pick_heaviest_child(FT UU(ft), FTNODE parent, void* UU(extra)) { @@ -307,11 +306,11 @@ static bool recurse_if_child_is_gorged(FTNODE child, void* extra) { struct flush_status_update_extra *fste = (flush_status_update_extra *)extra; - return toku_ft_nonleaf_is_gorged(child, fste->nodesize); + return toku_ftnode_nonleaf_is_gorged(child, fste->nodesize); } int -default_pick_child_after_split(FT UU(h), +default_pick_child_after_split(FT UU(ft), FTNODE UU(parent), int UU(childnuma), int UU(childnumb), @@ -322,7 +321,7 @@ default_pick_child_after_split(FT UU(h), void default_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -334,13 +333,13 @@ default_merge_child(struct flusher_advice *fa, // we are just going to unpin child and // let ft_merge_child pin it again // - toku_unpin_ftnode_off_client_thread(h, child); + toku_unpin_ftnode(ft, child); // // // it is responsibility of ft_merge_child to unlock parent // bool did_react; - ft_merge_child(h, parent, childnum, &did_react, fa); + ft_merge_child(ft, parent, childnum, &did_react, fa); } void @@ -397,7 +396,7 @@ struct ctm_extra { }; static int -ctm_pick_child(FT h, +ctm_pick_child(FT ft, FTNODE parent, void* extra) { @@ -405,13 +404,8 @@ ctm_pick_child(FT h, int childnum; if (parent->height == 1 && ctme->is_last_child) { childnum = parent->n_children - 1; - } - else { - childnum = toku_ftnode_which_child( - parent, - &ctme->target_key, - &h->cmp_descriptor, - h->compare_fun); + } else { + childnum = toku_ftnode_which_child(parent, &ctme->target_key, ft->cmp); } return childnum; } @@ -428,7 +422,7 @@ ctm_update_status( static void ctm_maybe_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, @@ -437,19 +431,19 @@ ctm_maybe_merge_child(struct flusher_advice *fa, if (child->height == 0) { (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1); } - default_merge_child(fa, h, parent, childnum, child, extra); + default_merge_child(fa, ft, parent, childnum, child, extra); } static void ct_maybe_merge_child(struct flusher_advice *fa, - FT h, + FT ft, FTNODE parent, int childnum, FTNODE child, void* extra) { if (child->height > 0) { - default_merge_child(fa, h, parent, childnum, child, extra); + default_merge_child(fa, ft, parent, childnum, child, extra); } else { struct ctm_extra ctme; @@ -471,8 +465,7 @@ ct_maybe_merge_child(struct flusher_advice *fa, ctme.is_last_child = false; pivot_to_save = childnum; } - const DBT *pivot = &parent->childkeys[pivot_to_save]; - toku_clone_dbt(&ctme.target_key, *pivot); + toku_clone_dbt(&ctme.target_key, parent->pivotkeys.get_pivot(pivot_to_save)); // at this point, ctme is properly setup, now we can do the merge struct flusher_advice new_fa; @@ -486,24 +479,24 @@ ct_maybe_merge_child(struct flusher_advice *fa, default_pick_child_after_split, &ctme); - toku_unpin_ftnode_off_client_thread(h, parent); - toku_unpin_ftnode_off_client_thread(h, child); + toku_unpin_ftnode(ft, parent); + toku_unpin_ftnode(ft, child); FTNODE root_node = NULL; { uint32_t fullhash; CACHEKEY root; - toku_calculate_root_offset_pointer(h, &root, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode_off_client_thread(h, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, 0, NULL, &root_node); - toku_assert_entire_node_in_memory(root_node); + toku_calculate_root_offset_pointer(ft, &root, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true); + toku_ftnode_assert_fully_in_memory(root_node); } (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1); (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1); - toku_ft_flush_some_child(h, root_node, &new_fa); + toku_ft_flush_some_child(ft, root_node, &new_fa); (void) toku_sync_fetch_and_sub(&STATUS_VALUE(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1); @@ -545,13 +538,12 @@ ct_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_ext // a leaf node that is not entirely in memory. If so, then // we cannot be sure if the node is reactive. // -static bool may_node_be_reactive(FT ft, FTNODE node) +static bool ft_ftnode_may_be_reactive(FT ft, FTNODE node) { if (node->height == 0) { return true; - } - else { - return (get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE); + } else { + return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE; } } @@ -565,6 +557,7 @@ static bool may_node_be_reactive(FT ft, FTNODE node) */ static void handle_split_of_child( + FT ft, FTNODE node, int childnum, FTNODE childa, @@ -575,40 +568,49 @@ handle_split_of_child( paranoid_invariant(node->height>0); paranoid_invariant(0 <= childnum); paranoid_invariant(childnum < node->n_children); - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(childa); - toku_assert_entire_node_in_memory(childb); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(childa); + toku_ftnode_assert_fully_in_memory(childb); NONLEAF_CHILDINFO old_bnc = BNC(node, childnum); paranoid_invariant(toku_bnc_nbytesinbuf(old_bnc)==0); - int cnum; WHEN_NOT_GCOV( - if (toku_ft_debug_mode) { - int i; - printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data); - printf("%s:%d oldsplitkeys:", __FILE__, __LINE__); - for(i=0; in_children-1; i++) printf(" %s", (char *) node->childkeys[i].data); - printf("\n"); - } - ) + if (toku_ft_debug_mode) { + printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data); + printf("%s:%d oldsplitkeys:", __FILE__, __LINE__); + for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data); + printf("\n"); + } + ) node->dirty = 1; XREALLOC_N(node->n_children+1, node->bp); - XREALLOC_N(node->n_children, node->childkeys); // Slide the children over. // suppose n_children is 10 and childnum is 5, meaning node->childnum[5] just got split // this moves node->bp[6] through node->bp[9] over to // node->bp[7] through node->bp[10] - for (cnum=node->n_children; cnum>childnum+1; cnum--) { + for (int cnum=node->n_children; cnum>childnum+1; cnum--) { node->bp[cnum] = node->bp[cnum-1]; } memset(&node->bp[childnum+1],0,sizeof(node->bp[0])); node->n_children++; - paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->thisnodename.b); // use the same child + paranoid_invariant(BP_BLOCKNUM(node, childnum).b==childa->blocknum.b); // use the same child + + // We never set the rightmost blocknum to be the root. + // Instead, we wait for the root to split and let promotion initialize the rightmost + // blocknum to be the first non-root leaf node on the right extreme to recieve an insert. + invariant(ft->h->root_blocknum.b != ft->rightmost_blocknum.b); + if (childa->blocknum.b == ft->rightmost_blocknum.b) { + // The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values + // with (a), now that it is the new rightmost leaf. This keeps the rightmost blocknum + // constant, the same the way we keep the root blocknum constant. + toku_ftnode_swap_pair_values(childa, childb); + BP_BLOCKNUM(node, childnum) = childa->blocknum; + } - BP_BLOCKNUM(node, childnum+1) = childb->thisnodename; - BP_WORKDONE(node, childnum+1) = 0; + BP_BLOCKNUM(node, childnum+1) = childb->blocknum; + BP_WORKDONE(node, childnum+1) = 0; BP_STATE(node,childnum+1) = PT_AVAIL; NONLEAF_CHILDINFO new_bnc = toku_create_empty_nl(); @@ -620,29 +622,21 @@ handle_split_of_child( } set_BNC(node, childnum+1, new_bnc); - // Slide the keys over - { - for (cnum=node->n_children-2; cnum>childnum; cnum--) { - toku_copy_dbt(&node->childkeys[cnum], node->childkeys[cnum-1]); - } - //if (logger) assert((t->flags&TOKU_DB_DUPSORT)==0); // the setpivot is wrong for TOKU_DB_DUPSORT, so recovery will be broken. - toku_copy_dbt(&node->childkeys[childnum], *splitk); - node->totalchildkeylens += splitk->size; - } + // Insert the new split key , sliding the other keys over + node->pivotkeys.insert_at(splitk, childnum); WHEN_NOT_GCOV( - if (toku_ft_debug_mode) { - int i; - printf("%s:%d splitkeys:", __FILE__, __LINE__); - for(i=0; in_children-2; i++) printf(" %s", (char*)node->childkeys[i].data); - printf("\n"); - } - ) + if (toku_ft_debug_mode) { + printf("%s:%d splitkeys:", __FILE__, __LINE__); + for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data); + printf("\n"); + } + ) /* Keep pushing to the children, but not if the children would require a pushdown */ - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(childa); - toku_assert_entire_node_in_memory(childb); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(childa); + toku_ftnode_assert_fully_in_memory(childb); VERIFY_NODE(t, node); VERIFY_NODE(t, childa); @@ -667,7 +661,7 @@ ftleaf_disk_size(FTNODE node) // Effect: get the disk size of a leafentry { paranoid_invariant(node->height == 0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); uint64_t retval = 0; for (int i = 0; i < node->n_children; i++) { retval += BLB_DATA(node, i)->get_disk_size(); @@ -689,16 +683,16 @@ ftleaf_get_split_loc( switch (split_mode) { case SPLIT_LEFT_HEAVY: { *num_left_bns = node->n_children; - *num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size(); + *num_left_les = BLB_DATA(node, *num_left_bns - 1)->num_klpairs(); if (*num_left_les == 0) { *num_left_bns = node->n_children - 1; - *num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size(); + *num_left_les = BLB_DATA(node, *num_left_bns - 1)->num_klpairs(); } goto exit; } case SPLIT_RIGHT_HEAVY: { *num_left_bns = 1; - *num_left_les = BLB_DATA(node, 0)->omt_size() ? 1 : 0; + *num_left_les = BLB_DATA(node, 0)->num_klpairs() ? 1 : 0; goto exit; } case SPLIT_EVENLY: { @@ -707,8 +701,8 @@ ftleaf_get_split_loc( uint64_t sumlesizes = ftleaf_disk_size(node); uint32_t size_so_far = 0; for (int i = 0; i < node->n_children; i++) { - BN_DATA bd = BLB_DATA(node, i); - uint32_t n_leafentries = bd->omt_size(); + bn_data* bd = BLB_DATA(node, i); + uint32_t n_leafentries = bd->num_klpairs(); for (uint32_t j=0; j < n_leafentries; j++) { size_t size_this_le; int rr = bd->fetch_klpair_disksize(j, &size_this_le); @@ -725,7 +719,7 @@ ftleaf_get_split_loc( (*num_left_les)--; } else if (*num_left_bns > 1) { (*num_left_bns)--; - *num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size(); + *num_left_les = BLB_DATA(node, *num_left_bns - 1)->num_klpairs(); } else { // we are trying to split a leaf with only one // leafentry in it @@ -743,8 +737,6 @@ ftleaf_get_split_loc( return; } -// TODO: (Zardosht) possibly get rid of this function and use toku_omt_split_at in -// ftleaf_split static void move_leafentries( BASEMENTNODE dest_bn, @@ -754,13 +746,14 @@ move_leafentries( ) //Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt { - src_bn->data_buffer.move_leafentries_to(&dest_bn->data_buffer, lbi, ube); + invariant(ube == src_bn->data_buffer.num_klpairs()); + src_bn->data_buffer.split_klpairs(&dest_bn->data_buffer, lbi); } static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_node) { // Effect: Finalizes a split by updating some bits and dirtying both nodes - toku_assert_entire_node_in_memory(node); - toku_assert_entire_node_in_memory(B); + toku_ftnode_assert_fully_in_memory(node); + toku_ftnode_assert_fully_in_memory(B); verify_all_in_mempool(node); verify_all_in_mempool(B); @@ -776,7 +769,7 @@ static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_ void ftleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -825,7 +818,7 @@ ftleaf_split( // So, we must call this before evaluating // those two values cachetable_put_empty_node_with_dep_nodes( - h, + ft, num_dependent_nodes, dependent_nodes, &name, @@ -839,7 +832,7 @@ ftleaf_split( paranoid_invariant(node->height==0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); verify_all_in_mempool(node); MSN max_msn_applied_to_node = node->max_msn_applied_to_node_on_disk; @@ -851,7 +844,7 @@ ftleaf_split( ftleaf_get_split_loc(node, split_mode, &num_left_bns, &num_left_les); { // did we split right on the boundary between basement nodes? - const bool split_on_boundary = (num_left_les == 0) || (num_left_les == (int) BLB_DATA(node, num_left_bns - 1)->omt_size()); + const bool split_on_boundary = (num_left_les == 0) || (num_left_les == (int) BLB_DATA(node, num_left_bns - 1)->num_klpairs()); // Now we know where we are going to break it // the two nodes will have a total of n_children+1 basement nodes // and n_children-1 pivots @@ -881,13 +874,12 @@ ftleaf_split( name, 0, num_children_in_b, - h->h->layout_version, - h->h->flags); + ft->h->layout_version, + ft->h->flags); B->fullhash = fullhash; } else { B = *nodeb; - REALLOC_N(num_children_in_b-1, B->childkeys); REALLOC_N(num_children_in_b, B->bp); B->n_children = num_children_in_b; for (int i = 0; i < num_children_in_b; i++) { @@ -912,7 +904,7 @@ ftleaf_split( move_leafentries(BLB(B, curr_dest_bn_index), BLB(node, curr_src_bn_index), num_left_les, // first row to be moved to B - BLB_DATA(node, curr_src_bn_index)->omt_size() // number of rows in basement to be split + BLB_DATA(node, curr_src_bn_index)->num_klpairs() // number of rows in basement to be split ); BLB_MAX_MSN_APPLIED(B, curr_dest_bn_index) = BLB_MAX_MSN_APPLIED(node, curr_src_bn_index); curr_dest_bn_index++; @@ -939,32 +931,21 @@ ftleaf_split( // the child index in the original node that corresponds to the // first node in the right node of the split - int base_index = num_left_bns - (split_on_boundary ? 0 : 1); - // make pivots in B - for (int i=0; i < num_children_in_b-1; i++) { - toku_copy_dbt(&B->childkeys[i], node->childkeys[i+base_index]); - B->totalchildkeylens += node->childkeys[i+base_index].size; - node->totalchildkeylens -= node->childkeys[i+base_index].size; - toku_init_dbt(&node->childkeys[i+base_index]); - } - if (split_on_boundary && num_left_bns < node->n_children) { - if (splitk) { - toku_copy_dbt(splitk, node->childkeys[num_left_bns - 1]); - } else { - toku_destroy_dbt(&node->childkeys[num_left_bns - 1]); - } + int split_idx = num_left_bns - (split_on_boundary ? 0 : 1); + node->pivotkeys.split_at(split_idx, &B->pivotkeys); + if (split_on_boundary && num_left_bns < node->n_children && splitk) { + toku_copyref_dbt(splitk, node->pivotkeys.get_pivot(num_left_bns - 1)); } else if (splitk) { - BN_DATA bd = BLB_DATA(node, num_left_bns - 1); + bn_data* bd = BLB_DATA(node, num_left_bns - 1); uint32_t keylen; void *key; - int rr = bd->fetch_le_key_and_len(bd->omt_size() - 1, &keylen, &key); + int rr = bd->fetch_key_and_len(bd->num_klpairs() - 1, &keylen, &key); invariant_zero(rr); toku_memdup_dbt(splitk, key, keylen); } node->n_children = num_children_in_node; REALLOC_N(num_children_in_node, node->bp); - REALLOC_N(num_children_in_node-1, node->childkeys); } ftnode_finalize_split(node, B, max_msn_applied_to_node); @@ -974,7 +955,7 @@ ftleaf_split( void ft_nonleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -984,7 +965,7 @@ ft_nonleaf_split( { //VERIFY_NODE(t,node); STATUS_VALUE(FT_FLUSHER_SPLIT_NONLEAF)++; - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); int old_n_children = node->n_children; int n_children_in_a = old_n_children/2; int n_children_in_b = old_n_children-n_children_in_a; @@ -992,14 +973,12 @@ ft_nonleaf_split( FTNODE B; paranoid_invariant(node->height>0); paranoid_invariant(node->n_children>=2); // Otherwise, how do we split? We need at least two children to split. */ - create_new_ftnode_with_dep_nodes(h, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes); + create_new_ftnode_with_dep_nodes(ft, &B, node->height, n_children_in_b, num_dependent_nodes, dependent_nodes); { /* The first n_children_in_a go into node a. * That means that the first n_children_in_a-1 keys go into node a. * The splitter key is key number n_children_in_a */ - int i; - - for (i=n_children_in_a; ibp[targchild] = node->bp[i]; memset(&node->bp[i], 0, sizeof(node->bp[0])); - - // Delete a child, removing the preceeding pivot key. The child number must be > 0 - { - paranoid_invariant(i>0); - if (i>n_children_in_a) { - toku_copy_dbt(&B->childkeys[targchild-1], node->childkeys[i-1]); - B->totalchildkeylens += node->childkeys[i-1].size; - node->totalchildkeylens -= node->childkeys[i-1].size; - toku_init_dbt(&node->childkeys[i-1]); - } - } } - node->n_children=n_children_in_a; + // the split key for our parent is the rightmost pivot key in node + node->pivotkeys.split_at(n_children_in_a, &B->pivotkeys); + toku_clone_dbt(splitk, node->pivotkeys.get_pivot(n_children_in_a - 1)); + node->pivotkeys.delete_at(n_children_in_a - 1); - toku_copy_dbt(splitk, node->childkeys[n_children_in_a-1]); - node->totalchildkeylens -= node->childkeys[n_children_in_a-1].size; - - REALLOC_N(n_children_in_a, node->bp); - REALLOC_N(n_children_in_a-1, node->childkeys); + node->n_children = n_children_in_a; + REALLOC_N(node->n_children, node->bp); } ftnode_finalize_split(node, B, max_msn_applied_to_node); @@ -1048,7 +1016,7 @@ ft_nonleaf_split( // static void ft_split_child( - FT h, + FT ft, FTNODE node, int childnum, FTNODE child, @@ -1067,12 +1035,12 @@ ft_split_child( dep_nodes[0] = node; dep_nodes[1] = child; if (child->height==0) { - ftleaf_split(h, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes); + ftleaf_split(ft, child, &nodea, &nodeb, &splitk, true, split_mode, 2, dep_nodes); } else { - ft_nonleaf_split(h, child, &nodea, &nodeb, &splitk, 2, dep_nodes); + ft_nonleaf_split(ft, child, &nodea, &nodeb, &splitk, 2, dep_nodes); } // printf("%s:%d child did split\n", __FILE__, __LINE__); - handle_split_of_child (node, childnum, nodea, nodeb, &splitk); + handle_split_of_child (ft, node, childnum, nodea, nodeb, &splitk); // for test call_flusher_thread_callback(flt_flush_during_split); @@ -1081,42 +1049,44 @@ ft_split_child( // now we need to unlock node, // and possibly continue // flushing one of the children - int picked_child = fa->pick_child_after_split(h, node, childnum, childnum + 1, fa->extra); - toku_unpin_ftnode_off_client_thread(h, node); + int picked_child = fa->pick_child_after_split(ft, node, childnum, childnum + 1, fa->extra); + toku_unpin_ftnode(ft, node); if (picked_child == childnum || (picked_child < 0 && nodea->height > 0 && fa->should_recursively_flush(nodea, fa->extra))) { - toku_unpin_ftnode_off_client_thread(h, nodeb); - toku_ft_flush_some_child(h, nodea, fa); + toku_unpin_ftnode(ft, nodeb); + toku_ft_flush_some_child(ft, nodea, fa); } else if (picked_child == childnum + 1 || (picked_child < 0 && nodeb->height > 0 && fa->should_recursively_flush(nodeb, fa->extra))) { - toku_unpin_ftnode_off_client_thread(h, nodea); - toku_ft_flush_some_child(h, nodeb, fa); + toku_unpin_ftnode(ft, nodea); + toku_ft_flush_some_child(ft, nodeb, fa); } else { - toku_unpin_ftnode_off_client_thread(h, nodea); - toku_unpin_ftnode_off_client_thread(h, nodeb); + toku_unpin_ftnode(ft, nodea); + toku_unpin_ftnode(ft, nodeb); } + + toku_destroy_dbt(&splitk); } static void bring_node_fully_into_memory(FTNODE node, FT ft) { - if (!is_entire_node_in_memory(node)) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); + if (!toku_ftnode_fully_in_memory(node)) { + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); toku_cachetable_pf_pinned_pair( node, toku_ftnode_pf_callback, &bfe, ft->cf, - node->thisnodename, - toku_cachetable_hash(ft->cf, node->thisnodename) + node->blocknum, + toku_cachetable_hash(ft->cf, node->blocknum) ); } } static void flush_this_child( - FT h, + FT ft, FTNODE node, FTNODE child, int childnum, @@ -1124,14 +1094,14 @@ flush_this_child( // Effect: Push everything in the CHILDNUMth buffer of node down into the child. { update_flush_status(child, 0); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); if (fa->should_destroy_basement_nodes(fa)) { - maybe_destroy_child_blbs(node, child, h); + maybe_destroy_child_blbs(node, child, ft); } - bring_node_fully_into_memory(child, h); - toku_assert_entire_node_in_memory(child); + bring_node_fully_into_memory(child, ft); + toku_ftnode_assert_fully_in_memory(child); paranoid_invariant(node->height>0); - paranoid_invariant(child->thisnodename.b!=0); + paranoid_invariant(child->blocknum.b!=0); // VERIFY_NODE does not work off client thread as of now //VERIFY_NODE(t, child); node->dirty = 1; @@ -1143,7 +1113,7 @@ flush_this_child( // now we have a bnc to flush to the child. pass down the parent's // oldest known referenced xid as we flush down to the child. - toku_bnc_flush_to_child(h, bnc, child, node->oldest_referenced_xid_known); + toku_bnc_flush_to_child(ft, bnc, child, node->oldest_referenced_xid_known); destroy_nonleaf_childinfo(bnc); } @@ -1151,8 +1121,8 @@ static void merge_leaf_nodes(FTNODE a, FTNODE b) { STATUS_VALUE(FT_FLUSHER_MERGE_LEAF)++; - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); paranoid_invariant(a->height == 0); paranoid_invariant(b->height == 0); paranoid_invariant(a->n_children > 0); @@ -1168,58 +1138,53 @@ merge_leaf_nodes(FTNODE a, FTNODE b) a->dirty = 1; b->dirty = 1; - BN_DATA a_last_bd = BLB_DATA(a, a->n_children-1); + bn_data* a_last_bd = BLB_DATA(a, a->n_children-1); // this bool states if the last basement node in a has any items or not // If it does, then it stays in the merge. If it does not, the last basement node // of a gets eliminated because we do not have a pivot to store for it (because it has no elements) - const bool a_has_tail = a_last_bd->omt_size() > 0; + const bool a_has_tail = a_last_bd->num_klpairs() > 0; - // move each basement node from b to a - // move the pivots, adding one of what used to be max(a) - // move the estimates int num_children = a->n_children + b->n_children; if (!a_has_tail) { - uint lastchild = a->n_children-1; + int lastchild = a->n_children - 1; BASEMENTNODE bn = BLB(a, lastchild); - { - // verify that last basement in a is empty, then destroy mempool - size_t used_space = a_last_bd->get_disk_size(); - invariant_zero(used_space); - } + + // verify that last basement in a is empty, then destroy mempool + size_t used_space = a_last_bd->get_disk_size(); + invariant_zero(used_space); destroy_basement_node(bn); - set_BNULL(a, a->n_children-1); + set_BNULL(a, lastchild); num_children--; - } - - //realloc pivots and basement nodes in a - REALLOC_N(num_children, a->bp); - REALLOC_N(num_children-1, a->childkeys); - - // fill in pivot for what used to be max of node 'a', if it is needed - if (a_has_tail) { + if (lastchild < a->pivotkeys.num_pivots()) { + a->pivotkeys.delete_at(lastchild); + } + } else { + // fill in pivot for what used to be max of node 'a', if it is needed uint32_t keylen; void *key; - int rr = a_last_bd->fetch_le_key_and_len(a_last_bd->omt_size() - 1, &keylen, &key); - invariant_zero(rr); - toku_memdup_dbt(&a->childkeys[a->n_children-1], key, keylen); - a->totalchildkeylens += keylen; + int r = a_last_bd->fetch_key_and_len(a_last_bd->num_klpairs() - 1, &keylen, &key); + invariant_zero(r); + DBT pivotkey; + toku_fill_dbt(&pivotkey, key, keylen); + a->pivotkeys.replace_at(&pivotkey, a->n_children - 1); } + // realloc basement nodes in `a' + REALLOC_N(num_children, a->bp); + + // move each basement node from b to a uint32_t offset = a_has_tail ? a->n_children : a->n_children - 1; for (int i = 0; i < b->n_children; i++) { - a->bp[i+offset] = b->bp[i]; - memset(&b->bp[i],0,sizeof(b->bp[0])); - if (i < (b->n_children-1)) { - toku_copy_dbt(&a->childkeys[i+offset], b->childkeys[i]); - toku_init_dbt(&b->childkeys[i]); - } + a->bp[i + offset] = b->bp[i]; + memset(&b->bp[i], 0, sizeof(b->bp[0])); } - a->totalchildkeylens += b->totalchildkeylens; - a->n_children = num_children; + + // append b's pivots to a's pivots + a->pivotkeys.append(b->pivotkeys); // now that all the data has been moved from b to a, we can destroy the data in b - // b can remain untouched, as it will be destroyed later - b->totalchildkeylens = 0; + a->n_children = num_children; + b->pivotkeys.destroy(); b->n_children = 0; } @@ -1243,7 +1208,7 @@ static void maybe_merge_pinned_leaf_nodes( FTNODE a, FTNODE b, - DBT *parent_splitk, + const DBT *parent_splitk, bool *did_merge, bool *did_rebalance, DBT *splitk, @@ -1256,7 +1221,7 @@ maybe_merge_pinned_leaf_nodes( { unsigned int sizea = toku_serialize_ftnode_size(a); unsigned int sizeb = toku_serialize_ftnode_size(b); - uint32_t num_leafentries = get_leaf_num_entries(a) + get_leaf_num_entries(b); + uint32_t num_leafentries = toku_ftnode_leaf_num_entries(a) + toku_ftnode_leaf_num_entries(b); if (num_leafentries > 1 && (sizea + sizeb)*4 > (nodesize*3)) { // the combined size is more than 3/4 of a node, so don't merge them. *did_merge = false; @@ -1267,7 +1232,6 @@ maybe_merge_pinned_leaf_nodes( return; } // one is less than 1/4 of a node, and together they are more than 3/4 of a node. - toku_destroy_dbt(parent_splitk); // We don't need the parent_splitk any more. If we need a splitk (if we don't merge) we'll malloc a new one. *did_rebalance = true; balance_leaf_nodes(a, b, splitk); } else { @@ -1275,7 +1239,6 @@ maybe_merge_pinned_leaf_nodes( *did_merge = true; *did_rebalance = false; toku_init_dbt(splitk); - toku_destroy_dbt(parent_splitk); // if we are merging, the splitk gets freed. merge_leaf_nodes(a, b); } } @@ -1289,28 +1252,20 @@ maybe_merge_pinned_nonleaf_nodes( bool *did_rebalance, DBT *splitk) { - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); - paranoid_invariant(parent_splitk->data); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); + invariant_notnull(parent_splitk->data); + int old_n_children = a->n_children; int new_n_children = old_n_children + b->n_children; + XREALLOC_N(new_n_children, a->bp); - memcpy(a->bp + old_n_children, - b->bp, - b->n_children*sizeof(b->bp[0])); - memset(b->bp,0,b->n_children*sizeof(b->bp[0])); - - XREALLOC_N(new_n_children-1, a->childkeys); - toku_copy_dbt(&a->childkeys[old_n_children-1], *parent_splitk); - a->totalchildkeylens += parent_splitk->size; - for (int i = 0; i < b->n_children - 1; ++i) { - toku_copy_dbt(&a->childkeys[old_n_children + i], b->childkeys[i]); - a->totalchildkeylens += b->childkeys[i].size; - toku_init_dbt(&b->childkeys[i]); - } - a->n_children = new_n_children; + memcpy(a->bp + old_n_children, b->bp, b->n_children * sizeof(b->bp[0])); + memset(b->bp, 0, b->n_children * sizeof(b->bp[0])); - b->totalchildkeylens = 0; + a->pivotkeys.insert_at(parent_splitk, old_n_children - 1); + a->pivotkeys.append(b->pivotkeys); + a->n_children = new_n_children; b->n_children = 0; a->dirty = 1; @@ -1326,7 +1281,7 @@ maybe_merge_pinned_nonleaf_nodes( static void maybe_merge_pinned_nodes( FTNODE parent, - DBT *parent_splitk, + const DBT *parent_splitk, FTNODE a, FTNODE b, bool *did_merge, @@ -1343,7 +1298,7 @@ maybe_merge_pinned_nodes( // For nonleaf nodes, we distribute the children evenly. That may leave one or both of the nodes overfull, but that's OK. // If we distribute, we set *splitk to a malloced pivot key. // Parameters: -// t The BRT. +// t The FT. // parent The parent of the two nodes to be split. // parent_splitk The pivot key between a and b. This is either free()'d or returned in *splitk. // a The first node to merge. @@ -1354,9 +1309,9 @@ maybe_merge_pinned_nodes( { MSN msn_max; paranoid_invariant(a->height == b->height); - toku_assert_entire_node_in_memory(parent); - toku_assert_entire_node_in_memory(a); - toku_assert_entire_node_in_memory(b); + toku_ftnode_assert_fully_in_memory(parent); + toku_ftnode_assert_fully_in_memory(a); + toku_ftnode_assert_fully_in_memory(b); parent->dirty = 1; // just to make sure { MSN msna = a->max_msn_applied_to_node_on_disk; @@ -1377,13 +1332,9 @@ maybe_merge_pinned_nodes( } } -static void merge_remove_key_callback( - BLOCKNUM *bp, - bool for_checkpoint, - void *extra) -{ - FT h = (FT) extra; - toku_free_blocknum(h->blocktable, bp, h, for_checkpoint); +static void merge_remove_key_callback(BLOCKNUM *bp, bool for_checkpoint, void *extra) { + FT ft = (FT) extra; + ft->blocktable.free_blocknum(bp, ft, for_checkpoint); } // @@ -1392,7 +1343,7 @@ static void merge_remove_key_callback( // static void ft_merge_child( - FT h, + FT ft, FTNODE node, int childnum_to_merge, bool *did_react, @@ -1401,7 +1352,7 @@ ft_merge_child( // this function should not be called // if the child is not mergable paranoid_invariant(node->n_children > 1); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); int childnuma,childnumb; if (childnum_to_merge > 0) { @@ -1423,10 +1374,10 @@ ft_merge_child( FTNODE childa, childb; { - uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnuma); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode_off_client_thread(h, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa); + uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true); } // for test call_flusher_thread_callback(flt_flush_before_pin_second_node_for_merge); @@ -1434,17 +1385,17 @@ ft_merge_child( FTNODE dep_nodes[2]; dep_nodes[0] = node; dep_nodes[1] = childa; - uint32_t childfullhash = compute_child_fullhash(h->cf, node, childnumb); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, h); - toku_pin_ftnode_off_client_thread(h, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb); + uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true); } if (toku_bnc_n_entries(BNC(node,childnuma))>0) { - flush_this_child(h, node, childa, childnuma, fa); + flush_this_child(ft, node, childa, childnuma, fa); } if (toku_bnc_n_entries(BNC(node,childnumb))>0) { - flush_this_child(h, node, childb, childnumb, fa); + flush_this_child(ft, node, childb, childnumb, fa); } // now we have both children pinned in main memory, and cachetable locked, @@ -1454,26 +1405,14 @@ ft_merge_child( { DBT splitk; toku_init_dbt(&splitk); - DBT *old_split_key = &node->childkeys[childnuma]; - unsigned int deleted_size = old_split_key->size; - maybe_merge_pinned_nodes(node, &node->childkeys[childnuma], childa, childb, &did_merge, &did_rebalance, &splitk, h->h->nodesize); - if (childa->height>0) { - for (int i=0; i+1n_children; i++) { - paranoid_invariant(childa->childkeys[i].data); - } - } + const DBT old_split_key = node->pivotkeys.get_pivot(childnuma); + maybe_merge_pinned_nodes(node, &old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize); //toku_verify_estimates(t,childa); // the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred *did_react = (bool)(did_merge || did_rebalance); - if (did_merge) { - paranoid_invariant(!splitk.data); - } else { - paranoid_invariant(splitk.data); - } - - node->totalchildkeylens -= deleted_size; // The key was free()'d inside the maybe_merge_pinned_nodes. if (did_merge) { + invariant_null(splitk.data); NONLEAF_CHILDINFO remaining_bnc = BNC(node, childnuma); NONLEAF_CHILDINFO merged_bnc = BNC(node, childnumb); for (unsigned int i = 0; i < (sizeof remaining_bnc->flow) / (sizeof remaining_bnc->flow[0]); ++i) { @@ -1486,11 +1425,16 @@ ft_merge_child( &node->bp[childnumb+1], (node->n_children-childnumb)*sizeof(node->bp[0])); REALLOC_N(node->n_children, node->bp); - memmove(&node->childkeys[childnuma], - &node->childkeys[childnuma+1], - (node->n_children-childnumb)*sizeof(node->childkeys[0])); - REALLOC_N(node->n_children-1, node->childkeys); - paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->thisnodename.b); + node->pivotkeys.delete_at(childnuma); + + // Handle a merge of the rightmost leaf node. + if (did_merge && childb->blocknum.b == ft->rightmost_blocknum.b) { + invariant(childb->blocknum.b != ft->h->root_blocknum.b); + toku_ftnode_swap_pair_values(childa, childb); + BP_BLOCKNUM(node, childnuma) = childa->blocknum; + } + + paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->blocknum.b); childa->dirty = 1; // just to make sure childb->dirty = 1; // just to make sure } else { @@ -1499,10 +1443,11 @@ ft_merge_child( // pretty far down the tree) // If we didn't merge the nodes, then we need the correct pivot. - toku_copy_dbt(&node->childkeys[childnuma], splitk); - node->totalchildkeylens += node->childkeys[childnuma].size; + invariant_notnull(splitk.data); + node->pivotkeys.replace_at(&splitk, childnuma); node->dirty = 1; } + toku_destroy_dbt(&splitk); } // // now we possibly flush the children @@ -1513,10 +1458,10 @@ ft_merge_child( // merge_remove_key_callback will free the blocknum int rrb = toku_cachetable_unpin_and_remove( - h->cf, + ft->cf, childb->ct_pair, merge_remove_key_callback, - h + ft ); assert_zero(rrb); @@ -1525,7 +1470,7 @@ ft_merge_child( // unlock the parent paranoid_invariant(node->dirty); - toku_unpin_ftnode_off_client_thread(h, node); + toku_unpin_ftnode(ft, node); } else { // for test @@ -1533,14 +1478,14 @@ ft_merge_child( // unlock the parent paranoid_invariant(node->dirty); - toku_unpin_ftnode_off_client_thread(h, node); - toku_unpin_ftnode_off_client_thread(h, childb); + toku_unpin_ftnode(ft, node); + toku_unpin_ftnode(ft, childb); } if (childa->height > 0 && fa->should_recursively_flush(childa, fa->extra)) { - toku_ft_flush_some_child(h, childa, fa); + toku_ft_flush_some_child(ft, childa, fa); } else { - toku_unpin_ftnode_off_client_thread(h, childa); + toku_unpin_ftnode(ft, childa); } } @@ -1557,7 +1502,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) int dirtied = 0; NONLEAF_CHILDINFO bnc = NULL; paranoid_invariant(parent->height>0); - toku_assert_entire_node_in_memory(parent); + toku_ftnode_assert_fully_in_memory(parent); TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known; // pick the child we want to flush to @@ -1568,14 +1513,14 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // get the child into memory BLOCKNUM targetchild = BP_BLOCKNUM(parent, childnum); - toku_verify_blocknum_allocated(ft->blocktable, targetchild); + ft->blocktable.verify_blocknum_allocated(targetchild); uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum); FTNODE child; - struct ftnode_fetch_extra bfe; + ftnode_fetch_extra bfe; // Note that we don't read the entire node into memory yet. // The idea is let's try to do the minimum work before releasing the parent lock - fill_bfe_for_min_read(&bfe, ft); - toku_pin_ftnode_off_client_thread(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child); + bfe.create_for_min_read(ft); + toku_pin_ftnode_with_dep_nodes(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child, true); // for test call_flusher_thread_callback(ft_flush_aflter_child_pin); @@ -1588,10 +1533,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // Let's do a quick check to see if the child may be reactive // If the child cannot be reactive, then we can safely unlock // the parent before finishing reading in the entire child node. - bool may_child_be_reactive = may_node_be_reactive(ft, child); + bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child); - paranoid_invariant(child->thisnodename.b!=0); - //VERIFY_NODE(brt, child); + paranoid_invariant(child->blocknum.b!=0); // only do the following work if there is a flush to perform if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) { @@ -1614,7 +1558,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // reactive, we can unpin the parent // if (!may_child_be_reactive) { - toku_unpin_ftnode_off_client_thread(ft, parent); + toku_unpin_ftnode(ft, parent); parent = NULL; } @@ -1630,9 +1574,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // we wont be splitting/merging child // and we have already replaced the bnc // for the root with a fresh one - enum reactivity child_re = get_node_reactivity(ft, child); + enum reactivity child_re = toku_ftnode_get_reactivity(ft, child); if (parent && child_re == RE_STABLE) { - toku_unpin_ftnode_off_client_thread(ft, parent); + toku_unpin_ftnode(ft, parent); parent = NULL; } @@ -1660,7 +1604,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // let's get the reactivity of the child again, // it is possible that the flush got rid of some values // and now the parent is no longer reactive - child_re = get_node_reactivity(ft, child); + child_re = toku_ftnode_get_reactivity(ft, child); // if the parent has been unpinned above, then // this is our only option, even if the child is not stable // if the child is not stable, we'll handle it the next @@ -1671,7 +1615,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) ) { if (parent) { - toku_unpin_ftnode_off_client_thread(ft, parent); + toku_unpin_ftnode(ft, parent); parent = NULL; } // @@ -1681,7 +1625,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) toku_ft_flush_some_child(ft, child, fa); } else { - toku_unpin_ftnode_off_client_thread(ft, child); + toku_unpin_ftnode(ft, child); } } else if (child_re == RE_FISSIBLE) { @@ -1705,6 +1649,78 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) } } +void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known) { + paranoid_invariant(bnc); + + TOKULOGGER logger = toku_cachefile_logger(ft->cf); + TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; + TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE; + + txn_manager_state txn_state_for_gc(txn_manager); + bool do_garbage_collection = child->height == 0 && txn_manager != nullptr; + if (do_garbage_collection) { + txn_state_for_gc.init(); + oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); + } + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + child->oldest_referenced_xid_known, + true); + struct flush_msg_fn { + FT ft; + FTNODE child; + NONLEAF_CHILDINFO bnc; + txn_gc_info *gc_info; + + STAT64INFO_S stats_delta; + size_t remaining_memsize = bnc->msg_buffer.buffer_size_in_use(); + + flush_msg_fn(FT t, FTNODE n, NONLEAF_CHILDINFO nl, txn_gc_info *g) : + ft(t), child(n), bnc(nl), gc_info(g), remaining_memsize(bnc->msg_buffer.buffer_size_in_use()) { + stats_delta = { 0, 0 }; + } + int operator()(const ft_msg &msg, bool is_fresh) { + size_t flow_deltas[] = { 0, 0 }; + size_t memsize_in_buffer = message_buffer::msg_memsize_in_buffer(msg); + if (remaining_memsize <= bnc->flow[0]) { + // this message is in the current checkpoint's worth of + // the end of the message buffer + flow_deltas[0] = memsize_in_buffer; + } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) { + // this message is in the last checkpoint's worth of the + // end of the message buffer + flow_deltas[1] = memsize_in_buffer; + } + toku_ftnode_put_msg( + ft->cmp, + ft->update_fun, + child, + -1, + msg, + is_fresh, + gc_info, + flow_deltas, + &stats_delta + ); + remaining_memsize -= memsize_in_buffer; + return 0; + } + } flush_fn(ft, child, bnc, &gc_info); + bnc->msg_buffer.iterate(flush_fn); + + child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known; + + invariant(flush_fn.remaining_memsize == 0); + if (flush_fn.stats_delta.numbytes || flush_fn.stats_delta.numrows) { + toku_ft_update_stats(&ft->in_memory_stats, flush_fn.stats_delta); + } + if (do_garbage_collection) { + size_t buffsize = bnc->msg_buffer.buffer_size_in_use(); + // may be misleading if there's a broadcast message in there + toku_ft_status_note_msg_bytes_out(buffsize); + } +} + static void update_cleaner_status( FTNODE node, @@ -1822,11 +1838,11 @@ toku_ftnode_cleaner_callback( void *extraargs) { FTNODE node = (FTNODE) ftnode_pv; - invariant(node->thisnodename.b == blocknum.b); + invariant(node->blocknum.b == blocknum.b); invariant(node->fullhash == fullhash); invariant(node->height > 0); // we should never pick a leaf node (for now at least) - FT h = (FT) extraargs; - bring_node_fully_into_memory(node, h); + FT ft = (FT) extraargs; + bring_node_fully_into_memory(node, ft); int childnum = find_heaviest_child(node); update_cleaner_status(node, childnum); @@ -1834,16 +1850,16 @@ toku_ftnode_cleaner_callback( if (toku_bnc_nbytesinbuf(BNC(node, childnum)) > 0) { struct flusher_advice fa; struct flush_status_update_extra fste; - ct_flusher_advice_init(&fa, &fste, h->h->nodesize); - toku_ft_flush_some_child(h, node, &fa); + ct_flusher_advice_init(&fa, &fste, ft->h->nodesize); + toku_ft_flush_some_child(ft, node, &fa); } else { - toku_unpin_ftnode_off_client_thread(h, node); + toku_unpin_ftnode(ft, node); } return 0; } struct flusher_extra { - FT h; + FT ft; FTNODE node; NONLEAF_CHILDINFO bnc; TXNID parent_oldest_referenced_xid_known; @@ -1868,12 +1884,12 @@ static void flush_node_fun(void *fe_v) // destroyed its basement nodes if necessary, so we now need to either // read them back in, or just do the regular partial fetch. If we // don't, that means fe->node is a parent, so we need to do this anyway. - bring_node_fully_into_memory(fe->node,fe->h); + bring_node_fully_into_memory(fe->node,fe->ft); fe->node->dirty = 1; struct flusher_advice fa; struct flush_status_update_extra fste; - flt_flusher_advice_init(&fa, &fste, fe->h->h->nodesize); + flt_flusher_advice_init(&fa, &fste, fe->ft->h->nodesize); if (fe->bnc) { // In this case, we have a bnc to flush to a node @@ -1882,7 +1898,7 @@ static void flush_node_fun(void *fe_v) call_flusher_thread_callback(flt_flush_before_applying_inbox); toku_bnc_flush_to_child( - fe->h, + fe->ft, fe->bnc, fe->node, fe->parent_oldest_referenced_xid_known @@ -1893,11 +1909,11 @@ static void flush_node_fun(void *fe_v) // If so, call toku_ft_flush_some_child on the node (because this flush intends to // pass a meaningful oldest referenced xid for simple garbage collection), and it is the // responsibility of the flush to unlock the node. otherwise, we unlock it here. - if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) { - toku_ft_flush_some_child(fe->h, fe->node, &fa); + if (fe->node->height > 0 && toku_ftnode_nonleaf_is_gorged(fe->node, fe->ft->h->nodesize)) { + toku_ft_flush_some_child(fe->ft, fe->node, &fa); } else { - toku_unpin_ftnode_off_client_thread(fe->h,fe->node); + toku_unpin_ftnode(fe->ft,fe->node); } } else { @@ -1905,25 +1921,25 @@ static void flush_node_fun(void *fe_v) // bnc, which means we are tasked with flushing some // buffer in the node. // It is the responsibility of flush some child to unlock the node - toku_ft_flush_some_child(fe->h, fe->node, &fa); + toku_ft_flush_some_child(fe->ft, fe->node, &fa); } - remove_background_job_from_cf(fe->h->cf); + remove_background_job_from_cf(fe->ft->cf); toku_free(fe); } static void place_node_and_bnc_on_background_thread( - FT h, + FT ft, FTNODE node, NONLEAF_CHILDINFO bnc, TXNID parent_oldest_referenced_xid_known) { struct flusher_extra *XMALLOC(fe); - fe->h = h; + fe->ft = ft; fe->node = node; fe->bnc = bnc; fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known; - cachefile_kibbutz_enq(h->cf, flush_node_fun, fe); + cachefile_kibbutz_enq(ft->cf, flush_node_fun, fe); } // @@ -1939,7 +1955,7 @@ place_node_and_bnc_on_background_thread( // child needs to be split/merged), then we place the parent on the background thread. // The parent will be unlocked on the background thread // -void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) +void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent) { toku::context flush_ctx(CTX_FLUSH); TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known; @@ -1953,24 +1969,24 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) // see if we can pin the child // FTNODE child; - uint32_t childfullhash = compute_child_fullhash(h->cf, parent, childnum); - int r = toku_maybe_pin_ftnode_clean(h, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child); + uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum); + int r = toku_maybe_pin_ftnode_clean(ft, BP_BLOCKNUM(parent, childnum), childfullhash, PL_WRITE_EXPENSIVE, &child); if (r != 0) { // In this case, we could not lock the child, so just place the parent on the background thread // In the callback, we will use toku_ft_flush_some_child, which checks to // see if we should blow away the old basement nodes. - place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known); + place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known); } else { // // successfully locked child // - bool may_child_be_reactive = may_node_be_reactive(h, child); + bool may_child_be_reactive = ft_ftnode_may_be_reactive(ft, child); if (!may_child_be_reactive) { // We're going to unpin the parent, so before we do, we must // check to see if we need to blow away the basement nodes to // keep the MSN invariants intact. - maybe_destroy_child_blbs(parent, child, h); + maybe_destroy_child_blbs(parent, child, ft); // // can detach buffer and unpin root here @@ -1988,17 +2004,17 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) // so, because we know for sure the child is not // reactive, we can unpin the parent // - place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known); - toku_unpin_ftnode(h, parent); + place_node_and_bnc_on_background_thread(ft, child, bnc, parent_oldest_referenced_xid_known); + toku_unpin_ftnode(ft, parent); } else { // because the child may be reactive, we need to // put parent on background thread. // As a result, we unlock the child here. - toku_unpin_ftnode(h, child); + toku_unpin_ftnode(ft, child); // Again, we'll have the parent on the background thread, so // we don't need to destroy the basement nodes yet. - place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known); + place_node_and_bnc_on_background_thread(ft, parent, NULL, parent_oldest_referenced_xid_known); } } } diff --git a/storage/tokudb/ft-index/ft/ft-flusher.h b/storage/tokudb/ft-index/ft/ft-flusher.h index 35371af99e81c..47bf4e7cf77e2 100644 --- a/storage/tokudb/ft-index/ft/ft-flusher.h +++ b/storage/tokudb/ft-index/ft/ft-flusher.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_FLUSHER_H -#define FT_FLUSHER_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +86,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This must be first to make the 64-bit file mode work right in Linux -#include "fttypes.h" +#include "ft/ft-internal.h" typedef enum { FT_FLUSHER_CLEANER_TOTAL_NODES = 0, // total number of nodes whose buffers are potentially flushed by cleaner thread @@ -124,7 +123,7 @@ typedef enum { FT_FLUSHER_SPLIT_NONLEAF, // number of nonleaf nodes split FT_FLUSHER_MERGE_LEAF, // number of times leaf nodes are merged FT_FLUSHER_MERGE_NONLEAF, // number of times nonleaf nodes are merged - FT_FLUSHER_BALANCE_LEAF, // number of times a leaf node is balanced inside brt + FT_FLUSHER_BALANCE_LEAF, // number of times a leaf node is balanced FT_FLUSHER_STATUS_NUM_ROWS } ft_flusher_status_entry; @@ -152,10 +151,31 @@ toku_flusher_thread_set_callback( * Puts a workitem on the flusher thread queue, scheduling the node to be * flushed by toku_ft_flush_some_child. */ -void -toku_ft_flush_node_on_background_thread( +void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent); + +enum split_mode { + SPLIT_EVENLY, + SPLIT_LEFT_HEAVY, + SPLIT_RIGHT_HEAVY +}; + + +// Given pinned node and pinned child, split child into two +// and update node with information about its new child. +void toku_ft_split_child( FT ft, - FTNODE parent + FTNODE node, + int childnum, + FTNODE child, + enum split_mode split_mode + ); + +// Given pinned node, merge childnum with a neighbor and update node with +// information about the change +void toku_ft_merge_child( + FT ft, + FTNODE node, + int childnum ); /** @@ -166,9 +186,10 @@ toku_ft_flush_node_on_background_thread( * nodea is the left node that results from the split * splitk is the right-most key of nodea */ +// TODO: Rename toku_ft_leaf_split void ftleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -189,8 +210,9 @@ ftleaf_split( * but it does not guarantee that the resulting nodes are smaller than nodesize. */ void +// TODO: Rename toku_ft_nonleaf_split ft_nonleaf_split( - FT h, + FT ft, FTNODE node, FTNODE *nodea, FTNODE *nodeb, @@ -199,8 +221,6 @@ ft_nonleaf_split( FTNODE* dependent_nodes ); - - /************************************************************************ * HOT optimize, should perhaps be factored out to its own header file * ************************************************************************ @@ -230,8 +250,6 @@ void toku_ft_hot_get_status(FT_HOT_STATUS); * we go until the end of the FT. */ int -toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, - int (*progress_callback)(void *extra, float progress), - void *progress_extra, uint64_t* loops_run); - -#endif // End of header guardian. +toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right, + int (*progress_callback)(void *extra, float progress), + void *progress_extra, uint64_t* loops_run); diff --git a/storage/tokudb/ft-index/ft/ft-hot-flusher.cc b/storage/tokudb/ft-index/ft/ft-hot-flusher.cc index 0c9691b1e4d8b..55230e75da095 100644 --- a/storage/tokudb/ft-index/ft/ft-hot-flusher.cc +++ b/storage/tokudb/ft-index/ft/ft-hot-flusher.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,15 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include -#include +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-flusher-internal.h" +#include "ft/ft-internal.h" +#include "ft/node.h" +#include "portability/toku_atomic.h" +#include "util/context.h" +#include "util/status.h" // Member Descirption: // 1. highest_pivot_key - this is the key that corresponds to the @@ -119,7 +120,7 @@ struct hot_flusher_extra { static FT_HOT_STATUS_S hot_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(hot_status, k, c, t, "hot: " l, inc) #define STATUS_VALUE(x) hot_status.status[x].value.num @@ -168,7 +169,7 @@ hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start) } static int -hot_just_pick_child(FT h, +hot_just_pick_child(FT ft, FTNODE parent, struct hot_flusher_extra *flusher) { @@ -183,10 +184,7 @@ hot_just_pick_child(FT h, childnum = 0; } else { // Find the pivot boundary. - childnum = toku_ftnode_hot_next_child(parent, - &flusher->highest_pivot_key, - &h->cmp_descriptor, - h->compare_fun); + childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp); } return childnum; @@ -201,19 +199,19 @@ hot_update_flusher_keys(FTNODE parent, // child node. if (childnum < (parent->n_children - 1)) { toku_destroy_dbt(&flusher->max_current_key); - toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]); + toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum)); } } // Picks which child toku_ft_flush_some_child will use for flushing and // recursion. static int -hot_pick_child(FT h, +hot_pick_child(FT ft, FTNODE parent, void *extra) { struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; - int childnum = hot_just_pick_child(h, parent, flusher); + int childnum = hot_just_pick_child(ft, parent, flusher); // Now we determine the percentage of the tree flushed so far. @@ -243,14 +241,14 @@ hot_update_status(FTNODE UU(child), // one to flush into. This gives it a chance to do that, and update the // keys it maintains. static int -hot_pick_child_after_split(FT h, +hot_pick_child_after_split(FT ft, FTNODE parent, int childnuma, int childnumb, void *extra) { struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; - int childnum = hot_just_pick_child(h, parent, flusher); + int childnum = hot_just_pick_child(ft, parent, flusher); assert(childnum == childnuma || childnum == childnumb); hot_update_flusher_keys(parent, childnum, flusher); if (parent->height == 1) { @@ -298,9 +296,9 @@ hot_flusher_destroy(struct hot_flusher_extra *flusher) // Entry point for Hot Optimize Table (HOT). Note, this function is // not recursive. It iterates over root-to-leaf paths. int -toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, - int (*progress_callback)(void *extra, float progress), - void *progress_extra, uint64_t* loops_run) +toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right, + int (*progress_callback)(void *extra, float progress), + void *progress_extra, uint64_t* loops_run) { toku::context flush_ctx(CTX_FLUSH); @@ -316,7 +314,7 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, // start of HOT operation (void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_HOT_NUM_STARTED), 1); - toku_ft_note_hot_begin(brt); + toku_ft_note_hot_begin(ft_handle); // Higher level logic prevents a dictionary from being deleted or // truncated during a hot optimize operation. Doing so would violate @@ -329,18 +327,17 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, { // Get root node (the first parent of each successive HOT // call.) - toku_calculate_root_offset_pointer(brt->ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread(brt->ft, - (BLOCKNUM) root_key, - fullhash, - &bfe, - PL_WRITE_EXPENSIVE, - 0, - NULL, - &root); - toku_assert_entire_node_in_memory(root); + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); + toku_pin_ftnode(ft_handle->ft, + (BLOCKNUM) root_key, + fullhash, + &bfe, + PL_WRITE_EXPENSIVE, + &root, + true); + toku_ftnode_assert_fully_in_memory(root); } // Prepare HOT diagnostics. @@ -365,12 +362,12 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, // This should recurse to the bottom of the tree and then // return. if (root->height > 0) { - toku_ft_flush_some_child(brt->ft, root, &advice); + toku_ft_flush_some_child(ft_handle->ft, root, &advice); } else { // Since there are no children to flush, we should abort // the HOT call. flusher.rightmost_leaf_seen = 1; - toku_unpin_ftnode_off_client_thread(brt->ft, root); + toku_unpin_ftnode(ft_handle->ft, root); } // Set the highest pivot key seen here, since the parent may @@ -386,8 +383,7 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, else if (right) { // if we have flushed past the bounds set for us, // set rightmost_leaf_seen so we exit - FAKE_DB(db, &brt->ft->cmp_descriptor); - int cmp = brt->ft->compare_fun(&db, &flusher.max_current_key, right); + int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right); if (cmp > 0) { flusher.rightmost_leaf_seen = 1; } @@ -417,7 +413,7 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right, if (r == 0) { success = true; } { - toku_ft_note_hot_complete(brt, success, msn_at_start_of_hot); + toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot); } if (success) { diff --git a/storage/tokudb/ft-index/ft/ft-internal.h b/storage/tokudb/ft-index/ft/ft-internal.h index 730bf1e1547fc..3cd3970557101 100644 --- a/storage/tokudb/ft-index/ft/ft-internal.h +++ b/storage/tokudb/ft-index/ft/ft-internal.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_INTERNAL_H -#define FT_INTERNAL_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,22 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "toku_config.h" -#include +#include "portability/toku_config.h" +#include "portability/toku_list.h" +#include "portability/toku_race_tools.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/txn/rollback.h" // Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so // BUILD_ID of 1000 indicates development build of main, not a release build. @@ -103,361 +112,24 @@ PATENT RIGHTS GRANT: #error #endif -#include "ft_layout_version.h" -#include "block_allocator.h" -#include "cachetable.h" -#include "fifo.h" -#include "ft-ops.h" -#include "toku_list.h" -#include -#include "leafentry.h" -#include "block_table.h" -#include "compress.h" -#include -#include -#include "bndata.h" - -enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */ -enum { FT_CMD_OVERHEAD = (2 + sizeof(MSN)) }; // the type plus freshness plus MSN +struct ft_search; + enum { FT_DEFAULT_FANOUT = 16 }; enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 }; enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 }; -// -// Field in ftnode_fetch_extra that tells the -// partial fetch callback what piece of the node -// is needed by the ydb -// -enum ftnode_fetch_type { - ftnode_fetch_none=1, // no partitions needed. - ftnode_fetch_subset, // some subset of partitions needed - ftnode_fetch_prefetch, // this is part of a prefetch call - ftnode_fetch_all, // every partition is needed - ftnode_fetch_keymatch, // one child is needed if it holds both keys -}; - -static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU(); -static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) { - switch (type) { - case ftnode_fetch_none: - case ftnode_fetch_subset: - case ftnode_fetch_prefetch: - case ftnode_fetch_all: - case ftnode_fetch_keymatch: - return true; - default: - return false; - } -} - -// -// An extra parameter passed to cachetable functions -// That is used in all types of fetch callbacks. -// The contents help the partial fetch and fetch -// callbacks retrieve the pieces of a node necessary -// for the ensuing operation (flush, query, ...) -// -struct ftnode_fetch_extra { - enum ftnode_fetch_type type; - // needed for reading a node off disk - FT h; - // used in the case where type == ftnode_fetch_subset - // parameters needed to find out which child needs to be decompressed (so it can be read) - ft_search_t* search; - DBT range_lock_left_key, range_lock_right_key; - bool left_is_neg_infty, right_is_pos_infty; - // states if we should try to aggressively fetch basement nodes - // that are not specifically needed for current query, - // but may be needed for other cursor operations user is doing - // For example, if we have not disabled prefetching, - // and the user is doing a dictionary wide scan, then - // even though a query may only want one basement node, - // we fetch all basement nodes in a leaf node. - bool disable_prefetching; - // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback - // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it - int child_to_read; - // when we read internal nodes, we want to read all the data off disk in one I/O - // then we'll treat it as normal and only decompress the needed partitions etc. - - bool read_all_partitions; - // Accounting: How many bytes were read, and how much time did we spend doing I/O? - uint64_t bytes_read; - tokutime_t io_time; - tokutime_t decompress_time; - tokutime_t deserialize_time; -}; - -struct toku_fifo_entry_key_msn_heaviside_extra { - DESCRIPTOR desc; - ft_compare_func cmp; - FIFO fifo; - const DBT *key; - MSN msn; -}; - -// comparison function for inserting messages into a -// ftnode_nonleaf_childinfo's message_tree -int -toku_fifo_entry_key_msn_heaviside(const int32_t &v, const struct toku_fifo_entry_key_msn_heaviside_extra &extra); - -struct toku_fifo_entry_key_msn_cmp_extra { - DESCRIPTOR desc; - ft_compare_func cmp; - FIFO fifo; -}; - -// same thing for qsort_r -int -toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extrap, const int &a, const int &b); - -typedef toku::omt off_omt_t; -typedef toku::omt marked_off_omt_t; - -// data of an available partition of a nonleaf ftnode -struct ftnode_nonleaf_childinfo { - FIFO buffer; - off_omt_t broadcast_list; - marked_off_omt_t fresh_message_tree; - off_omt_t stale_message_tree; - uint64_t flow[2]; // current and last checkpoint -}; - -unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc); -int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc); -long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc); -long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc); -void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp); -void toku_bnc_empty(NONLEAF_CHILDINFO bnc); -void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known); -bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull)); -bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize); - -enum reactivity get_nonleaf_reactivity(FTNODE node, unsigned int fanout); -enum reactivity get_node_reactivity(FT ft, FTNODE node); -uint32_t get_leaf_num_entries(FTNODE node); - -// data of an available partition of a leaf ftnode -struct ftnode_leaf_basement_node { - bn_data data_buffer; - unsigned int seqinsert; // number of sequential inserts to this leaf - MSN max_msn_applied; // max message sequence number applied - bool stale_ancestor_messages_applied; - STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk -}; - -enum pt_state { // declare this to be packed so that when used below it will only take 1 byte. - PT_INVALID = 0, - PT_ON_DISK = 1, - PT_COMPRESSED = 2, - PT_AVAIL = 3}; - -enum ftnode_child_tag { - BCT_INVALID = 0, - BCT_NULL, - BCT_SUBBLOCK, - BCT_LEAF, - BCT_NONLEAF -}; - -typedef struct ftnode_child_pointer { - union { - struct sub_block *subblock; - struct ftnode_nonleaf_childinfo *nonleaf; - struct ftnode_leaf_basement_node *leaf; - } u; - enum ftnode_child_tag tag; -} FTNODE_CHILD_POINTER; - - -struct ftnode_disk_data { - // - // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk - // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless - // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition - // The SIZE is the size of the compressed partition. - // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. - // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. - uint32_t start; - uint32_t size; -}; -#define BP_START(node_dd,i) ((node_dd)[i].start) -#define BP_SIZE(node_dd,i) ((node_dd)[i].size) - - -// a ftnode partition, associated with a child of a node -struct ftnode_partition { - // the following three variables are used for nonleaf nodes - // for leaf nodes, they are meaningless - BLOCKNUM blocknum; // blocknum of child - - // How many bytes worth of work was performed by messages in each buffer. - uint64_t workdone; - - // - // pointer to the partition. Depending on the state, they may be different things - // if state == PT_INVALID, then the node was just initialized and ptr == NULL - // if state == PT_ON_DISK, then ptr == NULL - // if state == PT_COMPRESSED, then ptr points to a struct sub_block* - // if state == PT_AVAIL, then ptr is: - // a struct ftnode_nonleaf_childinfo for internal nodes, - // a struct ftnode_leaf_basement_node for leaf nodes - // - struct ftnode_child_pointer ptr; - // - // at any time, the partitions may be in one of the following three states (stored in pt_state): - // PT_INVALID - means that the partition was just initialized - // PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress - // PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress - // PT_AVAIL - means the partition is decompressed and in memory - // - enum pt_state state; // make this an enum to make debugging easier. - - // clock count used to for pe_callback to determine if a node should be evicted or not - // for now, saturating the count at 1 - uint8_t clock_count; -}; - -struct ftnode { - MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk - unsigned int flags; - BLOCKNUM thisnodename; // Which block number is this node? - int layout_version; // What version of the data structure? - int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging) - int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging) - uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk - int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ - int dirty; - uint32_t fullhash; - int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced. - // for leaf nodes, represents number of basement nodes - unsigned int totalchildkeylens; - DBT *childkeys; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1]. - Child 1's keys are > childkeys[0]. */ - - // What's the oldest referenced xid that this node knows about? The real oldest - // referenced xid might be younger, but this is our best estimate. We use it - // as a heuristic to transition provisional mvcc entries from provisional to - // committed (from implicity committed to really committed). - // - // A better heuristic would be the oldest live txnid, but we use this since it - // still works well most of the time, and its readily available on the inject - // code path. - TXNID oldest_referenced_xid_known; - - // array of size n_children, consisting of ftnode partitions - // each one is associated with a child - // for internal nodes, the ith partition corresponds to the ith message buffer - // for leaf nodes, the ith partition corresponds to the ith basement node - struct ftnode_partition *bp; - PAIR ct_pair; -}; - -// ftnode partition macros -// BP stands for ftnode_partition -#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum) -#define BP_STATE(node,i) ((node)->bp[i].state) -#define BP_WORKDONE(node, i)((node)->bp[i].workdone) - -// -// macros for managing a node's clock -// Should be managed by ft-ops.c, NOT by serialize/deserialize -// - -// -// BP_TOUCH_CLOCK uses a compare and swap because multiple threads -// that have a read lock on an internal node may try to touch the clock -// simultaneously -// -#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1) -#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0) -#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0) -// not crazy about having these two here, one is for the case where we create new -// nodes, such as in splits and creating new roots, and the other is for when -// we are deserializing a node and not all bp's are touched -#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1) -#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0) - -// internal node macros -static inline void set_BNULL(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - node->bp[i].ptr.tag = BCT_NULL; -} -static inline bool is_BNULL (FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - return node->bp[i].ptr.tag == BCT_NULL; -} -static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_NONLEAF); - return p.u.nonleaf; -} -static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_NONLEAF; - p->u.nonleaf = nl; -} - -static inline BASEMENTNODE BLB(FTNODE node, int i) { - paranoid_invariant(i >= 0); - // The optimizer really doesn't like it when we compare - // i to n_children as signed integers. So we assert that - // n_children is in fact positive before doing a comparison - // on the values forcibly cast to unsigned ints. - paranoid_invariant(node->n_children > 0); - paranoid_invariant((unsigned) i < (unsigned) node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_LEAF); - return p.u.leaf; -} -static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_LEAF; - p->u.leaf = bn; -} - -static inline SUB_BLOCK BSB(FTNODE node, int i) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER p = node->bp[i].ptr; - paranoid_invariant(p.tag==BCT_SUBBLOCK); - return p.u.subblock; -} -static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) { - paranoid_invariant(i >= 0); - paranoid_invariant(i < node->n_children); - FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; - p->tag = BCT_SUBBLOCK; - p->u.subblock = sb; -} - -// ftnode leaf basementnode macros, -#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied) -#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied) -#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer)) -#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size()) -#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert) - -/* pivot flags (must fit in 8 bits) */ -enum { - FT_PIVOT_TRUNC = 4, - FT_PIVOT_FRONT_COMPRESS = 8, -}; +// We optimize for a sequential insert pattern if 100 consecutive injections +// happen into the rightmost leaf node due to promotion. +enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 }; uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum); -// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. - -enum ft_type {FT_CURRENT=1, FT_CHECKPOINT_INPROGRESS}; +enum ft_type { + FT_CURRENT = 1, + FT_CHECKPOINT_INPROGRESS +}; +// The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. struct ft_header { enum ft_type type; @@ -470,7 +142,7 @@ struct ft_header { // LSN of creation of "checkpoint-begin" record in log. LSN checkpoint_lsn; - // see brt_layout_version.h. maybe don't need this if we assume + // see serialize/ft_layout_version.h. maybe don't need this if we assume // it's always the current version after deserializing const int layout_version; // different (<) from layout_version if upgraded from a previous @@ -504,7 +176,7 @@ struct ft_header { enum toku_compression_method compression_method; unsigned int fanout; - // Current Minimum MSN to be used when upgrading pre-MSN BRT's. + // Current Minimum MSN to be used when upgrading pre-MSN FT's. // This is decremented from our currnt MIN_MSN so as not to clash // with any existing 'normal' MSN's. MSN highest_unused_msn_for_upgrade; @@ -525,8 +197,9 @@ struct ft_header { STAT64INFO_S on_disk_stats; }; +typedef struct ft_header *FT_HEADER; -// brt_header is always the current version. +// ft_header is always the current version. struct ft { FT_HEADER h; FT_HEADER checkpoint_header; @@ -536,20 +209,23 @@ struct ft { CACHEFILE cf; // unique id for dictionary DICTIONARY_ID dict_id; - ft_compare_func compare_fun; - ft_update_func update_fun; // protected by locktree DESCRIPTOR_S descriptor; - // protected by locktree and user. User - // makes sure this is only changed - // when no activity on tree + + // protected by locktree and user. + // User makes sure this is only changed when no activity on tree DESCRIPTOR_S cmp_descriptor; + // contains a pointer to cmp_descriptor (above) - their lifetimes are bound + toku::comparator cmp; + + // the update function always utilizes the cmp_descriptor, not the regular one + ft_update_func update_fun; // These are not read-only: // protected by blocktable lock - BLOCK_TABLE blocktable; + block_table blocktable; // protected by atomic builtins STAT64INFO_S in_memory_stats; @@ -572,13 +248,29 @@ struct ft { // is this ft a blackhole? if so, all messages are dropped. bool blackhole; + + // The blocknum of the rightmost leaf node in the tree. Stays constant through splits + // and merges using pair-swapping (like the root node, see toku_ftnode_swap_pair_values()) + // + // This field only transitions from RESERVED_BLOCKNUM_NULL to non-null, never back. + // We initialize it when promotion inserts into a non-root leaf node on the right extreme. + // We use the blocktable lock to protect the initialize transition, though it's not really + // necessary since all threads should be setting it to the same value. We maintain that invariant + // on first initialization, see ft_set_or_verify_rightmost_blocknum() + BLOCKNUM rightmost_blocknum; + + // sequential access pattern heuristic + // - when promotion pushes a message directly into the rightmost leaf, the score goes up. + // - if the score is high enough, we optimistically attempt to insert directly into the rightmost leaf + // - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0 + uint32_t seqinsert_score; }; // Allocate a DB struct off the stack and only set its comparison // descriptor. We don't bother setting any other fields because // the comparison function doesn't need it, and we would like to // reduce the CPU work done per comparison. -#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = desc; } while (0) +#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = const_cast(desc); } while (0) struct ft_options { unsigned int nodesize; @@ -586,6 +278,7 @@ struct ft_options { enum toku_compression_method compression_method; unsigned int fanout; unsigned int flags; + uint8_t memcmp_magic; ft_compare_func compare_fun; ft_update_func update_fun; }; @@ -605,439 +298,172 @@ struct ft_handle { PAIR_ATTR make_ftnode_pair_attr(FTNODE node); PAIR_ATTR make_invalid_pair_attr(void); -/* serialization code */ -void -toku_create_compressed_partition_from_available( - FTNODE node, - int childnum, - enum toku_compression_method compression_method, - SUB_BLOCK sb - ); -void rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize); -int toku_serialize_ftnode_to_memory (FTNODE node, - FTNODE_DISK_DATA* ndd, - unsigned int basementnodesize, - enum toku_compression_method compression_method, - bool do_rebalancing, - bool in_parallel, - /*out*/ size_t *n_bytes_to_write, - /*out*/ size_t *n_uncompressed_bytes, - /*out*/ char **bytes_to_write); -int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT h, bool for_checkpoint); -int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT h, bool for_checkpoint); -void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized); -int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h); -int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe); -int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe); -int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe); - -// For verifying old, non-upgraded nodes (versions 13 and 14). -int -decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum); -// - -//////////////// TODO: Move these function declarations -int -deserialize_ft_from_fd_into_rbuf(int fd, - toku_off_t offset_of_header, - struct rbuf *rb, - uint64_t *checkpoint_count, - LSN *checkpoint_lsn, - uint32_t * version_p); +// +// Field in ftnode_fetch_extra that tells the +// partial fetch callback what piece of the node +// is needed by the ydb +// +enum ftnode_fetch_type { + ftnode_fetch_none = 1, // no partitions needed. + ftnode_fetch_subset, // some subset of partitions needed + ftnode_fetch_prefetch, // this is part of a prefetch call + ftnode_fetch_all, // every partition is needed + ftnode_fetch_keymatch, // one child is needed if it holds both keys +}; -int -deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); +// Info passed to cachetable fetch callbacks to say which parts of a node +// should be fetched (perhaps a subset, perhaps the whole thing, depending +// on operation) +class ftnode_fetch_extra { +public: + // Used when the whole node must be in memory, such as for flushes. + void create_for_full_read(FT ft); -void read_block_from_fd_into_rbuf( - int fd, - BLOCKNUM blocknum, - FT h, - struct rbuf *rb - ); + // A subset of children are necessary. Used by point queries. + void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right, + bool left_is_neg_infty, bool right_is_pos_infty, + bool disable_prefetching, bool read_all_partitions); -int -read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb); + // No partitions are necessary - only pivots and/or subtree estimates. + // Currently used for stat64. + void create_for_min_read(FT ft); -int -verify_ftnode_sub_block (struct sub_block *sb); + // Used to prefetch partitions that fall within the bounds given by the cursor. + void create_for_prefetch(FT ft, struct ft_cursor *cursor); -void -just_decompress_sub_block(struct sub_block *sb); + // Only a portion of the node (within a keyrange) is required. + // Used by keysrange when the left and right key are in the same basement node. + void create_for_keymatch(FT ft, const DBT *left, const DBT *right, + bool disable_prefetching, bool read_all_partitions); -/* Beginning of ft-node-deserialize.c helper functions. */ -void initialize_ftnode(FTNODE node, BLOCKNUM blocknum); -int read_and_check_magic(struct rbuf *rb); -int read_and_check_version(FTNODE node, struct rbuf *rb); -void read_node_info(FTNODE node, struct rbuf *rb, int version); -void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd); -int check_node_info_checksum(struct rbuf *rb); -void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version); -int check_legacy_end_checksum(struct rbuf *rb); -/* End of ft-node-deserialization.c helper functions. */ + void destroy(void); -unsigned int toku_serialize_ftnode_size(FTNODE node); /* How much space will it take? */ + // return: true if a specific childnum is required to be in memory + bool wants_child_available(int childnum) const; -void toku_verify_or_set_counts(FTNODE); + // return: the childnum of the leftmost child that is required to be in memory + int leftmost_child_wanted(FTNODE node) const; -size_t toku_serialize_ft_size (FT_HEADER h); -void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf); -void toku_serialize_ft_to_wbuf ( - struct wbuf *wbuf, - FT_HEADER h, - DISKOFF translation_location_on_disk, - DISKOFF translation_size_on_disk - ); -int toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft); -void toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset); -void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc); -BASEMENTNODE toku_create_empty_bn(void); -BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. -NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo); -BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn); -NONLEAF_CHILDINFO toku_create_empty_nl(void); -// FIXME needs toku prefix -void destroy_basement_node (BASEMENTNODE bn); -// FIXME needs toku prefix -void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl); -void toku_destroy_ftnode_internals(FTNODE node); -void toku_ftnode_free (FTNODE *node); -bool is_entire_node_in_memory(FTNODE node); -void toku_assert_entire_node_in_memory(FTNODE node); + // return: the childnum of the rightmost child that is required to be in memory + int rightmost_child_wanted(FTNODE node) const; -// append a child node to a parent node -void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey); + // needed for reading a node off disk + FT ft; -// append a cmd to a nonleaf node child buffer -void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val); + enum ftnode_fetch_type type; -STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode); + // used in the case where type == ftnode_fetch_subset + // parameters needed to find out which child needs to be decompressed (so it can be read) + ft_search *search; + DBT range_lock_left_key, range_lock_right_key; + bool left_is_neg_infty, right_is_pos_infty; -//#define SLOW -#ifdef SLOW -#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n)) -#else -#define VERIFY_NODE(t,n) ((void)0) -#endif + // states if we should try to aggressively fetch basement nodes + // that are not specifically needed for current query, + // but may be needed for other cursor operations user is doing + // For example, if we have not disabled prefetching, + // and the user is doing a dictionary wide scan, then + // even though a query may only want one basement node, + // we fetch all basement nodes in a leaf node. + bool disable_prefetching; -void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe); -void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint); -void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time); -void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time); + // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback + // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it + int child_to_read; + + // when we read internal nodes, we want to read all the data off disk in one I/O + // then we'll treat it as normal and only decompress the needed partitions etc. + bool read_all_partitions; + + // Accounting: How many bytes were read, and how much time did we spend doing I/O? + uint64_t bytes_read; + tokutime_t io_time; + tokutime_t decompress_time; + tokutime_t deserialize_time; + +private: + void _create_internal(FT ft_); +}; +// Only exported for tests. +// Cachetable callbacks for ftnodes. void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs); void toku_ftnode_checkpoint_complete_callback(void *value_data); -void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); -int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs); +void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM blocknum, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); +int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs); void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs); int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs, void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra); bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs); int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep); int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs); -void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h); -BASEMENTNODE toku_detach_bn(FTNODE node, int childnum); - -// Given pinned node and pinned child, split child into two -// and update node with information about its new child. -void toku_ft_split_child( - FT h, - FTNODE node, - int childnum, - FTNODE child, - enum split_mode split_mode - ); -// Given pinned node, merge childnum with a neighbor and update node with -// information about the change -void toku_ft_merge_child( - FT ft, - FTNODE node, - int childnum - ); -static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) { - CACHETABLE_WRITE_CALLBACK wc; - wc.flush_callback = toku_ftnode_flush_callback; - wc.pe_est_callback = toku_ftnode_pe_est_callback; - wc.pe_callback = toku_ftnode_pe_callback; - wc.cleaner_callback = toku_ftnode_cleaner_callback; - wc.clone_callback = toku_ftnode_clone_callback; - wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback; - wc.write_extraargs = h; - return wc; -} - -static const FTNODE null_ftnode=0; - -/* a brt cursor is represented as a kv pair in a tree */ -struct ft_cursor { - struct toku_list cursors_link; - FT_HANDLE ft_handle; - DBT key, val; // The key-value pair that the cursor currently points to - DBT range_lock_left_key, range_lock_right_key; - bool prefetching; - bool left_is_neg_infty, right_is_pos_infty; - bool is_snapshot_read; // true if query is read_committed, false otherwise - bool is_leaf_mode; - bool disable_prefetching; - bool is_temporary; - int out_of_range_error; - int direction; - TOKUTXN ttxn; - FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; - void *interrupt_cb_extra; -}; -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that the entire node is -// necessary. Used in cases where the entire node -// is required, such as for flushes. -// -static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) { - bfe->type = ftnode_fetch_all; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - bfe->left_is_neg_infty = false; - bfe->right_is_pos_infty = false; - bfe->child_to_read = -1; - bfe->disable_prefetching = false; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} +CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft); -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that an explicit range of children is -// necessary. Used in cases where the portion of the node that is required -// is known in advance, e.g. for keysrange when the left and right key -// are in the same basement node. -// -static inline void fill_bfe_for_keymatch( - struct ftnode_fetch_extra *bfe, - FT h, - const DBT *left, - const DBT *right, - bool disable_prefetching, - bool read_all_partitions - ) -{ - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_keymatch; - bfe->h = h; - bfe->search = nullptr; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - if (left) { - toku_copyref_dbt(&bfe->range_lock_left_key, *left); - } - - if (right) { - toku_copyref_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = left == nullptr; - bfe->right_is_pos_infty = right == nullptr; - bfe->child_to_read = -1; - bfe->disable_prefetching = disable_prefetching; - bfe->read_all_partitions = read_all_partitions; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} +// This is only exported for tests. +// append a child node to a parent node +void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey); -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that some subset of the node -// necessary. Used in cases where some of the node is required -// such as for a point query. -// -static inline void fill_bfe_for_subset_read( - struct ftnode_fetch_extra *bfe, - FT h, - ft_search_t* search, - const DBT *left, - const DBT *right, - bool left_is_neg_infty, - bool right_is_pos_infty, - bool disable_prefetching, - bool read_all_partitions - ) -{ - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_subset; - bfe->h = h; - bfe->search = search; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - if (left) { - toku_copyref_dbt(&bfe->range_lock_left_key, *left); - } - if (right) { - toku_copyref_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = left_is_neg_infty; - bfe->right_is_pos_infty = right_is_pos_infty; - bfe->child_to_read = -1; - bfe->disable_prefetching = disable_prefetching; - bfe->read_all_partitions = read_all_partitions; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} +// This is only exported for tests. +// append a message to a nonleaf node child buffer +void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val); -// -// Helper function to fill a ftnode_fetch_extra with data -// that will tell the fetch callback that no partitions are -// necessary, only the pivots and/or subtree estimates. -// Currently used for stat64. -// -static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) { - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_none; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - bfe->left_is_neg_infty = false; - bfe->right_is_pos_infty = false; - bfe->child_to_read = -1; - bfe->disable_prefetching = false; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) { - paranoid_invariant(bfe->type == ftnode_fetch_prefetch); - toku_destroy_dbt(&bfe->range_lock_left_key); - toku_destroy_dbt(&bfe->range_lock_right_key); -} - -// this is in a strange place because it needs the cursor struct to be defined -static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, - FT h, - FT_CURSOR c) { - paranoid_invariant(h->h->type == FT_CURRENT); - bfe->type = ftnode_fetch_prefetch; - bfe->h = h; - bfe->search = NULL; - toku_init_dbt(&bfe->range_lock_left_key); - toku_init_dbt(&bfe->range_lock_right_key); - const DBT *left = &c->range_lock_left_key; - if (left->data) { - toku_clone_dbt(&bfe->range_lock_left_key, *left); - } - const DBT *right = &c->range_lock_right_key; - if (right->data) { - toku_clone_dbt(&bfe->range_lock_right_key, *right); - } - bfe->left_is_neg_infty = c->left_is_neg_infty; - bfe->right_is_pos_infty = c->right_is_pos_infty; - bfe->child_to_read = -1; - bfe->disable_prefetching = c->disable_prefetching; - bfe->read_all_partitions = false; - bfe->bytes_read = 0; - bfe->io_time = 0; - bfe->deserialize_time = 0; - bfe->decompress_time = 0; -} - -struct ancestors { - FTNODE node; // This is the root node if next is NULL. - int childnum; // which buffer holds messages destined to the node whose ancestors this list represents. - ANCESTORS next; // Parent of this node (so next->node.(next->childnum) refers to this node). -}; -struct pivot_bounds { - const DBT * const lower_bound_exclusive; - const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages). -}; +STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode); -__attribute__((nonnull)) -void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node); -void toku_apply_ancestors_messages_to_node (FT_HANDLE t, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool* msgs_applied, int child_to_read); -__attribute__((nonnull)) -bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds, MSN *const max_msn_in_path, int child_to_read); -__attribute__((nonnull)) -void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read); +//#define SLOW +#ifdef SLOW +#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n)) +#else +#define VERIFY_NODE(t,n) ((void)0) +#endif -__attribute__((const,nonnull)) -size_t toku_ft_msg_memsize_in_fifo(FT_MSG cmd); +void toku_verify_or_set_counts(FTNODE); -int -toku_ft_search_which_child( - DESCRIPTOR desc, - ft_compare_func cmp, - FTNODE node, - ft_search_t *search - ); +// TODO: consider moving this to ft/pivotkeys.cc +class pivot_bounds { +public: + pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt); -bool -toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum); + pivot_bounds next_bounds(FTNODE node, int childnum) const; -int -toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node); -int -toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node); + const DBT *lbe() const; + const DBT *ubi() const; + + static pivot_bounds infinite_bounds(); + +private: + DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const; + DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const; + + // if toku_dbt_is_empty() is true for either bound, then it represents + // negative or positive infinity (which are exclusive in practice) + const DBT _lower_bound_exclusive; + const DBT _upper_bound_inclusive; +}; // allocate a block number // allocate and initialize a ftnode // put the ftnode into the cache table -void toku_create_new_ftnode (FT_HANDLE t, FTNODE *result, int height, int n_children); - -// Effect: Fill in N as an empty ftnode. -void toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, - int layout_version, unsigned int flags); - -int toku_ftnode_which_child(FTNODE node, const DBT *k, - DESCRIPTOR desc, ft_compare_func cmp) - __attribute__((__warn_unused_result__)); - -/** - * Finds the next child for HOT to flush to, given that everything up to - * and including k has been flattened. - * - * If k falls between pivots in node, then we return the childnum where k - * lies. - * - * If k is equal to some pivot, then we return the next (to the right) - * childnum. - */ -int toku_ftnode_hot_next_child(FTNODE node, - const DBT *k, - DESCRIPTOR desc, - ft_compare_func cmp); +void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int n_children); /* Stuff for testing */ // toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called. void toku_testsetup_initialize(void); -int toku_testsetup_leaf(FT_HANDLE brt, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens); -int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *diskoff, int n_children, BLOCKNUM *children, char **keys, int *keylens); -int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM); -int toku_testsetup_get_sersize(FT_HANDLE brt, BLOCKNUM); // Return the size on disk. -int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM, const char *key, int keylen, const char *val, int vallen); -int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen); +int toku_testsetup_leaf(FT_HANDLE ft_h, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens); +int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens); +int toku_testsetup_root(FT_HANDLE ft_h, BLOCKNUM); +int toku_testsetup_get_sersize(FT_HANDLE ft_h, BLOCKNUM); // Return the size on disk. +int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, int keylen, const char *val, int vallen); +int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen); void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t); -// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn -void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info); +void toku_ft_root_put_msg(FT ft, const ft_msg &msg, txn_gc_info *gc_info); -void -toku_get_node_for_verify( - BLOCKNUM blocknum, - FT_HANDLE brt, - FTNODE* nodep - ); +// TODO: Rename +void toku_get_node_for_verify(BLOCKNUM blocknum, FT_HANDLE ft_h, FTNODE* nodep); int -toku_verify_ftnode (FT_HANDLE brt, +toku_verify_ftnode (FT_HANDLE ft_h, MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above, FTNODE node, int height, const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) @@ -1185,6 +611,11 @@ typedef enum { FT_PRO_NUM_STOP_LOCK_CHILD, FT_PRO_NUM_STOP_CHILD_INMEM, FT_PRO_NUM_DIDNT_WANT_PROMOTE, + FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, // how many basement nodes were deserialized with a fixed keysize + FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, // how many basement nodes were deserialized with a variable keysize + FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, + FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, + FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, FT_STATUS_NUM_ROWS } ft_status_entry; @@ -1193,61 +624,37 @@ typedef struct { TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS]; } FT_STATUS_S, *FT_STATUS; -void toku_ft_get_status(FT_STATUS); +void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe); +void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint); +void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time); +void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time); +void toku_ft_status_note_msn_discard(void); +void toku_ft_status_note_update(bool broadcast); +void toku_ft_status_note_msg_bytes_out(size_t buffsize); +void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed -void -toku_ft_bn_apply_cmd_once ( - BASEMENTNODE bn, - const FT_MSG cmd, - uint32_t idx, - LEAFENTRY le, - txn_gc_info *gc_info, - uint64_t *workdonep, - STAT64INFO stats_to_update - ); - -void -toku_ft_bn_apply_cmd ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - BASEMENTNODE bn, - FT_MSG cmd, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ); - -void -toku_ft_leaf_apply_cmd ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG cmd, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ); - -void -toku_ft_node_put_cmd ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG cmd, - bool is_fresh, - txn_gc_info *gc_info, - size_t flow_deltas[], - STAT64INFO stats_to_update - ); +void toku_ft_get_status(FT_STATUS); void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra); -int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) __attribute__((nonnull)); -int toku_upgrade_msn_from_root_to_header(int fd, FT h) __attribute__((nonnull)); - -#endif +// For upgrade +int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull)); +int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull)); + +// A callback function is invoked with the key, and the data. +// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns. +// Note: In the thread-safe version, the ftnode remains locked while the callback function runs. So return soon, and don't call the ft code from the callback function. +// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself. +// The cursor object will have been updated (so that if result==0 the current value is the value being passed) +// (If r!=0 then the cursor won't have been updated.) +// If r!=0, it's up to the callback function to return that value of r. +// A 'key' pointer of NULL means that element is not found (effectively infinity or +// -infinity depending on direction) +// When lock_only is false, the callback does optional lock tree locking and then processes the key and val. +// When lock_only is true, the callback only does optional lock tree locking. +typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only); + +typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra); + +struct ft_cursor; +int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch); diff --git a/storage/tokudb/ft-index/ft/ft-ops.cc b/storage/tokudb/ft-index/ft/ft-ops.cc index d61af8716e38e..bf845d2c38db9 100644 --- a/storage/tokudb/ft-index/ft/ft-ops.cc +++ b/storage/tokudb/ft-index/ft/ft-ops.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -168,7 +168,7 @@ Split_or_merge (node, childnum) { return; If the child needs to be merged (it's a leaf with too little stuff (less than 1/4 full) or a nonleaf with too little fanout (less than 1/4) fetch node, the child and a sibling of the child into main memory. - move all messages from the node to the two children (so that the FIFOs are empty) + move all messages from the node to the two children (so that the message buffers are empty) If the two siblings together fit into one node then merge the two siblings. fixup the node to point at one child @@ -200,20 +200,24 @@ basement nodes, bulk fetch, and partial fetch: */ -#include "checkpoint.h" -#include "ft.h" -#include "ft-cachetable-wrappers.h" -#include "ft-flusher.h" -#include "ft-internal.h" -#include "ft_layout_version.h" -#include "key.h" -#include "log-internal.h" -#include "sub_block.h" -#include "txn_manager.h" -#include "leafentry.h" -#include "xids.h" -#include "ft_msg.h" -#include "ule.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/cursor.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-internal.h" +#include "ft/msg.h" +#include "ft/leafentry.h" +#include "ft/logger/log-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/txn/txn_manager.h" +#include "ft/ule.h" +#include "ft/txn/xids.h" #include @@ -228,14 +232,12 @@ basement nodes, bulk fetch, and partial fetch: #include -static const uint32_t this_version = FT_LAYOUT_VERSION; - /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ static FT_STATUS_S ft_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_status, k, c, t, "brt: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_status, k, c, t, "ft: " l, inc) static toku_mutex_t ft_open_close_lock; @@ -367,6 +369,11 @@ status_init(void) STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, nullptr, PARCOUNT, "promotion: succeeded in using the rightmost leaf shortcut", TOKU_ENGINE_STATUS); + STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, nullptr, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (out-of-bounds)", TOKU_ENGINE_STATUS); + STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,nullptr, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (child reactive)", TOKU_ENGINE_STATUS); ft_status.initialized = true; } @@ -419,172 +426,82 @@ toku_ft_get_status(FT_STATUS s) { } \ } while (0) -bool is_entire_node_in_memory(FTNODE node) { - for (int i = 0; i < node->n_children; i++) { - if(BP_STATE(node,i) != PT_AVAIL) { - return false; - } - } - return true; -} - -void -toku_assert_entire_node_in_memory(FTNODE UU() node) { - paranoid_invariant(is_entire_node_in_memory(node)); -} -uint32_t -get_leaf_num_entries(FTNODE node) { - uint32_t result = 0; - int i; - toku_assert_entire_node_in_memory(node); - for ( i = 0; i < node->n_children; i++) { - result += BLB_DATA(node, i)->omt_size(); +void toku_note_deserialized_basement_node(bool fixed_key_size) { + if (fixed_key_size) { + STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1); + } else { + STATUS_INC(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, 1); } - return result; } -static enum reactivity -get_leaf_reactivity (FTNODE node, uint32_t nodesize) { - enum reactivity re = RE_STABLE; - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->height==0); - unsigned int size = toku_serialize_ftnode_size(node); - if (size > nodesize && get_leaf_num_entries(node) > 1) { - re = RE_FISSIBLE; - } - else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) { - re = RE_FUSIBLE; - } - return re; +static void ft_verify_flags(FT UU(ft), FTNODE UU(node)) { + paranoid_invariant(ft->h->flags == node->flags); } -enum reactivity -get_nonleaf_reactivity(FTNODE node, unsigned int fanout) { +int toku_ft_debug_mode = 0; + +uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) { paranoid_invariant(node->height>0); - int n_children = node->n_children; - if (n_children > (int) fanout) return RE_FISSIBLE; - if (n_children*4 < (int) fanout) return RE_FUSIBLE; - return RE_STABLE; + paranoid_invariant(childnumn_children); + return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum)); } -enum reactivity -get_node_reactivity(FT ft, FTNODE node) { - toku_assert_entire_node_in_memory(node); - if (node->height==0) - return get_leaf_reactivity(node, ft->h->nodesize); - else - return get_nonleaf_reactivity(node, ft->h->fanout); -} +// +// pivot bounds +// TODO: move me to ft/node.cc? +// -unsigned int -toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc) -{ - return toku_fifo_buffer_size_in_use(bnc->buffer); +pivot_bounds::pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt) : + _lower_bound_exclusive(lbe_dbt), _upper_bound_inclusive(ubi_dbt) { } -// return true if the size of the buffers plus the amount of work done is large enough. (But return false if there is nothing to be flushed (the buffers empty)). -bool -toku_ft_nonleaf_is_gorged (FTNODE node, uint32_t nodesize) { - uint64_t size = toku_serialize_ftnode_size(node); - - bool buffers_are_empty = true; - toku_assert_entire_node_in_memory(node); - // - // the nonleaf node is gorged if the following holds true: - // - the buffers are non-empty - // - the total workdone by the buffers PLUS the size of the buffers - // is greater than nodesize (which as of Maxwell should be - // 4MB) - // - paranoid_invariant(node->height > 0); - for (int child = 0; child < node->n_children; ++child) { - size += BP_WORKDONE(node, child); - } - for (int child = 0; child < node->n_children; ++child) { - if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) { - buffers_are_empty = false; - break; - } - } - return ((size > nodesize) - && - (!buffers_are_empty)); -} +pivot_bounds pivot_bounds::infinite_bounds() { + DBT dbt; + toku_init_dbt(&dbt); -static void ft_verify_flags(FT UU(ft), FTNODE UU(node)) { - paranoid_invariant(ft->h->flags == node->flags); + // infinity is represented by an empty dbt + invariant(toku_dbt_is_empty(&dbt)); + return pivot_bounds(dbt, dbt); } -int toku_ft_debug_mode = 0; - -uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum) { - paranoid_invariant(node->height>0); - paranoid_invariant(childnumn_children); - return toku_cachetable_hash(cf, BP_BLOCKNUM(node, childnum)); +const DBT *pivot_bounds::lbe() const { + return &_lower_bound_exclusive; } -int -toku_bnc_n_entries(NONLEAF_CHILDINFO bnc) -{ - return toku_fifo_n_entries(bnc->buffer); +const DBT *pivot_bounds::ubi() const { + return &_upper_bound_inclusive; } -static const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive) { - if (childnum==0) - return lower_bound_exclusive; - else { - return &node->childkeys[childnum-1]; +DBT pivot_bounds::_prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const { + if (childnum == 0) { + return lbe_dbt; + } else { + return node->pivotkeys.get_pivot(childnum - 1); } } -static const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive) { - if (childnum+1 == node->n_children) - return upper_bound_inclusive; - else { - return &node->childkeys[childnum]; +DBT pivot_bounds::_postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const { + if (childnum + 1 == node->n_children) { + return ubi_dbt; + } else { + return node->pivotkeys.get_pivot(childnum); } } -static struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb) { - struct pivot_bounds pb = {.lower_bound_exclusive = prepivotkey(node, childnum, old_pb->lower_bound_exclusive), - .upper_bound_inclusive = postpivotkey(node, childnum, old_pb->upper_bound_inclusive)}; - return pb; -} -// how much memory does this child buffer consume? -long -toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) -{ - return (sizeof(*bnc) + - toku_fifo_memory_footprint(bnc->buffer) + - bnc->fresh_message_tree.memory_size() + - bnc->stale_message_tree.memory_size() + - bnc->broadcast_list.memory_size()); +pivot_bounds pivot_bounds::next_bounds(FTNODE node, int childnum) const { + return pivot_bounds(_prepivotkey(node, childnum, _lower_bound_exclusive), + _postpivotkey(node, childnum, _upper_bound_inclusive)); } -// how much memory in this child buffer holds useful data? -// originally created solely for use by test program(s). -long -toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) -{ - return (sizeof(*bnc) + - toku_fifo_memory_size_in_use(bnc->buffer) + - bnc->fresh_message_tree.memory_size() + - bnc->stale_message_tree.memory_size() + - bnc->broadcast_list.memory_size()); -} +//////////////////////////////////////////////////////////////////////////////// -static long -get_avail_internal_node_partition_size(FTNODE node, int i) -{ +static long get_avail_internal_node_partition_size(FTNODE node, int i) { paranoid_invariant(node->height > 0); return toku_bnc_memory_size(BNC(node, i)); } - -static long -ftnode_cachepressure_size(FTNODE node) -{ +static long ftnode_cachepressure_size(FTNODE node) { long retval = 0; bool totally_empty = true; if (node->height == 0) { @@ -625,7 +542,7 @@ ftnode_memory_size (FTNODE node) int n_children = node->n_children; retval += sizeof(*node); retval += (n_children)*(sizeof(node->bp[0])); - retval += node->totalchildkeylens; + retval += node->pivotkeys.total_size(); // now calculate the sizes of the partitions for (int i = 0; i < n_children; i++) { @@ -691,54 +608,145 @@ next_dict_id(void) { return d; } -// -// Given a bfe and a childnum, returns whether the query that constructed the bfe -// wants the child available. -// Requires: bfe->child_to_read to have been set -// -bool -toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum) -{ - return bfe->type == ftnode_fetch_all || - (bfe->child_to_read == childnum && - (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch)); +// TODO: This isn't so pretty +void ftnode_fetch_extra::_create_internal(FT ft_) { + ft = ft_; + type = ftnode_fetch_none; + search = nullptr; + + toku_init_dbt(&range_lock_left_key); + toku_init_dbt(&range_lock_right_key); + left_is_neg_infty = false; + right_is_pos_infty = false; + + // -1 means 'unknown', which is the correct default state + child_to_read = -1; + disable_prefetching = false; + read_all_partitions = false; + + bytes_read = 0; + io_time = 0; + deserialize_time = 0; + decompress_time = 0; } -int -toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node) -{ - paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch); - if (bfe->left_is_neg_infty) { +void ftnode_fetch_extra::create_for_full_read(FT ft_) { + _create_internal(ft_); + + type = ftnode_fetch_all; +} + +void ftnode_fetch_extra::create_for_keymatch(FT ft_, const DBT *left, const DBT *right, + bool disable_prefetching_, bool read_all_partitions_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_keymatch; + if (left != nullptr) { + toku_copyref_dbt(&range_lock_left_key, *left); + } + if (right != nullptr) { + toku_copyref_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = left == nullptr; + right_is_pos_infty = right == nullptr; + disable_prefetching = disable_prefetching_; + read_all_partitions = read_all_partitions_; +} + +void ftnode_fetch_extra::create_for_subset_read(FT ft_, ft_search *search_, + const DBT *left, const DBT *right, + bool left_is_neg_infty_, bool right_is_pos_infty_, + bool disable_prefetching_, bool read_all_partitions_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_subset; + search = search_; + if (left != nullptr) { + toku_copyref_dbt(&range_lock_left_key, *left); + } + if (right != nullptr) { + toku_copyref_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = left_is_neg_infty_; + right_is_pos_infty = right_is_pos_infty_; + disable_prefetching = disable_prefetching_; + read_all_partitions = read_all_partitions_; +} + +void ftnode_fetch_extra::create_for_min_read(FT ft_) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_none; +} + +void ftnode_fetch_extra::create_for_prefetch(FT ft_, struct ft_cursor *cursor) { + _create_internal(ft_); + invariant(ft->h->type == FT_CURRENT); + + type = ftnode_fetch_prefetch; + const DBT *left = &cursor->range_lock_left_key; + if (left->data) { + toku_clone_dbt(&range_lock_left_key, *left); + } + const DBT *right = &cursor->range_lock_right_key; + if (right->data) { + toku_clone_dbt(&range_lock_right_key, *right); + } + left_is_neg_infty = cursor->left_is_neg_infty; + right_is_pos_infty = cursor->right_is_pos_infty; + disable_prefetching = cursor->disable_prefetching; +} + +void ftnode_fetch_extra::destroy(void) { + toku_destroy_dbt(&range_lock_left_key); + toku_destroy_dbt(&range_lock_right_key); +} + +// Requires: child_to_read to have been set +bool ftnode_fetch_extra::wants_child_available(int childnum) const { + return type == ftnode_fetch_all || + (child_to_read == childnum && + (type == ftnode_fetch_subset || type == ftnode_fetch_keymatch)); +} + +int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const { + paranoid_invariant(type == ftnode_fetch_subset || + type == ftnode_fetch_prefetch || + type == ftnode_fetch_keymatch); + if (left_is_neg_infty) { return 0; - } else if (bfe->range_lock_left_key.data == nullptr) { + } else if (range_lock_left_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + return toku_ftnode_which_child(node, &range_lock_left_key, ft->cmp); } } -int -toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node) -{ - paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch); - if (bfe->right_is_pos_infty) { +int ftnode_fetch_extra::rightmost_child_wanted(FTNODE node) const { + paranoid_invariant(type == ftnode_fetch_subset || + type == ftnode_fetch_prefetch || + type == ftnode_fetch_keymatch); + if (right_is_pos_infty) { return node->n_children - 1; - } else if (bfe->range_lock_right_key.data == nullptr) { + } else if (range_lock_right_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + return toku_ftnode_which_child(node, &range_lock_right_key, ft->cmp); } } static int -ft_cursor_rightmost_child_wanted(FT_CURSOR cursor, FT_HANDLE brt, FTNODE node) +ft_cursor_rightmost_child_wanted(FT_CURSOR cursor, FT_HANDLE ft_handle, FTNODE node) { if (cursor->right_is_pos_infty) { return node->n_children - 1; } else if (cursor->range_lock_right_key.data == nullptr) { return -1; } else { - return toku_ftnode_which_child(node, &cursor->range_lock_right_key, &brt->ft->cmp_descriptor, brt->ft->compare_fun); + return toku_ftnode_which_child(node, &cursor->range_lock_right_key, ft_handle->ft->cmp); } } @@ -789,45 +797,6 @@ void toku_ft_status_update_flush_reason(FTNODE node, } } -static void ftnode_update_disk_stats( - FTNODE ftnode, - FT ft, - bool for_checkpoint - ) -{ - STAT64INFO_S deltas = ZEROSTATS; - // capture deltas before rebalancing basements for serialization - deltas = toku_get_and_clear_basement_stats(ftnode); - // locking not necessary here with respect to checkpointing - // in Clayface (because of the pending lock and cachetable lock - // in toku_cachetable_begin_checkpoint) - // essentially, if we are dealing with a for_checkpoint - // parameter in a function that is called by the flush_callback, - // then the cachetable needs to ensure that this is called in a safe - // manner that does not interfere with the beginning - // of a checkpoint, which it does with the cachetable lock - // and pending lock - toku_ft_update_stats(&ft->h->on_disk_stats, deltas); - if (for_checkpoint) { - toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas); - } -} - -static void ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) { - for (int i = 0; i < node->n_children; i++) { - BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i); - paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); - BP_STATE(cloned_node,i) = PT_AVAIL; - BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i); - if (node->height == 0) { - set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i))); - } - else { - set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i))); - } - } -} - void toku_ftnode_checkpoint_complete_callback(void *value_data) { FTNODE node = static_cast(value_data); if (node->height > 0) { @@ -851,20 +820,20 @@ void toku_ftnode_clone_callback( ) { FTNODE node = static_cast(value_data); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); FT ft = static_cast(write_extraargs); FTNODE XCALLOC(cloned_node); if (node->height == 0) { // set header stats, must be done before rebalancing - ftnode_update_disk_stats(node, ft, for_checkpoint); + toku_ftnode_update_disk_stats(node, ft, for_checkpoint); // rebalance the leaf node - rebalance_ftnode_leaf(node, ft->h->basementnodesize); + toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize); } cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known; cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk; cloned_node->flags = node->flags; - cloned_node->thisnodename = node->thisnodename; + cloned_node->blocknum = node->blocknum; cloned_node->layout_version = node->layout_version; cloned_node->layout_version_original = node->layout_version_original; cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk; @@ -873,16 +842,17 @@ void toku_ftnode_clone_callback( cloned_node->dirty = node->dirty; cloned_node->fullhash = node->fullhash; cloned_node->n_children = node->n_children; - cloned_node->totalchildkeylens = node->totalchildkeylens; - XMALLOC_N(node->n_children-1, cloned_node->childkeys); XMALLOC_N(node->n_children, cloned_node->bp); // clone pivots - for (int i = 0; i < node->n_children-1; i++) { - toku_clone_dbt(&cloned_node->childkeys[i], node->childkeys[i]); + cloned_node->pivotkeys.create_from_pivot_keys(node->pivotkeys); + if (node->height > 0) { + // need to move messages here so that we don't serialize stale + // messages to the fresh tree - ft verify code complains otherwise. + toku_move_ftnode_messages_to_stale(ft, node); } // clone partition - ftnode_clone_partitions(node, cloned_node); + toku_ftnode_clone_partitions(node, cloned_node); // clear dirty bit node->dirty = 0; @@ -899,12 +869,10 @@ void toku_ftnode_clone_callback( *cloned_value_data = cloned_node; } -static void ft_leaf_run_gc(FT ft, FTNODE node); - void toku_ftnode_flush_callback( CACHEFILE UU(cachefile), int fd, - BLOCKNUM nodename, + BLOCKNUM blocknum, void *ftnode_v, void** disk_data, void *extraargs, @@ -916,20 +884,23 @@ void toku_ftnode_flush_callback( bool is_clone ) { - FT h = (FT) extraargs; + FT ft = (FT) extraargs; FTNODE ftnode = (FTNODE) ftnode_v; FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; - assert(ftnode->thisnodename.b==nodename.b); + assert(ftnode->blocknum.b == blocknum.b); int height = ftnode->height; if (write_me) { - toku_assert_entire_node_in_memory(ftnode); - if (height == 0) { - ft_leaf_run_gc(h, ftnode); - } - if (height == 0 && !is_clone) { - ftnode_update_disk_stats(ftnode, h, for_checkpoint); + toku_ftnode_assert_fully_in_memory(ftnode); + if (height > 0 && !is_clone) { + // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback() + toku_move_ftnode_messages_to_stale(ft, ftnode); + } else if (height == 0) { + toku_ftnode_leaf_run_gc(ft, ftnode); + if (!is_clone) { + toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint); + } } - int r = toku_serialize_ftnode_to(fd, ftnode->thisnodename, ftnode, ndd, !is_clone, h, for_checkpoint); + int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); assert_zero(r); ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION; } @@ -950,7 +921,7 @@ void toku_ftnode_flush_callback( for (int i = 0; i < ftnode->n_children; i++) { if (BP_STATE(ftnode,i) == PT_AVAIL) { BASEMENTNODE bn = BLB(ftnode, i); - toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); } } } @@ -963,7 +934,7 @@ void toku_ftnode_flush_callback( } void -toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe) +toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe) { if (bfe->type == ftnode_fetch_prefetch) { STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1); @@ -980,17 +951,17 @@ toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe) } } -int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, +int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { assert(extraargs); assert(*ftnode_pv == NULL); FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *)extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs; FTNODE *node=(FTNODE*)ftnode_pv; // deserialize the node, must pass the bfe in because we cannot // evaluate what piece of the the node is necessary until we get it at // least partially into memory - int r = toku_deserialize_ftnode_from(fd, nodename, fullhash, node, ndd, bfe); + int r = toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe); if (r != 0) { if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, @@ -1071,10 +1042,7 @@ void toku_ftnode_pe_est_callback( } // replace the child buffer with a compressed version of itself. -// @return the old child buffer -static NONLEAF_CHILDINFO -compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method) -{ +static void compress_internal_node_partition(FTNODE node, int i, enum toku_compression_method compression_method) { // if we should evict, compress the // message buffer into a sub_block assert(BP_STATE(node, i) == PT_AVAIL); @@ -1083,29 +1051,9 @@ compress_internal_node_partition(FTNODE node, int i, enum toku_compression_metho sub_block_init(sb); toku_create_compressed_partition_from_available(node, i, compression_method, sb); - // now set the state to compressed and return the old, available partition - NONLEAF_CHILDINFO bnc = BNC(node, i); + // now set the state to compressed set_BSB(node, i, sb); BP_STATE(node,i) = PT_COMPRESSED; - return bnc; -} - -void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) { - // free the basement node - assert(!node->dirty); - BASEMENTNODE bn = BLB(node, childnum); - toku_ft_decrease_stats(&h->in_memory_stats, bn->stat64_delta); - destroy_basement_node(bn); - set_BNULL(node, childnum); - BP_STATE(node, childnum) = PT_ON_DISK; -} - -BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) { - assert(BP_STATE(node, childnum) == PT_AVAIL); - BASEMENTNODE bn = BLB(node, childnum); - set_BNULL(node, childnum); - BP_STATE(node, childnum) = PT_ON_DISK; - return bn; } // callback for partially evicting a node @@ -1140,18 +1088,27 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext for (int i = 0; i < node->n_children; i++) { if (BP_STATE(node,i) == PT_AVAIL) { if (BP_SHOULD_EVICT(node,i)) { - NONLEAF_CHILDINFO bnc; - if (ft_compress_buffers_before_eviction) { - // When partially evicting, always compress with quicklz - bnc = compress_internal_node_partition( + NONLEAF_CHILDINFO bnc = BNC(node, i); + if (ft_compress_buffers_before_eviction && + // We may not serialize and compress a partition in memory if its + // in memory layout version is different than what's on disk (and + // therefore requires upgrade). + // + // Auto-upgrade code assumes that if a node's layout version read + // from disk is not current, it MUST require upgrade. Breaking + // this rule would cause upgrade code to upgrade this partition + // again after we serialize it as the current version, which is bad. + node->layout_version == node->layout_version_read_from_disk) { + toku_ft_bnc_move_messages_to_stale(ft, bnc); + compress_internal_node_partition( node, i, + // Always compress with quicklz TOKU_QUICKLZ_METHOD ); } else { // We're not compressing buffers before eviction. Simply // detach the buffer and set the child's state to on-disk. - bnc = BNC(node, i); set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; } @@ -1259,9 +1216,9 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) { // placeholder for now bool retval = false; FTNODE node = (FTNODE) ftnode_pv; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs; // - // The three types of fetches that the brt layer may request are: + // The three types of fetches that the ft layer may request are: // - ftnode_fetch_none: no partitions are necessary (example use: stat64) // - ftnode_fetch_subset: some subset is necessary (example use: toku_ft_search) // - ftnode_fetch_all: entire node is necessary (example use: flush, split, merge) @@ -1289,11 +1246,9 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) { // we can possibly require is a single basement node // we find out what basement node the query cares about // and check if it is available - paranoid_invariant(bfe->h->compare_fun); paranoid_invariant(bfe->search); bfe->child_to_read = toku_ft_search_which_child( - &bfe->h->cmp_descriptor, - bfe->h->compare_fun, + bfe->ft->cmp, node, bfe->search ); @@ -1305,8 +1260,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) { // makes no sense to have prefetching disabled // and still call this function paranoid_invariant(!bfe->disable_prefetching); - int lc = toku_bfe_leftmost_child_wanted(bfe, node); - int rc = toku_bfe_rightmost_child_wanted(bfe, node); + int lc = bfe->leftmost_child_wanted(node); + int rc = bfe->rightmost_child_wanted(node); for (int i = lc; i <= rc; ++i) { if (BP_STATE(node, i) != PT_AVAIL) { retval = true; @@ -1318,10 +1273,9 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) { // we can possibly require is a single basement node // we find out what basement node the query cares about // and check if it is available - paranoid_invariant(bfe->h->compare_fun); if (node->height == 0) { - int left_child = toku_bfe_leftmost_child_wanted(bfe, node); - int right_child = toku_bfe_rightmost_child_wanted(bfe, node); + int left_child = bfe->leftmost_child_wanted(node); + int right_child = bfe->rightmost_child_wanted(node); if (left_child == right_child) { bfe->child_to_read = left_child; unsafe_touch_clock(node,bfe->child_to_read); @@ -1338,7 +1292,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) { static void ft_status_update_partial_fetch_reason( - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, int childnum, enum pt_state state, bool is_leaf @@ -1437,13 +1391,41 @@ void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize } } +void toku_ft_status_note_msn_discard(void) { + STATUS_INC(FT_MSN_DISCARDS, 1); +} + +void toku_ft_status_note_update(bool broadcast) { + if (broadcast) { + STATUS_INC(FT_UPDATES_BROADCAST, 1); + } else { + STATUS_INC(FT_UPDATES, 1); + } +} + +void toku_ft_status_note_msg_bytes_out(size_t buffsize) { + STATUS_INC(FT_MSG_BYTES_OUT, buffsize); + STATUS_INC(FT_MSG_BYTES_CURR, -buffsize); +} +void toku_ft_status_note_ftnode(int height, bool created) { + if (created) { + if (height == 0) { + STATUS_INC(FT_CREATE_LEAF, 1); + } else { + STATUS_INC(FT_CREATE_NONLEAF, 1); + } + } else { + // created = false means destroyed + } +} + // callback for partially reading a node // could have just used toku_ftnode_fetch_callback, but wanted to separate the two cases to separate functions int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraargs, int fd, PAIR_ATTR* sizep) { int r = 0; FTNODE node = (FTNODE) ftnode_pv; FTNODE_DISK_DATA ndd = (FTNODE_DISK_DATA) disk_data; - struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs; + ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs; // there must be a reason this is being called. If we get a garbage type or the type is ftnode_fetch_none, // then something went wrong assert((bfe->type == ftnode_fetch_subset) || (bfe->type == ftnode_fetch_all) || (bfe->type == ftnode_fetch_prefetch) || (bfe->type == ftnode_fetch_keymatch)); @@ -1453,8 +1435,8 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) ) { - lc = toku_bfe_leftmost_child_wanted(bfe, node); - rc = toku_bfe_rightmost_child_wanted(bfe, node); + lc = bfe->leftmost_child_wanted(node); + rc = bfe->rightmost_child_wanted(node); } else { lc = -1; rc = -1; @@ -1463,7 +1445,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar if (BP_STATE(node,i) == PT_AVAIL) { continue; } - if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) { + if ((lc <= i && i <= rc) || bfe->wants_child_available(i)) { enum pt_state state = BP_STATE(node, i); if (state == PT_COMPRESSED) { r = toku_deserialize_bp_from_compressed(node, i, bfe); @@ -1478,7 +1460,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Checksum failure while reading node partition in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); + toku_cachefile_fname_in_env(bfe->ft->cf)); } else { fprintf(stderr, "Error while reading node partition %d\n", @@ -1493,118 +1475,8 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar return 0; } -struct cmd_leafval_heaviside_extra { - ft_compare_func compare_fun; - DESCRIPTOR desc; - DBT const * const key; -}; - -//TODO: #1125 optimize -static int -toku_cmd_leafval_heaviside(DBT const &kdbt, const struct cmd_leafval_heaviside_extra &be) { - FAKE_DB(db, be.desc); - DBT const * const key = be.key; - return be.compare_fun(&db, &kdbt, key); -} - -static int -ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT *pivot) -{ - int r; - FAKE_DB(db, desc); - r = cmp(&db, key, pivot); - return r; -} - - -// destroys the internals of the ftnode, but it does not free the values -// that are stored -// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf -// MUST NOT do anything besides free the structures that have been allocated -void toku_destroy_ftnode_internals(FTNODE node) -{ - for (int i=0; in_children-1; i++) { - toku_destroy_dbt(&node->childkeys[i]); - } - toku_free(node->childkeys); - node->childkeys = NULL; - - for (int i=0; i < node->n_children; i++) { - if (BP_STATE(node,i) == PT_AVAIL) { - if (node->height > 0) { - destroy_nonleaf_childinfo(BNC(node,i)); - } else { - destroy_basement_node(BLB(node, i)); - } - } else if (BP_STATE(node,i) == PT_COMPRESSED) { - SUB_BLOCK sb = BSB(node,i); - toku_free(sb->compressed_ptr); - toku_free(sb); - } else { - paranoid_invariant(is_BNULL(node, i)); - } - set_BNULL(node, i); - } - toku_free(node->bp); - node->bp = NULL; -} - -/* Frees a node, including all the stuff in the hash table. */ -void toku_ftnode_free(FTNODE *nodep) { - FTNODE node = *nodep; - if (node->height == 0) { - STATUS_INC(FT_DESTROY_LEAF, 1); - } else { - STATUS_INC(FT_DESTROY_NONLEAF, 1); - } - toku_destroy_ftnode_internals(node); - toku_free(node); - *nodep = nullptr; -} - -void -toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int flags) -// Effect: Fill in N as an empty ftnode. -{ - paranoid_invariant(layout_version != 0); - paranoid_invariant(height >= 0); - - if (height == 0) { - STATUS_INC(FT_CREATE_LEAF, 1); - } else { - STATUS_INC(FT_CREATE_NONLEAF, 1); - } - - n->max_msn_applied_to_node_on_disk = ZERO_MSN; // correct value for root node, harmless for others - n->flags = flags; - n->thisnodename = nodename; - n->layout_version = layout_version; - n->layout_version_original = layout_version; - n->layout_version_read_from_disk = layout_version; - n->height = height; - n->totalchildkeylens = 0; - n->childkeys = 0; - n->bp = 0; - n->n_children = num_children; - n->oldest_referenced_xid_known = TXNID_NONE; - - if (num_children > 0) { - XMALLOC_N(num_children-1, n->childkeys); - XMALLOC_N(num_children, n->bp); - for (int i = 0; i < num_children; i++) { - BP_BLOCKNUM(n,i).b=0; - BP_STATE(n,i) = PT_INVALID; - BP_WORKDONE(n,i) = 0; - BP_INIT_TOUCHED_CLOCK(n, i); - set_BNULL(n,i); - if (height > 0) { - set_BNC(n, i, toku_create_empty_nl()); - } else { - set_BLB(n, i, toku_create_empty_bn()); - } - } - } - n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty +int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be) { + return be.cmp(&kdbt, be.key); } static void @@ -1615,14 +1487,12 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp) { FTNODE newroot; - BLOCKNUM old_blocknum = oldroot->thisnodename; + BLOCKNUM old_blocknum = oldroot->blocknum; uint32_t old_fullhash = oldroot->fullhash; - PAIR old_pair = oldroot->ct_pair; int new_height = oldroot->height+1; uint32_t new_fullhash; BLOCKNUM new_blocknum; - PAIR new_pair = NULL; cachetable_put_empty_node_with_dep_nodes( ft, @@ -1632,7 +1502,6 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp) &new_fullhash, &newroot ); - new_pair = newroot->ct_pair; assert(newroot); assert(new_height > 0); @@ -1644,22 +1513,18 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp) ft->h->layout_version, ft->h->flags ); + newroot->fullhash = new_fullhash; MSN msna = oldroot->max_msn_applied_to_node_on_disk; newroot->max_msn_applied_to_node_on_disk = msna; BP_STATE(newroot,0) = PT_AVAIL; newroot->dirty = 1; - // now do the "switcheroo" - BP_BLOCKNUM(newroot,0) = new_blocknum; - newroot->thisnodename = old_blocknum; - newroot->fullhash = old_fullhash; - newroot->ct_pair = old_pair; - - oldroot->thisnodename = new_blocknum; - oldroot->fullhash = new_fullhash; - oldroot->ct_pair = new_pair; - - toku_cachetable_swap_pair_values(old_pair, new_pair); + // Set the first child to have the new blocknum, + // and then swap newroot with oldroot. The new root + // will inherit the hash/blocknum/pair from oldroot, + // keeping the root blocknum constant. + BP_BLOCKNUM(newroot, 0) = new_blocknum; + toku_ftnode_swap_pair_values(newroot, oldroot); toku_ft_split_child( ft, @@ -1672,3286 +1537,1809 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp) // ft_split_child released locks on newroot // and oldroot, so now we repin and // return to caller - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode( ft, old_blocknum, old_fullhash, &bfe, PL_WRITE_EXPENSIVE, // may_modify_node - 0, - NULL, - newrootp + newrootp, + true ); } -static void -init_childinfo(FTNODE node, int childnum, FTNODE child) { - BP_BLOCKNUM(node,childnum) = child->thisnodename; - BP_STATE(node,childnum) = PT_AVAIL; - BP_WORKDONE(node, childnum) = 0; - set_BNC(node, childnum, toku_create_empty_nl()); -} +static void inject_message_in_locked_node( + FT ft, + FTNODE node, + int childnum, + const ft_msg &msg, + size_t flow_deltas[], + txn_gc_info *gc_info + ) +{ + // No guarantee that we're the writer, but oh well. + // TODO(leif): Implement "do I have the lock or is it someone else?" + // check in frwlock. Should be possible with TOKU_PTHREAD_DEBUG, nop + // otherwise. + invariant(toku_ctpair_is_write_locked(node->ct_pair)); + toku_ftnode_assert_fully_in_memory(node); -static void -init_childkey(FTNODE node, int childnum, const DBT *pivotkey) { - toku_clone_dbt(&node->childkeys[childnum], *pivotkey); - node->totalchildkeylens += pivotkey->size; -} + // Take the newer of the two oldest referenced xid values from the node and gc_info. + // The gc_info usually has a newer value, because we got it at the top of this call + // stack from the txn manager. But sometimes the node has a newer value, if some + // other thread sees a newer value and writes to this node before we got the lock. + if (gc_info->oldest_referenced_xid_for_implicit_promotion > node->oldest_referenced_xid_known) { + node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion; + } else if (gc_info->oldest_referenced_xid_for_implicit_promotion < node->oldest_referenced_xid_known) { + gc_info->oldest_referenced_xid_for_implicit_promotion = node->oldest_referenced_xid_known; + } -// Used only by test programs: append a child node to a parent node -void -toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) { - int childnum = node->n_children; - node->n_children++; - XREALLOC_N(node->n_children, node->bp); - init_childinfo(node, childnum, child); - XREALLOC_N(node->n_children-1, node->childkeys); - if (pivotkey) { - invariant(childnum > 0); - init_childkey(node, childnum-1, pivotkey); + // Get the MSN from the header. Now that we have a write lock on the + // node we're injecting into, we know no other thread will get an MSN + // after us and get that message into our subtree before us. + MSN msg_msn = { .msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1) }; + ft_msg msg_with_msn(msg.kdbt(), msg.vdbt(), msg.type(), msg_msn, msg.xids()); + paranoid_invariant(msg_with_msn.msn().msn > node->max_msn_applied_to_node_on_disk.msn); + + STAT64INFO_S stats_delta = {0,0}; + toku_ftnode_put_msg( + ft->cmp, + ft->update_fun, + node, + childnum, + msg_with_msn, + true, + gc_info, + flow_deltas, + &stats_delta + ); + if (stats_delta.numbytes || stats_delta.numrows) { + toku_ft_update_stats(&ft->in_memory_stats, stats_delta); } - node->dirty = 1; -} + // + // assumption is that toku_ftnode_put_msg will + // mark the node as dirty. + // enforcing invariant here. + // + paranoid_invariant(node->dirty != 0); -void -toku_ft_bn_apply_cmd_once ( - BASEMENTNODE bn, - const FT_MSG cmd, - uint32_t idx, - LEAFENTRY le, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) -// Effect: Apply cmd to leafentry (msn is ignored) -// Calculate work done by message on leafentry and add it to caller's workdone counter. -// idx is the location where it goes -// le is old leafentry -{ - size_t newsize=0, oldsize=0, workdone_this_le=0; - LEAFENTRY new_le=0; - int64_t numbytes_delta = 0; // how many bytes of user data (not including overhead) were added or deleted from this row - int64_t numrows_delta = 0; // will be +1 or -1 or 0 (if row was added or deleted or not) - uint32_t key_storage_size = ft_msg_get_keylen(cmd) + sizeof(uint32_t); - if (le) { - oldsize = leafentry_memsize(le) + key_storage_size; + // update some status variables + if (node->height != 0) { + size_t msgsize = msg.total_size(); + STATUS_INC(FT_MSG_BYTES_IN, msgsize); + STATUS_INC(FT_MSG_BYTES_CURR, msgsize); + STATUS_INC(FT_MSG_NUM, 1); + if (ft_msg_type_applies_all(msg.type())) { + STATUS_INC(FT_MSG_NUM_BROADCAST, 1); + } } - - // toku_le_apply_msg() may call mempool_malloc_from_omt() to allocate more space. - // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is - // no longer in use. We'll have to release the old mempool later. - toku_le_apply_msg( - cmd, - le, - &bn->data_buffer, - idx, - gc_info, - &new_le, - &numbytes_delta - ); - newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0; - if (le && new_le) { - workdone_this_le = (oldsize > newsize ? oldsize : newsize); // work done is max of le size before and after message application + // verify that msn of latest message was captured in root node + paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn); - } else { // we did not just replace a row, so ... - if (le) { - // ... we just deleted a row ... - workdone_this_le = oldsize; - numrows_delta = -1; + if (node->blocknum.b == ft->rightmost_blocknum.b) { + if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { + // we promoted to the rightmost leaf node and the seqinsert score has not yet saturated. + toku_sync_fetch_and_add(&ft->seqinsert_score, 1); } - if (new_le) { - // ... or we just added a row - workdone_this_le = newsize; - numrows_delta = 1; - } - } - if (workdone) { // test programs may call with NULL - *workdone += workdone_this_le; + } else if (ft->seqinsert_score != 0) { + // we promoted to something other than the rightmost leaf node and the score should reset + ft->seqinsert_score = 0; } - // now update stat64 statistics - bn->stat64_delta.numrows += numrows_delta; - bn->stat64_delta.numbytes += numbytes_delta; - // the only reason stats_to_update may be null is for tests - if (stats_to_update) { - stats_to_update->numrows += numrows_delta; - stats_to_update->numbytes += numbytes_delta; + // if we call toku_ft_flush_some_child, then that function unpins the root + // otherwise, we unpin ourselves + if (node->height > 0 && toku_ftnode_nonleaf_is_gorged(node, ft->h->nodesize)) { + toku_ft_flush_node_on_background_thread(ft, node); + } + else { + toku_unpin_ftnode(ft, node); } - } -static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in. -struct setval_extra_s { - uint32_t tag; - bool did_set_val; - int setval_r; // any error code that setval_fun wants to return goes here. - // need arguments for toku_ft_bn_apply_cmd_once - BASEMENTNODE bn; - MSN msn; // captured from original message, not currently used - XIDS xids; - const DBT *key; - uint32_t idx; - LEAFENTRY le; - txn_gc_info *gc_info; - uint64_t * workdone; // set by toku_ft_bn_apply_cmd_once() - STAT64INFO stats_to_update; -}; - -/* - * If new_val == NULL, we send a delete message instead of an insert. - * This happens here instead of in do_delete() for consistency. - * setval_fun() is called from handlerton, passing in svextra_v - * from setval_extra_s input arg to brt->update_fun(). - */ -static void setval_fun (const DBT *new_val, void *svextra_v) { - struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v); - paranoid_invariant(svextra->tag==setval_tag); - paranoid_invariant(!svextra->did_set_val); - svextra->did_set_val = true; +// seqinsert_loc is a bitmask. +// The root counts as being both on the "left extreme" and on the "right extreme". +// Therefore, at the root, you're at LEFT_EXTREME | RIGHT_EXTREME. +typedef char seqinsert_loc; +static const seqinsert_loc NEITHER_EXTREME = 0; +static const seqinsert_loc LEFT_EXTREME = 1; +static const seqinsert_loc RIGHT_EXTREME = 2; - { - // can't leave scope until toku_ft_bn_apply_cmd_once if - // this is a delete - DBT val; - FT_MSG_S msg = { FT_NONE, svextra->msn, svextra->xids, - .u = { .id = {svextra->key, NULL} } }; - if (new_val) { - msg.type = FT_INSERT; - msg.u.id.val = new_val; - } else { - msg.type = FT_DELETE_ANY; - toku_init_dbt(&val); - msg.u.id.val = &val; - } - toku_ft_bn_apply_cmd_once(svextra->bn, &msg, - svextra->idx, svextra->le, - svextra->gc_info, - svextra->workdone, svextra->stats_to_update); - svextra->setval_r = 0; - } -} - -// We are already past the msn filter (in toku_ft_bn_apply_cmd(), which calls do_update()), -// so capturing the msn in the setval_extra_s is not strictly required. The alternative -// would be to put a dummy msn in the messages created by setval_fun(), but preserving -// the original msn seems cleaner and it preserves accountability at a lower layer. -static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn, FT_MSG cmd, uint32_t idx, - LEAFENTRY le, - void* keydata, - uint32_t keylen, - txn_gc_info *gc_info, - uint64_t * workdone, - STAT64INFO stats_to_update) { - LEAFENTRY le_for_update; - DBT key; - const DBT *keyp; - const DBT *update_function_extra; - DBT vdbt; - const DBT *vdbtp; - - // the location of data depends whether this is a regular or - // broadcast update - if (cmd->type == FT_UPDATE) { - // key is passed in with command (should be same as from le) - // update function extra is passed in with command - STATUS_INC(FT_UPDATES, 1); - keyp = cmd->u.id.key; - update_function_extra = cmd->u.id.val; - } else if (cmd->type == FT_UPDATE_BROADCAST_ALL) { - // key is not passed in with broadcast, it comes from le - // update function extra is passed in with command - paranoid_invariant(le); // for broadcast updates, we just hit all leafentries - // so this cannot be null - paranoid_invariant(keydata); - paranoid_invariant(keylen); - paranoid_invariant(cmd->u.id.key->size == 0); - STATUS_INC(FT_UPDATES_BROADCAST, 1); - keyp = toku_fill_dbt(&key, keydata, keylen); - update_function_extra = cmd->u.id.val; - } else { - abort(); - } - - if (le && !le_latest_is_del(le)) { - // if the latest val exists, use it, and we'll use the leafentry later - uint32_t vallen; - void *valp = le_latest_val_and_len(le, &vallen); - vdbtp = toku_fill_dbt(&vdbt, valp, vallen); - } else { - // otherwise, the val and leafentry are both going to be null - vdbtp = NULL; - } - le_for_update = le; - - struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, cmd->msn, cmd->xids, - keyp, idx, le_for_update, gc_info, - workdone, stats_to_update}; - // call handlerton's brt->update_fun(), which passes setval_extra to setval_fun() - FAKE_DB(db, desc); - int r = update_fun( - &db, - keyp, - vdbtp, - update_function_extra, - setval_fun, &setval_extra - ); - - if (r == 0) { r = setval_extra.setval_r; } - return r; -} - -// Should be renamed as something like "apply_cmd_to_basement()." -void -toku_ft_bn_apply_cmd ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - BASEMENTNODE bn, - FT_MSG cmd, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) +static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int childnum, seqinsert_loc loc) // Effect: -// Put a cmd into a leaf. -// Calculate work done by message on leafnode and add it to caller's workdone counter. -// The leaf could end up "too big" or "too small". The caller must fix that up. +// If child needs to be split or merged, do that. +// parent and child will be unlocked if this happens +// Requires: parent and child are read locked +// Returns: +// true if relocking is needed +// false otherwise { - LEAFENTRY storeddata; - void* key = NULL; - uint32_t keylen = 0; - - uint32_t omt_size; - int r; - struct cmd_leafval_heaviside_extra be = {compare_fun, desc, cmd->u.id.key}; - - unsigned int doing_seqinsert = bn->seqinsert; - bn->seqinsert = 0; - - switch (cmd->type) { - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: { - uint32_t idx; - if (doing_seqinsert) { - idx = bn->data_buffer.omt_size(); - DBT kdbt; - r = bn->data_buffer.fetch_le_key_and_len(idx-1, &kdbt.size, &kdbt.data); - if (r != 0) goto fz; - int cmp = toku_cmd_leafval_heaviside(kdbt, be); - if (cmp >= 0) goto fz; - r = DB_NOTFOUND; - } else { - fz: - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - } - if (r==DB_NOTFOUND) { - storeddata = 0; - } else { - assert_zero(r); - } - toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update); - - // if the insertion point is within a window of the right edge of - // the leaf then it is sequential - // window = min(32, number of leaf entries/16) + enum reactivity re = toku_ftnode_get_reactivity(ft, child); + enum reactivity newre; + BLOCKNUM child_blocknum; + uint32_t child_fullhash; + switch (re) { + case RE_STABLE: + return false; + case RE_FISSIBLE: { - uint32_t s = bn->data_buffer.omt_size(); - uint32_t w = s / 16; - if (w == 0) w = 1; - if (w > 32) w = 32; - - // within the window? - if (s - idx <= w) - bn->seqinsert = doing_seqinsert + 1; - } - break; - } - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: { - uint32_t idx; - // Apply to all the matches - - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - if (r == DB_NOTFOUND) break; - assert_zero(r); - toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update); - - break; - } - case FT_OPTIMIZE_FOR_UPGRADE: - // fall through so that optimize_for_upgrade performs rest of the optimize logic - case FT_COMMIT_BROADCAST_ALL: - case FT_OPTIMIZE: - // Apply to all leafentries - omt_size = bn->data_buffer.omt_size(); - for (uint32_t idx = 0; idx < omt_size; ) { - DBT curr_keydbt; - void* curr_keyp = NULL; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); - assert_zero(r); - toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen); - // because this is a broadcast message, we need - // to fill the key in the msg that we pass into toku_ft_bn_apply_cmd_once - cmd->u.id.key = &curr_keydbt; - int deleted = 0; - if (!le_is_clean(storeddata)) { //If already clean, nothing to do. - toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update); - uint32_t new_omt_size = bn->data_buffer.omt_size(); - if (new_omt_size != omt_size) { - paranoid_invariant(new_omt_size+1 == omt_size); - //Item was deleted. - deleted = 1; + // We only have a read lock on the parent. We need to drop both locks, and get write locks. + BLOCKNUM parent_blocknum = parent->blocknum; + uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); + int parent_height = parent->height; + int parent_n_children = parent->n_children; + toku_unpin_ftnode_read_only(ft, child); + toku_unpin_ftnode_read_only(ft, parent); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + FTNODE newparent, newchild; + toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); + if (newparent->height != parent_height || newparent->n_children != parent_n_children || + childnum >= newparent->n_children || toku_bnc_n_entries(BNC(newparent, childnum))) { + // If the height changed or childnum is now off the end, something clearly got split or merged out from under us. + // If something got injected in this node, then it got split or merged and we shouldn't be splitting it. + // But we already unpinned the child so we need to have the caller re-try the pins. + toku_unpin_ftnode_read_only(ft, newparent); + return true; + } + // It's ok to reuse the same childnum because if we get something + // else we need to split, well, that's crazy, but let's go ahead + // and split it. + child_blocknum = BP_BLOCKNUM(newparent, childnum); + child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); + toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_WRITE_CHEAP, 1, &newparent, &newchild, true); + newre = toku_ftnode_get_reactivity(ft, newchild); + if (newre == RE_FISSIBLE) { + enum split_mode split_mode; + if (newparent->height == 1 && (loc & LEFT_EXTREME) && childnum == 0) { + split_mode = SPLIT_RIGHT_HEAVY; + } else if (newparent->height == 1 && (loc & RIGHT_EXTREME) && childnum == newparent->n_children - 1) { + split_mode = SPLIT_LEFT_HEAVY; + } else { + split_mode = SPLIT_EVENLY; } + toku_ft_split_child(ft, newparent, childnum, newchild, split_mode); + } else { + // some other thread already got it, just unpin and tell the + // caller to retry + toku_unpin_ftnode_read_only(ft, newchild); + toku_unpin_ftnode_read_only(ft, newparent); } - if (deleted) - omt_size--; - else - idx++; + return true; } - paranoid_invariant(bn->data_buffer.omt_size() == omt_size); - - break; - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - // Apply to all leafentries if txn is represented - omt_size = bn->data_buffer.omt_size(); - for (uint32_t idx = 0; idx < omt_size; ) { - DBT curr_keydbt; - void* curr_keyp = NULL; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); - assert_zero(r); - toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen); - // because this is a broadcast message, we need - // to fill the key in the msg that we pass into toku_ft_bn_apply_cmd_once - cmd->u.id.key = &curr_keydbt; - int deleted = 0; - if (le_has_xids(storeddata, cmd->xids)) { - toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update); - uint32_t new_omt_size = bn->data_buffer.omt_size(); - if (new_omt_size != omt_size) { - paranoid_invariant(new_omt_size+1 == omt_size); - //Item was deleted. - deleted = 1; + case RE_FUSIBLE: + { + if (parent->height == 1) { + // prevent re-merging of recently unevenly-split nodes + if (((loc & LEFT_EXTREME) && childnum <= 1) || + ((loc & RIGHT_EXTREME) && childnum >= parent->n_children - 2)) { + return false; } } - if (deleted) - omt_size--; - else - idx++; - } - paranoid_invariant(bn->data_buffer.omt_size() == omt_size); - break; - case FT_UPDATE: { - uint32_t idx; - r = bn->data_buffer.find_zero( - be, - &storeddata, - &key, - &keylen, - &idx - ); - if (r==DB_NOTFOUND) { - { - //Point to msg's copy of the key so we don't worry about le being freed - //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled - key = cmd->u.id.key->data; - keylen = cmd->u.id.key->size; + int parent_height = parent->height; + BLOCKNUM parent_blocknum = parent->blocknum; + uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); + toku_unpin_ftnode_read_only(ft, child); + toku_unpin_ftnode_read_only(ft, parent); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + FTNODE newparent, newchild; + toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true); + if (newparent->height != parent_height || childnum >= newparent->n_children) { + // looks like this is the root and it got merged, let's just start over (like in the split case above) + toku_unpin_ftnode_read_only(ft, newparent); + return true; } - r = do_update(update_fun, desc, bn, cmd, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update); - } else if (r==0) { - r = do_update(update_fun, desc, bn, cmd, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update); - } // otherwise, a worse error, just return it - break; - } - case FT_UPDATE_BROADCAST_ALL: { - // apply to all leafentries. - uint32_t idx = 0; - uint32_t num_leafentries_before; - while (idx < (num_leafentries_before = bn->data_buffer.omt_size())) { - void* curr_key = nullptr; - uint32_t curr_keylen = 0; - r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key); - assert_zero(r); - - //TODO: 46 replace this with something better than cloning key - // TODO: (Zardosht) This may be unnecessary now, due to how the key - // is handled in the bndata. Investigate and determine - char clone_mem[curr_keylen]; // only lasts one loop, alloca would overflow (end of function) - memcpy((void*)clone_mem, curr_key, curr_keylen); - curr_key = (void*)clone_mem; - - // This is broken below. Have a compilation error checked - // in as a reminder - r = do_update(update_fun, desc, bn, cmd, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update); - assert_zero(r); - - if (num_leafentries_before == bn->data_buffer.omt_size()) { - // we didn't delete something, so increment the index. - idx++; + child_blocknum = BP_BLOCKNUM(newparent, childnum); + child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); + toku_pin_ftnode_with_dep_nodes(ft, child_blocknum, child_fullhash, &bfe, PL_READ, 1, &newparent, &newchild, true); + newre = toku_ftnode_get_reactivity(ft, newchild); + if (newre == RE_FUSIBLE && newparent->n_children >= 2) { + toku_unpin_ftnode_read_only(ft, newchild); + toku_ft_merge_child(ft, newparent, childnum); + } else { + // Could be a weird case where newparent has only one + // child. In this case, we want to inject here but we've + // already unpinned the caller's copy of parent so we have + // to ask them to re-pin, or they could (very rarely) + // dereferenced memory in a freed node. TODO: we could + // give them back the copy of the parent we pinned. + // + // Otherwise, some other thread already got it, just unpin + // and tell the caller to retry + toku_unpin_ftnode_read_only(ft, newchild); + toku_unpin_ftnode_read_only(ft, newparent); } + return true; } - break; - } - case FT_NONE: break; // don't do anything } - - return; + abort(); } -static inline int -key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn, - DESCRIPTOR descriptor, ft_compare_func key_cmp) +static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, const ft_msg &msg, size_t flow_deltas[], txn_gc_info *gc_info) +// Effect: +// Inject message into the node at this blocknum (cachekey). +// Gets a write lock on the node for you. { - FAKE_DB(db, descriptor); - int r = key_cmp(&db, a, b); - if (r == 0) { - if (amsn.msn > bmsn.msn) { - r = +1; - } else if (amsn.msn < bmsn.msn) { - r = -1; - } else { - r = 0; - } - } - return r; + toku::context inject_ctx(CTX_MESSAGE_INJECTION); + FTNODE node; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true); + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->fullhash==fullhash); + ft_verify_flags(ft, node); + inject_message_in_locked_node(ft, node, -1, msg, flow_deltas, gc_info); } -int -toku_fifo_entry_key_msn_heaviside(const int32_t &offset, const struct toku_fifo_entry_key_msn_heaviside_extra &extra) +__attribute__((const)) +static inline bool should_inject_in_node(seqinsert_loc loc, int height, int depth) +// We should inject directly in a node if: +// - it's a leaf, or +// - it's a height 1 node not at either extreme, or +// - it's a depth 2 node not at either extreme { - const struct fifo_entry *query = toku_fifo_get_entry(extra.fifo, offset); - DBT qdbt; - const DBT *query_key = fill_dbt_for_fifo_entry(&qdbt, query); - const DBT *target_key = extra.key; - return key_msn_cmp(query_key, target_key, query->msn, extra.msn, - extra.desc, extra.cmp); + return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2))); } -int -toku_fifo_entry_key_msn_cmp(const struct toku_fifo_entry_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo) -{ - const struct fifo_entry *a = toku_fifo_get_entry(extra.fifo, ao); - const struct fifo_entry *b = toku_fifo_get_entry(extra.fifo, bo); - DBT adbt, bdbt; - const DBT *akey = fill_dbt_for_fifo_entry(&adbt, a); - const DBT *bkey = fill_dbt_for_fifo_entry(&bdbt, b); - return key_msn_cmp(akey, bkey, a->msn, b->msn, - extra.desc, extra.cmp); -} - -void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp) -// Effect: Enqueue the message represented by the parameters into the -// bnc's buffer, and put it in either the fresh or stale message tree, -// or the broadcast list. -// -// This is only exported for tests. +static void ft_set_or_verify_rightmost_blocknum(FT ft, BLOCKNUM b) +// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft' { - int32_t offset; - int r = toku_fifo_enq(bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, &offset); - assert_zero(r); - if (ft_msg_type_applies_once(type)) { - DBT keydbt; - struct toku_fifo_entry_key_msn_heaviside_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer, .key = toku_fill_dbt(&keydbt, key, keylen), .msn = msn }; - if (is_fresh) { - r = bnc->fresh_message_tree.insert(offset, extra, nullptr); - assert_zero(r); - } else { - r = bnc->stale_message_tree.insert(offset, extra, nullptr); - assert_zero(r); + if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { + toku_ft_lock(ft); + if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { + ft->rightmost_blocknum = b; } - } else { - invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)); - const uint32_t idx = bnc->broadcast_list.size(); - r = bnc->broadcast_list.insert_at(offset, idx); - assert_zero(r); + toku_ft_unlock(ft); } + // The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null. + // If it's already set, verify that the stored value is consistent with 'b' + invariant(ft->rightmost_blocknum.b == b.b); } -// append a cmd to a nonleaf node's child buffer -// should be static, but used by test programs -void toku_ft_append_to_child_buffer(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) { - paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL); - toku_bnc_insert_msg(BNC(node, childnum), key->data, key->size, val->data, val->size, type, msn, xids, is_fresh, desc, compare_fun); - node->dirty = 1; +bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) { + static const double factor = 0.125; + const uint64_t flow_threshold = ft->h->nodesize * factor; + return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold; } -static void ft_nonleaf_cmd_once_to_child(ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG cmd, bool is_fresh, size_t flow_deltas[]) -// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint. So now we are just putting it in the buffer here. -// Also we don't worry about the node getting overfull here. It's the caller's problem. +static void push_something_in_subtree( + FT ft, + FTNODE subtree_root, + int target_childnum, + const ft_msg &msg, + size_t flow_deltas[], + txn_gc_info *gc_info, + int depth, + seqinsert_loc loc, + bool just_did_split_or_merge + ) +// Effects: +// Assign message an MSN from ft->h. +// Put message in the subtree rooted at node. Due to promotion the message may not be injected directly in this node. +// Unlock node or schedule it to be unlocked (after a background flush). +// Either way, the caller is not responsible for unlocking node. +// Requires: +// subtree_root is read locked and fully in memory. +// Notes: +// In Ming, the basic rules of promotion are as follows: +// Don't promote broadcast messages. +// Don't promote past non-empty buffers. +// Otherwise, promote at most to height 1 or depth 2 (whichever is highest), as far as the birdie asks you to promote. +// We don't promote to leaves because injecting into leaves is expensive, mostly because of #5605 and some of #5552. +// We don't promote past depth 2 because we found that gives us enough parallelism without costing us too much pinning work. +// +// This is true with the following caveats: +// We always promote all the way to the leaves on the rightmost and leftmost edges of the tree, for sequential insertions. +// (That means we can promote past depth 2 near the edges of the tree.) +// +// When the birdie is still saying we should promote, we use get_and_pin so that we wait to get the node. +// If the birdie doesn't say to promote, we try maybe_get_and_pin. If we get the node cheaply, and it's dirty, we promote anyway. { - unsigned int childnum = (target_childnum >= 0 - ? target_childnum - : toku_ftnode_which_child(node, cmd->u.id.key, desc, compare_fun)); - toku_ft_append_to_child_buffer(compare_fun, desc, node, childnum, cmd->type, cmd->msn, cmd->xids, is_fresh, cmd->u.id.key, cmd->u.id.val); - NONLEAF_CHILDINFO bnc = BNC(node, childnum); - bnc->flow[0] += flow_deltas[0]; - bnc->flow[1] += flow_deltas[1]; -} - -/* Find the leftmost child that may contain the key. - * If the key exists it will be in the child whose number - * is the return value of this function. - */ -int toku_ftnode_which_child(FTNODE node, const DBT *k, - DESCRIPTOR desc, ft_compare_func cmp) { - // a funny case of no pivots - if (node->n_children <= 1) return 0; + toku_ftnode_assert_fully_in_memory(subtree_root); + if (should_inject_in_node(loc, subtree_root->height, depth)) { + switch (depth) { + case 0: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; + case 1: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; + case 2: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; + case 3: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; + default: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; + } + // If the target node is a non-root leaf node on the right extreme, + // set the rightmost blocknum. We know there are no messages above us + // because promotion would not chose to inject directly into this leaf + // otherwise. We explicitly skip the root node because then we don't have + // to worry about changing the rightmost blocknum when the root splits. + if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->blocknum.b != ft->h->root_blocknum.b) { + ft_set_or_verify_rightmost_blocknum(ft, subtree_root->blocknum); + } + inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info); + } else { + int r; + int childnum; + NONLEAF_CHILDINFO bnc; - // check the last key to optimize seq insertions - int n = node->n_children-1; - int c = ft_compare_pivot(desc, cmp, k, &node->childkeys[n-1]); - if (c > 0) return n; + // toku_ft_root_put_msg should not have called us otherwise. + paranoid_invariant(ft_msg_type_applies_once(msg.type())); - // binary search the pivots - int lo = 0; - int hi = n-1; // skip the last one, we checked it above - int mi; - while (lo < hi) { - mi = (lo + hi) / 2; - c = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]); - if (c > 0) { - lo = mi+1; - continue; - } - if (c < 0) { - hi = mi; - continue; + childnum = (target_childnum >= 0 ? target_childnum + : toku_ftnode_which_child(subtree_root, msg.kdbt(), ft->cmp)); + bnc = BNC(subtree_root, childnum); + + if (toku_bnc_n_entries(bnc) > 0) { + // The buffer is non-empty, give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1); + goto relock_and_push_here; } - return mi; - } - return lo; -} -// Used for HOT. -int -toku_ftnode_hot_next_child(FTNODE node, - const DBT *k, - DESCRIPTOR desc, - ft_compare_func cmp) { - int low = 0; - int hi = node->n_children - 1; - int mi; - while (low < hi) { - mi = (low + hi) / 2; - int r = ft_compare_pivot(desc, cmp, k, &node->childkeys[mi]); - if (r > 0) { - low = mi + 1; - } else if (r < 0) { - hi = mi; + seqinsert_loc next_loc; + if ((loc & LEFT_EXTREME) && childnum == 0) { + next_loc = LEFT_EXTREME; + } else if ((loc & RIGHT_EXTREME) && childnum == subtree_root->n_children - 1) { + next_loc = RIGHT_EXTREME; } else { - // if they were exactly equal, then we want the sub-tree under - // the next pivot. - return mi + 1; + next_loc = NEITHER_EXTREME; } - } - invariant(low == hi); - return low; -} - -// TODO Use this function to clean up other places where bits of messages are passed around -// such as toku_bnc_insert_msg() and the call stack above it. -static uint64_t -ft_msg_size(FT_MSG msg) { - size_t keyval_size = msg->u.id.key->size + msg->u.id.val->size; - size_t xids_size = xids_get_serialize_size(msg->xids); - return keyval_size + KEY_VALUE_OVERHEAD + FT_CMD_OVERHEAD + xids_size; -} -static void -ft_nonleaf_cmd_all (ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, FT_MSG cmd, bool is_fresh, size_t flow_deltas[]) -// Effect: Put the cmd into a nonleaf node. We put it into all children, possibly causing the children to become reactive. -// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. -// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) -{ - for (int i = 0; i < node->n_children; i++) { - ft_nonleaf_cmd_once_to_child(compare_fun, desc, node, i, cmd, is_fresh, flow_deltas); - } -} + if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) { + // Never promote to leaf nodes except on the edges + STATUS_INC(FT_PRO_NUM_STOP_H1, 1); + goto relock_and_push_here; + } -static bool -ft_msg_applies_once(FT_MSG cmd) -{ - return ft_msg_type_applies_once(cmd->type); -} + { + const BLOCKNUM child_blocknum = BP_BLOCKNUM(subtree_root, childnum); + ft->blocktable.verify_blocknum_allocated(child_blocknum); + const uint32_t child_fullhash = toku_cachetable_hash(ft->cf, child_blocknum); -static bool -ft_msg_applies_all(FT_MSG cmd) -{ - return ft_msg_type_applies_all(cmd->type); -} + FTNODE child; + { + const int child_height = subtree_root->height - 1; + const int child_depth = depth + 1; + // If we're locking a leaf, or a height 1 node or depth 2 + // node in the middle, we know we won't promote further + // than that, so just get a write lock now. + const pair_lock_type lock_type = (should_inject_in_node(next_loc, child_height, child_depth) + ? PL_WRITE_CHEAP + : PL_READ); + if (next_loc != NEITHER_EXTREME || (toku_bnc_should_promote(ft, bnc) && depth <= 1)) { + // If we're on either extreme, or the birdie wants to + // promote and we're in the top two levels of the + // tree, don't stop just because someone else has the + // node locked. + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + if (lock_type == PL_WRITE_CHEAP) { + // We intend to take the write lock for message injection + toku::context inject_ctx(CTX_MESSAGE_INJECTION); + toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); + } else { + // We're going to keep promoting + toku::context promo_ctx(CTX_PROMO); + toku_pin_ftnode(ft, child_blocknum, child_fullhash, &bfe, lock_type, &child, true); + } + } else { + r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child); + if (r != 0) { + // We couldn't get the child cheaply, so give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1); + goto relock_and_push_here; + } + if (toku_ftnode_fully_in_memory(child)) { + // toku_pin_ftnode... touches the clock but toku_maybe_pin_ftnode... doesn't. + // This prevents partial eviction. + for (int i = 0; i < child->n_children; ++i) { + BP_TOUCH_CLOCK(child, i); + } + } else { + // We got the child, but it's not fully in memory. Give up on promoting. + STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1); + goto unlock_child_and_push_here; + } + } + } + paranoid_invariant_notnull(child); -static bool -ft_msg_does_nothing(FT_MSG cmd) -{ - return ft_msg_type_does_nothing(cmd->type); -} + if (!just_did_split_or_merge) { + BLOCKNUM subtree_root_blocknum = subtree_root->blocknum; + uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); + const bool did_split_or_merge = process_maybe_reactive_child(ft, subtree_root, child, childnum, loc); + if (did_split_or_merge) { + // Need to re-pin this node and try at this level again. + FTNODE newparent; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); // should be fully in memory, we just split it + toku_pin_ftnode(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, &newparent, true); + push_something_in_subtree(ft, newparent, -1, msg, flow_deltas, gc_info, depth, loc, true); + return; + } + } -static void -ft_nonleaf_put_cmd (ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, int target_childnum, FT_MSG cmd, bool is_fresh, size_t flow_deltas[]) -// Effect: Put the cmd into a nonleaf node. We may put it into a child, possibly causing the child to become reactive. -// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. -// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) -// -{ + if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { + push_something_in_subtree(ft, child, -1, msg, flow_deltas, gc_info, depth + 1, next_loc, false); + toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); + // The recursive call unpinned the child, but + // we're responsible for unpinning subtree_root. + toku_unpin_ftnode_read_only(ft, subtree_root); + return; + } - // - // see comments in toku_ft_leaf_apply_cmd - // to understand why we handle setting - // node->max_msn_applied_to_node_on_disk here, - // and don't do it in toku_ft_node_put_cmd - // - MSN cmd_msn = cmd->msn; - invariant(cmd_msn.msn > node->max_msn_applied_to_node_on_disk.msn); - node->max_msn_applied_to_node_on_disk = cmd_msn; - - if (ft_msg_applies_once(cmd)) { - ft_nonleaf_cmd_once_to_child(compare_fun, desc, node, target_childnum, cmd, is_fresh, flow_deltas); - } else if (ft_msg_applies_all(cmd)) { - ft_nonleaf_cmd_all(compare_fun, desc, node, cmd, is_fresh, flow_deltas); - } else { - paranoid_invariant(ft_msg_does_nothing(cmd)); + STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1); + unlock_child_and_push_here: + // We locked the child, but we decided not to promote. + // Unlock the child, and fall through to the next case. + toku_unpin_ftnode_read_only(ft, child); + } + relock_and_push_here: + // Give up on promoting. + // We have subtree_root read-locked and we don't have a child locked. + // Drop the read lock, grab a write lock, and inject here. + { + // Right now we have a read lock on subtree_root, but we want + // to inject into it so we get a write lock instead. + BLOCKNUM subtree_root_blocknum = subtree_root->blocknum; + uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); + toku_unpin_ftnode_read_only(ft, subtree_root); + switch (depth) { + case 0: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; + case 1: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; + case 2: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; + case 3: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; + default: + STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; + } + inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, msg, flow_deltas, gc_info); + } } } -// Garbage collect one leaf entry. -static void -ft_basement_node_gc_once(BASEMENTNODE bn, - uint32_t index, - void* keyp, - uint32_t keylen, - LEAFENTRY leaf_entry, - txn_gc_info *gc_info, - STAT64INFO_S * delta) +void toku_ft_root_put_msg( + FT ft, + const ft_msg &msg, + txn_gc_info *gc_info + ) +// Effect: +// - assign msn to message and update msn in the header +// - push the message into the ft + +// As of Clayface, the root blocknum is a constant, so preventing a +// race between message injection and the split of a root is the job +// of the cachetable's locking rules. +// +// We also hold the MO lock for a number of reasons, but an important +// one is to make sure that a begin_checkpoint may not start while +// this code is executing. A begin_checkpoint does (at least) two things +// that can interfere with the operations here: +// - Copies the header to a checkpoint header. Because we may change +// the max_msn_in_ft below, we don't want the header to be copied in +// the middle of these operations. +// - Takes note of the log's LSN. Because this put operation has +// already been logged, this message injection must be included +// in any checkpoint that contains this put's logentry. +// Holding the mo lock throughout this function ensures that fact. { - paranoid_invariant(leaf_entry); + toku::context promo_ctx(CTX_PROMO); - // Don't run garbage collection on non-mvcc leaf entries. - if (leaf_entry->type != LE_MVCC) { - goto exit; + // blackhole fractal trees drop all messages, so do nothing. + if (ft->blackhole) { + return; } - // Don't run garbage collection if this leafentry decides it's not worth it. - if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) { - goto exit; - } + FTNODE node; - LEAFENTRY new_leaf_entry; - new_leaf_entry = NULL; - - // The mempool doesn't free itself. When it allocates new memory, - // this pointer will be set to the older memory that must now be - // freed. - void * maybe_free; - maybe_free = NULL; - - // These will represent the number of bytes and rows changed as - // part of the garbage collection. - int64_t numbytes_delta; - int64_t numrows_delta; - toku_le_garbage_collect(leaf_entry, - &bn->data_buffer, - index, - keyp, - keylen, - gc_info, - &new_leaf_entry, - &numbytes_delta); - - numrows_delta = 0; - if (new_leaf_entry) { - numrows_delta = 0; - } else { - numrows_delta = -1; + uint32_t fullhash; + CACHEKEY root_key; + toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + + size_t flow_deltas[] = { message_buffer::msg_memsize_in_buffer(msg), 0 }; + + pair_lock_type lock_type; + lock_type = PL_READ; // try first for a read lock + // If we need to split the root, we'll have to change from a read lock + // to a write lock and check again. We change the variable lock_type + // and jump back to here. + change_lock_type: + // get the root node + toku_pin_ftnode(ft, root_key, fullhash, &bfe, lock_type, &node, true); + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->fullhash==fullhash); + ft_verify_flags(ft, node); + + // First handle a reactive root. + // This relocking for split algorithm will cause every message + // injection thread to change lock type back and forth, when only one + // of them needs to in order to handle the split. That's not great, + // but root splits are incredibly rare. + enum reactivity re = toku_ftnode_get_reactivity(ft, node); + switch (re) { + case RE_STABLE: + case RE_FUSIBLE: // cannot merge anything at the root + if (lock_type != PL_READ) { + // We thought we needed to split, but someone else got to + // it before us. Downgrade to a read lock. + toku_unpin_ftnode_read_only(ft, node); + lock_type = PL_READ; + goto change_lock_type; + } + break; + case RE_FISSIBLE: + if (lock_type == PL_READ) { + // Here, we only have a read lock on the root. In order + // to split it, we need a write lock, but in the course of + // gaining the write lock, someone else may have gotten in + // before us and split it. So we upgrade to a write lock + // and check again. + toku_unpin_ftnode_read_only(ft, node); + lock_type = PL_WRITE_CHEAP; + goto change_lock_type; + } else { + // We have a write lock, now we can split. + ft_init_new_root(ft, node, &node); + // Then downgrade back to a read lock, and we can finally + // do the injection. + toku_unpin_ftnode(ft, node); + lock_type = PL_READ; + STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1); + goto change_lock_type; + } + break; } + // If we get to here, we have a read lock and the root doesn't + // need to be split. It's safe to inject the message. + paranoid_invariant(lock_type == PL_READ); + // We cannot assert that we have the read lock because frwlock asserts + // that its mutex is locked when we check if there are any readers. + // That wouldn't give us a strong guarantee that we have the read lock + // anyway. - // If we created a new mempool buffer we must free the - // old/original buffer. - if (maybe_free) { - toku_free(maybe_free); + // Now, either inject here or promote. We decide based on a heuristic: + if (node->height == 0 || !ft_msg_type_applies_once(msg.type())) { + // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here. + toku_unpin_ftnode_read_only(ft, node); + STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1); + inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); + } else if (node->height > 1) { + // If the root's above height 1, we are definitely eligible for promotion. + push_something_in_subtree(ft, node, -1, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); + } else { + // The root's height 1. We may be eligible for promotion here. + // On the extremes, we want to promote, in the middle, we don't. + int childnum = toku_ftnode_which_child(node, msg.kdbt(), ft->cmp); + if (childnum == 0 || childnum == node->n_children - 1) { + // On the extremes, promote. We know which childnum we're going to, so pass that down too. + push_something_in_subtree(ft, node, childnum, msg, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); + } else { + // At height 1 in the middle, don't promote, drop the read lock and inject here. + toku_unpin_ftnode_read_only(ft, node); + STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1); + inject_message_at_this_blocknum(ft, root_key, fullhash, msg, flow_deltas, gc_info); + } } +} - // Update stats. - bn->stat64_delta.numrows += numrows_delta; - bn->stat64_delta.numbytes += numbytes_delta; - delta->numrows += numrows_delta; - delta->numbytes += numbytes_delta; +// TODO: Remove me, I'm boring. +static int ft_compare_keys(FT ft, const DBT *a, const DBT *b) +// Effect: Compare two keys using the given fractal tree's comparator/descriptor +{ + return ft->cmp(a, b); +} -exit: - return; +static LEAFENTRY bn_get_le_and_key(BASEMENTNODE bn, int idx, DBT *key) +// Effect: Gets the i'th leafentry from the given basement node and +// fill its key in *key +// Requires: The i'th leafentry exists. +{ + LEAFENTRY le; + uint32_t le_len; + void *le_key; + int r = bn->data_buffer.fetch_klpair(idx, &le, &le_len, &le_key); + invariant_zero(r); + toku_fill_dbt(key, le_key, le_len); + return le; } -// Garbage collect all leaf entries for a given basement node. -static void -basement_node_gc_all_les(BASEMENTNODE bn, - txn_gc_info *gc_info, - STAT64INFO_S * delta) +static LEAFENTRY ft_leaf_leftmost_le_and_key(FTNODE leaf, DBT *leftmost_key) +// Effect: If a leftmost key exists in the given leaf, toku_fill_dbt() +// the key into *leftmost_key +// Requires: Leaf is fully in memory and pinned for read or write. +// Return: leafentry if it exists, nullptr otherwise { - int r = 0; - uint32_t index = 0; - uint32_t num_leafentries_before; - while (index < (num_leafentries_before = bn->data_buffer.omt_size())) { - void* keyp = NULL; - uint32_t keylen = 0; - LEAFENTRY leaf_entry; - r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp); - assert_zero(r); - ft_basement_node_gc_once( - bn, - index, - keyp, - keylen, - leaf_entry, - gc_info, - delta - ); - // Check if the leaf entry was deleted or not. - if (num_leafentries_before == bn->data_buffer.omt_size()) { - ++index; + for (int i = 0; i < leaf->n_children; i++) { + BASEMENTNODE bn = BLB(leaf, i); + if (bn->data_buffer.num_klpairs() > 0) { + // Get the first (leftmost) leafentry and its key + return bn_get_le_and_key(bn, 0, leftmost_key); } } + return nullptr; } -// Garbage collect all leaf entires in all basement nodes. -static void -ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info) +static LEAFENTRY ft_leaf_rightmost_le_and_key(FTNODE leaf, DBT *rightmost_key) +// Effect: If a rightmost key exists in the given leaf, toku_fill_dbt() +// the key into *rightmost_key +// Requires: Leaf is fully in memory and pinned for read or write. +// Return: leafentry if it exists, nullptr otherwise +{ + for (int i = leaf->n_children - 1; i >= 0; i--) { + BASEMENTNODE bn = BLB(leaf, i); + size_t num_les = bn->data_buffer.num_klpairs(); + if (num_les > 0) { + // Get the last (rightmost) leafentry and its key + return bn_get_le_and_key(bn, num_les - 1, rightmost_key); + } + } + return nullptr; +} + +static int ft_leaf_get_relative_key_pos(FT ft, FTNODE leaf, const DBT *key, bool *nondeleted_key_found, int *target_childnum) +// Effect: Determines what the relative position of the given key is with +// respect to a leaf node, and if it exists. +// Requires: Leaf is fully in memory and pinned for read or write. +// Requires: target_childnum is non-null +// Return: < 0 if key is less than the leftmost key in the leaf OR the relative position is unknown, for any reason. +// 0 if key is in the bounds [leftmost_key, rightmost_key] for this leaf or the leaf is empty +// > 0 if key is greater than the rightmost key in the leaf +// *nondeleted_key_found is set (if non-null) if the target key was found and is not deleted, unmodified otherwise +// *target_childnum is set to the child that (does or would) contain the key, if calculated, unmodified otherwise { - toku_assert_entire_node_in_memory(node); - paranoid_invariant_zero(node->height); - // Loop through each leaf entry, garbage collecting as we go. - for (int i = 0; i < node->n_children; ++i) { - // Perform the garbage collection. - BASEMENTNODE bn = BLB(node, i); - STAT64INFO_S delta; - delta.numrows = 0; - delta.numbytes = 0; - basement_node_gc_all_les(bn, gc_info, &delta); - toku_ft_update_stats(&ft->in_memory_stats, delta); + DBT rightmost_key; + LEAFENTRY rightmost_le = ft_leaf_rightmost_le_and_key(leaf, &rightmost_key); + if (rightmost_le == nullptr) { + // If we can't get a rightmost key then the leaf is empty. + // In such a case, we don't have any information about what keys would be in this leaf. + // We have to assume the leaf node that would contain this key is to the left. + return -1; } -} + // We have a rightmost leafentry, so it must exist in some child node + invariant(leaf->n_children > 0); -static void -ft_leaf_run_gc(FT ft, FTNODE node) { - TOKULOGGER logger = toku_cachefile_logger(ft->cf); - if (logger) { - TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); - txn_manager_state txn_state_for_gc(txn_manager); - txn_state_for_gc.init(); - TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); - - // Perform full garbage collection. - // - // - txn_state_for_gc - // a fresh snapshot of the transaction system. - // - oldest_referenced_xid_for_simple_gc - // the oldest xid in any live list as of right now - suitible for simple gc - // - node->oldest_referenced_xid_known - // the last known oldest referenced xid for this node and any unapplied messages. - // it is a lower bound on the actual oldest referenced xid - but becasue there - // may be abort messages above us, we need to be careful to only use this value - // for implicit promotion (as opposed to the oldest referenced xid for simple gc) - // - // The node has its own oldest referenced xid because it must be careful not to implicitly promote - // provisional entries for transactions that are no longer live, but may have abort messages - // somewhere above us in the tree. - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - node->oldest_referenced_xid_known, - true); - ft_leaf_gc_all_les(ft, node, &gc_info); + int relative_pos = 0; + int c = ft_compare_keys(ft, key, &rightmost_key); + if (c > 0) { + relative_pos = 1; + *target_childnum = leaf->n_children - 1; + } else if (c == 0) { + if (nondeleted_key_found != nullptr && !le_latest_is_del(rightmost_le)) { + *nondeleted_key_found = true; + } + relative_pos = 0; + *target_childnum = leaf->n_children - 1; + } else { + // The key is less than the rightmost. It may still be in bounds if it's >= the leftmost. + DBT leftmost_key; + LEAFENTRY leftmost_le = ft_leaf_leftmost_le_and_key(leaf, &leftmost_key); + invariant_notnull(leftmost_le); // Must exist because a rightmost exists + c = ft_compare_keys(ft, key, &leftmost_key); + if (c > 0) { + if (nondeleted_key_found != nullptr) { + // The caller wants to know if a nondeleted key can be found. + LEAFENTRY target_le; + int childnum = toku_ftnode_which_child(leaf, key, ft->cmp); + BASEMENTNODE bn = BLB(leaf, childnum); + struct toku_msg_leafval_heaviside_extra extra(ft->cmp, key); + int r = bn->data_buffer.find_zero( + extra, + &target_le, + nullptr, nullptr, nullptr + ); + *target_childnum = childnum; + if (r == 0 && !le_latest_is_del(leftmost_le)) { + *nondeleted_key_found = true; + } + } + relative_pos = 0; + } else if (c == 0) { + if (nondeleted_key_found != nullptr && !le_latest_is_del(leftmost_le)) { + *nondeleted_key_found = true; + } + relative_pos = 0; + *target_childnum = 0; + } else { + relative_pos = -1; + } } -} -void toku_bnc_flush_to_child( - FT ft, - NONLEAF_CHILDINFO bnc, - FTNODE child, - TXNID parent_oldest_referenced_xid_known - ) -{ - paranoid_invariant(bnc); - STAT64INFO_S stats_delta = {0,0}; - size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer); + return relative_pos; +} - TOKULOGGER logger = toku_cachefile_logger(ft->cf); - TXN_MANAGER txn_manager = logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; - TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE; +static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, + XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info); +static int getf_nothing(uint32_t, const void *, uint32_t, const void *, void *, bool); - txn_manager_state txn_state_for_gc(txn_manager); - bool do_garbage_collection = child->height == 0 && txn_manager != nullptr; - if (do_garbage_collection) { - txn_state_for_gc.init(); - oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); - } - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - child->oldest_referenced_xid_known, - true); - FIFO_ITERATE( - bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - ({ - DBT hk,hv; - FT_MSG_S ftcmd = { type, msn, xids, .u = { .id = { toku_fill_dbt(&hk, key, keylen), - toku_fill_dbt(&hv, val, vallen) } } }; - size_t flow_deltas[] = { 0, 0 }; - if (remaining_memsize <= bnc->flow[0]) { - // this message is in the current checkpoint's worth of - // the end of the fifo - flow_deltas[0] = FIFO_CURRENT_ENTRY_MEMSIZE; - } else if (remaining_memsize <= bnc->flow[0] + bnc->flow[1]) { - // this message is in the last checkpoint's worth of the - // end of the fifo - flow_deltas[1] = FIFO_CURRENT_ENTRY_MEMSIZE; - } - toku_ft_node_put_cmd( - ft->compare_fun, - ft->update_fun, - &ft->cmp_descriptor, - child, - -1, - &ftcmd, - is_fresh, - &gc_info, - flow_deltas, - &stats_delta - ); - remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE; - })); - child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known; - - invariant(remaining_memsize == 0); - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&ft->in_memory_stats, stats_delta); - } - if (do_garbage_collection) { - size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer); - STATUS_INC(FT_MSG_BYTES_OUT, buffsize); - // may be misleading if there's a broadcast message in there - STATUS_INC(FT_MSG_BYTES_CURR, -buffsize); - } -} - -bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) { - static const double factor = 0.125; - const uint64_t flow_threshold = ft->h->nodesize * factor; - return bnc->flow[0] >= flow_threshold || bnc->flow[1] >= flow_threshold; -} - -void -toku_ft_node_put_cmd ( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, - FT_MSG cmd, - bool is_fresh, - txn_gc_info *gc_info, - size_t flow_deltas[], - STAT64INFO stats_to_update - ) -// Effect: Push CMD into the subtree rooted at NODE. -// If NODE is a leaf, then -// put CMD into leaf, applying it to the leafentries -// If NODE is a nonleaf, then push the cmd into the FIFO(s) of the relevent child(ren). -// The node may become overfull. That's not our problem. +static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS message_xids, enum ft_msg_type type, + txn_gc_info *gc_info, bool unique) +// Effect: Pins the rightmost leaf node and attempts to do an insert. +// There are three reasons why we may not succeed. +// - The rightmost leaf is too full and needs a split. +// - The key to insert is not within the provable bounds of this leaf node. +// - The key is within bounds, but it already exists. +// Return: 0 if this function did insert, DB_KEYEXIST if a unique key constraint exists and +// some nondeleted leafentry with the same key exists +// < 0 if this function did not insert, for a reason other than DB_KEYEXIST. +// Note: Treat this function as a possible, but not necessary, optimization for insert. +// Rationale: We want O(1) insertions down the rightmost path of the tree. { - toku_assert_entire_node_in_memory(node); + int r = -1; + + uint32_t rightmost_fullhash; + BLOCKNUM rightmost_blocknum = ft->rightmost_blocknum; + FTNODE rightmost_leaf = nullptr; + + // Don't do the optimization if our heurstic suggests that + // insertion pattern is not sequential. + if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { + goto cleanup; + } + + // We know the seqinsert score is high enough that we should + // attemp to directly insert into the right most leaf. Because + // the score is non-zero, the rightmost blocknum must have been + // set. See inject_message_in_locked_node(), which only increases + // the score if the target node blocknum == rightmost_blocknum + invariant(rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); + + // Pin the rightmost leaf with a write lock. + rightmost_fullhash = toku_cachetable_hash(ft->cf, rightmost_blocknum); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, rightmost_blocknum, rightmost_fullhash, &bfe, PL_WRITE_CHEAP, &rightmost_leaf, true); + + // The rightmost blocknum never chances once it is initialized to something + // other than null. Verify that the pinned node has the correct blocknum. + invariant(rightmost_leaf->blocknum.b == rightmost_blocknum.b); + + // If the rightmost leaf is reactive, bail out out and let the normal promotion pass + // take care of it. This also ensures that if any of our ancestors are reactive, + // they'll be taken care of too. + if (toku_ftnode_get_leaf_reactivity(rightmost_leaf, ft->h->nodesize) != RE_STABLE) { + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, 1); + goto cleanup; + } + + // The groundwork has been laid for an insertion directly into the rightmost + // leaf node. We know that it is pinned for write, fully in memory, has + // no messages above it, and is not reactive. // - // see comments in toku_ft_leaf_apply_cmd - // to understand why we don't handle setting - // node->max_msn_applied_to_node_on_disk here, - // and instead defer to these functions - // - if (node->height==0) { - toku_ft_leaf_apply_cmd(compare_fun, update_fun, desc, node, target_childnum, cmd, gc_info, nullptr, stats_to_update); + // Now, two more things must be true for this insertion to actually happen: + // 1. The key to insert is within the bounds of this leafnode, or to the right. + // 2. If there is a uniqueness constraint, it passes. + bool nondeleted_key_found; + int relative_pos; + int target_childnum; + + nondeleted_key_found = false; + target_childnum = -1; + relative_pos = ft_leaf_get_relative_key_pos(ft, rightmost_leaf, key, + unique ? &nondeleted_key_found : nullptr, + &target_childnum); + if (relative_pos >= 0) { + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, 1); + if (unique && nondeleted_key_found) { + r = DB_KEYEXIST; + } else { + ft_insert_directly_into_leaf(ft, rightmost_leaf, target_childnum, + key, val, message_xids, type, gc_info); + r = 0; + } } else { - ft_nonleaf_put_cmd(compare_fun, desc, node, target_childnum, cmd, is_fresh, flow_deltas); + STATUS_INC(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, 1); + r = -1; } -} -static const struct pivot_bounds infinite_bounds = {.lower_bound_exclusive=NULL, - .upper_bound_inclusive=NULL}; +cleanup: + // If we did the insert, the rightmost leaf was unpinned for us. + if (r != 0 && rightmost_leaf != nullptr) { + toku_unpin_ftnode(ft, rightmost_leaf); + } + return r; +} + +static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type); -// Effect: applies the cmd to the leaf if the appropriate basement node is in memory. -// This function is called during message injection and/or flushing, so the entire -// node MUST be in memory. -void toku_ft_leaf_apply_cmd( - ft_compare_func compare_fun, - ft_update_func update_fun, - DESCRIPTOR desc, - FTNODE node, - int target_childnum, // which child to inject to, or -1 if unknown - FT_MSG cmd, - txn_gc_info *gc_info, - uint64_t *workdone, - STAT64INFO stats_to_update - ) -{ - VERIFY_NODE(t, node); - toku_assert_entire_node_in_memory(node); +int toku_ft_insert_unique(FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool do_logging) { +// Effect: Insert a unique key-val pair into the fractal tree. +// Return: 0 on success, DB_KEYEXIST if the overwrite constraint failed + XIDS message_xids = txn != nullptr ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); - // - // Because toku_ft_leaf_apply_cmd is called with the intent of permanently - // applying a message to a leaf node (meaning the message is permanently applied - // and will be purged from the system after this call, as opposed to - // toku_apply_ancestors_messages_to_node, which applies a message - // for a query, but the message may still reside in the system and - // be reapplied later), we mark the node as dirty and - // take the opportunity to update node->max_msn_applied_to_node_on_disk. - // - node->dirty = 1; + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); - // - // we cannot blindly update node->max_msn_applied_to_node_on_disk, - // we must check to see if the msn is greater that the one already stored, - // because the cmd may have already been applied earlier (via - // toku_apply_ancestors_messages_to_node) to answer a query - // - // This is why we handle node->max_msn_applied_to_node_on_disk both here - // and in ft_nonleaf_put_cmd, as opposed to in one location, toku_ft_node_put_cmd. - // - MSN cmd_msn = cmd->msn; - if (cmd_msn.msn > node->max_msn_applied_to_node_on_disk.msn) { - node->max_msn_applied_to_node_on_disk = cmd_msn; - } - - if (ft_msg_applies_once(cmd)) { - unsigned int childnum = (target_childnum >= 0 - ? target_childnum - : toku_ftnode_which_child(node, cmd->u.id.key, desc, compare_fun)); - BASEMENTNODE bn = BLB(node, childnum); - if (cmd->msn.msn > bn->max_msn_applied.msn) { - bn->max_msn_applied = cmd->msn; - toku_ft_bn_apply_cmd(compare_fun, - update_fun, - desc, - bn, - cmd, - gc_info, - workdone, - stats_to_update); + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + true); + int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, true); + if (r != 0 && r != DB_KEYEXIST) { + // Default to a regular unique check + insert algorithm if we couldn't + // do it based on the rightmost leaf alone. + int lookup_r = toku_ft_lookup(ft_h, key, getf_nothing, nullptr); + if (lookup_r == DB_NOTFOUND) { + toku_ft_send_insert(ft_h, key, val, message_xids, FT_INSERT, &gc_info); + r = 0; } else { - STATUS_INC(FT_MSN_DISCARDS, 1); + r = DB_KEYEXIST; } } - else if (ft_msg_applies_all(cmd)) { - for (int childnum=0; childnumn_children; childnum++) { - if (cmd->msn.msn > BLB(node, childnum)->max_msn_applied.msn) { - BLB(node, childnum)->max_msn_applied = cmd->msn; - toku_ft_bn_apply_cmd(compare_fun, - update_fun, - desc, - BLB(node, childnum), - cmd, - gc_info, - workdone, - stats_to_update); - } else { - STATUS_INC(FT_MSN_DISCARDS, 1); - } - } + + if (r == 0) { + ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, FT_INSERT); } - else if (!ft_msg_does_nothing(cmd)) { - abort(); + return r; +} + +// Effect: Insert the key-val pair into an ft. +void toku_ft_insert (FT_HANDLE ft_handle, DBT *key, DBT *val, TOKUTXN txn) { + toku_ft_maybe_insert(ft_handle, key, val, txn, false, ZERO_LSN, true, FT_INSERT); +} + +void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) { + paranoid_invariant(txn); + toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log + //before the (old) file is actually unlinked + TOKULOGGER logger = toku_txn_logger(txn); + + BYTESTRING new_iname_bs = {.len=(uint32_t) strlen(new_iname), .data=(char*)new_iname}; + toku_logger_save_rollback_load(txn, old_filenum, &new_iname_bs); + if (do_log && logger) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_load(logger, load_lsn, do_fsync, txn, xid, old_filenum, new_iname_bs); } - VERIFY_NODE(t, node); } -static void inject_message_in_locked_node( - FT ft, - FTNODE node, - int childnum, - FT_MSG_S *cmd, - size_t flow_deltas[], - txn_gc_info *gc_info - ) +// 2954 +// this function handles the tasks needed to be recoverable +// - write to rollback log +// - write to recovery log +void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn) { - // No guarantee that we're the writer, but oh well. - // TODO(leif): Implement "do I have the lock or is it someone else?" - // check in frwlock. Should be possible with TOKU_PTHREAD_DEBUG, nop - // otherwise. - invariant(toku_ctpair_is_write_locked(node->ct_pair)); - toku_assert_entire_node_in_memory(node); + paranoid_invariant(txn); + TOKULOGGER logger = toku_txn_logger(txn); - // Take the newer of the two oldest referenced xid values from the node and gc_info. - // The gc_info usually has a newer value, because we got it at the top of this call - // stack from the txn manager. But sometimes the node has a newer value, if some - // other thread sees a newer value and writes to this node before we got the lock. - if (gc_info->oldest_referenced_xid_for_implicit_promotion > node->oldest_referenced_xid_known) { - node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion; - } else if (gc_info->oldest_referenced_xid_for_implicit_promotion < node->oldest_referenced_xid_known) { - gc_info->oldest_referenced_xid_for_implicit_promotion = node->oldest_referenced_xid_known; + // write to the rollback log + toku_logger_save_rollback_hot_index(txn, &filenums); + if (do_log && logger) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + // write to the recovery log + toku_log_hot_index(logger, hot_index_lsn, do_fsync, txn, xid, filenums); } +} - // Get the MSN from the header. Now that we have a write lock on the - // node we're injecting into, we know no other thread will get an MSN - // after us and get that message into our subtree before us. - cmd->msn.msn = toku_sync_add_and_fetch(&ft->h->max_msn_in_ft.msn, 1); - paranoid_invariant(cmd->msn.msn > node->max_msn_applied_to_node_on_disk.msn); - STAT64INFO_S stats_delta = {0,0}; - toku_ft_node_put_cmd( - ft->compare_fun, - ft->update_fun, - &ft->cmp_descriptor, - node, - childnum, - cmd, - true, - gc_info, - flow_deltas, - &stats_delta - ); - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&ft->in_memory_stats, stats_delta); +// Effect: Optimize the ft. +void toku_ft_optimize (FT_HANDLE ft_h) { + TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); + if (logger) { + TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager); + + XIDS root_xids = toku_xids_get_root_xids(); + XIDS message_xids; + if (oldest == TXNID_NONE_LIVING) { + message_xids = root_xids; + } + else { + int r = toku_xids_create_child(root_xids, &message_xids, oldest); + invariant(r == 0); + } + + DBT key; + DBT val; + toku_init_dbt(&key); + toku_init_dbt(&val); + ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, message_xids); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + true); + toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); + toku_xids_destroy(&message_xids); } - // - // assumption is that toku_ft_node_put_cmd will - // mark the node as dirty. - // enforcing invariant here. - // - paranoid_invariant(node->dirty != 0); +} - // TODO: Why not at height 0? - // update some status variables - if (node->height != 0) { - uint64_t msgsize = ft_msg_size(cmd); - STATUS_INC(FT_MSG_BYTES_IN, msgsize); - STATUS_INC(FT_MSG_BYTES_CURR, msgsize); - STATUS_INC(FT_MSG_NUM, 1); - if (ft_msg_applies_all(cmd)) { - STATUS_INC(FT_MSG_NUM_BROADCAST, 1); +void toku_ft_load(FT_HANDLE ft_handle, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *load_lsn) { + FILENUM old_filenum = toku_cachefile_filenum(ft_handle->ft->cf); + int do_log = 1; + toku_ft_load_recovery(txn, old_filenum, new_iname, do_fsync, do_log, load_lsn); +} + +// ft actions for logging hot index filenums +void toku_ft_hot_index(FT_HANDLE ft_handle __attribute__ ((unused)), TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn) { + int do_log = 1; + toku_ft_hot_index_recovery(txn, filenums, do_fsync, do_log, lsn); +} + +void +toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key, const DBT *val) { + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs, valbs); + } +} + +void +toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { + assert(txn); + assert(num_fts > 0); + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + FILENUM fnums[num_fts]; + uint32_t i; + for (i = 0; i < num_fts; i++) { + fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); } + FILENUMS filenums = {.num = num_fts, .filenums = fnums}; + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; + toku_log_enq_insert_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); } +} - // verify that msn of latest message was captured in root node - paranoid_invariant(cmd->msn.msn == node->max_msn_applied_to_node_on_disk.msn); +TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h) { + TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); + return logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; +} - // if we call toku_ft_flush_some_child, then that function unpins the root - // otherwise, we unpin ourselves - if (node->height > 0 && toku_ft_nonleaf_is_gorged(node, ft->h->nodesize)) { - toku_ft_flush_node_on_background_thread(ft, node); +TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + return txn_manager != nullptr ? toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager) : TXNID_NONE; +} + +static void ft_txn_log_insert(FT ft, DBT *key, DBT *val, TOKUTXN txn, bool do_logging, enum ft_msg_type type) { + paranoid_invariant(type == FT_INSERT || type == FT_INSERT_NO_OVERWRITE); + + //By default use committed messages + TXNID_PAIR xid = toku_txn_get_txnid(txn); + if (txn) { + BYTESTRING keybs = {key->size, (char *) key->data}; + toku_logger_save_rollback_cmdinsert(txn, toku_cachefile_filenum(ft->cf), &keybs); + toku_txn_maybe_note_ft(txn, ft); } - else { - toku_unpin_ftnode(ft, node); + TOKULOGGER logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + if (type == FT_INSERT) { + toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); + } + else { + toku_log_enq_insert_no_overwrite(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft->cf), xid, keybs, valbs); + } } } -// seqinsert_loc is a bitmask. -// The root counts as being both on the "left extreme" and on the "right extreme". -// Therefore, at the root, you're at LEFT_EXTREME | RIGHT_EXTREME. -typedef char seqinsert_loc; -static const seqinsert_loc NEITHER_EXTREME = 0; -static const seqinsert_loc LEFT_EXTREME = 1; -static const seqinsert_loc RIGHT_EXTREME = 2; +void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) { + ft_txn_log_insert(ft_h->ft, key, val, txn, do_logging, type); -static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int childnum, seqinsert_loc loc) -// Effect: -// If child needs to be split or merged, do that. -// parent and child will be unlocked if this happens -// also, the batched pin will have ended if this happens -// Requires: parent and child are read locked -// Returns: -// true if relocking is needed -// false otherwise + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + int r = ft_maybe_insert_into_rightmost_leaf(ft_h->ft, key, val, message_xids, FT_INSERT, &gc_info, false); + if (r != 0) { + toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info); + } + } +} + +static void ft_insert_directly_into_leaf(FT ft, FTNODE leaf, int target_childnum, DBT *key, DBT *val, + XIDS message_xids, enum ft_msg_type type, txn_gc_info *gc_info) +// Effect: Insert directly into a leaf node a fractal tree. Does not do any logging. +// Requires: Leaf is fully in memory and pinned for write. +// Requires: If this insertion were to happen through the root node, the promotion +// algorithm would have selected the given leaf node as the point of injection. +// That means this function relies on the current implementation of promotion. { - enum reactivity re = get_node_reactivity(ft, child); - enum reactivity newre; - BLOCKNUM child_blocknum; - uint32_t child_fullhash; - switch (re) { - case RE_STABLE: - return false; - case RE_FISSIBLE: - { - // We only have a read lock on the parent. We need to drop both locks, and get write locks. - BLOCKNUM parent_blocknum = parent->thisnodename; - uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); - int parent_height = parent->height; - int parent_n_children = parent->n_children; - toku_unpin_ftnode_read_only(ft, child); - toku_unpin_ftnode_read_only(ft, parent); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - FTNODE newparent, newchild; - toku_pin_ftnode_off_client_thread_batched(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, 0, nullptr, &newparent); - if (newparent->height != parent_height || newparent->n_children != parent_n_children || - childnum >= newparent->n_children || toku_bnc_n_entries(BNC(newparent, childnum))) { - // If the height changed or childnum is now off the end, something clearly got split or merged out from under us. - // If something got injected in this node, then it got split or merged and we shouldn't be splitting it. - // But we already unpinned the child so we need to have the caller re-try the pins. - toku_unpin_ftnode_read_only(ft, newparent); - return true; - } - // It's ok to reuse the same childnum because if we get something - // else we need to split, well, that's crazy, but let's go ahead - // and split it. - child_blocknum = BP_BLOCKNUM(newparent, childnum); - child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); - toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, PL_WRITE_CHEAP, 1, &newparent, &newchild); - newre = get_node_reactivity(ft, newchild); - if (newre == RE_FISSIBLE) { - enum split_mode split_mode; - if (newparent->height == 1 && (loc & LEFT_EXTREME) && childnum == 0) { - split_mode = SPLIT_RIGHT_HEAVY; - } else if (newparent->height == 1 && (loc & RIGHT_EXTREME) && childnum == newparent->n_children - 1) { - split_mode = SPLIT_LEFT_HEAVY; - } else { - split_mode = SPLIT_EVENLY; - } - toku_ft_split_child(ft, newparent, childnum, newchild, split_mode); - } else { - // some other thread already got it, just unpin and tell the - // caller to retry - toku_unpin_ftnode_read_only(ft, newchild); - toku_unpin_ftnode_read_only(ft, newparent); - } - return true; - } - case RE_FUSIBLE: - { - if (parent->height == 1) { - // prevent re-merging of recently unevenly-split nodes - if (((loc & LEFT_EXTREME) && childnum <= 1) || - ((loc & RIGHT_EXTREME) && childnum >= parent->n_children - 2)) { - return false; - } - } - - int parent_height = parent->height; - BLOCKNUM parent_blocknum = parent->thisnodename; - uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum); - toku_unpin_ftnode_read_only(ft, child); - toku_unpin_ftnode_read_only(ft, parent); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - FTNODE newparent, newchild; - toku_pin_ftnode_off_client_thread_batched(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, 0, nullptr, &newparent); - if (newparent->height != parent_height || childnum >= newparent->n_children) { - // looks like this is the root and it got merged, let's just start over (like in the split case above) - toku_unpin_ftnode_read_only(ft, newparent); - return true; - } - child_blocknum = BP_BLOCKNUM(newparent, childnum); - child_fullhash = compute_child_fullhash(ft->cf, newparent, childnum); - toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, PL_READ, 1, &newparent, &newchild); - newre = get_node_reactivity(ft, newchild); - if (newre == RE_FUSIBLE && newparent->n_children >= 2) { - toku_unpin_ftnode_read_only(ft, newchild); - toku_ft_merge_child(ft, newparent, childnum); - } else { - // Could be a weird case where newparent has only one - // child. In this case, we want to inject here but we've - // already unpinned the caller's copy of parent so we have - // to ask them to re-pin, or they could (very rarely) - // dereferenced memory in a freed node. TODO: we could - // give them back the copy of the parent we pinned. - // - // Otherwise, some other thread already got it, just unpin - // and tell the caller to retry - toku_unpin_ftnode_read_only(ft, newchild); - toku_unpin_ftnode_read_only(ft, newparent); - } - return true; - } - } - abort(); -} - -static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *cmd, size_t flow_deltas[], txn_gc_info *gc_info) -// Effect: -// Inject cmd into the node at this blocknum (cachekey). -// Gets a write lock on the node for you. -{ - toku::context inject_ctx(CTX_MESSAGE_INJECTION); - FTNODE node; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - toku_pin_ftnode_off_client_thread_batched(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, 0, NULL, &node); - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->fullhash==fullhash); - ft_verify_flags(ft, node); - inject_message_in_locked_node(ft, node, -1, cmd, flow_deltas, gc_info); -} - -__attribute__((const)) -static inline bool should_inject_in_node(seqinsert_loc loc, int height, int depth) -// We should inject directly in a node if: -// - it's a leaf, or -// - it's a height 1 node not at either extreme, or -// - it's a depth 2 node not at either extreme -{ - return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2))); -} - -static void push_something_in_subtree( - FT ft, - FTNODE subtree_root, - int target_childnum, - FT_MSG_S *cmd, - size_t flow_deltas[], - txn_gc_info *gc_info, - int depth, - seqinsert_loc loc, - bool just_did_split_or_merge - ) -// Effects: -// Assign cmd an MSN from ft->h. -// Put cmd in the subtree rooted at node. Due to promotion the message may not be injected directly in this node. -// Unlock node or schedule it to be unlocked (after a background flush). -// Either way, the caller is not responsible for unlocking node. -// Requires: -// subtree_root is read locked and fully in memory. -// Notes: -// In Ming, the basic rules of promotion are as follows: -// Don't promote broadcast messages. -// Don't promote past non-empty buffers. -// Otherwise, promote at most to height 1 or depth 2 (whichever is highest), as far as the birdie asks you to promote. -// We don't promote to leaves because injecting into leaves is expensive, mostly because of #5605 and some of #5552. -// We don't promote past depth 2 because we found that gives us enough parallelism without costing us too much pinning work. -// -// This is true with the following caveats: -// We always promote all the way to the leaves on the rightmost and leftmost edges of the tree, for sequential insertions. -// (That means we can promote past depth 2 near the edges of the tree.) -// -// When the birdie is still saying we should promote, we use get_and_pin so that we wait to get the node. -// If the birdie doesn't say to promote, we try maybe_get_and_pin. If we get the node cheaply, and it's dirty, we promote anyway. -{ - toku_assert_entire_node_in_memory(subtree_root); - if (should_inject_in_node(loc, subtree_root->height, depth)) { - switch (depth) { - case 0: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; - case 1: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; - case 2: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; - case 3: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; - default: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; - } - inject_message_in_locked_node(ft, subtree_root, target_childnum, cmd, flow_deltas, gc_info); - } else { - int r; - int childnum; - NONLEAF_CHILDINFO bnc; - - // toku_ft_root_put_cmd should not have called us otherwise. - paranoid_invariant(ft_msg_applies_once(cmd)); - - childnum = (target_childnum >= 0 ? target_childnum - : toku_ftnode_which_child(subtree_root, cmd->u.id.key, &ft->cmp_descriptor, ft->compare_fun)); - bnc = BNC(subtree_root, childnum); - - if (toku_bnc_n_entries(bnc) > 0) { - // The buffer is non-empty, give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_NONEMPTY_BUF, 1); - goto relock_and_push_here; - } - - seqinsert_loc next_loc; - if ((loc & LEFT_EXTREME) && childnum == 0) { - next_loc = LEFT_EXTREME; - } else if ((loc & RIGHT_EXTREME) && childnum == subtree_root->n_children - 1) { - next_loc = RIGHT_EXTREME; - } else { - next_loc = NEITHER_EXTREME; - } - - if (next_loc == NEITHER_EXTREME && subtree_root->height <= 1) { - // Never promote to leaf nodes except on the edges - STATUS_INC(FT_PRO_NUM_STOP_H1, 1); - goto relock_and_push_here; - } - - { - const BLOCKNUM child_blocknum = BP_BLOCKNUM(subtree_root, childnum); - toku_verify_blocknum_allocated(ft->blocktable, child_blocknum); - const uint32_t child_fullhash = toku_cachetable_hash(ft->cf, child_blocknum); - - FTNODE child; - { - const int child_height = subtree_root->height - 1; - const int child_depth = depth + 1; - // If we're locking a leaf, or a height 1 node or depth 2 - // node in the middle, we know we won't promote further - // than that, so just get a write lock now. - const pair_lock_type lock_type = (should_inject_in_node(next_loc, child_height, child_depth) - ? PL_WRITE_CHEAP - : PL_READ); - if (next_loc != NEITHER_EXTREME || (toku_bnc_should_promote(ft, bnc) && depth <= 1)) { - // If we're on either extreme, or the birdie wants to - // promote and we're in the top two levels of the - // tree, don't stop just because someone else has the - // node locked. - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - if (lock_type == PL_WRITE_CHEAP) { - // We intend to take the write lock for message injection - toku::context inject_ctx(CTX_MESSAGE_INJECTION); - toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, lock_type, 0, nullptr, &child); - } else { - // We're going to keep promoting - toku::context promo_ctx(CTX_PROMO); - toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, lock_type, 0, nullptr, &child); - } - } else { - r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child); - if (r != 0) { - // We couldn't get the child cheaply, so give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_LOCK_CHILD, 1); - goto relock_and_push_here; - } - if (is_entire_node_in_memory(child)) { - // toku_pin_ftnode... touches the clock but toku_maybe_pin_ftnode... doesn't. - // This prevents partial eviction. - for (int i = 0; i < child->n_children; ++i) { - BP_TOUCH_CLOCK(child, i); - } - } else { - // We got the child, but it's not fully in memory. Give up on promoting. - STATUS_INC(FT_PRO_NUM_STOP_CHILD_INMEM, 1); - goto unlock_child_and_push_here; - } - } - } - paranoid_invariant_notnull(child); - - if (!just_did_split_or_merge) { - BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename; - uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); - const bool did_split_or_merge = process_maybe_reactive_child(ft, subtree_root, child, childnum, loc); - if (did_split_or_merge) { - // Need to re-pin this node and try at this level again. - FTNODE newparent; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it - toku_pin_ftnode_off_client_thread_batched(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, 0, nullptr, &newparent); - push_something_in_subtree(ft, newparent, -1, cmd, flow_deltas, gc_info, depth, loc, true); - return; - } - } - - if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { - push_something_in_subtree(ft, child, -1, cmd, flow_deltas, gc_info, depth + 1, next_loc, false); - toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); - // The recursive call unpinned the child, but - // we're responsible for unpinning subtree_root. - toku_unpin_ftnode_read_only(ft, subtree_root); - return; - } - - STATUS_INC(FT_PRO_NUM_DIDNT_WANT_PROMOTE, 1); - unlock_child_and_push_here: - // We locked the child, but we decided not to promote. - // Unlock the child, and fall through to the next case. - toku_unpin_ftnode_read_only(ft, child); - } - relock_and_push_here: - // Give up on promoting. - // We have subtree_root read-locked and we don't have a child locked. - // Drop the read lock, grab a write lock, and inject here. - { - // Right now we have a read lock on subtree_root, but we want - // to inject into it so we get a write lock instead. - BLOCKNUM subtree_root_blocknum = subtree_root->thisnodename; - uint32_t subtree_root_fullhash = toku_cachetable_hash(ft->cf, subtree_root_blocknum); - toku_unpin_ftnode_read_only(ft, subtree_root); - switch (depth) { - case 0: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_0, 1); break; - case 1: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_1, 1); break; - case 2: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_2, 1); break; - case 3: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_3, 1); break; - default: - STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; - } - inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, cmd, flow_deltas, gc_info); - } - } -} - -void toku_ft_root_put_cmd( - FT ft, - FT_MSG_S *cmd, - txn_gc_info *gc_info - ) -// Effect: -// - assign msn to cmd and update msn in the header -// - push the cmd into the ft - -// As of Clayface, the root blocknum is a constant, so preventing a -// race between message injection and the split of a root is the job -// of the cachetable's locking rules. -// -// We also hold the MO lock for a number of reasons, but an important -// one is to make sure that a begin_checkpoint may not start while -// this code is executing. A begin_checkpoint does (at least) two things -// that can interfere with the operations here: -// - Copies the header to a checkpoint header. Because we may change -// the max_msn_in_ft below, we don't want the header to be copied in -// the middle of these operations. -// - Takes note of the log's LSN. Because this put operation has -// already been logged, this message injection must be included -// in any checkpoint that contains this put's logentry. -// Holding the mo lock throughout this function ensures that fact. -{ - toku::context promo_ctx(CTX_PROMO); - - // blackhole fractal trees drop all messages, so do nothing. - if (ft->blackhole) { - return; - } - - FTNODE node; - - uint32_t fullhash; - CACHEKEY root_key; - toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, ft); - - size_t flow_deltas[] = { toku_ft_msg_memsize_in_fifo(cmd), 0 }; - - pair_lock_type lock_type; - lock_type = PL_READ; // try first for a read lock - // If we need to split the root, we'll have to change from a read lock - // to a write lock and check again. We change the variable lock_type - // and jump back to here. - change_lock_type: - // get the root node - toku_pin_ftnode_off_client_thread_batched(ft, root_key, fullhash, &bfe, lock_type, 0, NULL, &node); - toku_assert_entire_node_in_memory(node); - paranoid_invariant(node->fullhash==fullhash); - ft_verify_flags(ft, node); - - // First handle a reactive root. - // This relocking for split algorithm will cause every message - // injection thread to change lock type back and forth, when only one - // of them needs to in order to handle the split. That's not great, - // but root splits are incredibly rare. - enum reactivity re = get_node_reactivity(ft, node); - switch (re) { - case RE_STABLE: - case RE_FUSIBLE: // cannot merge anything at the root - if (lock_type != PL_READ) { - // We thought we needed to split, but someone else got to - // it before us. Downgrade to a read lock. - toku_unpin_ftnode_read_only(ft, node); - lock_type = PL_READ; - goto change_lock_type; - } - break; - case RE_FISSIBLE: - if (lock_type == PL_READ) { - // Here, we only have a read lock on the root. In order - // to split it, we need a write lock, but in the course of - // gaining the write lock, someone else may have gotten in - // before us and split it. So we upgrade to a write lock - // and check again. - toku_unpin_ftnode_read_only(ft, node); - lock_type = PL_WRITE_CHEAP; - goto change_lock_type; - } else { - // We have a write lock, now we can split. - ft_init_new_root(ft, node, &node); - // Then downgrade back to a read lock, and we can finally - // do the injection. - toku_unpin_ftnode_off_client_thread(ft, node); - lock_type = PL_READ; - STATUS_INC(FT_PRO_NUM_ROOT_SPLIT, 1); - goto change_lock_type; - } - break; - } - // If we get to here, we have a read lock and the root doesn't - // need to be split. It's safe to inject the message. - paranoid_invariant(lock_type == PL_READ); - // We cannot assert that we have the read lock because frwlock asserts - // that its mutex is locked when we check if there are any readers. - // That wouldn't give us a strong guarantee that we have the read lock - // anyway. - - // Now, either inject here or promote. We decide based on a heuristic: - if (node->height == 0 || !ft_msg_applies_once(cmd)) { - // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here. - toku_unpin_ftnode_read_only(ft, node); - STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1); - inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info); - } else if (node->height > 1) { - // If the root's above height 1, we are definitely eligible for promotion. - push_something_in_subtree(ft, node, -1, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); - } else { - // The root's height 1. We may be eligible for promotion here. - // On the extremes, we want to promote, in the middle, we don't. - int childnum = toku_ftnode_which_child(node, cmd->u.id.key, &ft->cmp_descriptor, ft->compare_fun); - if (childnum == 0 || childnum == node->n_children - 1) { - // On the extremes, promote. We know which childnum we're going to, so pass that down too. - push_something_in_subtree(ft, node, childnum, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); - } else { - // At height 1 in the middle, don't promote, drop the read lock and inject here. - toku_unpin_ftnode_read_only(ft, node); - STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1); - inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info); - } - } -} - -// Effect: Insert the key-val pair into brt. -void toku_ft_insert (FT_HANDLE brt, DBT *key, DBT *val, TOKUTXN txn) { - toku_ft_maybe_insert(brt, key, val, txn, false, ZERO_LSN, true, FT_INSERT); -} - -void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) { - paranoid_invariant(txn); - toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log - //before the (old) file is actually unlinked - TOKULOGGER logger = toku_txn_logger(txn); - - BYTESTRING new_iname_bs = {.len=(uint32_t) strlen(new_iname), .data=(char*)new_iname}; - toku_logger_save_rollback_load(txn, old_filenum, &new_iname_bs); - if (do_log && logger) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_load(logger, load_lsn, do_fsync, txn, xid, old_filenum, new_iname_bs); - } -} - -// 2954 -// this function handles the tasks needed to be recoverable -// - write to rollback log -// - write to recovery log -void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn) -{ - paranoid_invariant(txn); - TOKULOGGER logger = toku_txn_logger(txn); - - // write to the rollback log - toku_logger_save_rollback_hot_index(txn, &filenums); - if (do_log && logger) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - // write to the recovery log - toku_log_hot_index(logger, hot_index_lsn, do_fsync, txn, xid, filenums); - } -} - -// Effect: Optimize the ft. -void toku_ft_optimize (FT_HANDLE ft_h) { - TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); - if (logger) { - TXNID oldest = toku_txn_manager_get_oldest_living_xid(logger->txn_manager); - - XIDS root_xids = xids_get_root_xids(); - XIDS message_xids; - if (oldest == TXNID_NONE_LIVING) { - message_xids = root_xids; - } - else { - int r = xids_create_child(root_xids, &message_xids, oldest); - invariant(r == 0); - } - - DBT key; - DBT val; - toku_init_dbt(&key); - toku_init_dbt(&val); - FT_MSG_S ftcmd = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } }; - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - true); - toku_ft_root_put_cmd(ft_h->ft, &ftcmd, &gc_info); - xids_destroy(&message_xids); - } -} - -void toku_ft_load(FT_HANDLE brt, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *load_lsn) { - FILENUM old_filenum = toku_cachefile_filenum(brt->ft->cf); - int do_log = 1; - toku_ft_load_recovery(txn, old_filenum, new_iname, do_fsync, do_log, load_lsn); -} - -// ft actions for logging hot index filenums -void toku_ft_hot_index(FT_HANDLE brt __attribute__ ((unused)), TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn) { - int do_log = 1; - toku_ft_hot_index_recovery(txn, filenums, do_fsync, do_log, lsn); -} - -void -toku_ft_log_put (TOKUTXN txn, FT_HANDLE brt, const DBT *key, const DBT *val) { - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(brt->ft->cf), xid, keybs, valbs); - } -} - -void -toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32_t num_fts, const DBT *key, const DBT *val) { - assert(txn); - assert(num_fts > 0); - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - FILENUM fnums[num_fts]; - uint32_t i; - for (i = 0; i < num_fts; i++) { - fnums[i] = toku_cachefile_filenum(brts[i]->ft->cf); - } - FILENUMS filenums = {.num = num_fts, .filenums = fnums}; - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; - toku_log_enq_insert_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); - } -} - -TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h) { - TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf); - return logger != nullptr ? toku_logger_get_txn_manager(logger) : nullptr; -} - -TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) { - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - return txn_manager != nullptr ? toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager) : TXNID_NONE; -} - -void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) { - paranoid_invariant(type==FT_INSERT || type==FT_INSERT_NO_OVERWRITE); - XIDS message_xids = xids_get_root_xids(); //By default use committed messages - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = {key->size, (char *) key->data}; - toku_logger_save_rollback_cmdinsert(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft_h->ft); - message_xids = toku_txn_get_xids(txn); - } - TOKULOGGER logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - if (type == FT_INSERT) { - toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs, valbs); - } - else { - toku_log_enq_insert_no_overwrite(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs, valbs); - } - } - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info); - } -} - -static void -ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) { - msg->xids = (txn - ? toku_txn_get_xids(txn) - : xids_get_root_xids()); - - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - toku_ft_root_put_cmd(ft_h->ft, msg, &gc_info); -} - -void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = { key->size, (char *) key->data }; - toku_logger_save_rollback_cmdupdate( - txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft_h->ft); - } - - TOKULOGGER logger; - logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_update(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, keybs, extrabs); - } - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - FT_MSG_S msg = { FT_UPDATE, ZERO_MSN, NULL, - .u = { .id = { key, update_function_extra } } }; - ft_send_update_msg(ft_h, &msg, txn); - } -} - -void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging, bool is_resetting_op) { - TXNID_PAIR xid = toku_txn_get_txnid(txn); - uint8_t resetting = is_resetting_op ? 1 : 0; - if (txn) { - toku_logger_save_rollback_cmdupdatebroadcast(txn, toku_cachefile_filenum(ft_h->ft->cf), resetting); - toku_txn_maybe_note_ft(txn, ft_h->ft); - } - - TOKULOGGER logger; - logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_updatebroadcast(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, extrabs, resetting); - } - - //TODO(yoni): remove treelsn here and similar calls (no longer being used) - LSN treelsn; - if (oplsn_valid && - oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - - } else { - DBT nullkey; - const DBT *nullkeyp = toku_init_dbt(&nullkey); - FT_MSG_S msg = { FT_UPDATE_BROADCAST_ALL, ZERO_MSN, NULL, - .u = { .id = { nullkeyp, update_function_extra } } }; - ft_send_update_msg(ft_h, &msg, txn); - } -} - -void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) { - FT_MSG_S ftcmd = { type, ZERO_MSN, xids, .u = { .id = { key, val } } }; - toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info); -} - -void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) { - DBT val; - FT_MSG_S ftcmd = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } }; - toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info); -} - -void toku_ft_delete(FT_HANDLE brt, DBT *key, TOKUTXN txn) { - toku_ft_maybe_delete(brt, key, txn, false, ZERO_LSN, true); -} - -void -toku_ft_log_del(TOKUTXN txn, FT_HANDLE brt, const DBT *key) { - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(brt->ft->cf), xid, keybs); - } -} - -void -toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32_t num_fts, const DBT *key, const DBT *val) { - assert(txn); - assert(num_fts > 0); - TOKULOGGER logger = toku_txn_logger(txn); - if (logger) { - FILENUM fnums[num_fts]; - uint32_t i; - for (i = 0; i < num_fts; i++) { - fnums[i] = toku_cachefile_filenum(brts[i]->ft->cf); - } - FILENUMS filenums = {.num = num_fts, .filenums = fnums}; - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; - TXNID_PAIR xid = toku_txn_get_txnid(txn); - FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; - toku_log_enq_delete_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); - } -} - -void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) { - XIDS message_xids = xids_get_root_xids(); //By default use committed messages - TXNID_PAIR xid = toku_txn_get_txnid(txn); - if (txn) { - BYTESTRING keybs = {key->size, (char *) key->data}; - toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); - toku_txn_maybe_note_ft(txn, ft_h->ft); - message_xids = toku_txn_get_xids(txn); - } - TOKULOGGER logger = toku_txn_logger(txn); - if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs); - } - - LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { - // do nothing - } else { - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); - txn_manager_state txn_state_for_gc(txn_manager); - - TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_estimate, - // no messages above us, we can implicitly promote uxrs based on this xid - oldest_referenced_xid_estimate, - txn != nullptr ? !txn->for_recovery : false); - toku_ft_send_delete(ft_h, key, message_xids, &gc_info); - } -} - -void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) { - DBT val; toku_init_dbt(&val); - FT_MSG_S ftcmd = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } }; - toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info); -} - -/* ******************** open,close and create ********************** */ - -// Test only function (not used in running system). This one has no env -int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *ft_handle_p, int nodesize, - int basementnodesize, - enum toku_compression_method compression_method, - CACHETABLE cachetable, TOKUTXN txn, - int (*compare_fun)(DB *, const DBT*,const DBT*)) { - FT_HANDLE brt; - const int only_create = 0; - - toku_ft_handle_create(&brt); - toku_ft_handle_set_nodesize(brt, nodesize); - toku_ft_handle_set_basementnodesize(brt, basementnodesize); - toku_ft_handle_set_compression_method(brt, compression_method); - toku_ft_handle_set_fanout(brt, 16); - toku_ft_set_bt_compare(brt, compare_fun); - - int r = toku_ft_handle_open(brt, fname, is_create, only_create, cachetable, txn); - if (r != 0) { - return r; - } - - *ft_handle_p = brt; - return r; -} - -static bool use_direct_io = true; - -void toku_ft_set_direct_io (bool direct_io_on) { - use_direct_io = direct_io_on; -} - -static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode) { - if (use_direct_io) { - return toku_os_open_direct(filename, oflag, mode); - } else { - return toku_os_open(filename, oflag, mode); - } -} - -static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; - -// open a file for use by the brt -// Requires: File does not exist. -static int ft_create_file(FT_HANDLE UU(brt), const char *fname, int *fdp) { - int r; - int fd; - int er; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); - assert(fd==-1); - if ((er = get_maybe_error_errno()) != ENOENT) { - return er; - } - fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, file_mode); - if (fd==-1) { - r = get_error_errno(); - return r; - } - - r = toku_fsync_directory(fname); - if (r == 0) { - *fdp = fd; - } else { - int rr = close(fd); - assert_zero(rr); - } - return r; -} - -// open a file for use by the brt. if the file does not exist, error -static int ft_open_file(const char *fname, int *fdp) { - int fd; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); - if (fd==-1) { - return get_error_errno(); - } - *fdp = fd; - return 0; -} - -void -toku_ft_handle_set_compression_method(FT_HANDLE t, enum toku_compression_method method) -{ - if (t->ft) { - toku_ft_set_compression_method(t->ft, method); - } - else { - t->options.compression_method = method; - } -} - -void -toku_ft_handle_get_compression_method(FT_HANDLE t, enum toku_compression_method *methodp) -{ - if (t->ft) { - toku_ft_get_compression_method(t->ft, methodp); - } - else { - *methodp = t->options.compression_method; - } + ft_msg msg(key, val, type, ZERO_MSN, message_xids); + size_t flow_deltas[] = { 0, 0 }; + inject_message_in_locked_node(ft, leaf, target_childnum, msg, flow_deltas, gc_info); } -void -toku_ft_handle_set_fanout(FT_HANDLE ft_handle, unsigned int fanout) -{ - if (ft_handle->ft) { - toku_ft_set_fanout(ft_handle->ft, fanout); - } - else { - ft_handle->options.fanout = fanout; - } -} +static void +ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); -void -toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout) -{ - if (ft_handle->ft) { - toku_ft_get_fanout(ft_handle->ft, fanout); - } - else { - *fanout = ft_handle->options.fanout; - } -} -static int -verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) { - if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun)) - return EINVAL; - return 0; + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); } -// -// See comments in toku_db_change_descriptor to understand invariants -// in the system when this function is called -// -void toku_ft_change_descriptor( - FT_HANDLE ft_h, - const DBT* old_descriptor, - const DBT* new_descriptor, - bool do_log, - TOKUTXN txn, - bool update_cmp_descriptor - ) -{ - DESCRIPTOR_S new_d; - - // if running with txns, save to rollback + write to recovery log +void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, + TOKUTXN txn, bool oplsn_valid, LSN oplsn, + bool do_logging) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); if (txn) { - // put information into rollback file - BYTESTRING old_desc_bs = { old_descriptor->size, (char *) old_descriptor->data }; - BYTESTRING new_desc_bs = { new_descriptor->size, (char *) new_descriptor->data }; - toku_logger_save_rollback_change_fdescriptor( - txn, - toku_cachefile_filenum(ft_h->ft->cf), - &old_desc_bs - ); + BYTESTRING keybs = { key->size, (char *) key->data }; + toku_logger_save_rollback_cmdupdate( + txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); toku_txn_maybe_note_ft(txn, ft_h->ft); - - if (do_log) { - TOKULOGGER logger = toku_txn_logger(txn); - TXNID_PAIR xid = toku_txn_get_txnid(txn); - toku_log_change_fdescriptor( - logger, NULL, 0, - txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, - old_desc_bs, - new_desc_bs, - update_cmp_descriptor - ); - } - } - - // write new_descriptor to header - new_d.dbt = *new_descriptor; - toku_ft_update_descriptor(ft_h->ft, &new_d); - // very infrequent operation, worth precise threadsafe count - STATUS_INC(FT_DESCRIPTOR_SET, 1); - - if (update_cmp_descriptor) { - toku_ft_update_cmp_descriptor(ft_h->ft); - } -} - -static void -toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { - struct ft_options options = { - .nodesize = ft->h->nodesize, - .basementnodesize = ft->h->basementnodesize, - .compression_method = ft->h->compression_method, - .fanout = ft->h->fanout, - .flags = ft->h->flags, - .compare_fun = ft->compare_fun, - .update_fun = ft->update_fun - }; - t->options = options; - t->did_set_flags = true; -} - -// This is the actual open, used for various purposes, such as normal use, recovery, and redirect. -// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix). -// The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . -// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. -static int -ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { - int r; - bool txn_created = false; - char *fname_in_cwd = NULL; - CACHEFILE cf = NULL; - FT ft = NULL; - bool did_create = false; - toku_ft_open_close_lock(); - - if (ft_h->did_set_flags) { - r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); - if (r!=0) { goto exit; } - } - - assert(is_create || !only_create); - FILENUM reserved_filenum; - reserved_filenum = use_filenum; - fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); - bool was_already_open; - { - int fd = -1; - r = ft_open_file(fname_in_cwd, &fd); - if (reserved_filenum.fileid == FILENUM_NONE.fileid) { - reserved_filenum = toku_cachetable_reserve_filenum(cachetable); - } - if (r==ENOENT && is_create) { - did_create = true; - if (txn) { - BYTESTRING bs = { .len=(uint32_t) strlen(fname_in_env), .data = (char*)fname_in_env }; - toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment - } - txn_created = (bool)(txn!=NULL); - toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, file_mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method); - r = ft_create_file(ft_h, fname_in_cwd, &fd); - if (r) { goto exit; } - } - if (r) { goto exit; } - r=toku_cachetable_openfd_with_filenum(&cf, cachetable, fd, fname_in_env, reserved_filenum, &was_already_open); - if (r) { goto exit; } - } - assert(ft_h->options.nodesize>0); - if (is_create) { - r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); - if (r==TOKUDB_DICTIONARY_NO_HEADER) { - toku_ft_create(&ft, &ft_h->options, cf, txn); - } - else if (r!=0) { - goto exit; - } - else if (only_create) { - assert_zero(r); - r = EEXIST; - goto exit; - } - // if we get here, then is_create was true but only_create was false, - // so it is ok for toku_read_ft_and_store_in_cachefile to have read - // the header via toku_read_ft_and_store_in_cachefile - } else { - r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); - if (r) { goto exit; } - } - if (!ft_h->did_set_flags) { - r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); - if (r) { goto exit; } - } else if (ft_h->options.flags != ft->h->flags) { /* if flags have been set then flags must match */ - r = EINVAL; - goto exit; } - toku_ft_handle_inherit_options(ft_h, ft); - if (!was_already_open) { - if (!did_create) { //Only log the fopen that OPENs the file. If it was already open, don't log. - toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(cf), ft_h->options.flags); - } - } - int use_reserved_dict_id; - use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid; - if (!was_already_open) { - DICTIONARY_ID dict_id; - if (use_reserved_dict_id) { - dict_id = use_dictionary_id; - } - else { - dict_id = next_dict_id(); - } - ft->dict_id = dict_id; + TOKULOGGER logger; + logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING extrabs = {.len=update_function_extra->size, + .data = (char *) update_function_extra->data}; + toku_log_enq_update(logger, NULL, 0, txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, keybs, extrabs); } - else { - // dict_id is already in header - if (use_reserved_dict_id) { - assert(ft->dict_id.dictid == use_dictionary_id.dictid); - } + + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); + ft_send_update_msg(ft_h, msg, txn); } - assert(ft); - assert(ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); - assert(ft->dict_id.dictid < dict_id_serial); +} - // important note here, - // after this point, where we associate the header - // with the brt, the function is not allowed to fail - // Code that handles failure (located below "exit"), - // depends on this - toku_ft_note_ft_handle_open(ft, ft_h); - if (txn_created) { - assert(txn); - toku_txn_maybe_note_ft(txn, ft); +void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, + TOKUTXN txn, bool oplsn_valid, LSN oplsn, + bool do_logging, bool is_resetting_op) { + TXNID_PAIR xid = toku_txn_get_txnid(txn); + uint8_t resetting = is_resetting_op ? 1 : 0; + if (txn) { + toku_logger_save_rollback_cmdupdatebroadcast(txn, toku_cachefile_filenum(ft_h->ft->cf), resetting); + toku_txn_maybe_note_ft(txn, ft_h->ft); } - //Opening a brt may restore to previous checkpoint. Truncate if necessary. - { - int fd = toku_cachefile_get_fd (ft->cf); - toku_maybe_truncate_file_on_open(ft->blocktable, fd); + TOKULOGGER logger; + logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING extrabs = {.len=update_function_extra->size, + .data = (char *) update_function_extra->data}; + toku_log_enq_updatebroadcast(logger, NULL, 0, txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, extrabs, resetting); } - r = 0; -exit: - if (fname_in_cwd) { - toku_free(fname_in_cwd); + //TODO(yoni): remove treelsn here and similar calls (no longer being used) + LSN treelsn; + if (oplsn_valid && + oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + + } else { + DBT empty_dbt; + XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg(toku_init_dbt(&empty_dbt), update_function_extra, FT_UPDATE_BROADCAST_ALL, ZERO_MSN, message_xids); + ft_send_update_msg(ft_h, msg, txn); } - if (r != 0 && cf) { - if (ft) { - // we only call toku_ft_note_ft_handle_open - // when the function succeeds, so if we are here, - // then that means we have a reference to the header - // but we have not linked it to this brt. So, - // we can simply try to remove the header. - // We don't need to unlink this brt from the header - toku_ft_grab_reflock(ft); - bool needed = toku_ft_needed_unlocked(ft); - toku_ft_release_reflock(ft); - if (!needed) { - // close immediately. - toku_ft_evict_from_memory(ft, false, ZERO_LSN); - } - } - else { - toku_cachefile_close(&cf, false, ZERO_LSN); +} + +void toku_ft_send_insert(FT_HANDLE ft_handle, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) { + ft_msg msg(key, val, type, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); +} + +void toku_ft_send_commit_any(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { + DBT val; + ft_msg msg(key, toku_init_dbt(&val), FT_COMMIT_ANY, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); +} + +void toku_ft_delete(FT_HANDLE ft_handle, DBT *key, TOKUTXN txn) { + toku_ft_maybe_delete(ft_handle, key, txn, false, ZERO_LSN, true); +} + +void +toku_ft_log_del(TOKUTXN txn, FT_HANDLE ft_handle, const DBT *key) { + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_handle->ft->cf), xid, keybs); + } +} + +void +toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val) { + assert(txn); + assert(num_fts > 0); + TOKULOGGER logger = toku_txn_logger(txn); + if (logger) { + FILENUM fnums[num_fts]; + uint32_t i; + for (i = 0; i < num_fts; i++) { + fnums[i] = toku_cachefile_filenum(fts[i]->ft->cf); } + FILENUMS filenums = {.num = num_fts, .filenums = fnums}; + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + BYTESTRING valbs = {.len=val->size, .data=(char *) val->data}; + TXNID_PAIR xid = toku_txn_get_txnid(txn); + FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE; + toku_log_enq_delete_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs); } - toku_ft_open_close_unlock(); - return r; } -// Open a brt for the purpose of recovery, which requires that the brt be open to a pre-determined FILENUM -// and may require a specific checkpointed version of the file. -// (dict_id is assigned by the ft_handle_open() function.) -int -toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) { - int r; - assert(use_filenum.fileid != FILENUM_NONE.fileid); - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, - txn, use_filenum, DICTIONARY_ID_NONE, max_acceptable_lsn); - return r; +void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) { + XIDS message_xids = toku_xids_get_root_xids(); //By default use committed messages + TXNID_PAIR xid = toku_txn_get_txnid(txn); + if (txn) { + BYTESTRING keybs = {key->size, (char *) key->data}; + toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); + toku_txn_maybe_note_ft(txn, ft_h->ft); + message_xids = toku_txn_get_xids(txn); + } + TOKULOGGER logger = toku_txn_logger(txn); + if (do_logging && logger) { + BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; + toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(ft_h->ft->cf), xid, keybs); + } + + LSN treelsn; + if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + // do nothing + } else { + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(ft_h); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_estimate, + // no messages above us, we can implicitly promote uxrs based on this xid + oldest_referenced_xid_estimate, + txn != nullptr ? !txn->for_recovery : false); + toku_ft_send_delete(ft_h, key, message_xids, &gc_info); + } } -// Open a brt in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. -// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. -int -toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { - int r; - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); - return r; +void toku_ft_send_delete(FT_HANDLE ft_handle, DBT *key, XIDS xids, txn_gc_info *gc_info) { + DBT val; toku_init_dbt(&val); + ft_msg msg(key, toku_init_dbt(&val), FT_DELETE_ANY, ZERO_MSN, xids); + toku_ft_root_put_msg(ft_handle->ft, msg, gc_info); } -// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree -int -toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { - FT_HANDLE result_ft_handle; - toku_ft_handle_create(&result_ft_handle); +/* ******************** open,close and create ********************** */ - // we're cloning, so the handle better have an open ft and open cf - invariant(ft_handle->ft); - invariant(ft_handle->ft->cf); +// Test only function (not used in running system). This one has no env +int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *ft_handle_p, int nodesize, + int basementnodesize, + enum toku_compression_method compression_method, + CACHETABLE cachetable, TOKUTXN txn, + int (*compare_fun)(DB *, const DBT*,const DBT*)) { + FT_HANDLE ft_handle; + const int only_create = 0; - // inherit the options of the ft whose handle is being cloned. - toku_ft_handle_inherit_options(result_ft_handle, ft_handle->ft); + toku_ft_handle_create(&ft_handle); + toku_ft_handle_set_nodesize(ft_handle, nodesize); + toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); + toku_ft_handle_set_compression_method(ft_handle, compression_method); + toku_ft_handle_set_fanout(ft_handle, 16); + toku_ft_set_bt_compare(ft_handle, compare_fun); - // we can clone the handle by creating a new handle with the same fname - CACHEFILE cf = ft_handle->ft->cf; - CACHETABLE ct = toku_cachefile_get_cachetable(cf); - const char *fname_in_env = toku_cachefile_fname_in_env(cf); - int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); + int r = toku_ft_handle_open(ft_handle, fname, is_create, only_create, cachetable, txn); if (r != 0) { - toku_ft_handle_close(result_ft_handle); - result_ft_handle = NULL; + return r; } - *cloned_ft_handle = result_ft_handle; - return r; -} -// Open a brt in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. -int -toku_ft_handle_open_with_dict_id( - FT_HANDLE t, - const char *fname_in_env, - int is_create, - int only_create, - CACHETABLE cachetable, - TOKUTXN txn, - DICTIONARY_ID use_dictionary_id - ) -{ - int r; - r = ft_handle_open( - t, - fname_in_env, - is_create, - only_create, - cachetable, - txn, - FILENUM_NONE, - use_dictionary_id, - MAX_LSN - ); + *ft_handle_p = ft_handle; return r; } -DICTIONARY_ID -toku_ft_get_dictionary_id(FT_HANDLE brt) { - FT h = brt->ft; - DICTIONARY_ID dict_id = h->dict_id; - return dict_id; -} +static bool use_direct_io = true; -void toku_ft_set_flags(FT_HANDLE ft_handle, unsigned int flags) { - ft_handle->did_set_flags = true; - ft_handle->options.flags = flags; +void toku_ft_set_direct_io (bool direct_io_on) { + use_direct_io = direct_io_on; } -void toku_ft_get_flags(FT_HANDLE ft_handle, unsigned int *flags) { - *flags = ft_handle->options.flags; +static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode) { + if (use_direct_io) { + return toku_os_open_direct(filename, oflag, mode); + } else { + return toku_os_open(filename, oflag, mode); + } } -void toku_ft_get_maximum_advised_key_value_lengths (unsigned int *max_key_len, unsigned int *max_val_len) -// return the maximum advisable key value lengths. The brt doesn't enforce these. -{ - *max_key_len = 32*1024; - *max_val_len = 32*1024*1024; +static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; + +// open a file for use by the ft +// Requires: File does not exist. +static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { + int r; + int fd; + int er; + fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + assert(fd==-1); + if ((er = get_maybe_error_errno()) != ENOENT) { + return er; + } + fd = ft_open_maybe_direct(fname, O_RDWR | O_CREAT | O_BINARY, file_mode); + if (fd==-1) { + r = get_error_errno(); + return r; + } + + r = toku_fsync_directory(fname); + if (r == 0) { + *fdp = fd; + } else { + int rr = close(fd); + assert_zero(rr); + } + return r; } +// open a file for use by the ft. if the file does not exist, error +static int ft_open_file(const char *fname, int *fdp) { + int fd; + fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + if (fd==-1) { + return get_error_errno(); + } + *fdp = fd; + return 0; +} -void toku_ft_handle_set_nodesize(FT_HANDLE ft_handle, unsigned int nodesize) { - if (ft_handle->ft) { - toku_ft_set_nodesize(ft_handle->ft, nodesize); +void +toku_ft_handle_set_compression_method(FT_HANDLE t, enum toku_compression_method method) +{ + if (t->ft) { + toku_ft_set_compression_method(t->ft, method); } else { - ft_handle->options.nodesize = nodesize; + t->options.compression_method = method; } } -void toku_ft_handle_get_nodesize(FT_HANDLE ft_handle, unsigned int *nodesize) { - if (ft_handle->ft) { - toku_ft_get_nodesize(ft_handle->ft, nodesize); +void +toku_ft_handle_get_compression_method(FT_HANDLE t, enum toku_compression_method *methodp) +{ + if (t->ft) { + toku_ft_get_compression_method(t->ft, methodp); } else { - *nodesize = ft_handle->options.nodesize; + *methodp = t->options.compression_method; } } -void toku_ft_handle_set_basementnodesize(FT_HANDLE ft_handle, unsigned int basementnodesize) { +void +toku_ft_handle_set_fanout(FT_HANDLE ft_handle, unsigned int fanout) +{ if (ft_handle->ft) { - toku_ft_set_basementnodesize(ft_handle->ft, basementnodesize); + toku_ft_set_fanout(ft_handle->ft, fanout); } else { - ft_handle->options.basementnodesize = basementnodesize; + ft_handle->options.fanout = fanout; } } -void toku_ft_handle_get_basementnodesize(FT_HANDLE ft_handle, unsigned int *basementnodesize) { +void +toku_ft_handle_get_fanout(FT_HANDLE ft_handle, unsigned int *fanout) +{ if (ft_handle->ft) { - toku_ft_get_basementnodesize(ft_handle->ft, basementnodesize); + toku_ft_get_fanout(ft_handle->ft, fanout); } else { - *basementnodesize = ft_handle->options.basementnodesize; + *fanout = ft_handle->options.fanout; } } -void toku_ft_set_bt_compare(FT_HANDLE brt, int (*bt_compare)(DB*, const DBT*, const DBT*)) { - brt->options.compare_fun = bt_compare; -} - -void toku_ft_set_redirect_callback(FT_HANDLE brt, on_redirect_callback redir_cb, void* extra) { - brt->redirect_callback = redir_cb; - brt->redirect_callback_extra = extra; -} - -void toku_ft_set_update(FT_HANDLE brt, ft_update_func update_fun) { - brt->options.update_fun = update_fun; -} - -ft_compare_func toku_ft_get_bt_compare (FT_HANDLE brt) { - return brt->options.compare_fun; -} - -static void -ft_remove_handle_ref_callback(FT UU(ft), void *extra) { - FT_HANDLE CAST_FROM_VOIDP(handle, extra); - toku_list_remove(&handle->live_ft_handle_link); -} - -// close an ft handle during normal operation. the underlying ft may or may not close, -// depending if there are still references. an lsn for this close will come from the logger. -void -toku_ft_handle_close(FT_HANDLE ft_handle) { - // There are error paths in the ft_handle_open that end with ft_handle->ft==NULL. - FT ft = ft_handle->ft; - if (ft) { - const bool oplsn_valid = false; - toku_ft_remove_reference(ft, oplsn_valid, ZERO_LSN, ft_remove_handle_ref_callback, ft_handle); +// The memcmp magic byte may be set on a per fractal tree basis to communicate +// that if two keys begin with this byte, they may be compared with the builtin +// key comparison function. This greatly optimizes certain in-memory workloads, +// such as lookups by OID primary key in TokuMX. +int toku_ft_handle_set_memcmp_magic(FT_HANDLE ft_handle, uint8_t magic) { + if (magic == comparator::MEMCMP_MAGIC_NONE) { + return EINVAL; } - toku_free(ft_handle); -} - -// close an ft handle during recovery. the underlying ft must close, and will use the given lsn. -void -toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) { - FT ft = ft_handle->ft; - // the ft must exist if closing during recovery. error paths during - // open for recovery should close handles using toku_ft_handle_close() - assert(ft); - const bool oplsn_valid = true; - toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle); - toku_free(ft_handle); -} - -// TODO: remove this, callers should instead just use toku_ft_handle_close() -int -toku_close_ft_handle_nolsn (FT_HANDLE ft_handle, char** UU(error_string)) { - toku_ft_handle_close(ft_handle); + if (ft_handle->ft != nullptr) { + // if the handle is already open, then we cannot set the memcmp magic + // (because it may or may not have been set by someone else already) + return EINVAL; + } + ft_handle->options.memcmp_magic = magic; return 0; } -void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) { - FT_HANDLE XMALLOC(brt); - memset(brt, 0, sizeof *brt); - toku_list_init(&brt->live_ft_handle_link); - brt->options.flags = 0; - brt->did_set_flags = false; - brt->options.nodesize = FT_DEFAULT_NODE_SIZE; - brt->options.basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; - brt->options.compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; - brt->options.fanout = FT_DEFAULT_FANOUT; - brt->options.compare_fun = toku_builtin_compare_fun; - brt->options.update_fun = NULL; - *ft_handle_ptr = brt; -} - -/* ************* CURSORS ********************* */ - -static inline void -ft_cursor_cleanup_dbts(FT_CURSOR c) { - toku_destroy_dbt(&c->key); - toku_destroy_dbt(&c->val); -} - -// -// This function is used by the leafentry iterators. -// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value -// that is written by transaction with LSN of id -// live transaction context may read value if either id is the root ancestor of context, or if -// id was committed before context's snapshot was taken. -// For id to be committed before context's snapshot was taken, the following must be true: -// - id < context->snapshot_txnid64 AND id is not in context's live root transaction list -// For the above to NOT be true: -// - id > context->snapshot_txnid64 OR id is in context's live root transaction list -// static int -does_txn_read_entry(TXNID id, TOKUTXN context) { - int rval; - TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(context); - if (oldest_live_in_snapshot == TXNID_NONE && id < context->snapshot_txnid64) { - rval = TOKUDB_ACCEPT; - } - else if (id < oldest_live_in_snapshot || id == context->txnid.parent_id64) { - rval = TOKUDB_ACCEPT; - } - else if (id > context->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*context->live_root_txn_list, id)) { - rval = 0; - } - else { - rval = TOKUDB_ACCEPT; - } - return rval; -} - -static inline void -ft_cursor_extract_val(LEAFENTRY le, - FT_CURSOR cursor, - uint32_t *vallen, - void **val) { - if (toku_ft_cursor_is_leaf_mode(cursor)) { - *val = le; - *vallen = leafentry_memsize(le); - } else if (cursor->is_snapshot_read) { - int r = le_iterate_val( - le, - does_txn_read_entry, - val, - vallen, - cursor->ttxn - ); - lazy_assert_zero(r); - } else { - *val = le_latest_val_and_len(le, vallen); +verify_builtin_comparisons_consistent(FT_HANDLE t, uint32_t flags) { + if ((flags & TOKU_DB_KEYCMP_BUILTIN) && (t->options.compare_fun != toku_builtin_compare_fun)) { + return EINVAL; } + return 0; } -int toku_ft_cursor ( - FT_HANDLE brt, - FT_CURSOR *cursorptr, - TOKUTXN ttxn, - bool is_snapshot_read, - bool disable_prefetching +// +// See comments in toku_db_change_descriptor to understand invariants +// in the system when this function is called +// +void toku_ft_change_descriptor( + FT_HANDLE ft_h, + const DBT* old_descriptor, + const DBT* new_descriptor, + bool do_log, + TOKUTXN txn, + bool update_cmp_descriptor ) { - if (is_snapshot_read) { - invariant(ttxn != NULL); - int accepted = does_txn_read_entry(brt->ft->h->root_xid_that_created, ttxn); - if (accepted!=TOKUDB_ACCEPT) { - invariant(accepted==0); - return TOKUDB_MVCC_DICTIONARY_TOO_NEW; - } - } - FT_CURSOR XCALLOC(cursor); - cursor->ft_handle = brt; - cursor->prefetching = false; - toku_init_dbt(&cursor->range_lock_left_key); - toku_init_dbt(&cursor->range_lock_right_key); - cursor->left_is_neg_infty = false; - cursor->right_is_pos_infty = false; - cursor->is_snapshot_read = is_snapshot_read; - cursor->is_leaf_mode = false; - cursor->ttxn = ttxn; - cursor->disable_prefetching = disable_prefetching; - cursor->is_temporary = false; - *cursorptr = cursor; - return 0; -} + DESCRIPTOR_S new_d; -void toku_ft_cursor_remove_restriction(FT_CURSOR ftcursor) { - ftcursor->out_of_range_error = 0; - ftcursor->direction = 0; -} + // if running with txns, save to rollback + write to recovery log + if (txn) { + // put information into rollback file + BYTESTRING old_desc_bs = { old_descriptor->size, (char *) old_descriptor->data }; + BYTESTRING new_desc_bs = { new_descriptor->size, (char *) new_descriptor->data }; + toku_logger_save_rollback_change_fdescriptor( + txn, + toku_cachefile_filenum(ft_h->ft->cf), + &old_desc_bs + ); + toku_txn_maybe_note_ft(txn, ft_h->ft); -void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) { - ftcursor->interrupt_cb = cb; - ftcursor->interrupt_cb_extra = extra; -} + if (do_log) { + TOKULOGGER logger = toku_txn_logger(txn); + TXNID_PAIR xid = toku_txn_get_txnid(txn); + toku_log_change_fdescriptor( + logger, NULL, 0, + txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, + old_desc_bs, + new_desc_bs, + update_cmp_descriptor + ); + } + } + // write new_descriptor to header + new_d.dbt = *new_descriptor; + toku_ft_update_descriptor(ft_h->ft, &new_d); + // very infrequent operation, worth precise threadsafe count + STATUS_INC(FT_DESCRIPTOR_SET, 1); -void -toku_ft_cursor_set_temporary(FT_CURSOR ftcursor) { - ftcursor->is_temporary = true; + if (update_cmp_descriptor) { + toku_ft_update_cmp_descriptor(ft_h->ft); + } } -void -toku_ft_cursor_set_leaf_mode(FT_CURSOR ftcursor) { - ftcursor->is_leaf_mode = true; +static void +toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { + struct ft_options options = { + .nodesize = ft->h->nodesize, + .basementnodesize = ft->h->basementnodesize, + .compression_method = ft->h->compression_method, + .fanout = ft->h->fanout, + .flags = ft->h->flags, + .memcmp_magic = ft->cmp.get_memcmp_magic(), + .compare_fun = ft->cmp.get_compare_func(), + .update_fun = ft->update_fun + }; + t->options = options; + t->did_set_flags = true; } -int -toku_ft_cursor_is_leaf_mode(FT_CURSOR ftcursor) { - return ftcursor->is_leaf_mode; -} +// This is the actual open, used for various purposes, such as normal use, recovery, and redirect. +// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix). +// The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . +// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. +static int +ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { + int r; + bool txn_created = false; + char *fname_in_cwd = NULL; + CACHEFILE cf = NULL; + FT ft = NULL; + bool did_create = false; + toku_ft_open_close_lock(); -void -toku_ft_cursor_set_range_lock(FT_CURSOR cursor, const DBT *left, const DBT *right, - bool left_is_neg_infty, bool right_is_pos_infty, - int out_of_range_error) -{ - // Destroy any existing keys and then clone the given left, right keys - toku_destroy_dbt(&cursor->range_lock_left_key); - if (left_is_neg_infty) { - cursor->left_is_neg_infty = true; - } else { - toku_clone_dbt(&cursor->range_lock_left_key, *left); + if (ft_h->did_set_flags) { + r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); + if (r!=0) { goto exit; } } - toku_destroy_dbt(&cursor->range_lock_right_key); - if (right_is_pos_infty) { - cursor->right_is_pos_infty = true; + assert(is_create || !only_create); + FILENUM reserved_filenum; + reserved_filenum = use_filenum; + fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); + bool was_already_open; + { + int fd = -1; + r = ft_open_file(fname_in_cwd, &fd); + if (reserved_filenum.fileid == FILENUM_NONE.fileid) { + reserved_filenum = toku_cachetable_reserve_filenum(cachetable); + } + if (r==ENOENT && is_create) { + did_create = true; + if (txn) { + BYTESTRING bs = { .len=(uint32_t) strlen(fname_in_env), .data = (char*)fname_in_env }; + toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment + } + txn_created = (bool)(txn!=NULL); + toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, file_mode, ft_h->options.flags, ft_h->options.nodesize, ft_h->options.basementnodesize, ft_h->options.compression_method); + r = ft_create_file(ft_h, fname_in_cwd, &fd); + if (r) { goto exit; } + } + if (r) { goto exit; } + r=toku_cachetable_openfd_with_filenum(&cf, cachetable, fd, fname_in_env, reserved_filenum, &was_already_open); + if (r) { goto exit; } + } + assert(ft_h->options.nodesize>0); + if (is_create) { + r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); + if (r==TOKUDB_DICTIONARY_NO_HEADER) { + toku_ft_create(&ft, &ft_h->options, cf, txn); + } + else if (r!=0) { + goto exit; + } + else if (only_create) { + assert_zero(r); + r = EEXIST; + goto exit; + } + // if we get here, then is_create was true but only_create was false, + // so it is ok for toku_read_ft_and_store_in_cachefile to have read + // the header via toku_read_ft_and_store_in_cachefile } else { - toku_clone_dbt(&cursor->range_lock_right_key, *right); + r = toku_read_ft_and_store_in_cachefile(ft_h, cf, max_acceptable_lsn, &ft); + if (r) { goto exit; } } - - // TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster) - cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error; - cursor->direction = 0; -} - -void toku_ft_cursor_close(FT_CURSOR cursor) { - ft_cursor_cleanup_dbts(cursor); - toku_destroy_dbt(&cursor->range_lock_left_key); - toku_destroy_dbt(&cursor->range_lock_right_key); - toku_free(cursor); -} - -static inline void ft_cursor_set_prefetching(FT_CURSOR cursor) { - cursor->prefetching = true; -} - -static inline bool ft_cursor_prefetching(FT_CURSOR cursor) { - return cursor->prefetching; -} - -//Return true if cursor is uninitialized. false otherwise. -static bool -ft_cursor_not_set(FT_CURSOR cursor) { - assert((cursor->key.data==NULL) == (cursor->val.data==NULL)); - return (bool)(cursor->key.data == NULL); -} - -// -// -// -// -// -// -// -// -// -// TODO: ask Yoni why second parameter here is not const -// -// -// -// -// -// -// -// -// -static int -heaviside_from_search_t(const DBT &kdbt, ft_search_t &search) { - int cmp = search.compare(search, - search.k ? &kdbt : 0); - // The search->compare function returns only 0 or 1 - switch (search.direction) { - case FT_SEARCH_LEFT: return cmp==0 ? -1 : +1; - case FT_SEARCH_RIGHT: return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches. + if (!ft_h->did_set_flags) { + r = verify_builtin_comparisons_consistent(ft_h, ft_h->options.flags); + if (r) { goto exit; } + } else if (ft_h->options.flags != ft->h->flags) { /* if flags have been set then flags must match */ + r = EINVAL; + goto exit; } - abort(); return 0; -} + // Ensure that the memcmp magic bits are consistent, if set. + if (ft->cmp.get_memcmp_magic() != toku::comparator::MEMCMP_MAGIC_NONE && + ft_h->options.memcmp_magic != toku::comparator::MEMCMP_MAGIC_NONE && + ft_h->options.memcmp_magic != ft->cmp.get_memcmp_magic()) { + r = EINVAL; + goto exit; + } + toku_ft_handle_inherit_options(ft_h, ft); -// -// Returns true if the value that is to be read is empty. -// -static inline int -is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) { - int rval; - if (ftcursor->is_snapshot_read) { - bool is_del; - le_iterate_is_del( - le, - does_txn_read_entry, - &is_del, - ftcursor->ttxn - ); - rval = is_del; + if (!was_already_open) { + if (!did_create) { //Only log the fopen that OPENs the file. If it was already open, don't log. + toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(cf), ft_h->options.flags); + } + } + int use_reserved_dict_id; + use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid; + if (!was_already_open) { + DICTIONARY_ID dict_id; + if (use_reserved_dict_id) { + dict_id = use_dictionary_id; + } + else { + dict_id = next_dict_id(); + } + ft->dict_id = dict_id; } else { - rval = le_latest_is_del(le); + // dict_id is already in header + if (use_reserved_dict_id) { + assert(ft->dict_id.dictid == use_dictionary_id.dictid); + } } - return rval; -} - -struct store_fifo_offset_extra { - int32_t *offsets; - int i; -}; - -int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra) __attribute__((nonnull(3))); -int store_fifo_offset(const int32_t &offset, const uint32_t UU(idx), struct store_fifo_offset_extra *const extra) -{ - extra->offsets[extra->i] = offset; - extra->i++; - return 0; -} + assert(ft); + assert(ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); + assert(ft->dict_id.dictid < dict_id_serial); -/** - * Given pointers to offsets within a FIFO where we can find messages, - * figure out the MSN of each message, and compare those MSNs. Returns 1, - * 0, or -1 if a is larger than, equal to, or smaller than b. - */ -int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo); -int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo) -{ - const struct fifo_entry *a = toku_fifo_get_entry(fifo, ao); - const struct fifo_entry *b = toku_fifo_get_entry(fifo, bo); - if (a->msn.msn > b->msn.msn) { - return +1; - } - if (a->msn.msn < b->msn.msn) { - return -1; + // important note here, + // after this point, where we associate the header + // with the ft_handle, the function is not allowed to fail + // Code that handles failure (located below "exit"), + // depends on this + toku_ft_note_ft_handle_open(ft, ft_h); + if (txn_created) { + assert(txn); + toku_txn_maybe_note_ft(txn, ft); } - return 0; -} - -/** - * Given a fifo_entry, either decompose it into its parameters and call - * toku_ft_bn_apply_cmd, or discard it, based on its MSN and the MSN of the - * basement node. - */ -static void -do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) -{ - // The messages are being iterated over in (key,msn) order or just in - // msn order, so all the messages for one key, from one buffer, are in - // ascending msn order. So it's ok that we don't update the basement - // node's msn until the end. - if (entry->msn.msn > bn->max_msn_applied.msn) { - ITEMLEN keylen = entry->keylen; - ITEMLEN vallen = entry->vallen; - enum ft_msg_type type = fifo_entry_get_msg_type(entry); - MSN msn = entry->msn; - const XIDS xids = (XIDS) &entry->xids_s; - bytevec key = xids_get_end_of_array(xids); - bytevec val = (uint8_t*)key + entry->keylen; - - DBT hk; - toku_fill_dbt(&hk, key, keylen); - DBT hv; - FT_MSG_S ftcmd = { type, msn, xids, .u = { .id = { &hk, toku_fill_dbt(&hv, val, vallen) } } }; - toku_ft_bn_apply_cmd( - t->ft->compare_fun, - t->ft->update_fun, - &t->ft->cmp_descriptor, - bn, - &ftcmd, - gc_info, - workdone, - stats_to_update - ); - } else { - STATUS_INC(FT_MSN_DISCARDS, 1); - } - // We must always mark entry as stale since it has been marked - // (using omt::iterate_and_mark_range) - // It is possible to call do_bn_apply_cmd even when it won't apply the message because - // the node containing it could have been evicted and brought back in. - entry->is_fresh = false; -} - -struct iterate_do_bn_apply_cmd_extra { - FT_HANDLE t; - BASEMENTNODE bn; - NONLEAF_CHILDINFO bnc; - txn_gc_info *gc_info; - uint64_t *workdone; - STAT64INFO stats_to_update; -}; - -int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e) __attribute__((nonnull(3))); -int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e) -{ - struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset); - do_bn_apply_cmd(e->t, e->bn, entry, e->gc_info, e->workdone, e->stats_to_update); - return 0; -} -/** - * Given the bounds of the basement node to which we will apply messages, - * find the indexes within message_tree which contain the range of - * relevant messages. - * - * The message tree contains offsets into the buffer, where messages are - * found. The pivot_bounds are the lower bound exclusive and upper bound - * inclusive, because they come from pivot keys in the tree. We want OMT - * indices, which must have the lower bound be inclusive and the upper - * bound exclusive. We will get these by telling toku_omt_find to look - * for something strictly bigger than each of our pivot bounds. - * - * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper - * bound exclusive). - */ -template -static void -find_bounds_within_message_tree( - DESCRIPTOR desc, /// used for cmp - ft_compare_func cmp, /// used to compare keys - const find_bounds_omt_t &message_tree, /// tree holding FIFO offsets, in which we want to look for indices - FIFO buffer, /// buffer in which messages are found - struct pivot_bounds const * const bounds, /// key bounds within the basement node we're applying messages to - uint32_t *lbi, /// (output) "lower bound inclusive" (index into message_tree) - uint32_t *ube /// (output) "upper bound exclusive" (index into message_tree) - ) -{ - int r = 0; + // Opening an ft may restore to previous checkpoint. + // Truncate if necessary. + { + int fd = toku_cachefile_get_fd (ft->cf); + ft->blocktable.maybe_truncate_file_on_open(fd); + } - if (bounds->lower_bound_exclusive) { - // By setting msn to MAX_MSN and by using direction of +1, we will - // get the first message greater than (in (key, msn) order) any - // message (with any msn) with the key lower_bound_exclusive. - // This will be a message we want to try applying, so it is the - // "lower bound inclusive" within the message_tree. - struct toku_fifo_entry_key_msn_heaviside_extra lbi_extra; - ZERO_STRUCT(lbi_extra); - lbi_extra.desc = desc; - lbi_extra.cmp = cmp; - lbi_extra.fifo = buffer; - lbi_extra.key = bounds->lower_bound_exclusive; - lbi_extra.msn = MAX_MSN; - int32_t found_lb; - r = message_tree.template find(lbi_extra, +1, &found_lb, lbi); - if (r == DB_NOTFOUND) { - // There is no relevant data (the lower bound is bigger than - // any message in this tree), so we have no range and we're - // done. - *lbi = 0; - *ube = 0; - return; - } - if (bounds->upper_bound_inclusive) { - // Check if what we found for lbi is greater than the upper - // bound inclusive that we have. If so, there are no relevant - // messages between these bounds. - const DBT *ubi = bounds->upper_bound_inclusive; - const int32_t offset = found_lb; - DBT found_lbidbt; - fill_dbt_for_fifo_entry(&found_lbidbt, toku_fifo_get_entry(buffer, offset)); - FAKE_DB(db, desc); - int c = cmp(&db, &found_lbidbt, ubi); - // These DBTs really are both inclusive bounds, so we need - // strict inequality in order to determine that there's - // nothing between them. If they're equal, then we actually - // need to apply the message pointed to by lbi, and also - // anything with the same key but a bigger msn. - if (c > 0) { - *lbi = 0; - *ube = 0; - return; + r = 0; +exit: + if (fname_in_cwd) { + toku_free(fname_in_cwd); + } + if (r != 0 && cf) { + if (ft) { + // we only call toku_ft_note_ft_handle_open + // when the function succeeds, so if we are here, + // then that means we have a reference to the header + // but we have not linked it to this ft. So, + // we can simply try to remove the header. + // We don't need to unlink this ft from the header + toku_ft_grab_reflock(ft); + bool needed = toku_ft_needed_unlocked(ft); + toku_ft_release_reflock(ft); + if (!needed) { + // close immediately. + toku_ft_evict_from_memory(ft, false, ZERO_LSN); } } - } else { - // No lower bound given, it's negative infinity, so we start at - // the first message in the OMT. - *lbi = 0; - } - if (bounds->upper_bound_inclusive) { - // Again, we use an msn of MAX_MSN and a direction of +1 to get - // the first thing bigger than the upper_bound_inclusive key. - // This is therefore the smallest thing we don't want to apply, - // and toku_omt_iterate_on_range will not examine it. - struct toku_fifo_entry_key_msn_heaviside_extra ube_extra; - ZERO_STRUCT(ube_extra); - ube_extra.desc = desc; - ube_extra.cmp = cmp; - ube_extra.fifo = buffer; - ube_extra.key = bounds->upper_bound_inclusive; - ube_extra.msn = MAX_MSN; - r = message_tree.template find(ube_extra, +1, nullptr, ube); - if (r == DB_NOTFOUND) { - // Couldn't find anything in the buffer bigger than our key, - // so we need to look at everything up to the end of - // message_tree. - *ube = message_tree.size(); + else { + toku_cachefile_close(&cf, false, ZERO_LSN); } - } else { - // No upper bound given, it's positive infinity, so we need to go - // through the end of the OMT. - *ube = message_tree.size(); } + toku_ft_open_close_unlock(); + return r; } -/** - * For each message in the ancestor's buffer (determined by childnum) that - * is key-wise between lower_bound_exclusive and upper_bound_inclusive, - * apply the message to the basement node. We treat the bounds as minus - * or plus infinity respectively if they are NULL. Do not mark the node - * as dirty (preserve previous state of 'dirty' bit). - */ -static void -bnc_apply_messages_to_basement_node( - FT_HANDLE t, // used for comparison function - BASEMENTNODE bn, // where to apply messages - FTNODE ancestor, // the ancestor node where we can find messages to apply - int childnum, // which child buffer of ancestor contains messages we want - struct pivot_bounds const * const bounds, // contains pivot key bounds of this basement node - txn_gc_info *gc_info, - bool* msgs_applied - ) -{ +// Open an ft for the purpose of recovery, which requires that the ft be open to a pre-determined FILENUM +// and may require a specific checkpointed version of the file. +// (dict_id is assigned by the ft_handle_open() function.) +int +toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) { int r; - NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); - - // Determine the offsets in the message trees between which we need to - // apply messages from this buffer - STAT64INFO_S stats_delta = {0,0}; - uint64_t workdone_this_ancestor = 0; - - uint32_t stale_lbi, stale_ube; - if (!bn->stale_ancestor_messages_applied) { - find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->stale_message_tree, bnc->buffer, bounds, &stale_lbi, &stale_ube); - } else { - stale_lbi = 0; - stale_ube = 0; - } - uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree(&t->ft->cmp_descriptor, t->ft->compare_fun, bnc->fresh_message_tree, bnc->buffer, bounds, &fresh_lbi, &fresh_ube); - - // We now know where all the messages we must apply are, so one of the - // following 4 cases will do the application, depending on which of - // the lists contains relevant messages: - // - // 1. broadcast messages and anything else, or a mix of fresh and stale - // 2. only fresh messages - // 3. only stale messages - if (bnc->broadcast_list.size() > 0 || - (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) { - // We have messages in multiple trees, so we grab all - // the relevant messages' offsets and sort them by MSN, then apply - // them in MSN order. - const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size()); - toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); - int32_t *offsets = reinterpret_cast(offsets_buf.get()); - struct store_fifo_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; - - // Populate offsets array with offsets to stale messages - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); - assert_zero(r); - - // Then store fresh offsets, and mark them to be moved to stale later. - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); - assert_zero(r); + assert(use_filenum.fileid != FILENUM_NONE.fileid); + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, + txn, use_filenum, DICTIONARY_ID_NONE, max_acceptable_lsn); + return r; +} - // Store offsets of all broadcast messages. - r = bnc->broadcast_list.iterate(&sfo_extra); - assert_zero(r); - invariant(sfo_extra.i == buffer_size); +// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. +// Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. +int +toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { + int r; + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); + return r; +} - // Sort by MSN. - r = toku::sort::mergesort_r(offsets, buffer_size, bnc->buffer); - assert_zero(r); +// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree +int +toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { + FT_HANDLE result_ft_handle; + toku_ft_handle_create(&result_ft_handle); - // Apply the messages in MSN order. - for (int i = 0; i < buffer_size; ++i) { - *msgs_applied = true; - struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]); - do_bn_apply_cmd(t, bn, entry, gc_info, &workdone_this_ancestor, &stats_delta); - } - } else if (stale_lbi == stale_ube) { - // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. - struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; - if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); - assert_zero(r); - } else { - invariant(fresh_lbi == fresh_ube); - // No fresh messages to apply, we just apply stale messages. + // we're cloning, so the handle better have an open ft and open cf + invariant(ft_handle->ft); + invariant(ft_handle->ft->cf); - if (stale_ube - stale_lbi > 0) *msgs_applied = true; - struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + // inherit the options of the ft whose handle is being cloned. + toku_ft_handle_inherit_options(result_ft_handle, ft_handle->ft); - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); - assert_zero(r); - } - // - // update stats - // - if (workdone_this_ancestor > 0) { - (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); - } - if (stats_delta.numbytes || stats_delta.numrows) { - toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); + // we can clone the handle by creating a new handle with the same fname + CACHEFILE cf = ft_handle->ft->cf; + CACHETABLE ct = toku_cachefile_get_cachetable(cf); + const char *fname_in_env = toku_cachefile_fname_in_env(cf); + int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); + if (r != 0) { + toku_ft_handle_close(result_ft_handle); + result_ft_handle = NULL; } + *cloned_ft_handle = result_ft_handle; + return r; } -static void -apply_ancestors_messages_to_bn( +// Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. +int +toku_ft_handle_open_with_dict_id( FT_HANDLE t, - FTNODE node, - int childnum, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - txn_gc_info *gc_info, - bool* msgs_applied + const char *fname_in_env, + int is_create, + int only_create, + CACHETABLE cachetable, + TOKUTXN txn, + DICTIONARY_ID use_dictionary_id ) { - BASEMENTNODE curr_bn = BLB(node, childnum); - struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds); - for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) { - paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); - bnc_apply_messages_to_basement_node( - t, - curr_bn, - curr_ancestors->node, - curr_ancestors->childnum, - &curr_bounds, - gc_info, - msgs_applied - ); - // We don't want to check this ancestor node again if the - // next time we query it, the msn hasn't changed. - curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk; - } - } - // At this point, we know all the stale messages above this - // basement node have been applied, and any new messages will be - // fresh, so we don't need to look at stale messages for this - // basement node, unless it gets evicted (and this field becomes - // false when it's read in again). - curr_bn->stale_ancestor_messages_applied = true; + int r; + r = ft_handle_open( + t, + fname_in_env, + is_create, + only_create, + cachetable, + txn, + FILENUM_NONE, + use_dictionary_id, + MAX_LSN + ); + return r; } -void -toku_apply_ancestors_messages_to_node ( - FT_HANDLE t, - FTNODE node, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - bool* msgs_applied, - int child_to_read - ) -// Effect: -// Bring a leaf node up-to-date according to all the messages in the ancestors. -// If the leaf node is already up-to-date then do nothing. -// If the leaf node is not already up-to-date, then record the work done -// for that leaf in each ancestor. -// Requires: -// This is being called when pinning a leaf node for the query path. -// The entire root-to-leaf path is pinned and appears in the ancestors list. -{ - VERIFY_NODE(t, node); - paranoid_invariant(node->height == 0); +DICTIONARY_ID +toku_ft_get_dictionary_id(FT_HANDLE ft_handle) { + FT ft = ft_handle->ft; + return ft->dict_id; +} - TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t); - txn_manager_state txn_state_for_gc(txn_manager); +void toku_ft_set_flags(FT_HANDLE ft_handle, unsigned int flags) { + ft_handle->did_set_flags = true; + ft_handle->options.flags = flags; +} - TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t); - txn_gc_info gc_info(&txn_state_for_gc, - oldest_referenced_xid_for_simple_gc, - node->oldest_referenced_xid_known, - true); - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - apply_ancestors_messages_to_bn( - t, - node, - child_to_read, - ancestors, - bounds, - &gc_info, - msgs_applied - ); - } - else { - // know we are a leaf node - // An important invariant: - // We MUST bring every available basement node for a dirty node up to date. - // flushing on the cleaner thread depends on this. This invariant - // allows the cleaner thread to just pick an internal node and flush it - // as opposed to being forced to start from the root. - for (int i = 0; i < node->n_children; i++) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - apply_ancestors_messages_to_bn( - t, - node, - i, - ancestors, - bounds, - &gc_info, - msgs_applied - ); - } - } - VERIFY_NODE(t, node); +void toku_ft_get_flags(FT_HANDLE ft_handle, unsigned int *flags) { + *flags = ft_handle->options.flags; } -static bool bn_needs_ancestors_messages( - FT ft, - FTNODE node, - int childnum, - struct pivot_bounds const * const bounds, - ANCESTORS ancestors, - MSN* max_msn_applied - ) +void toku_ft_get_maximum_advised_key_value_lengths (unsigned int *max_key_len, unsigned int *max_val_len) +// return the maximum advisable key value lengths. The ft doesn't enforce these. { - BASEMENTNODE bn = BLB(node, childnum); - struct pivot_bounds curr_bounds = next_pivot_keys(node, childnum, bounds); - bool needs_ancestors_messages = false; - for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) { - paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); - NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum); - if (bnc->broadcast_list.size() > 0) { - needs_ancestors_messages = true; - goto cleanup; - } - if (!bn->stale_ancestor_messages_applied) { - uint32_t stale_lbi, stale_ube; - find_bounds_within_message_tree(&ft->cmp_descriptor, - ft->compare_fun, - bnc->stale_message_tree, - bnc->buffer, - &curr_bounds, - &stale_lbi, - &stale_ube); - if (stale_lbi < stale_ube) { - needs_ancestors_messages = true; - goto cleanup; - } - } - uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree(&ft->cmp_descriptor, - ft->compare_fun, - bnc->fresh_message_tree, - bnc->buffer, - &curr_bounds, - &fresh_lbi, - &fresh_ube); - if (fresh_lbi < fresh_ube) { - needs_ancestors_messages = true; - goto cleanup; - } - if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) { - max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn; - } - } + *max_key_len = 32*1024; + *max_val_len = 32*1024*1024; +} + + +void toku_ft_handle_set_nodesize(FT_HANDLE ft_handle, unsigned int nodesize) { + if (ft_handle->ft) { + toku_ft_set_nodesize(ft_handle->ft, nodesize); + } + else { + ft_handle->options.nodesize = nodesize; } -cleanup: - return needs_ancestors_messages; } -bool toku_ft_leaf_needs_ancestors_messages( - FT ft, - FTNODE node, - ANCESTORS ancestors, - struct pivot_bounds const * const bounds, - MSN *const max_msn_in_path, - int child_to_read - ) -// Effect: Determine whether there are messages in a node's ancestors -// which must be applied to it. These messages are in the correct -// keyrange for any available basement nodes, and are in nodes with the -// correct max_msn_applied_to_node_on_disk. -// Notes: -// This is an approximate query. -// Output: -// max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over -// ancestors. This is used later to update basement nodes' -// max_msn_applied values in case we don't do the full algorithm. -// Returns: -// true if there may be some such messages -// false only if there are definitely no such messages -// Rationale: -// When we pin a node with a read lock, we want to quickly determine if -// we should exchange it for a write lock in preparation for applying -// messages. If there are no messages, we don't need the write lock. -{ - paranoid_invariant(node->height == 0); - bool needs_ancestors_messages = false; - // child_to_read may be -1 in test cases - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - needs_ancestors_messages = bn_needs_ancestors_messages( - ft, - node, - child_to_read, - bounds, - ancestors, - max_msn_in_path - ); +void toku_ft_handle_get_nodesize(FT_HANDLE ft_handle, unsigned int *nodesize) { + if (ft_handle->ft) { + toku_ft_get_nodesize(ft_handle->ft, nodesize); } else { - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - needs_ancestors_messages = bn_needs_ancestors_messages( - ft, - node, - i, - bounds, - ancestors, - max_msn_in_path - ); - if (needs_ancestors_messages) { - goto cleanup; - } - } + *nodesize = ft_handle->options.nodesize; } -cleanup: - return needs_ancestors_messages; } -void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) { - invariant(node->height == 0); - if (!node->dirty && child_to_read >= 0) { - paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); - BASEMENTNODE bn = BLB(node, child_to_read); - if (max_msn_applied.msn > bn->max_msn_applied.msn) { - // see comment below - (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); - } +void toku_ft_handle_set_basementnodesize(FT_HANDLE ft_handle, unsigned int basementnodesize) { + if (ft_handle->ft) { + toku_ft_set_basementnodesize(ft_handle->ft, basementnodesize); } else { - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { continue; } - BASEMENTNODE bn = BLB(node, i); - if (max_msn_applied.msn > bn->max_msn_applied.msn) { - // This function runs in a shared access context, so to silence tools - // like DRD, we use a CAS and ignore the result. - // Any threads trying to update these basement nodes should be - // updating them to the same thing (since they all have a read lock on - // the same root-to-leaf path) so this is safe. - (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); - } - } + ft_handle->options.basementnodesize = basementnodesize; } } -struct copy_to_stale_extra { - FT ft; - NONLEAF_CHILDINFO bnc; -}; +void toku_ft_handle_get_basementnodesize(FT_HANDLE ft_handle, unsigned int *basementnodesize) { + if (ft_handle->ft) { + toku_ft_get_basementnodesize(ft_handle->ft, basementnodesize); + } + else { + *basementnodesize = ft_handle->options.basementnodesize; + } +} -int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3))); -int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) -{ - struct fifo_entry *entry = toku_fifo_get_entry(extra->bnc->buffer, offset); - DBT keydbt; - DBT *key = fill_dbt_for_fifo_entry(&keydbt, entry); - struct toku_fifo_entry_key_msn_heaviside_extra heaviside_extra = { .desc = &extra->ft->cmp_descriptor, .cmp = extra->ft->compare_fun, .fifo = extra->bnc->buffer, .key = key, .msn = entry->msn }; - int r = extra->bnc->stale_message_tree.insert(offset, heaviside_extra, nullptr); - invariant_zero(r); - return 0; +void toku_ft_set_bt_compare(FT_HANDLE ft_handle, int (*bt_compare)(DB*, const DBT*, const DBT*)) { + ft_handle->options.compare_fun = bt_compare; } -__attribute__((nonnull)) -void -toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) { - invariant(node->height > 0); - for (int i = 0; i < node->n_children; ++i) { - if (BP_STATE(node, i) != PT_AVAIL) { - continue; - } - NONLEAF_CHILDINFO bnc = BNC(node, i); - // We can't delete things out of the fresh tree inside the above - // procedures because we're still looking at the fresh tree. Instead - // we have to move messages after we're done looking at it. - struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc }; - int r = bnc->fresh_message_tree.iterate_over_marked(&cts_extra); - invariant_zero(r); - bnc->fresh_message_tree.delete_all_marked(); - } -} - -static int cursor_check_restricted_range(FT_CURSOR c, bytevec key, ITEMLEN keylen) { - if (c->out_of_range_error) { - FT ft = c->ft_handle->ft; - FAKE_DB(db, &ft->cmp_descriptor); - DBT found_key; - toku_fill_dbt(&found_key, key, keylen); - if ((!c->left_is_neg_infty && c->direction <= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_left_key) < 0) || - (!c->right_is_pos_infty && c->direction >= 0 && ft->compare_fun(&db, &found_key, &c->range_lock_right_key) > 0)) { - invariant(c->out_of_range_error); - return c->out_of_range_error; - } +void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback redir_cb, void* extra) { + ft_handle->redirect_callback = redir_cb; + ft_handle->redirect_callback_extra = extra; +} + +void toku_ft_set_update(FT_HANDLE ft_handle, ft_update_func update_fun) { + ft_handle->options.update_fun = update_fun; +} + +const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle) { + invariant_notnull(ft_handle->ft); + return ft_handle->ft->cmp; +} + +static void +ft_remove_handle_ref_callback(FT UU(ft), void *extra) { + FT_HANDLE CAST_FROM_VOIDP(handle, extra); + toku_list_remove(&handle->live_ft_handle_link); +} + +static void ft_handle_close(FT_HANDLE ft_handle, bool oplsn_valid, LSN oplsn) { + FT ft = ft_handle->ft; + // There are error paths in the ft_handle_open that end with ft_handle->ft == nullptr. + if (ft != nullptr) { + toku_ft_remove_reference(ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, ft_handle); } - // Reset cursor direction to mitigate risk if some query type doesn't set the direction. - // It is always correct to check both bounds (which happens when direction==0) but it can be slower. - c->direction = 0; + toku_free(ft_handle); +} + +// close an ft handle during normal operation. the underlying ft may or may not close, +// depending if there are still references. an lsn for this close will come from the logger. +void toku_ft_handle_close(FT_HANDLE ft_handle) { + ft_handle_close(ft_handle, false, ZERO_LSN); +} + +// close an ft handle during recovery. the underlying ft must close, and will use the given lsn. +void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn) { + // the ft must exist if closing during recovery. error paths during + // open for recovery should close handles using toku_ft_handle_close() + invariant_notnull(ft_handle->ft); + ft_handle_close(ft_handle, true, oplsn); +} + +// TODO: remove this, callers should instead just use toku_ft_handle_close() +int toku_close_ft_handle_nolsn(FT_HANDLE ft_handle, char **UU(error_string)) { + toku_ft_handle_close(ft_handle); return 0; } -static int -ft_cursor_shortcut ( - FT_CURSOR cursor, - int direction, - uint32_t index, - bn_data* bd, - FT_GET_CALLBACK_FUNCTION getf, - void *getf_v, - uint32_t *keylen, - void **key, - uint32_t *vallen, - void **val - ); +void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) { + FT_HANDLE XMALLOC(ft_handle); + memset(ft_handle, 0, sizeof *ft_handle); + toku_list_init(&ft_handle->live_ft_handle_link); + ft_handle->options.flags = 0; + ft_handle->did_set_flags = false; + ft_handle->options.nodesize = FT_DEFAULT_NODE_SIZE; + ft_handle->options.basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE; + ft_handle->options.compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; + ft_handle->options.fanout = FT_DEFAULT_FANOUT; + ft_handle->options.compare_fun = toku_builtin_compare_fun; + ft_handle->options.update_fun = NULL; + *ft_handle_ptr = ft_handle; +} + +/******************************* search ***************************************/ // Return true if this key is within the search bound. If there is no search bound then the tree search continues. static bool search_continue(ft_search *search, void *key, uint32_t key_len) { bool result = true; if (search->direction == FT_SEARCH_LEFT && search->k_bound) { FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context); - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); DBT this_key = { .data = key, .size = key_len }; // search continues if this key <= key bound - result = (ft_handle->ft->compare_fun(&db, &this_key, search->k_bound) <= 0); + result = (ft_handle->ft->cmp(&this_key, search->k_bound) <= 0); } return result; } +static int heaviside_from_search_t(const DBT &kdbt, ft_search &search) { + int cmp = search.compare(search, + search.k ? &kdbt : 0); + // The search->compare function returns only 0 or 1 + switch (search.direction) { + case FT_SEARCH_LEFT: return cmp==0 ? -1 : +1; + case FT_SEARCH_RIGHT: return cmp==0 ? +1 : -1; // Because the comparison runs backwards for right searches. + } + abort(); return 0; +} + // This is a bottom layer of the search functions. static int ft_search_basement_node( BASEMENTNODE bn, - ft_search_t *search, + ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, @@ -4959,7 +3347,7 @@ ft_search_basement_node( bool can_bulk_fetch ) { - // Now we have to convert from ft_search_t to the heaviside function with a direction. What a pain... + // Now we have to convert from ft_search to the heaviside function with a direction. What a pain... int direction; switch (search->direction) { @@ -4984,7 +3372,7 @@ ok: ; if (toku_ft_cursor_is_leaf_mode(ftcursor)) goto got_a_good_value; // leaf mode cursors see all leaf entries - if (is_le_val_del(le,ftcursor)) { + if (le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) { // Provisionally deleted stuff is gone. // So we need to scan in the direction to see if we can find something. // Every 100 deleted leaf entries check if the leaf's key is within the search bounds. @@ -4992,7 +3380,7 @@ ok: ; switch (search->direction) { case FT_SEARCH_LEFT: idx++; - if (idx >= bn->data_buffer.omt_size() || + if (idx >= bn->data_buffer.num_klpairs() || ((n_deleted % 64) == 0 && !search_continue(search, key, keylen))) { if (ftcursor->interrupt_cb && ftcursor->interrupt_cb(ftcursor->interrupt_cb_extra)) { return TOKUDB_INTERRUPTED; @@ -5014,7 +3402,9 @@ ok: ; } r = bn->data_buffer.fetch_klpair(idx, &le, &keylen, &key); assert_zero(r); // we just validated the index - if (!is_le_val_del(le,ftcursor)) goto got_a_good_value; + if (!le_val_is_del(le, ftcursor->is_snapshot_read, ftcursor->ttxn)) { + goto got_a_good_value; + } } } got_a_good_value: @@ -5022,42 +3412,31 @@ ok: ; uint32_t vallen; void *val; - ft_cursor_extract_val(le, - ftcursor, - &vallen, - &val - ); - r = cursor_check_restricted_range(ftcursor, key, keylen); - if (r==0) { + le_extract_val(le, toku_ft_cursor_is_leaf_mode(ftcursor), + ftcursor->is_snapshot_read, ftcursor->ttxn, + &vallen, &val); + r = toku_ft_cursor_check_restricted_range(ftcursor, key, keylen); + if (r == 0) { r = getf(keylen, key, vallen, val, getf_v, false); } - if (r==0 || r == TOKUDB_CURSOR_CONTINUE) { + if (r == 0 || r == TOKUDB_CURSOR_CONTINUE) { // // IMPORTANT: bulk fetch CANNOT go past the current basement node, // because there is no guarantee that messages have been applied // to other basement nodes, as part of #5770 // if (r == TOKUDB_CURSOR_CONTINUE && can_bulk_fetch) { - r = ft_cursor_shortcut( - ftcursor, - direction, - idx, - &bn->data_buffer, - getf, - getf_v, - &keylen, - &key, - &vallen, - &val - ); + r = toku_ft_cursor_shortcut(ftcursor, direction, idx, &bn->data_buffer, + getf, getf_v, &keylen, &key, &vallen, &val); } - ft_cursor_cleanup_dbts(ftcursor); + toku_destroy_dbt(&ftcursor->key); + toku_destroy_dbt(&ftcursor->val); if (!ftcursor->is_temporary) { toku_memdup_dbt(&ftcursor->key, key, keylen); toku_memdup_dbt(&ftcursor->val, val, vallen); } - //The search was successful. Prefetching can continue. + // The search was successful. Prefetching can continue. *doprefetch = true; } } @@ -5067,9 +3446,9 @@ ok: ; static int ft_search_node ( - FT_HANDLE brt, + FT_HANDLE ft_handle, FTNODE node, - ft_search_t *search, + ft_search *search, int child_to_search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, @@ -5077,17 +3456,17 @@ ft_search_node ( FT_CURSOR ftcursor, UNLOCKERS unlockers, ANCESTORS, - struct pivot_bounds const * const bounds, + const pivot_bounds &bounds, bool can_bulk_fetch ); static int -ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs) +ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { - int r = toku_ftnode_fetch_callback(cf, p, fd, nodename, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs); - struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, extraargs); - destroy_bfe_for_prefetch(ffe); - toku_free(ffe); + int r = toku_ftnode_fetch_callback(cf, p, fd, blocknum, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs); + ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, extraargs); + bfe->destroy(); + toku_free(bfe); return r; } @@ -5095,32 +3474,44 @@ static int ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep) { int r = toku_ftnode_pf_callback(ftnode_pv, disk_data, read_extraargs, fd, sizep); - struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, read_extraargs); - destroy_bfe_for_prefetch(ffe); - toku_free(ffe); + ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, read_extraargs); + bfe->destroy(); + toku_free(bfe); return r; } +CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft) { + CACHETABLE_WRITE_CALLBACK wc; + wc.flush_callback = toku_ftnode_flush_callback; + wc.pe_est_callback = toku_ftnode_pe_est_callback; + wc.pe_callback = toku_ftnode_pe_callback; + wc.cleaner_callback = toku_ftnode_cleaner_callback; + wc.clone_callback = toku_ftnode_clone_callback; + wc.checkpoint_complete_callback = toku_ftnode_checkpoint_complete_callback; + wc.write_extraargs = ft; + return wc; +} + static void -ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) { +ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) { // the number of nodes to prefetch const int num_nodes_to_prefetch = 1; // if we want to prefetch in the tree // then prefetch the next children if there are any - if (*doprefetch && ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) { - int rc = ft_cursor_rightmost_child_wanted(ftcursor, brt, node); + if (*doprefetch && toku_ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) { + int rc = ft_cursor_rightmost_child_wanted(ftcursor, ft_handle, node); for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) { BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i); - uint32_t nextfullhash = compute_child_fullhash(brt->ft->cf, node, i); - struct ftnode_fetch_extra *MALLOC(bfe); - fill_bfe_for_prefetch(bfe, brt->ft, ftcursor); + uint32_t nextfullhash = compute_child_fullhash(ft_handle->ft->cf, node, i); + ftnode_fetch_extra *XCALLOC(bfe); + bfe->create_for_prefetch(ft_handle->ft, ftcursor); bool doing_prefetch = false; toku_cachefile_prefetch( - brt->ft->cf, + ft_handle->ft->cf, nextchildblocknum, nextfullhash, - get_write_callbacks_for_node(brt->ft), + get_write_callbacks_for_node(ft_handle->ft), ftnode_fetch_callback_and_free_bfe, toku_ftnode_pf_req_callback, ftnode_pf_callback_and_free_bfe, @@ -5128,7 +3519,7 @@ ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcur &doing_prefetch ); if (!doing_prefetch) { - destroy_bfe_for_prefetch(bfe); + bfe->destroy(); toku_free(bfe); } *doprefetch = false; @@ -5141,16 +3532,17 @@ struct unlock_ftnode_extra { FTNODE node; bool msgs_applied; }; + // When this is called, the cachetable lock is held static void unlock_ftnode_fun (void *v) { struct unlock_ftnode_extra *x = NULL; CAST_FROM_VOIDP(x, v); - FT_HANDLE brt = x->ft_handle; + FT_HANDLE ft_handle = x->ft_handle; FTNODE node = x->node; // CT lock is held int r = toku_cachetable_unpin_ct_prelocked_no_flush( - brt->ft->cf, + ft_handle->ft->cf, node->ct_pair, (enum cachetable_dirty) node->dirty, x->msgs_applied ? make_ftnode_pair_attr(node) : make_invalid_pair_attr() @@ -5160,23 +3552,22 @@ unlock_ftnode_fun (void *v) { /* search in a node's child */ static int -ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers, - ANCESTORS ancestors, struct pivot_bounds const * const bounds, bool can_bulk_fetch) +ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool *doprefetch, FT_CURSOR ftcursor, UNLOCKERS unlockers, + ANCESTORS ancestors, const pivot_bounds &bounds, bool can_bulk_fetch) // Effect: Search in a node's child. Searches are read-only now (at least as far as the hardcopy is concerned). { struct ancestors next_ancestors = {node, childnum, ancestors}; BLOCKNUM childblocknum = BP_BLOCKNUM(node,childnum); - uint32_t fullhash = compute_child_fullhash(brt->ft->cf, node, childnum); + uint32_t fullhash = compute_child_fullhash(ft_handle->ft->cf, node, childnum); FTNODE childnode = nullptr; // If the current node's height is greater than 1, then its child is an internal node. // Therefore, to warm the cache better (#5798), we want to read all the partitions off disk in one shot. bool read_all_partitions = node->height > 1; - struct ftnode_fetch_extra bfe; - fill_bfe_for_subset_read( - &bfe, - brt->ft, + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( + ft_handle->ft, search, &ftcursor->range_lock_left_key, &ftcursor->range_lock_right_key, @@ -5187,7 +3578,7 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F ); bool msgs_applied = false; { - int rr = toku_pin_ftnode_batched(brt, childblocknum, fullhash, + int rr = toku_pin_ftnode_for_query(ft_handle, childblocknum, fullhash, unlockers, &next_ancestors, bounds, &bfe, @@ -5197,27 +3588,24 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F if (rr==TOKUDB_TRY_AGAIN) { return rr; } - // We end the batch before applying ancestor messages if we get - // all the way to a leaf. invariant_zero(rr); } - struct unlock_ftnode_extra unlock_extra = {brt,childnode,msgs_applied}; - struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, unlockers}; - - int r = ft_search_node(brt, childnode, search, bfe.child_to_read, getf, getf_v, doprefetch, ftcursor, &next_unlockers, &next_ancestors, bounds, can_bulk_fetch); + struct unlock_ftnode_extra unlock_extra = { ft_handle, childnode, msgs_applied }; + struct unlockers next_unlockers = { true, unlock_ftnode_fun, (void *) &unlock_extra, unlockers }; + int r = ft_search_node(ft_handle, childnode, search, bfe.child_to_read, getf, getf_v, doprefetch, ftcursor, &next_unlockers, &next_ancestors, bounds, can_bulk_fetch); if (r!=TOKUDB_TRY_AGAIN) { // maybe prefetch the next child if (r == 0 && node->height == 1) { - ft_node_maybe_prefetch(brt, node, childnum, ftcursor, doprefetch); + ft_node_maybe_prefetch(ft_handle, node, childnum, ftcursor, doprefetch); } assert(next_unlockers.locked); if (msgs_applied) { - toku_unpin_ftnode(brt->ft, childnode); + toku_unpin_ftnode(ft_handle->ft, childnode); } else { - toku_unpin_ftnode_read_only(brt->ft, childnode); + toku_unpin_ftnode_read_only(ft_handle->ft, childnode); } } else { // try again. @@ -5230,10 +3618,10 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F // the node was not unpinned, so we unpin it here if (next_unlockers.locked) { if (msgs_applied) { - toku_unpin_ftnode(brt->ft, childnode); + toku_unpin_ftnode(ft_handle->ft, childnode); } else { - toku_unpin_ftnode_read_only(brt->ft, childnode); + toku_unpin_ftnode_read_only(ft_handle->ft, childnode); } } } @@ -5242,19 +3630,13 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F } static inline int -search_which_child_cmp_with_bound(DB *db, ft_compare_func cmp, FTNODE node, int childnum, ft_search_t *search, DBT *dbt) -{ - return cmp(db, toku_copy_dbt(dbt, node->childkeys[childnum]), &search->pivot_bound); +search_which_child_cmp_with_bound(const toku::comparator &cmp, FTNODE node, int childnum, + ft_search *search, DBT *dbt) { + return cmp(toku_copyref_dbt(dbt, node->pivotkeys.get_pivot(childnum)), &search->pivot_bound); } int -toku_ft_search_which_child( - DESCRIPTOR desc, - ft_compare_func cmp, - FTNODE node, - ft_search_t *search - ) -{ +toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search) { if (node->n_children <= 1) return 0; DBT pivotkey; @@ -5264,7 +3646,7 @@ toku_ft_search_which_child( int mi; while (lo < hi) { mi = (lo + hi) / 2; - toku_copy_dbt(&pivotkey, node->childkeys[mi]); + node->pivotkeys.fill_pivot(mi, &pivotkey); // search->compare is really strange, and only works well with a // linear search, it makes binary search a pita. // @@ -5289,10 +3671,9 @@ toku_ft_search_which_child( // ready to return something, if the pivot is bounded, we have to move // over a bit to get away from what we've already searched if (search->pivot_bound.data != nullptr) { - FAKE_DB(db, desc); if (search->direction == FT_SEARCH_LEFT) { while (lo < node->n_children - 1 && - search_which_child_cmp_with_bound(&db, cmp, node, lo, search, &pivotkey) <= 0) { + search_which_child_cmp_with_bound(cmp, node, lo, search, &pivotkey) <= 0) { // searching left to right, if the comparison says the // current pivot (lo) is left of or equal to our bound, // don't search that child again @@ -5300,11 +3681,11 @@ toku_ft_search_which_child( } } else { while (lo > 0 && - search_which_child_cmp_with_bound(&db, cmp, node, lo - 1, search, &pivotkey) >= 0) { + search_which_child_cmp_with_bound(cmp, node, lo - 1, search, &pivotkey) >= 0) { // searching right to left, same argument as just above // (but we had to pass lo - 1 because the pivot between lo // and the thing just less than it is at that position in - // the childkeys array) + // the pivot keys array) lo--; } } @@ -5316,17 +3697,17 @@ static void maybe_search_save_bound( FTNODE node, int child_searched, - ft_search_t *search) + ft_search *search) { int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1; if (p >= 0 && p < node->n_children-1) { toku_destroy_dbt(&search->pivot_bound); - toku_clone_dbt(&search->pivot_bound, node->childkeys[p]); + toku_clone_dbt(&search->pivot_bound, node->pivotkeys.get_pivot(p)); } } // Returns true if there are still children left to search in this node within the search bound (if any). -static bool search_try_again(FTNODE node, int child_to_search, ft_search_t *search) { +static bool search_try_again(FTNODE node, int child_to_search, ft_search *search) { bool try_again = false; if (search->direction == FT_SEARCH_LEFT) { if (child_to_search < node->n_children-1) { @@ -5334,8 +3715,7 @@ static bool search_try_again(FTNODE node, int child_to_search, ft_search_t *sear // if there is a search bound and the bound is within the search pivot then continue the search if (search->k_bound) { FT_HANDLE CAST_FROM_VOIDP(ft_handle, search->context); - FAKE_DB(db, &ft_handle->ft->cmp_descriptor); - try_again = (ft_handle->ft->compare_fun(&db, search->k_bound, &search->pivot_bound) > 0); + try_again = (ft_handle->ft->cmp(search->k_bound, &search->pivot_bound) > 0); } } } else if (search->direction == FT_SEARCH_RIGHT) { @@ -5347,9 +3727,9 @@ static bool search_try_again(FTNODE node, int child_to_search, ft_search_t *sear static int ft_search_node( - FT_HANDLE brt, + FT_HANDLE ft_handle, FTNODE node, - ft_search_t *search, + ft_search *search, int child_to_search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, @@ -5357,7 +3737,7 @@ ft_search_node( FT_CURSOR ftcursor, UNLOCKERS unlockers, ANCESTORS ancestors, - struct pivot_bounds const * const bounds, + const pivot_bounds &bounds, bool can_bulk_fetch ) { @@ -5369,10 +3749,10 @@ ft_search_node( // At this point, we must have the necessary partition available to continue the search // assert(BP_STATE(node,child_to_search) == PT_AVAIL); - const struct pivot_bounds next_bounds = next_pivot_keys(node, child_to_search, bounds); + const pivot_bounds next_bounds = bounds.next_bounds(node, child_to_search); if (node->height > 0) { r = ft_search_child( - brt, + ft_handle, node, child_to_search, search, @@ -5382,7 +3762,7 @@ ft_search_node( ftcursor, unlockers, ancestors, - &next_bounds, + next_bounds, can_bulk_fetch ); } @@ -5411,12 +3791,8 @@ ft_search_node( // we have a new pivotkey if (node->height == 0) { // when we run off the end of a basement, try to lock the range up to the pivot. solves #3529 - const DBT *pivot = nullptr; - if (search->direction == FT_SEARCH_LEFT) { - pivot = next_bounds.upper_bound_inclusive; // left -> right - } else { - pivot = next_bounds.lower_bound_exclusive; // right -> left - } + const DBT *pivot = search->direction == FT_SEARCH_LEFT ? next_bounds.ubi() : // left -> right + next_bounds.lbe(); // right -> left if (pivot != nullptr) { int rr = getf(pivot->size, pivot->data, 0, nullptr, getf_v, true); if (rr != 0) { @@ -5444,14 +3820,13 @@ ft_search_node( return r; } -static int -toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch) +int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, FT_CURSOR ftcursor, bool can_bulk_fetch) // Effect: Perform a search. Associate cursor with a leaf if possible. // All searches are performed through this function. { int r; uint trycount = 0; // How many tries did it take to get the result? - FT ft = brt->ft; + FT ft = ft_handle->ft; toku::context search_ctx(CTX_SEARCH); @@ -5470,12 +3845,12 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get // the appropriate partition of the child we are using is in memory. // So, here are the steps for a search (and this applies to this function // as well as ft_search_child: - // - Take the search parameter, and create a ftnode_fetch_extra, that will be used by toku_pin_ftnode(_holding_lock) - // - Call toku_pin_ftnode(_holding_lock) with the bfe as the extra for the fetch callback (in case the node is not at all in memory) + // - Take the search parameter, and create a ftnode_fetch_extra, that will be used by toku_pin_ftnode + // - Call toku_pin_ftnode with the bfe as the extra for the fetch callback (in case the node is not at all in memory) // and the partial fetch callback (in case the node is perhaps partially in memory) to the fetch the node // - This eventually calls either toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback depending on whether the node is in // memory at all or not. - // - Within these functions, the "ft_search_t search" parameter is used to evaluate which child the search is interested in. + // - Within these functions, the "ft_search search" parameter is used to evaluate which child the search is interested in. // If the node is not in memory at all, toku_ftnode_fetch_callback will read the node and decompress only the partition for the // relevant child, be it a message buffer or basement node. If the node is in memory, then toku_ftnode_pf_req_callback // will tell the cachetable that a partial fetch is required if and only if the relevant child is not in memory. If the relevant child @@ -5485,9 +3860,8 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get // - At this point, toku_ftnode_pin_holding_lock has returned, with bfe.child_to_read set, // - ft_search_node is called, assuming that the node and its relevant partition are in memory. // - struct ftnode_fetch_extra bfe; - fill_bfe_for_subset_read( - &bfe, + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( ft, search, &ftcursor->range_lock_left_key, @@ -5502,28 +3876,27 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get uint32_t fullhash; CACHEKEY root_key; toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); - toku_pin_ftnode_off_client_thread_batched( + toku_pin_ftnode( ft, root_key, fullhash, &bfe, PL_READ, // may_modify_node set to false, because root cannot change during search - 0, - NULL, - &node + &node, + true ); } uint tree_height = node->height + 1; // How high is the tree? This is the height of the root node plus one (leaf is at height 0). - struct unlock_ftnode_extra unlock_extra = {brt,node,false}; + struct unlock_ftnode_extra unlock_extra = {ft_handle,node,false}; struct unlockers unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, (UNLOCKERS)NULL}; { bool doprefetch = false; //static int counter = 0; counter++; - r = ft_search_node(brt, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, &infinite_bounds, can_bulk_fetch); + r = ft_search_node(ft_handle, node, search, bfe.child_to_read, getf, getf_v, &doprefetch, ftcursor, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds(), can_bulk_fetch); if (r==TOKUDB_TRY_AGAIN) { // there are two cases where we get TOKUDB_TRY_AGAIN // case 1 is when some later call to toku_pin_ftnode returned @@ -5532,7 +3905,7 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get // some piece of a node that it needed was not in memory. // In this case, the node was not unpinned, so we unpin it here if (unlockers.locked) { - toku_unpin_ftnode_read_only(brt->ft, node); + toku_unpin_ftnode_read_only(ft_handle->ft, node); } goto try_again; } else { @@ -5541,7 +3914,7 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get } assert(unlockers.locked); - toku_unpin_ftnode_read_only(brt->ft, node); + toku_unpin_ftnode_read_only(ft_handle->ft, node); //Heaviside function (+direction) queries define only a lower or upper @@ -5577,363 +3950,20 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get return r; } -struct ft_cursor_search_struct { - FT_GET_CALLBACK_FUNCTION getf; - void *getf_v; - FT_CURSOR cursor; - ft_search_t *search; -}; - -/* search for the first kv pair that matches the search object */ -static int -ft_cursor_search(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) -{ - int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch); - return r; -} - -static inline int compare_k_x(FT_HANDLE brt, const DBT *k, const DBT *x) { - FAKE_DB(db, &brt->ft->cmp_descriptor); - return brt->ft->compare_fun(&db, k, x); -} - -static int -ft_cursor_compare_one(const ft_search_t &search __attribute__((__unused__)), const DBT *x __attribute__((__unused__))) -{ - return 1; -} - -static int ft_cursor_compare_set(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(brt, search.context); - return compare_k_x(brt, search.k, x) <= 0; /* return min xy: kv <= xy */ -} - -static int -ft_cursor_current_getf(ITEMLEN keylen, bytevec key, - ITEMLEN vallen, bytevec val, - void *v, bool lock_only) { - struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); - int r; - if (key==NULL) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); - } else { - FT_CURSOR cursor = bcss->cursor; - DBT newkey; - toku_fill_dbt(&newkey, key, keylen); - if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY - if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; - } - else - r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); - } - return r; -} - -int -toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - if (ft_cursor_not_set(cursor)) - return EINVAL; - cursor->direction = 0; - if (op == DB_CURRENT) { - struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0}; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); - int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false); - ft_search_finish(&search); - return r; - } - return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval); -} - -int -toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -int -toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_next(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(brt, search.context); - return compare_k_x(brt, search.k, x) < 0; /* return min xy: kv < xy */ -} - -static int -ft_cursor_shortcut ( - FT_CURSOR cursor, - int direction, - uint32_t index, - bn_data* bd, - FT_GET_CALLBACK_FUNCTION getf, - void *getf_v, - uint32_t *keylen, - void **key, - uint32_t *vallen, - void **val - ) -{ - int r = 0; - // if we are searching towards the end, limit is last element - // if we are searching towards the beginning, limit is the first element - uint32_t limit = (direction > 0) ? (bd->omt_size() - 1) : 0; - - //Starting with the prev, find the first real (non-provdel) leafentry. - while (index != limit) { - index += direction; - LEAFENTRY le; - void* foundkey = NULL; - uint32_t foundkeylen = 0; - - r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey); - invariant_zero(r); - - if (toku_ft_cursor_is_leaf_mode(cursor) || !is_le_val_del(le, cursor)) { - ft_cursor_extract_val( - le, - cursor, - vallen, - val - ); - *key = foundkey; - *keylen = foundkeylen; - - cursor->direction = direction; - r = cursor_check_restricted_range(cursor, *key, *keylen); - if (r!=0) { - paranoid_invariant(r == cursor->out_of_range_error); - // We already got at least one entry from the bulk fetch. - // Return 0 (instead of out of range error). - r = 0; - break; - } - r = getf(*keylen, *key, *vallen, *val, getf_v, false); - if (r == TOKUDB_CURSOR_CONTINUE) { - continue; - } - else { - break; - } - } - } - - return r; -} - -int -toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = +1; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, true); - ft_search_finish(&search); - if (r == 0) ft_cursor_set_prefetching(cursor); - return r; -} - -static int -ft_cursor_search_eq_k_x_getf(ITEMLEN keylen, bytevec key, - ITEMLEN vallen, bytevec val, - void *v, bool lock_only) { - struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v); - int r; - if (key==NULL) { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false); - } else { - FT_CURSOR cursor = bcss->cursor; - DBT newkey; - toku_fill_dbt(&newkey, key, keylen); - if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) { - r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only); - } else { - r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); - if (r==0) r = TOKUDB_FOUND_BUT_REJECTED; - } - } - return r; -} - -/* search for the kv pair that matches the search object and is equal to k */ -static int -ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search_t *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search}; - int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false); - return r; -} - -static int ft_cursor_compare_prev(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(brt, search.context); - return compare_k_x(brt, search.k, x) > 0; /* return max xy: kv > xy */ -} - -int -toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = -1; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, true); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_set_range(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(brt, search.context); - return compare_k_x(brt, search.k, x) <= 0; /* return kv <= xy */ -} - -int -toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle); - int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v); - ft_search_finish(&search); - return r; -} - -int -toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - -static int ft_cursor_compare_set_range_reverse(const ft_search_t &search, const DBT *x) { - FT_HANDLE CAST_FROM_VOIDP(brt, search.context); - return compare_k_x(brt, search.k, x) >= 0; /* return kv >= xy */ -} - -int -toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - cursor->direction = 0; - ft_search_t search; - ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle); - int r = ft_cursor_search(cursor, &search, getf, getf_v, false); - ft_search_finish(&search); - return r; -} - - -//TODO: When tests have been rewritten, get rid of this function. -//Only used by tests. -int -toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) -{ - int op = get_flags & DB_OPFLAGS_MASK; - if (get_flags & ~DB_OPFLAGS_MASK) - return EINVAL; - - switch (op) { - case DB_CURRENT: - case DB_CURRENT_BINDING: - return toku_ft_cursor_current(cursor, op, getf, getf_v); - case DB_FIRST: - return toku_ft_cursor_first(cursor, getf, getf_v); - case DB_LAST: - return toku_ft_cursor_last(cursor, getf, getf_v); - case DB_NEXT: - if (ft_cursor_not_set(cursor)) { - return toku_ft_cursor_first(cursor, getf, getf_v); - } else { - return toku_ft_cursor_next(cursor, getf, getf_v); - } - case DB_PREV: - if (ft_cursor_not_set(cursor)) { - return toku_ft_cursor_last(cursor, getf, getf_v); - } else { - return toku_ft_cursor_prev(cursor, getf, getf_v); - } - case DB_SET: - return toku_ft_cursor_set(cursor, key, getf, getf_v); - case DB_SET_RANGE: - return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v); - default: ;// Fall through - } - return EINVAL; -} - -void -toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) -// Effect: Retrieves a pointer to the DBTs for the current key and value. -// Requires: The caller may not modify the DBTs or the memory at which they points. -// Requires: The caller must be in the context of a -// FT_GET_(STRADDLE_)CALLBACK_FUNCTION -{ - *pkey = &cursor->key; - *pval = &cursor->val; -} - -//We pass in toku_dbt_fake to the search functions, since it will not pass the -//key(or val) to the heaviside function if key(or val) is NULL. -//It is not used for anything else, -//the actual 'extra' information for the heaviside function is inside the -//wrapper. -static const DBT __toku_dbt_fake = {}; -static const DBT* const toku_dbt_fake = &__toku_dbt_fake; - -bool toku_ft_cursor_uninitialized(FT_CURSOR c) { - return ft_cursor_not_set(c); -} - - -/* ********************************* lookup **************************************/ - -int -toku_ft_lookup (FT_HANDLE brt, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) -{ - int r, rr; - FT_CURSOR cursor; - - rr = toku_ft_cursor(brt, &cursor, NULL, false, false); - if (rr != 0) return rr; - - int op = DB_SET; - r = toku_ft_cursor_get(cursor, k, getf, getf_v, op); - - toku_ft_cursor_close(cursor); - - return r; -} - /* ********************************* delete **************************************/ static int -getf_nothing (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *UU(pair_v), bool UU(lock_only)) { +getf_nothing (uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *UU(pair_v), bool UU(lock_only)) { return 0; } -int -toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) { +int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) { int r; int unchecked_flags = flags; bool error_if_missing = (bool) !(flags&DB_DELETE_ANY); unchecked_flags &= ~DB_DELETE_ANY; if (unchecked_flags!=0) r = EINVAL; - else if (ft_cursor_not_set(cursor)) r = EINVAL; + else if (toku_ft_cursor_not_set(cursor)) r = EINVAL; else { r = 0; if (error_if_missing) { @@ -5948,21 +3978,18 @@ toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn) { /* ********************* keyrange ************************ */ - struct keyrange_compare_s { FT ft; const DBT *key; }; -static int -keyrange_compare (DBT const &kdbt, const struct keyrange_compare_s &s) { - // TODO: maybe put a const fake_db in the header - FAKE_DB(db, &s.ft->cmp_descriptor); - return s.ft->compare_fun(&db, &kdbt, s.key); +// TODO: Remove me, I'm boring +static int keyrange_compare(DBT const &kdbt, const struct keyrange_compare_s &s) { + return s.ft->cmp(&kdbt, s.key); } static void -keysrange_in_leaf_partition (FT_HANDLE brt, FTNODE node, +keysrange_in_leaf_partition (FT_HANDLE ft_handle, FTNODE node, DBT* key_left, DBT* key_right, int left_child_number, int right_child_number, uint64_t estimated_num_rows, uint64_t *less, uint64_t* equal_left, uint64_t* middle, @@ -5979,7 +4006,7 @@ keysrange_in_leaf_partition (FT_HANDLE brt, FTNODE node, if (BP_STATE(node, left_child_number) == PT_AVAIL) { int r; // The partition is in main memory then get an exact count. - struct keyrange_compare_s s_left = {brt->ft, key_left}; + struct keyrange_compare_s s_left = {ft_handle->ft, key_left}; BASEMENTNODE bn = BLB(node, left_child_number); uint32_t idx_left = 0; // if key_left is NULL then set r==-1 and idx==0. @@ -5987,11 +4014,11 @@ keysrange_in_leaf_partition (FT_HANDLE brt, FTNODE node, *less = idx_left; *equal_left = (r==0) ? 1 : 0; - uint32_t size = bn->data_buffer.omt_size(); + uint32_t size = bn->data_buffer.num_klpairs(); uint32_t idx_right = size; r = -1; if (single_basement && key_right) { - struct keyrange_compare_s s_right = {brt->ft, key_right}; + struct keyrange_compare_s s_right = {ft_handle->ft, key_right}; r = bn->data_buffer.find_zero(s_right, nullptr, nullptr, nullptr, &idx_right); } *middle = idx_right - idx_left - *equal_left; @@ -6016,27 +4043,27 @@ keysrange_in_leaf_partition (FT_HANDLE brt, FTNODE node, } static int -toku_ft_keysrange_internal (FT_HANDLE brt, FTNODE node, +toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node, DBT* key_left, DBT* key_right, bool may_find_right, uint64_t* less, uint64_t* equal_left, uint64_t* middle, uint64_t* equal_right, uint64_t* greater, bool* single_basement_node, uint64_t estimated_num_rows, - struct ftnode_fetch_extra *min_bfe, // set up to read a minimal read. - struct ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it - struct unlockers *unlockers, ANCESTORS ancestors, struct pivot_bounds const * const bounds) + ftnode_fetch_extra *min_bfe, // set up to read a minimal read. + ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it + struct unlockers *unlockers, ANCESTORS ancestors, const pivot_bounds &bounds) // Implementation note: Assign values to less, equal, and greater, and then on the way out (returning up the stack) we add more values in. { int r = 0; // if KEY is NULL then use the leftmost key. - int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, &brt->ft->cmp_descriptor, brt->ft->compare_fun) : 0; + int left_child_number = key_left ? toku_ftnode_which_child (node, key_left, ft_handle->ft->cmp) : 0; int right_child_number = node->n_children; // Sentinel that does not equal left_child_number. if (may_find_right) { - right_child_number = key_right ? toku_ftnode_which_child (node, key_right, &brt->ft->cmp_descriptor, brt->ft->compare_fun) : node->n_children - 1; + right_child_number = key_right ? toku_ftnode_which_child (node, key_right, ft_handle->ft->cmp) : node->n_children - 1; } uint64_t rows_per_child = estimated_num_rows / node->n_children; if (node->height == 0) { - keysrange_in_leaf_partition(brt, node, key_left, key_right, left_child_number, right_child_number, + keysrange_in_leaf_partition(ft_handle, node, key_left, key_right, left_child_number, right_child_number, rows_per_child, less, equal_left, middle, equal_right, greater, single_basement_node); *less += rows_per_child * left_child_number; @@ -6049,12 +4076,12 @@ toku_ft_keysrange_internal (FT_HANDLE brt, FTNODE node, // do the child. struct ancestors next_ancestors = {node, left_child_number, ancestors}; BLOCKNUM childblocknum = BP_BLOCKNUM(node, left_child_number); - uint32_t fullhash = compute_child_fullhash(brt->ft->cf, node, left_child_number); + uint32_t fullhash = compute_child_fullhash(ft_handle->ft->cf, node, left_child_number); FTNODE childnode; bool msgs_applied = false; bool child_may_find_right = may_find_right && left_child_number == right_child_number; - r = toku_pin_ftnode_batched( - brt, + r = toku_pin_ftnode_for_query( + ft_handle, childblocknum, fullhash, unlockers, @@ -6069,13 +4096,13 @@ toku_ft_keysrange_internal (FT_HANDLE brt, FTNODE node, if (r != TOKUDB_TRY_AGAIN) { assert_zero(r); - struct unlock_ftnode_extra unlock_extra = {brt,childnode,false}; + struct unlock_ftnode_extra unlock_extra = {ft_handle,childnode,false}; struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, unlockers}; - const struct pivot_bounds next_bounds = next_pivot_keys(node, left_child_number, bounds); + const pivot_bounds next_bounds = bounds.next_bounds(node, left_child_number); - r = toku_ft_keysrange_internal(brt, childnode, key_left, key_right, child_may_find_right, + r = toku_ft_keysrange_internal(ft_handle, childnode, key_left, key_right, child_may_find_right, less, equal_left, middle, equal_right, greater, single_basement_node, - rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, &next_bounds); + rows_per_child, min_bfe, match_bfe, &next_unlockers, &next_ancestors, next_bounds); if (r != TOKUDB_TRY_AGAIN) { assert_zero(r); @@ -6087,14 +4114,14 @@ toku_ft_keysrange_internal (FT_HANDLE brt, FTNODE node, } assert(unlockers->locked); - toku_unpin_ftnode_read_only(brt->ft, childnode); + toku_unpin_ftnode_read_only(ft_handle->ft, childnode); } } } return r; } -void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p) +void toku_ft_keysrange(FT_HANDLE ft_handle, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p) // Effect: Return an estimate of the number of keys to the left, the number equal (to left key), number between keys, number equal to right key, and the number to the right of both keys. // The values are an estimate. // If you perform a keyrange on two keys that are in the same basement, equal_less, middle, and equal_right will be exact. @@ -6108,7 +4135,7 @@ void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *l // Simplify internals by only supporting key_right != null when key_left != null // If key_right != null and key_left == null, then swap them and fix up numbers. uint64_t less = 0, equal_left = 0, middle = 0, equal_right = 0, greater = 0; - toku_ft_keysrange(brt, key_right, nullptr, &less, &equal_left, &middle, &equal_right, &greater, middle_3_exact_p); + toku_ft_keysrange(ft_handle, key_right, nullptr, &less, &equal_left, &middle, &equal_right, &greater, middle_3_exact_p); *less_p = 0; *equal_left_p = 0; *middle_p = less; @@ -6119,10 +4146,10 @@ void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *l return; } paranoid_invariant(!(!key_left && key_right)); - struct ftnode_fetch_extra min_bfe; - struct ftnode_fetch_extra match_bfe; - fill_bfe_for_min_read(&min_bfe, brt->ft); // read pivot keys but not message buffers - fill_bfe_for_keymatch(&match_bfe, brt->ft, key_left, key_right, false, false); // read basement node only if both keys in it. + ftnode_fetch_extra min_bfe; + ftnode_fetch_extra match_bfe; + min_bfe.create_for_min_read(ft_handle->ft); // read pivot keys but not message buffers + match_bfe.create_for_keymatch(ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it. try_again: { uint64_t less = 0, equal_left = 0, middle = 0, equal_right = 0, greater = 0; @@ -6131,31 +4158,30 @@ void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *l { uint32_t fullhash; CACHEKEY root_key; - toku_calculate_root_offset_pointer(brt->ft, &root_key, &fullhash); - toku_pin_ftnode_off_client_thread_batched( - brt->ft, + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + toku_pin_ftnode( + ft_handle->ft, root_key, fullhash, &match_bfe, PL_READ, // may_modify_node, cannot change root during keyrange - 0, - NULL, - &node + &node, + true ); } - struct unlock_ftnode_extra unlock_extra = {brt,node,false}; + struct unlock_ftnode_extra unlock_extra = {ft_handle,node,false}; struct unlockers unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, (UNLOCKERS)NULL}; { int r; - int64_t numrows = brt->ft->in_memory_stats.numrows; + int64_t numrows = ft_handle->ft->in_memory_stats.numrows; if (numrows < 0) numrows = 0; // prevent appearance of a negative number - r = toku_ft_keysrange_internal (brt, node, key_left, key_right, true, + r = toku_ft_keysrange_internal (ft_handle, node, key_left, key_right, true, &less, &equal_left, &middle, &equal_right, &greater, &single_basement_node, numrows, - &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, &infinite_bounds); + &min_bfe, &match_bfe, &unlockers, (ANCESTORS)NULL, pivot_bounds::infinite_bounds()); assert(r == 0 || r == TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { assert(!unlockers.locked); @@ -6168,10 +4194,10 @@ void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *l invariant_zero(greater); uint64_t less2 = 0, equal_left2 = 0, middle2 = 0, equal_right2 = 0, greater2 = 0; bool ignore; - r = toku_ft_keysrange_internal (brt, node, key_right, nullptr, false, + r = toku_ft_keysrange_internal (ft_handle, node, key_right, nullptr, false, &less2, &equal_left2, &middle2, &equal_right2, &greater2, &ignore, numrows, - &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, &infinite_bounds); + &min_bfe, &match_bfe, &unlockers, (ANCESTORS)nullptr, pivot_bounds::infinite_bounds()); assert(r == 0 || r == TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { assert(!unlockers.locked); @@ -6198,7 +4224,7 @@ void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *l } } assert(unlockers.locked); - toku_unpin_ftnode_read_only(brt->ft, node); + toku_unpin_ftnode_read_only(ft_handle->ft, node); if (!key_right) { paranoid_invariant_zero(equal_right); paranoid_invariant_zero(greater); @@ -6247,7 +4273,7 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT assert(r == 0 || r == DB_NOTFOUND); } struct get_key_after_bytes_iterate_extra iter_extra = {skip_len, skipped, callback, cb_extra}; - r = bn->data_buffer.omt_iterate_on_range(idx_left, bn->data_buffer.omt_size(), &iter_extra); + r = bn->data_buffer.iterate_on_range(idx_left, bn->data_buffer.num_klpairs(), &iter_extra); // Invert the sense of r == 0 (meaning the iterate finished, which means we didn't find what we wanted) if (r == 1) { @@ -6258,16 +4284,16 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT return r; } -static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped); +static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped); -static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { +static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { int r; struct ancestors next_ancestors = {node, childnum, ancestors}; BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum); uint32_t fullhash = compute_child_fullhash(ft->cf, node, childnum); FTNODE child; bool msgs_applied = false; - r = toku_pin_ftnode_batched(ft_h, childblocknum, fullhash, unlockers, &next_ancestors, bounds, bfe, false, &child, &msgs_applied); + r = toku_pin_ftnode_for_query(ft_h, childblocknum, fullhash, unlockers, &next_ancestors, bounds, bfe, false, &child, &msgs_applied); paranoid_invariant(!msgs_applied); if (r == TOKUDB_TRY_AGAIN) { return r; @@ -6275,13 +4301,13 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO assert_zero(r); struct unlock_ftnode_extra unlock_extra = {ft_h, child, false}; struct unlockers next_unlockers = {true, unlock_ftnode_fun, (void *) &unlock_extra, unlockers}; - const struct pivot_bounds next_bounds = next_pivot_keys(node, childnum, bounds); - return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, &next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped); + const pivot_bounds next_bounds = bounds.next_bounds(node, childnum); + return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped); } -static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, PIVOT_BOUNDS bounds, FTNODE_FETCH_EXTRA bfe, ft_search_t *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { +static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) { int r; - int childnum = toku_ft_search_which_child(&ft->cmp_descriptor, ft->compare_fun, node, search); + int childnum = toku_ft_search_which_child(ft->cmp, node, search); const uint64_t child_subtree_bytes = subtree_bytes / node->n_children; if (node->height == 0) { r = DB_NOTFOUND; @@ -6297,7 +4323,8 @@ static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UN } else { *skipped += child_subtree_bytes; if (*skipped >= skip_len && i < node->n_children - 1) { - callback(&node->childkeys[i], *skipped, cb_extra); + DBT pivot; + callback(node->pivotkeys.fill_pivot(i, &pivot), *skipped, cb_extra); r = 0; } // Otherwise, r is still DB_NOTFOUND. If this is the last @@ -6342,20 +4369,20 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s // an error code otherwise { FT ft = ft_h->ft; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, ft); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); while (true) { FTNODE root; { uint32_t fullhash; CACHEKEY root_key; toku_calculate_root_offset_pointer(ft, &root_key, &fullhash); - toku_pin_ftnode_off_client_thread_batched(ft, root_key, fullhash, &bfe, PL_READ, 0, nullptr, &root); + toku_pin_ftnode(ft, root_key, fullhash, &bfe, PL_READ, &root, true); } struct unlock_ftnode_extra unlock_extra = {ft_h, root, false}; struct unlockers unlockers = {true, unlock_ftnode_fun, (void*)&unlock_extra, (UNLOCKERS) nullptr}; - ft_search_t search; - ft_search_init(&search, (start_key == nullptr ? ft_cursor_compare_one : ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h); + ft_search search; + ft_search_init(&search, (start_key == nullptr ? toku_ft_cursor_compare_one : toku_ft_cursor_compare_set_range), FT_SEARCH_LEFT, start_key, nullptr, ft_h); int r; // We can't do this because of #5768, there may be dictionaries in the wild that have negative stats. This won't affect mongo so it's ok: @@ -6365,7 +4392,7 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s numbytes = 0; } uint64_t skipped = 0; - r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, &infinite_bounds, &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped); + r = get_key_after_bytes_in_subtree(ft_h, ft, root, &unlockers, nullptr, pivot_bounds::infinite_bounds(), &bfe, &search, (uint64_t) numbytes, start_key, skip_len, callback, cb_extra, &skipped); assert(!unlockers.locked); if (r != TOKUDB_TRY_AGAIN) { if (r == DB_NOTFOUND) { @@ -6378,16 +4405,16 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s } //Test-only wrapper for the old one-key range function -void toku_ft_keyrange(FT_HANDLE brt, DBT *key, uint64_t *less, uint64_t *equal, uint64_t *greater) { +void toku_ft_keyrange(FT_HANDLE ft_handle, DBT *key, uint64_t *less, uint64_t *equal, uint64_t *greater) { uint64_t zero_equal_right, zero_greater; bool ignore; - toku_ft_keysrange(brt, key, nullptr, less, equal, greater, &zero_equal_right, &zero_greater, &ignore); + toku_ft_keysrange(ft_handle, key, nullptr, less, equal, greater, &zero_equal_right, &zero_greater, &ignore); invariant_zero(zero_equal_right); invariant_zero(zero_greater); } -void toku_ft_handle_stat64 (FT_HANDLE brt, TOKUTXN UU(txn), struct ftstat64_s *s) { - toku_ft_stat64(brt->ft, s); +void toku_ft_handle_stat64 (FT_HANDLE ft_handle, TOKUTXN UU(txn), struct ftstat64_s *s) { + toku_ft_stat64(ft_handle->ft, s); } void toku_ft_handle_get_fractal_tree_info64(FT_HANDLE ft_h, struct ftinfo64 *s) { @@ -6400,23 +4427,22 @@ int toku_ft_handle_iterate_fractal_tree_block_map(FT_HANDLE ft_h, int (*iter)(ui /* ********************* debugging dump ************************ */ static int -toku_dump_ftnode (FILE *file, FT_HANDLE brt, BLOCKNUM blocknum, int depth, const DBT *lorange, const DBT *hirange) { +toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth, const DBT *lorange, const DBT *hirange) { int result=0; FTNODE node; - toku_get_node_for_verify(blocknum, brt, &node); - result=toku_verify_ftnode(brt, brt->ft->h->max_msn_in_ft, brt->ft->h->max_msn_in_ft, false, node, -1, lorange, hirange, NULL, NULL, 0, 1, 0); - uint32_t fullhash = toku_cachetable_hash(brt->ft->cf, blocknum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + toku_get_node_for_verify(blocknum, ft_handle, &node); + result=toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, node, -1, lorange, hirange, NULL, NULL, 0, 1, 0); + uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); + toku_pin_ftnode( + ft_handle->ft, blocknum, fullhash, &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->fullhash==fullhash); fprintf(file, "%*sNode=%p\n", depth, "", node); @@ -6427,23 +4453,30 @@ toku_dump_ftnode (FILE *file, FT_HANDLE brt, BLOCKNUM blocknum, int depth, const int i; for (i=0; i+1< node->n_children; i++) { fprintf(file, "%*spivotkey %d =", depth+1, "", i); - toku_print_BYTESTRING(file, node->childkeys[i].size, (char *) node->childkeys[i].data); + toku_print_BYTESTRING(file, node->pivotkeys.get_pivot(i).size, (char *) node->pivotkeys.get_pivot(i).data); fprintf(file, "\n"); } for (i=0; i< node->n_children; i++) { if (node->height > 0) { NONLEAF_CHILDINFO bnc = BNC(node, i); fprintf(file, "%*schild %d buffered (%d entries):", depth+1, "", i, toku_bnc_n_entries(bnc)); - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, type, msn, xids, UU(is_fresh), - { - data=data; datalen=datalen; keylen=keylen; - fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n", depth+2, "", xids_get_innermost_xid(xids), (unsigned)toku_dtoh32(*(int*)key), type, msn.msn); - //assert(strlen((char*)key)+1==keylen); - //assert(strlen((char*)data)+1==datalen); - }); + struct print_msg_fn { + FILE *file; + int depth; + print_msg_fn(FILE *f, int d) : file(f), depth(d) { } + int operator()(const ft_msg &msg, bool UU(is_fresh)) { + fprintf(file, "%*s xid=%" PRIu64 " %u (type=%d) msn=0x%" PRIu64 "\n", + depth+2, "", + toku_xids_get_innermost_xid(msg.xids()), + static_cast(toku_dtoh32(*(int*)msg.kdbt()->data)), + msg.type(), msg.msn().msn); + return 0; + } + } print_fn(file, depth); + bnc->msg_buffer.iterate(print_fn); } else { - int size = BLB_DATA(node, i)->omt_size(); + int size = BLB_DATA(node, i)->num_klpairs(); if (0) for (int j=0; jn_children; i++) { fprintf(file, "%*schild %d\n", depth, "", i); if (i>0) { - char *CAST_FROM_VOIDP(key, node->childkeys[i-1].data); - fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->childkeys[i-1].size, (unsigned)toku_dtoh32(*(int*)key)); + char *CAST_FROM_VOIDP(key, node->pivotkeys.get_pivot(i - 1).data); + fprintf(file, "%*spivot %d len=%u %u\n", depth+1, "", i-1, node->pivotkeys.get_pivot(i - 1).size, (unsigned)toku_dtoh32(*(int*)key)); } - toku_dump_ftnode(file, brt, BP_BLOCKNUM(node, i), depth+4, - (i==0) ? lorange : &node->childkeys[i-1], - (i==node->n_children-1) ? hirange : &node->childkeys[i]); + DBT x, y; + toku_dump_ftnode(file, ft_handle, BP_BLOCKNUM(node, i), depth+4, + (i==0) ? lorange : node->pivotkeys.fill_pivot(i - 1, &x), + (i==node->n_children-1) ? hirange : node->pivotkeys.fill_pivot(i, &y)); } } } - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft_handle->ft, node); return result; } -int toku_dump_ft (FILE *f, FT_HANDLE brt) { - int r; - assert(brt->ft); - toku_dump_translation_table(f, brt->ft->blocktable); - { - uint32_t fullhash = 0; - CACHEKEY root_key; - toku_calculate_root_offset_pointer(brt->ft, &root_key, &fullhash); - r = toku_dump_ftnode(f, brt, root_key, 0, 0, 0); - } - return r; +int toku_dump_ft(FILE *f, FT_HANDLE ft_handle) { + FT ft = ft_handle->ft; + invariant_notnull(ft); + ft->blocktable.dump_translation_table(f); + + uint32_t fullhash = 0; + CACHEKEY root_key; + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + return toku_dump_ftnode(f, ft_handle, root_key, 0, 0, 0); } int toku_ft_layer_init(void) { @@ -6577,24 +4609,21 @@ void toku_ft_unlink(FT_HANDLE handle) { toku_cachefile_unlink_on_close(cf); } -int -toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) { - int r; - - int fd = toku_cachefile_get_fd(brt->ft->cf); - toku_ft_lock(brt->ft); +int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { + int fd = toku_cachefile_get_fd(ft_handle->ft->cf); + toku_ft_lock(ft_handle->ft); int64_t file_size; - r = toku_os_get_file_size(fd, &file_size); - if (r==0) { + int r = toku_os_get_file_size(fd, &file_size); + if (r == 0) { report->file_size_bytes = file_size; - toku_block_table_get_fragmentation_unlocked(brt->ft->blocktable, report); + ft_handle->ft->blocktable.get_fragmentation_unlocked(report); } - toku_ft_unlock(brt->ft); + toku_ft_unlock(ft_handle->ft); return r; } -static bool is_empty_fast_iter (FT_HANDLE brt, FTNODE node) { +static bool is_empty_fast_iter (FT_HANDLE ft_handle, FTNODE node) { if (node->height > 0) { for (int childnum=0; childnumn_children; childnum++) { if (toku_bnc_nbytesinbuf(BNC(node, childnum)) != 0) { @@ -6603,31 +4632,30 @@ static bool is_empty_fast_iter (FT_HANDLE brt, FTNODE node) { FTNODE childnode; { BLOCKNUM childblocknum = BP_BLOCKNUM(node,childnum); - uint32_t fullhash = compute_child_fullhash(brt->ft->cf, node, childnum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); + uint32_t fullhash = compute_child_fullhash(ft_handle->ft->cf, node, childnum); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); // don't need to pass in dependent nodes as we are not // modifying nodes we are pinning - toku_pin_ftnode_off_client_thread( - brt->ft, + toku_pin_ftnode( + ft_handle->ft, childblocknum, fullhash, &bfe, PL_READ, // may_modify_node set to false, as nodes not modified - 0, - NULL, - &childnode + &childnode, + true ); } - int child_is_empty = is_empty_fast_iter(brt, childnode); - toku_unpin_ftnode(brt->ft, childnode); + int child_is_empty = is_empty_fast_iter(ft_handle, childnode); + toku_unpin_ftnode(ft_handle->ft, childnode); if (!child_is_empty) return 0; } return 1; } else { - // leaf: If the omt is empty, we are happy. + // leaf: If the dmt is empty, we are happy. for (int i = 0; i < node->n_children; i++) { - if (BLB_DATA(node, i)->omt_size()) { + if (BLB_DATA(node, i)->num_klpairs()) { return false; } } @@ -6635,7 +4663,7 @@ static bool is_empty_fast_iter (FT_HANDLE brt, FTNODE node) { } } -bool toku_ft_is_empty_fast (FT_HANDLE brt) +bool toku_ft_is_empty_fast (FT_HANDLE ft_handle) // A fast check to see if the tree is empty. If there are any messages or leafentries, we consider the tree to be nonempty. It's possible that those // messages and leafentries would all optimize away and that the tree is empty, but we'll say it is nonempty. { @@ -6643,22 +4671,21 @@ bool toku_ft_is_empty_fast (FT_HANDLE brt) FTNODE node; { CACHEKEY root_key; - toku_calculate_root_offset_pointer(brt->ft, &root_key, &fullhash); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); + toku_pin_ftnode( + ft_handle->ft, root_key, fullhash, &bfe, PL_READ, // may_modify_node set to false, node does not change - 0, - NULL, - &node + &node, + true ); } - bool r = is_empty_fast_iter(brt, node); - toku_unpin_ftnode(brt->ft, node); + bool r = is_empty_fast_iter(ft_handle, node); + toku_unpin_ftnode(ft_handle->ft, node); return r; } @@ -6682,6 +4709,26 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen) } } +int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len) { + int comparelen = key1len < key2len ? key1len : key2len; + int c = memcmp(key1, key2, comparelen); + if (__builtin_expect(c != 0, 1)) { + return c; + } else { + if (key1len < key2len) { + return -1; + } else if (key1len > key2len) { + return 1; + } else { + return 0; + } + } +} + +int toku_builtin_compare_fun(DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) { + return toku_keycompare(a->data, a->size, b->data, b->size); +} + #include void __attribute__((__constructor__)) toku_ft_helgrind_ignore(void); void diff --git a/storage/tokudb/ft-index/ft/ft-ops.h b/storage/tokudb/ft-index/ft/ft-ops.h index a98923e992443..c45e0c71ef50a 100644 --- a/storage/tokudb/ft-index/ft/ft-ops.h +++ b/storage/tokudb/ft-index/ft/ft-ops.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_OPS_H -#define FT_OPS_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,33 +86,22 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." // This must be first to make the 64-bit file mode work right in Linux #define _FILE_OFFSET_BITS 64 -#include "fttypes.h" -#include "ybt.h" + #include -#include "cachetable.h" -#include "log.h" -#include "ft-search.h" -#include "compress.h" - -// A callback function is invoked with the key, and the data. -// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns. -// Note: In the thread-safe version, the brt node remains locked while the callback function runs. So return soon, and don't call the BRT code from the callback function. -// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself. -// The cursor object will have been updated (so that if result==0 the current value is the value being passed) -// (If r!=0 then the cursor won't have been updated.) -// If r!=0, it's up to the callback function to return that value of r. -// A 'key' bytevec of NULL means that element is not found (effectively infinity or -// -infinity depending on direction) -// When lock_only is false, the callback does optional lock tree locking and then processes the key and val. -// When lock_only is true, the callback only does optional lock tree locking. -typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only); - -typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra); + +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/msg.h" +#include "util/dbt.h" + +typedef struct ft_handle *FT_HANDLE; int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result)); @@ -125,7 +112,7 @@ int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int node // ANY operations. to update the cmp descriptor after any operations have already happened, all handles // and transactions must close and reopen before the change, then you can update the cmp descriptor void toku_ft_change_descriptor(FT_HANDLE t, const DBT* old_descriptor, const DBT* new_descriptor, bool do_log, TOKUTXN txn, bool update_cmp_descriptor); -uint32_t toku_serialize_descriptor_size(const DESCRIPTOR desc); +uint32_t toku_serialize_descriptor_size(DESCRIPTOR desc); void toku_ft_handle_create(FT_HANDLE *ft); void toku_ft_set_flags(FT_HANDLE, unsigned int flags); @@ -139,11 +126,13 @@ void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_meth void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *); void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout); void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout); +int toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic); -void toku_ft_set_bt_compare(FT_HANDLE, ft_compare_func); -ft_compare_func toku_ft_get_bt_compare (FT_HANDLE brt); +void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func); +const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle); -void toku_ft_set_redirect_callback(FT_HANDLE brt, on_redirect_callback redir_cb, void* extra); +typedef void (*on_redirect_callback)(FT_HANDLE ft_handle, void *extra); +void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback cb, void *extra); // How updates (update/insert/deletes) work: // There are two flavers of upsertdels: Singleton and broadcast. @@ -181,7 +170,10 @@ void toku_ft_set_redirect_callback(FT_HANDLE brt, on_redirect_callback redir_cb, // Implementation note: Acquires a write lock on the entire database. // This function works by sending an BROADCAST-UPDATE message containing // the key and the extra. -void toku_ft_set_update(FT_HANDLE brt, ft_update_func update_fun); +typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const DBT *extra, + void (*set_val)(const DBT *new_val, void *set_extra), + void *set_extra); +void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun); int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn) __attribute__ ((warn_unused_result)); @@ -197,9 +189,17 @@ void toku_ft_handle_close(FT_HANDLE ft_handle); // close an ft handle during recovery. the underlying ft must close, and will use the given lsn. void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn); +// At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary. +// With the introduction of the loader (ticket 2216), it is possible for the file that holds +// an open dictionary to change, so these are now separate and independent unique identifiers (see FILENUM) +struct DICTIONARY_ID { + uint64_t dictid; +}; +static const DICTIONARY_ID DICTIONARY_ID_NONE = { .dictid = 0 }; + int toku_ft_handle_open_with_dict_id( - FT_HANDLE t, + FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, @@ -208,86 +208,57 @@ toku_ft_handle_open_with_dict_id( DICTIONARY_ID use_dictionary_id ) __attribute__ ((warn_unused_result)); -int toku_ft_lookup (FT_HANDLE brt, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); +// Effect: Insert a key and data pair into an ft +void toku_ft_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn); -// Effect: Insert a key and data pair into a brt -void toku_ft_insert (FT_HANDLE brt, DBT *k, DBT *v, TOKUTXN txn); +// Returns: 0 if the key was inserted, DB_KEYEXIST if the key already exists +int toku_ft_insert_unique(FT_HANDLE ft, DBT *k, DBT *v, TOKUTXN txn, bool do_logging); // Effect: Optimize the ft -void toku_ft_optimize (FT_HANDLE brt); +void toku_ft_optimize (FT_HANDLE ft_h); -// Effect: Insert a key and data pair into a brt if the oplsn is newer than the brt lsn. This function is called during recovery. -void toku_ft_maybe_insert (FT_HANDLE brt, DBT *k, DBT *v, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type); +// Effect: Insert a key and data pair into an ft if the oplsn is newer than the ft's lsn. This function is called during recovery. +void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type); -// Effect: Send an update message into a brt. This function is called +// Effect: Send an update message into an ft. This function is called // during recovery. -void toku_ft_maybe_update(FT_HANDLE brt, const DBT *key, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); +void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); -// Effect: Send a broadcasting update message into a brt. This function +// Effect: Send a broadcasting update message into an ft. This function // is called during recovery. -void toku_ft_maybe_update_broadcast(FT_HANDLE brt, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, bool is_resetting_op); +void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, bool is_resetting_op); void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn); -void toku_ft_load(FT_HANDLE brt, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *get_lsn); +void toku_ft_load(FT_HANDLE ft_h, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *get_lsn); void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn); -void toku_ft_hot_index(FT_HANDLE brt, TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn); +void toku_ft_hot_index(FT_HANDLE ft_h, TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn); -void toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32_t num_fts, const DBT *key, const DBT *val); -void toku_ft_log_put (TOKUTXN txn, FT_HANDLE brt, const DBT *key, const DBT *val); -void toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32_t num_fts, const DBT *key, const DBT *val); -void toku_ft_log_del (TOKUTXN txn, FT_HANDLE brt, const DBT *key); +void toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val); +void toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key, const DBT *val); +void toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val); +void toku_ft_log_del (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key); -// Effect: Delete a key from a brt -void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn); +// Effect: Delete a key from an ft +void toku_ft_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn); -// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery. -void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); +// Effect: Delete a key from an ft if the oplsn is newer than the ft lsn. This function is called during recovery. +void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h); -TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h); +struct txn_manager *toku_ft_get_txn_manager(FT_HANDLE ft_h); -void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info); -void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info); -void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info); +struct txn_gc_info; +void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info); +void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info); +void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info); int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result)); -int toku_dump_ft (FILE *,FT_HANDLE brt) __attribute__ ((warn_unused_result)); +int toku_dump_ft (FILE *,FT_HANDLE ft_h) __attribute__ ((warn_unused_result)); extern int toku_ft_debug_mode; -int toku_verify_ft (FT_HANDLE brt) __attribute__ ((warn_unused_result)); -int toku_verify_ft_with_progress (FT_HANDLE brt, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going) __attribute__ ((warn_unused_result)); - -typedef struct ft_cursor *FT_CURSOR; -int toku_ft_cursor (FT_HANDLE, FT_CURSOR*, TOKUTXN, bool, bool) __attribute__ ((warn_unused_result)); -void toku_ft_cursor_set_leaf_mode(FT_CURSOR); -// Sets a boolean on the brt cursor that prevents uncessary copying of -// the cursor duing a one query. -void toku_ft_cursor_set_temporary(FT_CURSOR); -void toku_ft_cursor_remove_restriction(FT_CURSOR); -void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra); -int toku_ft_cursor_is_leaf_mode(FT_CURSOR); -void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); - -// get is deprecated in favor of the individual functions below -int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) __attribute__ ((warn_unused_result)); - -int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_get_both_range(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); -int toku_ft_cursor_get_both_range_reverse(FT_CURSOR cursor, DBT *key, DBT *val, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); - -int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN) __attribute__ ((warn_unused_result)); -void toku_ft_cursor_close (FT_CURSOR curs); -bool toku_ft_cursor_uninitialized(FT_CURSOR c) __attribute__ ((warn_unused_result)); - -void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); +int toku_verify_ft (FT_HANDLE ft_h) __attribute__ ((warn_unused_result)); +int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going) __attribute__ ((warn_unused_result)); DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE); @@ -298,8 +269,8 @@ enum ft_flags { TOKU_DB_VALCMP_BUILTIN_13 = (1<<3), }; -void toku_ft_keyrange(FT_HANDLE brt, DBT *key, uint64_t *less, uint64_t *equal, uint64_t *greater); -void toku_ft_keysrange(FT_HANDLE brt, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p); +void toku_ft_keyrange(FT_HANDLE ft_h, DBT *key, uint64_t *less, uint64_t *equal, uint64_t *greater); +void toku_ft_keysrange(FT_HANDLE ft_h, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p); int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *end_key, uint64_t actually_skipped, void *extra), void *cb_extra); @@ -341,16 +312,16 @@ void toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size // Return 0 on success, otherwise an error number. -int toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result)); +int toku_ft_get_fragmentation(FT_HANDLE ft_h, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result)); -bool toku_ft_is_empty_fast (FT_HANDLE brt) __attribute__ ((warn_unused_result)); +bool toku_ft_is_empty_fast (FT_HANDLE ft_h) __attribute__ ((warn_unused_result)); // Effect: Return true if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty // even though if we were to optimize the tree it might turn out that they are empty. int toku_ft_strerror_r(int error, char *buf, size_t buflen); // Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror(). // If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message. -// If error<0 then return a TokuDB-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.) +// If error<0 then return a TokuFT-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.) extern bool garbage_collection_debug; @@ -358,4 +329,4 @@ extern bool garbage_collection_debug; void toku_ft_set_direct_io(bool direct_io_on); void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers); -#endif +void toku_note_deserialized_basement_node(bool fixed_key_size); diff --git a/storage/tokudb/ft-index/ft/ft-test-helpers.cc b/storage/tokudb/ft-index/ft/ft-test-helpers.cc index 135603147d94f..dc0b77099fa3c 100644 --- a/storage/tokudb/ft-index/ft/ft-test-helpers.cc +++ b/storage/tokudb/ft-index/ft/ft-test-helpers.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,15 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft-cachetable-wrappers.h" -#include "ft-flusher.h" -#include "ft-internal.h" -#include "ft.h" -#include "fttypes.h" -#include "ule.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/ft-flusher.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/ule.h" -// dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_cmd() +// dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_msg() #define MIN_DUMMYMSN ((MSN) {(uint64_t)1 << 62}) static MSN dummymsn; static int testsetup_initialized = 0; @@ -119,62 +120,71 @@ next_dummymsn(void) { bool ignore_if_was_already_open; -int toku_testsetup_leaf(FT_HANDLE brt, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens) { +int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens) { FTNODE node; assert(testsetup_initialized); - toku_create_new_ftnode(brt, &node, 0, n_children); - int i; - for (i=0; ichildkeys[i], keys[i], keylens[i]); - node->totalchildkeylens += keylens[i]; + DBT *XMALLOC_N(n_children - 1, pivotkeys); + for (int i = 0; i + 1 < n_children; i++) { + toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]); + } + node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1); + for (int i = 0; i + 1 < n_children; i++) { + toku_destroy_dbt(&pivotkeys[i]); } + toku_free(pivotkeys); - *blocknum = node->thisnodename; - toku_unpin_ftnode(brt->ft, node); + *blocknum = node->blocknum; + toku_unpin_ftnode(ft_handle->ft, node); return 0; } // Don't bother to clean up carefully if something goes wrong. (E.g., it's OK to have malloced stuff that hasn't been freed.) -int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens) { +int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens) { FTNODE node; assert(testsetup_initialized); - toku_create_new_ftnode(brt, &node, height, n_children); - int i; - for (i=0; ichildkeys[i], keys[i], keylens[i]); - node->totalchildkeylens += keylens[i]; + DBT *XMALLOC_N(n_children - 1, pivotkeys); + for (int i = 0; i + 1 < n_children; i++) { + toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]); + } + node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1); + for (int i = 0; i + 1 < n_children; i++) { + toku_destroy_dbt(&pivotkeys[i]); } - *blocknum = node->thisnodename; - toku_unpin_ftnode(brt->ft, node); + toku_free(pivotkeys); + + *blocknum = node->blocknum; + toku_unpin_ftnode(ft_handle->ft, node); return 0; } -int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM blocknum) { +int toku_testsetup_root(FT_HANDLE ft_handle, BLOCKNUM blocknum) { assert(testsetup_initialized); - brt->ft->h->root_blocknum = blocknum; + ft_handle->ft->h->root_blocknum = blocknum; return 0; } -int toku_testsetup_get_sersize(FT_HANDLE brt, BLOCKNUM diskoff) // Return the size on disk +int toku_testsetup_get_sersize(FT_HANDLE ft_handle, BLOCKNUM diskoff) // Return the size on disk { assert(testsetup_initialized); void *node_v; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); int r = toku_cachetable_get_and_pin( - brt->ft->cf, diskoff, - toku_cachetable_hash(brt->ft->cf, diskoff), + ft_handle->ft->cf, diskoff, + toku_cachetable_hash(ft_handle->ft->cf, diskoff), &node_v, NULL, - get_write_callbacks_for_node(brt->ft), + get_write_callbacks_for_node(ft_handle->ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -184,25 +194,25 @@ int toku_testsetup_get_sersize(FT_HANDLE brt, BLOCKNUM diskoff) // Return the si assert(r==0); FTNODE CAST_FROM_VOIDP(node, node_v); int size = toku_serialize_ftnode_size(node); - toku_unpin_ftnode(brt->ft, node); + toku_unpin_ftnode(ft_handle->ft, node); return size; } -int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char *key, int keylen, const char *val, int vallen) { +int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const char *key, int keylen, const char *val, int vallen) { void *node_v; int r; assert(testsetup_initialized); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); r = toku_cachetable_get_and_pin( - brt->ft->cf, + ft_handle->ft->cf, blocknum, - toku_cachetable_hash(brt->ft->cf, blocknum), + toku_cachetable_hash(ft_handle->ft->cf, blocknum), &node_v, NULL, - get_write_callbacks_for_node(brt->ft), + get_write_callbacks_for_node(ft_handle->ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -214,30 +224,26 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char toku_verify_or_set_counts(node); assert(node->height==0); - DBT keydbt,valdbt; - MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), - .u = { .id = { toku_fill_dbt(&keydbt, key, keylen), - toku_fill_dbt(&valdbt, val, vallen) } } }; + DBT kdbt, vdbt; + ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), + FT_INSERT, next_dummymsn(), toku_xids_get_root_xids()); static size_t zero_flow_deltas[] = { 0, 0 }; txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true); - toku_ft_node_put_cmd ( - brt->ft->compare_fun, - brt->ft->update_fun, - &brt->ft->cmp_descriptor, - node, - -1, - &cmd, - true, - &gc_info, - zero_flow_deltas, - NULL - ); + toku_ftnode_put_msg(ft_handle->ft->cmp, + ft_handle->ft->update_fun, + node, + -1, + msg, + true, + &gc_info, + zero_flow_deltas, + NULL + ); toku_verify_or_set_counts(node); - toku_unpin_ftnode(brt->ft, node); + toku_unpin_ftnode(ft_handle->ft, node); return 0; } @@ -252,35 +258,34 @@ testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, b, toku_cachetable_hash(t->ft->cf, b), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - node + node, + true ); } -int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM blocknum, enum ft_msg_type cmdtype, const char *key, int keylen, const char *val, int vallen) { +int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, enum ft_msg_type msgtype, const char *key, int keylen, const char *val, int vallen) { void *node_v; int r; assert(testsetup_initialized); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); r = toku_cachetable_get_and_pin( - brt->ft->cf, + ft_handle->ft->cf, blocknum, - toku_cachetable_hash(brt->ft->cf, blocknum), + toku_cachetable_hash(ft_handle->ft->cf, blocknum), &node_v, NULL, - get_write_callbacks_for_node(brt->ft), + get_write_callbacks_for_node(ft_handle->ft), toku_ftnode_fetch_callback, toku_ftnode_pf_req_callback, toku_ftnode_pf_callback, @@ -292,21 +297,22 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM blocknum, enum ft_ assert(node->height>0); DBT k; - int childnum = toku_ftnode_which_child(node, - toku_fill_dbt(&k, key, keylen), - &brt->ft->cmp_descriptor, brt->ft->compare_fun); + int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); MSN msn = next_dummymsn(); - toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, cmdtype, msn, xids_0, true, NULL, testhelper_string_key_cmp); + toku::comparator cmp; + cmp.create(testhelper_string_key_cmp, nullptr); + toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, cmp); + cmp.destroy(); // Hack to get the test working. The problem is that this test // is directly queueing something in a FIFO instead of - // using brt APIs. + // using ft APIs. node->max_msn_applied_to_node_on_disk = msn; node->dirty = 1; // Also hack max_msn_in_ft - brt->ft->h->max_msn_in_ft = msn; + ft_handle->ft->h->max_msn_in_ft = msn; - toku_unpin_ftnode(brt->ft, node); + toku_unpin_ftnode(ft_handle->ft, node); return 0; } diff --git a/storage/tokudb/ft-index/ft/ft-verify.cc b/storage/tokudb/ft-index/ft/ft-verify.cc index fd65c2711deff..cbb5159e276fd 100644 --- a/storage/tokudb/ft-index/ft/ft-verify.cc +++ b/storage/tokudb/ft-index/ft/ft-verify.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -/* Verify a BRT. */ +/* Verify an FT. */ /* Check: * The tree is of uniform depth (and the height is correct at every node) * For each pivot key: the max of the stuff to the left is <= the pivot key < the min of the stuff to the right. @@ -97,31 +97,30 @@ PATENT RIGHTS GRANT: * For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key) */ -#include "ft-cachetable-wrappers.h" -#include "ft-internal.h" -#include "ft.h" +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/node.h" static int -compare_pairs (FT_HANDLE brt, const DBT *a, const DBT *b) { - FAKE_DB(db, &brt->ft->cmp_descriptor); - int cmp = brt->ft->compare_fun(&db, a, b); - return cmp; +compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) { + return ft_handle->ft->cmp(a, b); } static int -compare_pair_to_key (FT_HANDLE brt, const DBT *a, bytevec key, ITEMLEN keylen) { +compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, const void *key, uint32_t keylen) { DBT y; - FAKE_DB(db, &brt->ft->cmp_descriptor); - int cmp = brt->ft->compare_fun(&db, a, toku_fill_dbt(&y, key, keylen)); - return cmp; + return ft_handle->ft->cmp(a, toku_fill_dbt(&y, key, keylen)); } static int -verify_msg_in_child_buffer(FT_HANDLE brt, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) +verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) __attribute__((warn_unused_result)); +UU() static int -verify_msg_in_child_buffer(FT_HANDLE brt, enum ft_msg_type type, MSN msn, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) { +verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) { int result = 0; if (msn.msn == ZERO_MSN.msn) result = EINVAL; @@ -135,12 +134,12 @@ verify_msg_in_child_buffer(FT_HANDLE brt, enum ft_msg_type type, MSN msn, byteve case FT_COMMIT_ANY: // verify key in bounds if (lesser_pivot) { - int compare = compare_pair_to_key(brt, lesser_pivot, key, keylen); + int compare = compare_pair_to_key(ft_handle, lesser_pivot, key, keylen); if (compare >= 0) result = EINVAL; } if (result == 0 && greatereq_pivot) { - int compare = compare_pair_to_key(brt, greatereq_pivot, key, keylen); + int compare = compare_pair_to_key(ft_handle, greatereq_pivot, key, keylen); if (compare < 0) result = EINVAL; } @@ -152,14 +151,15 @@ verify_msg_in_child_buffer(FT_HANDLE brt, enum ft_msg_type type, MSN msn, byteve static DBT get_ith_key_dbt (BASEMENTNODE bn, int i) { DBT kdbt; - int r = bn->data_buffer.fetch_le_key_and_len(i, &kdbt.size, &kdbt.data); + int r = bn->data_buffer.fetch_key_and_len(i, &kdbt.size, &kdbt.data); invariant_zero(r); // this is a bad failure if it happens. return kdbt; } #define VERIFY_ASSERTION(predicate, i, string) ({ \ if(!(predicate)) { \ - if (verbose) { \ + (void) verbose; \ + if (true) { \ fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \ } \ result = TOKUDB_NEEDS_REPAIR; \ @@ -169,7 +169,7 @@ get_ith_key_dbt (BASEMENTNODE bn, int i) { struct count_msgs_extra { int count; MSN msn; - FIFO fifo; + message_buffer *msg_buffer; }; // template-only function, but must be extern @@ -177,15 +177,16 @@ int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_ __attribute__((nonnull(3))); int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e) { - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); - if (entry->msn.msn == e->msn.msn) { + MSN msn; + e->msg_buffer->get_message_key_msn(offset, nullptr, &msn); + if (msn.msn == e->msn.msn) { e->count++; } return 0; } struct verify_message_tree_extra { - FIFO fifo; + message_buffer *msg_buffer; bool broadcast; bool is_fresh; int i; @@ -202,20 +203,22 @@ int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct ve BLOCKNUM blocknum = e->blocknum; int keep_going_on_failure = e->keep_going_on_failure; int result = 0; - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); + DBT k, v; + ft_msg msg = e->msg_buffer->get_message(offset, &k, &v); + bool is_fresh = e->msg_buffer->get_freshness(offset); if (e->broadcast) { - VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) entry->type) || ft_msg_type_does_nothing((enum ft_msg_type) entry->type), + VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type()) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type()), e->i, "message found in broadcast list that is not a broadcast"); } else { - VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) entry->type), + VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type()), e->i, "message found in fresh or stale message tree that does not apply once"); if (e->is_fresh) { if (e->messages_have_been_moved) { - VERIFY_ASSERTION(entry->is_fresh, + VERIFY_ASSERTION(is_fresh, e->i, "message found in fresh message tree that is not fresh"); } } else { - VERIFY_ASSERTION(!entry->is_fresh, + VERIFY_ASSERTION(!is_fresh, e->i, "message found in stale message tree that is fresh"); } } @@ -235,15 +238,15 @@ int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct BLOCKNUM blocknum = e->blocknum; int keep_going_on_failure = e->keep_going_on_failure; int result = 0; - const struct fifo_entry *entry = toku_fifo_get_entry(e->fifo, offset); - VERIFY_ASSERTION(!entry->is_fresh, e->i, "marked message found in the fresh message tree that is fresh"); + bool is_fresh = e->msg_buffer->get_freshness(offset); + VERIFY_ASSERTION(!is_fresh, e->i, "marked message found in the fresh message tree that is fresh"); done: return result; } template static int -verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const verify_omt_t &mt) { +verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const verify_omt_t &mt) { int result = 0; size_t last_offset = 0; for (uint32_t i = 0; i < mt.size(); i++) { @@ -251,12 +254,8 @@ verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const verify_omt_t &mt) { int r = mt.fetch(i, &offset); assert_zero(r); if (i > 0) { - struct toku_fifo_entry_key_msn_cmp_extra extra; - ZERO_STRUCT(extra); - extra.desc = &brt->ft->cmp_descriptor; - extra.cmp = brt->ft->compare_fun; - extra.fifo = fifo; - if (toku_fifo_entry_key_msn_cmp(extra, last_offset, offset) >= 0) { + struct toku_msg_buffer_key_msn_cmp_extra extra(ft_handle->ft->cmp, msg_buffer); + if (toku_msg_buffer_key_msn_cmp(extra, last_offset, offset) >= 0) { result = TOKUDB_NEEDS_REPAIR; break; } @@ -268,15 +267,9 @@ verify_sorted_by_key_msn(FT_HANDLE brt, FIFO fifo, const verify_omt_t &mt) { template static int -count_eq_key_msn(FT_HANDLE brt, FIFO fifo, const count_omt_t &mt, const DBT *key, MSN msn) { - struct toku_fifo_entry_key_msn_heaviside_extra extra; - ZERO_STRUCT(extra); - extra.desc = &brt->ft->cmp_descriptor; - extra.cmp = brt->ft->compare_fun; - extra.fifo = fifo; - extra.key = key; - extra.msn = msn; - int r = mt.template find_zero(extra, nullptr, nullptr); +count_eq_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const count_omt_t &mt, const DBT *key, MSN msn) { + struct toku_msg_buffer_key_msn_heaviside_extra extra(ft_handle->ft->cmp, msg_buffer, key, msn); + int r = mt.template find_zero(extra, nullptr, nullptr); int count; if (r == 0) { count = 1; @@ -290,28 +283,100 @@ count_eq_key_msn(FT_HANDLE brt, FIFO fifo, const count_omt_t &mt, const DBT *key void toku_get_node_for_verify( BLOCKNUM blocknum, - FT_HANDLE brt, + FT_HANDLE ft_handle, FTNODE* nodep ) { - uint32_t fullhash = toku_cachetable_hash(brt->ft->cf, blocknum); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread_and_maybe_move_messages( - brt->ft, + uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); + toku_pin_ftnode( + ft_handle->ft, blocknum, fullhash, &bfe, PL_WRITE_EXPENSIVE, // may_modify_node - 0, - NULL, nodep, false ); } +struct verify_msg_fn { + FT_HANDLE ft_handle; + NONLEAF_CHILDINFO bnc; + const DBT *curr_less_pivot; + const DBT *curr_geq_pivot; + BLOCKNUM blocknum; + MSN this_msn; + int verbose; + int keep_going_on_failure; + bool messages_have_been_moved; + + MSN last_msn; + int msg_i; + int result = 0; // needed by VERIFY_ASSERTION + + verify_msg_fn(FT_HANDLE handle, NONLEAF_CHILDINFO nl, const DBT *less, const DBT *geq, + BLOCKNUM b, MSN tmsn, int v, int k, bool m) : + ft_handle(handle), bnc(nl), curr_less_pivot(less), curr_geq_pivot(geq), + blocknum(b), this_msn(tmsn), verbose(v), keep_going_on_failure(k), messages_have_been_moved(m), last_msn(ZERO_MSN), msg_i(0) { + } + + int operator()(const ft_msg &msg, bool is_fresh) { + enum ft_msg_type type = (enum ft_msg_type) msg.type(); + MSN msn = msg.msn(); + XIDS xid = msg.xids(); + const void *key = msg.kdbt()->data; + const void *data = msg.vdbt()->data; + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + + int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid, + curr_less_pivot, + curr_geq_pivot); + VERIFY_ASSERTION(r == 0, msg_i, "A message in the buffer is out of place"); + VERIFY_ASSERTION((msn.msn > last_msn.msn), msg_i, "msn per msg must be monotonically increasing toward newer messages in buffer"); + VERIFY_ASSERTION((msn.msn <= this_msn.msn), msg_i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory"); + if (ft_msg_type_applies_once(type)) { + int count; + DBT keydbt; + toku_fill_dbt(&keydbt, key, keylen); + int total_count = 0; + count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn); + total_count += count; + if (is_fresh) { + VERIFY_ASSERTION(count == 1, msg_i, "a fresh message was not found in the fresh message tree"); + } else if (messages_have_been_moved) { + VERIFY_ASSERTION(count == 0, msg_i, "a stale message was found in the fresh message tree"); + } + VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the fresh message tree"); + count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree, &keydbt, msn); + + total_count += count; + if (is_fresh) { + VERIFY_ASSERTION(count == 0, msg_i, "a fresh message was found in the stale message tree"); + } else if (messages_have_been_moved) { + VERIFY_ASSERTION(count == 1, msg_i, "a stale message was not found in the stale message tree"); + } + VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the stale message tree"); + + VERIFY_ASSERTION(total_count <= 1, msg_i, "a message was found in both message trees (or more than once in a single tree)"); + VERIFY_ASSERTION(total_count >= 1, msg_i, "a message was not found in either message tree"); + } else { + VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), msg_i, "a message was found that does not apply either to all or to only one key"); + struct count_msgs_extra extra = { .count = 0, .msn = msn, .msg_buffer = &bnc->msg_buffer }; + bnc->broadcast_list.iterate(&extra); + VERIFY_ASSERTION(extra.count == 1, msg_i, "a broadcast message was not found in the broadcast list"); + } + last_msn = msn; + msg_i++; +done: + return result; + } +}; + static int -toku_verify_ftnode_internal(FT_HANDLE brt, +toku_verify_ftnode_internal(FT_HANDLE ft_handle, MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above, FTNODE node, int height, const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) @@ -320,10 +385,10 @@ toku_verify_ftnode_internal(FT_HANDLE brt, { int result=0; MSN this_msn; - BLOCKNUM blocknum = node->thisnodename; + BLOCKNUM blocknum = node->blocknum; //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); this_msn = node->max_msn_applied_to_node_on_disk; if (height >= 0) { @@ -334,74 +399,40 @@ toku_verify_ftnode_internal(FT_HANDLE brt, } // Verify that all the pivot keys are in order. for (int i = 0; i < node->n_children-2; i++) { - int compare = compare_pairs(brt, &node->childkeys[i], &node->childkeys[i+1]); + DBT x, y; + int compare = compare_pairs(ft_handle, node->pivotkeys.fill_pivot(i, &x), node->pivotkeys.fill_pivot(i + 1, &y)); VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value"); } // Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot for (int i = 0; i < node->n_children-1; i++) { + DBT x; if (lesser_pivot) { - int compare = compare_pairs(brt, lesser_pivot, &node->childkeys[i]); + int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.fill_pivot(i, &x)); VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot"); } if (greatereq_pivot) { - int compare = compare_pairs(brt, greatereq_pivot, &node->childkeys[i]); + int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.fill_pivot(i, &x)); VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot"); } } for (int i = 0; i < node->n_children; i++) { - const DBT *curr_less_pivot = (i==0) ? lesser_pivot : &node->childkeys[i-1]; - const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i]; + DBT x, y; + const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x); + const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y); if (node->height > 0) { - MSN last_msn = ZERO_MSN; - // Verify that messages in the buffers are in the right place. NONLEAF_CHILDINFO bnc = BNC(node, i); - VERIFY_ASSERTION(verify_sorted_by_key_msn(brt, bnc->buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree"); - VERIFY_ASSERTION(verify_sorted_by_key_msn(brt, bnc->buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree"); - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, itype, msn, xid, is_fresh, - ({ - enum ft_msg_type type = (enum ft_msg_type) itype; - int r = verify_msg_in_child_buffer(brt, type, msn, key, keylen, data, datalen, xid, - curr_less_pivot, - curr_geq_pivot); - VERIFY_ASSERTION(r==0, i, "A message in the buffer is out of place"); - VERIFY_ASSERTION((msn.msn > last_msn.msn), i, "msn per msg must be monotonically increasing toward newer messages in buffer"); - VERIFY_ASSERTION((msn.msn <= this_msn.msn), i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory"); - if (ft_msg_type_applies_once(type)) { - int count; - DBT keydbt; - toku_fill_dbt(&keydbt, key, keylen); - int total_count = 0; - count = count_eq_key_msn(brt, bnc->buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn); - total_count += count; - if (is_fresh) { - VERIFY_ASSERTION(count == 1, i, "a fresh message was not found in the fresh message tree"); - } else if (messages_have_been_moved) { - VERIFY_ASSERTION(count == 0, i, "a stale message was found in the fresh message tree"); - } - VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the fresh message tree"); - count = count_eq_key_msn(brt, bnc->buffer, bnc->stale_message_tree, &keydbt, msn); - - total_count += count; - if (is_fresh) { - VERIFY_ASSERTION(count == 0, i, "a fresh message was found in the stale message tree"); - } else if (messages_have_been_moved) { - VERIFY_ASSERTION(count == 1, i, "a stale message was not found in the stale message tree"); - } - VERIFY_ASSERTION(count <= 1, i, "a message was found multiple times in the stale message tree"); - - VERIFY_ASSERTION(total_count <= 1, i, "a message was found in both message trees (or more than once in a single tree)"); - VERIFY_ASSERTION(total_count >= 1, i, "a message was not found in either message tree"); - } else { - VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), i, "a message was found that does not apply either to all or to only one key"); - struct count_msgs_extra extra = { .count = 0, .msn = msn, .fifo = bnc->buffer }; - bnc->broadcast_list.iterate(&extra); - VERIFY_ASSERTION(extra.count == 1, i, "a broadcast message was not found in the broadcast list"); - } - last_msn = msn; - })); - struct verify_message_tree_extra extra = { .fifo = bnc->buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->thisnodename, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved }; - int r = bnc->fresh_message_tree.iterate(&extra); + // Verify that messages in the buffers are in the right place. + VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree"); + VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree"); + + verify_msg_fn verify_msg(ft_handle, bnc, curr_less_pivot, curr_geq_pivot, + blocknum, this_msn, verbose, keep_going_on_failure, messages_have_been_moved); + int r = bnc->msg_buffer.iterate(verify_msg); + if (r != 0) { result = r; goto done; } + + struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->blocknum, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved }; + r = bnc->fresh_message_tree.iterate(&extra); if (r != 0) { result = r; goto done; } extra.is_fresh = false; r = bnc->stale_message_tree.iterate(&extra); @@ -424,20 +455,20 @@ toku_verify_ftnode_internal(FT_HANDLE brt, } else { BASEMENTNODE bn = BLB(node, i); - for (uint32_t j = 0; j < bn->data_buffer.omt_size(); j++) { + for (uint32_t j = 0; j < bn->data_buffer.num_klpairs(); j++) { VERIFY_ASSERTION((rootmsn.msn >= this_msn.msn), 0, "leaf may have latest msn, but cannot be greater than root msn"); DBT kdbt = get_ith_key_dbt(bn, j); if (curr_less_pivot) { - int compare = compare_pairs(brt, curr_less_pivot, &kdbt); + int compare = compare_pairs(ft_handle, curr_less_pivot, &kdbt); VERIFY_ASSERTION(compare < 0, j, "The leafentry is >= the lower-bound pivot"); } if (curr_geq_pivot) { - int compare = compare_pairs(brt, curr_geq_pivot, &kdbt); + int compare = compare_pairs(ft_handle, curr_geq_pivot, &kdbt); VERIFY_ASSERTION(compare >= 0, j, "The leafentry is < the upper-bound pivot"); } if (0 < j) { DBT prev_key_dbt = get_ith_key_dbt(bn, j-1); - int compare = compare_pairs(brt, &prev_key_dbt, &kdbt); + int compare = compare_pairs(ft_handle, &prev_key_dbt, &kdbt); VERIFY_ASSERTION(compare < 0, j, "Adjacent leafentries are out of order"); } } @@ -451,7 +482,7 @@ toku_verify_ftnode_internal(FT_HANDLE brt, // input is a pinned node, on exit, node is unpinned int -toku_verify_ftnode (FT_HANDLE brt, +toku_verify_ftnode (FT_HANDLE ft_handle, MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above, FTNODE node, int height, const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) @@ -462,7 +493,7 @@ toku_verify_ftnode (FT_HANDLE brt, MSN this_msn; //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); this_msn = node->max_msn_applied_to_node_on_disk; int result = 0; @@ -471,15 +502,15 @@ toku_verify_ftnode (FT_HANDLE brt, // Otherwise we'll just do the next call result = toku_verify_ftnode_internal( - brt, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot, + ft_handle, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot, verbose, keep_going_on_failure, false); if (result != 0 && (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR)) goto done; } if (node->height > 0) { - toku_move_ftnode_messages_to_stale(brt->ft, node); + toku_move_ftnode_messages_to_stale(ft_handle->ft, node); } result2 = toku_verify_ftnode_internal( - brt, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot, + ft_handle, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot, verbose, keep_going_on_failure, true); if (result == 0) { result = result2; @@ -490,15 +521,16 @@ toku_verify_ftnode (FT_HANDLE brt, if (recurse && node->height > 0) { for (int i = 0; i < node->n_children; i++) { FTNODE child_node; - toku_get_node_for_verify(BP_BLOCKNUM(node, i), brt, &child_node); - int r = toku_verify_ftnode(brt, rootmsn, + toku_get_node_for_verify(BP_BLOCKNUM(node, i), ft_handle, &child_node); + DBT x, y; + int r = toku_verify_ftnode(ft_handle, rootmsn, (toku_bnc_n_entries(BNC(node, i)) > 0 ? this_msn : parentmsn_with_messages), messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0, child_node, node->height-1, - (i==0) ? lesser_pivot : &node->childkeys[i-1], - (i==node->n_children-1) ? greatereq_pivot : &node->childkeys[i], + (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x), + (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y), progress_callback, progress_extra, recurse, verbose, keep_going_on_failure); if (r) { @@ -508,7 +540,7 @@ toku_verify_ftnode (FT_HANDLE brt, } } done: - toku_unpin_ftnode(brt->ft, node); + toku_unpin_ftnode(ft_handle->ft, node); if (result == 0 && progress_callback) result = progress_callback(progress_extra, 0.0); @@ -517,26 +549,26 @@ toku_verify_ftnode (FT_HANDLE brt, } int -toku_verify_ft_with_progress (FT_HANDLE brt, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) { - assert(brt->ft); +toku_verify_ft_with_progress (FT_HANDLE ft_handle, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) { + assert(ft_handle->ft); FTNODE root_node = NULL; { uint32_t root_hash; CACHEKEY root_key; - toku_calculate_root_offset_pointer(brt->ft, &root_key, &root_hash); - toku_get_node_for_verify(root_key, brt, &root_node); + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &root_hash); + toku_get_node_for_verify(root_key, ft_handle, &root_node); } - int r = toku_verify_ftnode(brt, brt->ft->h->max_msn_in_ft, brt->ft->h->max_msn_in_ft, false, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going); + int r = toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going); if (r == 0) { - toku_ft_lock(brt->ft); - brt->ft->h->time_of_last_verification = time(NULL); - brt->ft->h->dirty = 1; - toku_ft_unlock(brt->ft); + toku_ft_lock(ft_handle->ft); + ft_handle->ft->h->time_of_last_verification = time(NULL); + ft_handle->ft->h->dirty = 1; + toku_ft_unlock(ft_handle->ft); } return r; } int -toku_verify_ft (FT_HANDLE brt) { - return toku_verify_ft_with_progress(brt, NULL, NULL, 0, 0); +toku_verify_ft (FT_HANDLE ft_handle) { + return toku_verify_ft_with_progress(ft_handle, NULL, NULL, 0, 0); } diff --git a/storage/tokudb/ft-index/ft/ft.cc b/storage/tokudb/ft-index/ft/ft.cc index 0fd43065f1a2e..fd3960b64f6ea 100644 --- a/storage/tokudb/ft-index/ft/ft.cc +++ b/storage/tokudb/ft-index/ft/ft.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,15 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft.h" -#include "ft-internal.h" -#include "ft-cachetable-wrappers.h" -#include "log-internal.h" - -#include +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-internal.h" +#include "ft/logger/log-internal.h" +#include "ft/log_header.h" +#include "ft/node.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" #include #include @@ -107,10 +110,10 @@ toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) { // hold lock around setting and clearing of dirty bit // (see cooperative use of dirty bit in ft_begin_checkpoint()) - toku_ft_lock (ft); + toku_ft_lock(ft); ft->h->root_xid_that_created = new_root_xid_that_created; ft->h->dirty = 1; - toku_ft_unlock (ft); + toku_ft_unlock(ft); } static void @@ -118,9 +121,10 @@ ft_destroy(FT ft) { //header and checkpoint_header have same Blocktable pointer //cannot destroy since it is still in use by CURRENT assert(ft->h->type == FT_CURRENT); - toku_blocktable_destroy(&ft->blocktable); - if (ft->descriptor.dbt.data) toku_free(ft->descriptor.dbt.data); - if (ft->cmp_descriptor.dbt.data) toku_free(ft->cmp_descriptor.dbt.data); + ft->blocktable.destroy(); + ft->cmp.destroy(); + toku_destroy_dbt(&ft->descriptor.dbt); + toku_destroy_dbt(&ft->cmp_descriptor.dbt); toku_ft_destroy_reflock(ft); toku_free(ft->h); } @@ -187,7 +191,7 @@ ft_log_fassociate_during_checkpoint (CACHEFILE cf, void *header_v) { } // Maps to cf->begin_checkpoint_userdata -// Create checkpoint-in-progress versions of header and translation (btt) (and fifo for now...). +// Create checkpoint-in-progress versions of header and translation (btt) // Has access to fd (it is protected). // // Not reentrant for a single FT (see ft_checkpoint) @@ -199,7 +203,7 @@ static void ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) { assert(ft->checkpoint_header == NULL); ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn); ft->h->dirty = 0; // this is only place this bit is cleared (in currentheader) - toku_block_translation_note_start_checkpoint_unlocked(ft->blocktable); + ft->blocktable.note_start_checkpoint_unlocked(); toku_ft_unlock (ft); } @@ -235,8 +239,6 @@ ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(FT ft) { static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { FT ft = (FT) header_v; FT_HEADER ch = ft->checkpoint_header; - //printf("%s:%d allocated_limit=%lu writing queue to %lu\n", __FILE__, __LINE__, - // block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize); assert(ch); assert(ch->type == FT_CHECKPOINT_INPROGRESS); if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header) @@ -251,16 +253,15 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(ft); // write translation and header to disk (or at least to OS internal buffer) - toku_serialize_ft_to(fd, ch, ft->blocktable, ft->cf); + toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf); ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header) // fsync the cachefile toku_cachefile_fsync(cf); ft->h->checkpoint_count++; // checkpoint succeeded, next checkpoint will save to alternate header location ft->h->checkpoint_lsn = ch->checkpoint_lsn; //Header updated. - } - else { - toku_block_translation_note_skipped_checkpoint(ft->blocktable); + } else { + ft->blocktable.note_skipped_checkpoint(); } } @@ -268,14 +269,12 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { // free unused disk space // (i.e. tell BlockAllocator to liberate blocks used by previous checkpoint). // Must have access to fd (protected) -static void ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) { +static void ft_end_checkpoint(CACHEFILE UU(cf), int fd, void *header_v) { FT ft = (FT) header_v; assert(ft->h->type == FT_CURRENT); - toku_block_translation_note_end_checkpoint(ft->blocktable, fd); - if (ft->checkpoint_header) { - toku_free(ft->checkpoint_header); - ft->checkpoint_header = NULL; - } + ft->blocktable.note_end_checkpoint(fd); + toku_free(ft->checkpoint_header); + ft->checkpoint_header = nullptr; } // maps to cf->close_userdata @@ -360,11 +359,6 @@ static void ft_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v // End of Functions that are callbacks to the cachefile ///////////////////////////////////////////////////////////////////////// -void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { - FTNODE CAST_FROM_VOIDP(node, value_data); - node->ct_pair = p; -} - static void setup_initial_ft_root_node(FT ft, BLOCKNUM blocknum) { FTNODE XCALLOC(node); toku_initialize_empty_ftnode(node, blocknum, 0, 1, ft->h->layout_version, ft->h->flags); @@ -375,7 +369,7 @@ static void setup_initial_ft_root_node(FT ft, BLOCKNUM blocknum) { toku_cachetable_put(ft->cf, blocknum, fullhash, node, make_ftnode_pair_attr(node), get_write_callbacks_for_node(ft), - toku_node_save_ct_pair); + toku_ftnode_save_ct_pair); toku_unpin_ftnode(ft, node); } @@ -386,7 +380,8 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) { toku_list_init(&ft->live_ft_handles); - ft->compare_fun = options->compare_fun; + // intuitively, the comparator points to the FT's cmp descriptor + ft->cmp.create(options->compare_fun, &ft->cmp_descriptor, options->memcmp_magic); ft->update_fun = options->update_fun; if (ft->cf != NULL) { @@ -407,7 +402,7 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) { ft_note_pin_by_checkpoint, ft_note_unpin_by_checkpoint); - toku_block_verify_no_free_blocknums(ft->blocktable); + ft->blocktable.verify_no_free_blocknums(); } @@ -451,55 +446,48 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) { invariant(ftp); FT XCALLOC(ft); - memset(&ft->descriptor, 0, sizeof(ft->descriptor)); - memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor)); - ft->h = ft_header_create(options, make_blocknum(0), (txn ? txn->txnid.parent_id64: TXNID_NONE)); toku_ft_init_reflock(ft); // Assign blocknum for root block, also dirty the header - toku_blocktable_create_new(&ft->blocktable); - toku_allocate_blocknum(ft->blocktable, &ft->h->root_blocknum, ft); + ft->blocktable.create(); + ft->blocktable.allocate_blocknum(&ft->h->root_blocknum, ft); ft_init(ft, options, cf); *ftp = ft; } -// TODO: (Zardosht) get rid of brt parameter -int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header) +// TODO: (Zardosht) get rid of ft parameter +int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN max_acceptable_lsn, FT *header) // If the cachefile already has the header, then just get it. // If the cachefile has not been initialized, then don't modify anything. // max_acceptable_lsn is the latest acceptable checkpointed version of the file. { - { - FT h; - if ((h = (FT) toku_cachefile_get_userdata(cf))!=0) { - *header = h; - assert(brt->options.update_fun == h->update_fun); - assert(brt->options.compare_fun == h->compare_fun); - return 0; - } + FT ft = nullptr; + if ((ft = (FT) toku_cachefile_get_userdata(cf)) != nullptr) { + *header = ft; + assert(ft_handle->options.update_fun == ft->update_fun); + return 0; } - FT h = nullptr; - int r; - { - int fd = toku_cachefile_get_fd(cf); - r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &h); - if (r == TOKUDB_BAD_CHECKSUM) { - fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf)); - assert(false); // make absolutely sure we crash before doing anything else - } + + int fd = toku_cachefile_get_fd(cf); + int r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft); + if (r == TOKUDB_BAD_CHECKSUM) { + fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf)); + assert(false); // make absolutely sure we crash before doing anything else + } else if (r != 0) { + return r; } - if (r!=0) return r; - // GCC 4.8 seems to get confused by the gotos in the deserialize code and think h is maybe uninitialized. - invariant_notnull(h); - h->cf = cf; - h->compare_fun = brt->options.compare_fun; - h->update_fun = brt->options.update_fun; + + invariant_notnull(ft); + // intuitively, the comparator points to the FT's cmp descriptor + ft->cmp.create(ft_handle->options.compare_fun, &ft->cmp_descriptor, ft_handle->options.memcmp_magic); + ft->update_fun = ft_handle->options.update_fun; + ft->cf = cf; toku_cachefile_set_userdata(cf, - (void*)h, + reinterpret_cast(ft), ft_log_fassociate_during_checkpoint, ft_close, ft_free, @@ -508,7 +496,7 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_ac ft_end_checkpoint, ft_note_pin_by_checkpoint, ft_note_unpin_by_checkpoint); - *header = h; + *header = ft; return 0; } @@ -550,22 +538,22 @@ void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn) { } // Verifies there exists exactly one ft handle and returns it. -FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h) { +FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft) { FT_HANDLE ft_handle_ret = NULL; - toku_ft_grab_reflock(h); - assert(toku_list_num_elements_est(&h->live_ft_handles) == 1); - ft_handle_ret = toku_list_struct(toku_list_head(&h->live_ft_handles), struct ft_handle, live_ft_handle_link); - toku_ft_release_reflock(h); + toku_ft_grab_reflock(ft); + assert(toku_list_num_elements_est(&ft->live_ft_handles) == 1); + ft_handle_ret = toku_list_struct(toku_list_head(&ft->live_ft_handles), struct ft_handle, live_ft_handle_link); + toku_ft_release_reflock(ft); return ft_handle_ret; } -// Purpose: set fields in brt_header to capture accountability info for start of HOT optimize. +// Purpose: set fields in ft_header to capture accountability info for start of HOT optimize. // Note: HOT accountability variables in header are modified only while holding header lock. // (Header lock is really needed for touching the dirty bit, but it's useful and // convenient here for keeping the HOT variables threadsafe.) void -toku_ft_note_hot_begin(FT_HANDLE brt) { - FT ft = brt->ft; +toku_ft_note_hot_begin(FT_HANDLE ft_handle) { + FT ft = ft_handle->ft; time_t now = time(NULL); // hold lock around setting and clearing of dirty bit @@ -578,11 +566,11 @@ toku_ft_note_hot_begin(FT_HANDLE brt) { } -// Purpose: set fields in brt_header to capture accountability info for end of HOT optimize. +// Purpose: set fields in ft_header to capture accountability info for end of HOT optimize. // Note: See note for toku_ft_note_hot_begin(). void -toku_ft_note_hot_complete(FT_HANDLE brt, bool success, MSN msn_at_start_of_hot) { - FT ft = brt->ft; +toku_ft_note_hot_complete(FT_HANDLE ft_handle, bool success, MSN msn_at_start_of_hot) { + FT ft = ft_handle->ft; time_t now = time(NULL); toku_ft_lock(ft); @@ -620,6 +608,7 @@ toku_ft_init(FT ft, .compression_method = compression_method, .fanout = fanout, .flags = 0, + .memcmp_magic = 0, .compare_fun = NULL, .update_fun = NULL }; @@ -628,29 +617,29 @@ toku_ft_init(FT ft, ft->h->checkpoint_lsn = checkpoint_lsn; } -// Open a brt for use by redirect. The new brt must have the same dict_id as the old_ft passed in. (FILENUM is assigned by the ft_handle_open() function.) +// Open an ft for use by redirect. The new ft must have the same dict_id as the old_ft passed in. (FILENUM is assigned by the ft_handle_open() function.) static int -ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_h) { - FT_HANDLE t; - assert(old_h->dict_id.dictid != DICTIONARY_ID_NONE.dictid); - toku_ft_handle_create(&t); - toku_ft_set_bt_compare(t, old_h->compare_fun); - toku_ft_set_update(t, old_h->update_fun); - toku_ft_handle_set_nodesize(t, old_h->h->nodesize); - toku_ft_handle_set_basementnodesize(t, old_h->h->basementnodesize); - toku_ft_handle_set_compression_method(t, old_h->h->compression_method); - toku_ft_handle_set_fanout(t, old_h->h->fanout); - CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf); - int r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id); +ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTXN txn, FT old_ft) { + FT_HANDLE ft_handle; + assert(old_ft->dict_id.dictid != DICTIONARY_ID_NONE.dictid); + toku_ft_handle_create(&ft_handle); + toku_ft_set_bt_compare(ft_handle, old_ft->cmp.get_compare_func()); + toku_ft_set_update(ft_handle, old_ft->update_fun); + toku_ft_handle_set_nodesize(ft_handle, old_ft->h->nodesize); + toku_ft_handle_set_basementnodesize(ft_handle, old_ft->h->basementnodesize); + toku_ft_handle_set_compression_method(ft_handle, old_ft->h->compression_method); + toku_ft_handle_set_fanout(ft_handle, old_ft->h->fanout); + CACHETABLE ct = toku_cachefile_get_cachetable(old_ft->cf); + int r = toku_ft_handle_open_with_dict_id(ft_handle, fname_in_env, 0, 0, ct, txn, old_ft->dict_id); if (r != 0) { goto cleanup; } - assert(t->ft->dict_id.dictid == old_h->dict_id.dictid); - *new_ftp = t; + assert(ft_handle->ft->dict_id.dictid == old_ft->dict_id.dictid); + *new_ftp = ft_handle; cleanup: if (r != 0) { - toku_ft_handle_close(t); + toku_ft_handle_close(ft_handle); } return r; } @@ -658,81 +647,81 @@ ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTX // This function performs most of the work to redirect a dictionary to different file. // It is called for redirect and to abort a redirect. (This function is almost its own inverse.) static int -dictionary_redirect_internal(const char *dst_fname_in_env, FT src_h, TOKUTXN txn, FT *dst_hp) { +dictionary_redirect_internal(const char *dst_fname_in_env, FT src_ft, TOKUTXN txn, FT *dst_ftp) { int r; - FILENUM src_filenum = toku_cachefile_filenum(src_h->cf); + FILENUM src_filenum = toku_cachefile_filenum(src_ft->cf); FILENUM dst_filenum = FILENUM_NONE; - FT dst_h = NULL; + FT dst_ft = NULL; struct toku_list *list; - // open a dummy brt based off of + // open a dummy ft based off of // dst_fname_in_env to get the header - // then we will change all the brt's to have - // their headers point to dst_h instead of src_h + // then we will change all the ft's to have + // their headers point to dst_ft instead of src_ft FT_HANDLE tmp_dst_ft = NULL; - r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_h); + r = ft_handle_open_for_redirect(&tmp_dst_ft, dst_fname_in_env, txn, src_ft); if (r != 0) { goto cleanup; } - dst_h = tmp_dst_ft->ft; + dst_ft = tmp_dst_ft->ft; // some sanity checks on dst_filenum - dst_filenum = toku_cachefile_filenum(dst_h->cf); + dst_filenum = toku_cachefile_filenum(dst_ft->cf); assert(dst_filenum.fileid!=FILENUM_NONE.fileid); assert(dst_filenum.fileid!=src_filenum.fileid); //Cannot be same file. - // for each live brt, brt->ft is currently src_h + // for each live ft_handle, ft_handle->ft is currently src_ft // we want to change it to dummy_dst - toku_ft_grab_reflock(src_h); - while (!toku_list_empty(&src_h->live_ft_handles)) { - list = src_h->live_ft_handles.next; + toku_ft_grab_reflock(src_ft); + while (!toku_list_empty(&src_ft->live_ft_handles)) { + list = src_ft->live_ft_handles.next; FT_HANDLE src_handle = NULL; src_handle = toku_list_struct(list, struct ft_handle, live_ft_handle_link); toku_list_remove(&src_handle->live_ft_handle_link); - toku_ft_note_ft_handle_open(dst_h, src_handle); + toku_ft_note_ft_handle_open(dst_ft, src_handle); if (src_handle->redirect_callback) { src_handle->redirect_callback(src_handle, src_handle->redirect_callback_extra); } } - assert(dst_h); - // making sure that we are not leaking src_h - assert(toku_ft_needed_unlocked(src_h)); - toku_ft_release_reflock(src_h); + assert(dst_ft); + // making sure that we are not leaking src_ft + assert(toku_ft_needed_unlocked(src_ft)); + toku_ft_release_reflock(src_ft); toku_ft_handle_close(tmp_dst_ft); - *dst_hp = dst_h; + *dst_ftp = dst_ft; cleanup: return r; } -//This is the 'abort redirect' function. The redirect of old_h to new_h was done -//and now must be undone, so here we redirect new_h back to old_h. +//This is the 'abort redirect' function. The redirect of old_ft to new_ft was done +//and now must be undone, so here we redirect new_ft back to old_ft. int -toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) { - char *old_fname_in_env = toku_cachefile_fname_in_env(old_h->cf); +toku_dictionary_redirect_abort(FT old_ft, FT new_ft, TOKUTXN txn) { + char *old_fname_in_env = toku_cachefile_fname_in_env(old_ft->cf); int r; { - FILENUM old_filenum = toku_cachefile_filenum(old_h->cf); - FILENUM new_filenum = toku_cachefile_filenum(new_h->cf); + FILENUM old_filenum = toku_cachefile_filenum(old_ft->cf); + FILENUM new_filenum = toku_cachefile_filenum(new_ft->cf); assert(old_filenum.fileid!=new_filenum.fileid); //Cannot be same file. - //No living brts in old header. - toku_ft_grab_reflock(old_h); - assert(toku_list_empty(&old_h->live_ft_handles)); - toku_ft_release_reflock(old_h); + //No living fts in old header. + toku_ft_grab_reflock(old_ft); + assert(toku_list_empty(&old_ft->live_ft_handles)); + toku_ft_release_reflock(old_ft); } - FT dst_h; - // redirect back from new_h to old_h - r = dictionary_redirect_internal(old_fname_in_env, new_h, txn, &dst_h); + FT dst_ft; + // redirect back from new_ft to old_ft + r = dictionary_redirect_internal(old_fname_in_env, new_ft, txn, &dst_ft); if (r == 0) { - assert(dst_h == old_h); + assert(dst_ft == old_ft); } return r; } @@ -740,13 +729,13 @@ toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) { /**** * on redirect or abort: * if redirect txn_note_doing_work(txn) - * if redirect connect src brt to txn (txn modified this brt) - * for each src brt - * open brt to dst file (create new brt struct) - * if redirect connect dst brt to txn - * redirect db to new brt - * redirect cursors to new brt - * close all src brts + * if redirect connect src ft to txn (txn modified this ft) + * for each src ft + * open ft to dst file (create new ft struct) + * if redirect connect dst ft to txn + * redirect db to new ft + * redirect cursors to new ft + * close all src fts * if redirect make rollback log entry * * on commit: @@ -758,21 +747,21 @@ int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft_h, TOKUTXN txn) { // Input args: // new file name for dictionary (relative to env) -// old_ft_h is a live brt of open handle ({DB, BRT} pair) that currently refers to old dictionary file. +// old_ft_h is a live ft of open handle ({DB, FT_HANDLE} pair) that currently refers to old dictionary file. // (old_ft_h may be one of many handles to the dictionary.) // txn that created the loader // Requires: // multi operation lock is held. -// The brt is open. (which implies there can be no zombies.) +// The ft is open. (which implies there can be no zombies.) // The new file must be a valid dictionary. -// The block size and flags in the new file must match the existing BRT. +// The block size and flags in the new file must match the existing FT. // The new file must already have its descriptor in it (and it must match the existing descriptor). // Effect: // Open new FTs (and related header and cachefile) to the new dictionary file with a new FILENUM. -// Redirect all DBs that point to brts that point to the old file to point to brts that point to the new file. +// Redirect all DBs that point to fts that point to the old file to point to fts that point to the new file. // Copy the dictionary id (dict_id) from the header of the original file to the header of the new file. // Create a rollback log entry. -// The original BRT, header, cachefile and file remain unchanged. They will be cleaned up on commmit. +// The original FT, header, cachefile and file remain unchanged. They will be cleaned up on commmit. // If the txn aborts, then this operation will be undone int r; @@ -881,18 +870,17 @@ toku_ft_stat64 (FT ft, struct ftstat64_s *s) { s->verify_time_sec = ft->h->time_of_last_verification; } -void -toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *s) { - toku_blocktable_get_info64(ft->blocktable, s); +void toku_ft_get_fractal_tree_info64(FT ft, struct ftinfo64 *info) { + ft->blocktable.get_info64(info); } int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra) { uint64_t this_checkpoint_count = ft->h->checkpoint_count; - return toku_blocktable_iterate_translation_tables(ft->blocktable, this_checkpoint_count, iter, iter_extra); + return ft->blocktable.iterate_translation_tables(this_checkpoint_count, iter, iter_extra); } void -toku_ft_update_descriptor(FT ft, DESCRIPTOR d) +toku_ft_update_descriptor(FT ft, DESCRIPTOR desc) // Effect: Changes the descriptor in a tree (log the change, make sure it makes it to disk eventually). // requires: the ft is fully user-opened with a valid cachefile. // descriptor updates cannot happen in parallel for an FT @@ -900,7 +888,7 @@ toku_ft_update_descriptor(FT ft, DESCRIPTOR d) { assert(ft->cf); int fd = toku_cachefile_get_fd(ft->cf); - toku_ft_update_descriptor_with_fd(ft, d, fd); + toku_ft_update_descriptor_with_fd(ft, desc, fd); } // upadate the descriptor for an ft and serialize it using @@ -909,41 +897,30 @@ toku_ft_update_descriptor(FT ft, DESCRIPTOR d) // update a descriptor before the ft is fully opened and has // a valid cachefile. void -toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd) { +toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) { // the checksum is four bytes, so that's where the magic number comes from // make space for the new descriptor and write it out to disk DISKOFF offset, size; - size = toku_serialize_descriptor_size(d) + 4; - toku_realloc_descriptor_on_disk(ft->blocktable, size, &offset, ft, fd); - toku_serialize_descriptor_contents_to_fd(fd, d, offset); + size = toku_serialize_descriptor_size(desc) + 4; + ft->blocktable.realloc_descriptor_on_disk(size, &offset, ft, fd); + toku_serialize_descriptor_contents_to_fd(fd, desc, offset); // cleanup the old descriptor and set the in-memory descriptor to the new one - if (ft->descriptor.dbt.data) { - toku_free(ft->descriptor.dbt.data); - } - ft->descriptor.dbt.size = d->dbt.size; - ft->descriptor.dbt.data = toku_memdup(d->dbt.data, d->dbt.size); + toku_destroy_dbt(&ft->descriptor.dbt); + toku_clone_dbt(&ft->descriptor.dbt, desc->dbt); } -void -toku_ft_update_cmp_descriptor(FT ft) { - if (ft->cmp_descriptor.dbt.data != NULL) { - toku_free(ft->cmp_descriptor.dbt.data); - } - ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size; - ft->cmp_descriptor.dbt.data = toku_xmemdup( - ft->descriptor.dbt.data, - ft->descriptor.dbt.size - ); +void toku_ft_update_cmp_descriptor(FT ft) { + // cleanup the old cmp descriptor and clone it as the in-memory descriptor + toku_destroy_dbt(&ft->cmp_descriptor.dbt); + toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt); } -DESCRIPTOR -toku_ft_get_descriptor(FT_HANDLE ft_handle) { +DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle) { return &ft_handle->ft->descriptor; } -DESCRIPTOR -toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) { +DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle) { return &ft_handle->ft->cmp_descriptor; } @@ -1068,8 +1045,8 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e struct garbage_helper_extra *CAST_FROM_VOIDP(info, extra); FTNODE node; FTNODE_DISK_DATA ndd; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, info->ft); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(info->ft); int fd = toku_cachefile_get_fd(info->ft->cf); int r = toku_deserialize_ftnode_from(fd, blocknum, 0, &node, &ndd, &bfe); if (r != 0) { @@ -1079,8 +1056,8 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e goto exit; } for (int i = 0; i < node->n_children; ++i) { - BN_DATA bd = BLB_DATA(node, i); - r = bd->omt_iterate(info); + bn_data* bd = BLB_DATA(node, i); + r = bd->iterate(info); if (r != 0) { goto exit; } @@ -1103,7 +1080,7 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) { .total_space = 0, .used_space = 0 }; - toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true); + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, garbage_helper, &info, true, true); *total_space = info.total_space; *used_space = info.used_space; } @@ -1113,8 +1090,6 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) { #error #endif - - #define xstr(X) str(X) #define str(X) #X #define static_version_string xstr(DB_VERSION_MAJOR) "." \ @@ -1124,10 +1099,9 @@ void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space) { struct toku_product_name_strings_struct toku_product_name_strings; char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; -void -tokudb_update_product_name_strings(void) { - //DO ALL STRINGS HERE.. maybe have a separate FT layer version as well - { // Version string +void tokuft_update_product_name_strings(void) { + // DO ALL STRINGS HERE.. maybe have a separate FT layer version as well + { int n = snprintf(toku_product_name_strings.db_version, sizeof(toku_product_name_strings.db_version), "%s %s", toku_product_name, static_version_string); @@ -1179,7 +1153,7 @@ toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd) { *lockfd = toku_os_lock_file(lockfname); if (*lockfd < 0) { int e = get_error_errno(); - fprintf(stderr, "Couldn't start tokudb because some other tokudb process is using the same directory [%s] for [%s]\n", lock_dir, which); + fprintf(stderr, "Couldn't start tokuft because some other tokuft process is using the same directory [%s] for [%s]\n", lock_dir, which); return e; } return 0; @@ -1197,10 +1171,10 @@ toku_single_process_unlock(int *lockfd) { return 0; } -int tokudb_num_envs = 0; +int tokuft_num_envs = 0; int db_env_set_toku_product_name(const char *name) { - if (tokudb_num_envs > 0) { + if (tokuft_num_envs > 0) { return EINVAL; } if (!name || strlen(name) < 1) { @@ -1211,7 +1185,7 @@ db_env_set_toku_product_name(const char *name) { } if (strncmp(toku_product_name, name, sizeof(toku_product_name))) { strcpy(toku_product_name, name); - tokudb_update_product_name_strings(); + tokuft_update_product_name_strings(); } return 0; } diff --git a/storage/tokudb/ft-index/ft/ft.h b/storage/tokudb/ft-index/ft/ft.h index 92d1ba0b5ea83..336845475cc66 100644 --- a/storage/tokudb/ft-index/ft/ft.h +++ b/storage/tokudb/ft-index/ft/ft.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_H -#define FT_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +86,20 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "fttypes.h" -#include "ybt.h" #include -#include "cachetable.h" -#include "log.h" -#include "ft-search.h" -#include "ft-ops.h" -#include "compress.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/ft-ops.h" +#include "ft/logger/log.h" +#include "util/dbt.h" + +typedef struct ft *FT; +typedef struct ft_options *FT_OPTIONS; // unlink a ft from the filesystem with or without a txn. // if with a txn, then the unlink happens on commit. @@ -110,10 +111,13 @@ void toku_ft_destroy_reflock(FT ft); void toku_ft_grab_reflock(FT ft); void toku_ft_release_reflock(FT ft); +void toku_ft_lock(struct ft *ft); +void toku_ft_unlock(struct ft *ft); + void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn); -void toku_ft_free (FT h); +void toku_ft_free (FT ft); -int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header); +int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_h, CACHEFILE cf, LSN max_acceptable_lsn, FT *header); void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live); bool toku_ft_needed_unlocked(FT ft); @@ -123,10 +127,10 @@ bool toku_ft_has_one_reference_unlocked(FT ft); // will have to read in the ft in a new cachefile and new FT object. void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn); -FT_HANDLE toku_ft_get_only_existing_ft_handle(FT h); +FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft); -void toku_ft_note_hot_begin(FT_HANDLE brt); -void toku_ft_note_hot_complete(FT_HANDLE brt, bool success, MSN msn_at_start_of_hot); +void toku_ft_note_hot_begin(FT_HANDLE ft_h); +void toku_ft_note_hot_complete(FT_HANDLE ft_h, bool success, MSN msn_at_start_of_hot); void toku_ft_init( @@ -142,29 +146,29 @@ toku_ft_init( int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result)); int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTXN txn); -void toku_reset_root_xid_that_created(FT h, TXNID new_root_xid_that_created); +void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created); // Reset the root_xid_that_created field to the given value. // This redefines which xid created the dictionary. -void toku_ft_add_txn_ref(FT h); -void toku_ft_remove_txn_ref(FT h); +void toku_ft_add_txn_ref(FT ft); +void toku_ft_remove_txn_ref(FT ft); -void toku_calculate_root_offset_pointer ( FT h, CACHEKEY* root_key, uint32_t *roothash); -void toku_ft_set_new_root_blocknum(FT h, CACHEKEY new_root_key); -LSN toku_ft_checkpoint_lsn(FT h) __attribute__ ((warn_unused_result)); -void toku_ft_stat64 (FT h, struct ftstat64_s *s); -void toku_ft_get_fractal_tree_info64 (FT h, struct ftinfo64 *s); +void toku_calculate_root_offset_pointer (FT ft, CACHEKEY* root_key, uint32_t *roothash); +void toku_ft_set_new_root_blocknum(FT ft, CACHEKEY new_root_key); +LSN toku_ft_checkpoint_lsn(FT ft) __attribute__ ((warn_unused_result)); +void toku_ft_stat64 (FT ft, struct ftstat64_s *s); +void toku_ft_get_fractal_tree_info64 (FT ft, struct ftinfo64 *s); int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra); // unconditionally set the descriptor for an open FT. can't do this when // any operation has already occurred on the ft. // see toku_ft_change_descriptor(), which is the transactional version // used by the ydb layer. it better describes the client contract. -void toku_ft_update_descriptor(FT ft, DESCRIPTOR d); +void toku_ft_update_descriptor(FT ft, DESCRIPTOR desc); // use this version if the FT is not fully user-opened with a valid cachefile. // this is a clean hack to get deserialization code to update a descriptor // while the FT and cf are in the process of opening, for upgrade purposes -void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR d, int fd); +void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd); void toku_ft_update_cmp_descriptor(FT ft); // get the descriptor for a ft. safe to read as long as clients honor the @@ -174,9 +178,17 @@ void toku_ft_update_cmp_descriptor(FT ft); DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle); DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle); +typedef struct { + // delta versions in basements could be negative + int64_t numrows; + int64_t numbytes; +} STAT64INFO_S, *STAT64INFO; +static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0}; + void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta); void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta); +typedef void (*remove_ft_ref_callback)(FT ft, void *extra); void toku_ft_remove_reference(FT ft, bool oplsn_valid, LSN oplsn, remove_ft_ref_callback remove_ref, void *extra); @@ -189,7 +201,6 @@ void toku_ft_set_compression_method(FT ft, enum toku_compression_method method); void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp); void toku_ft_set_fanout(FT ft, unsigned int fanout); void toku_ft_get_fanout(FT ft, unsigned int *fanout); -void toku_node_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p); // mark the ft as a blackhole. any message injections will be a no op. void toku_ft_set_blackhole(FT_HANDLE ft_handle); @@ -198,15 +209,17 @@ void toku_ft_set_blackhole(FT_HANDLE ft_handle); // The difference between the two is MVCC garbage. void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space); +// TODO: Should be in portability int get_num_cores(void); + +// TODO: Use the cachetable's worker pool instead of something managed by the FT... struct toku_thread_pool *get_ft_pool(void); -void dump_bad_block(unsigned char *vp, uint64_t size); +// TODO: Should be in portability int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd); - int toku_single_process_unlock(int *lockfd); -void tokudb_update_product_name_strings(void); +void tokuft_update_product_name_strings(void); #define TOKU_MAX_PRODUCT_NAME_LENGTH (256) extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; @@ -219,5 +232,4 @@ struct toku_product_name_strings_struct { }; extern struct toku_product_name_strings_struct toku_product_name_strings; -extern int tokudb_num_envs; -#endif +extern int tokuft_num_envs; diff --git a/storage/tokudb/ft-index/ft/fttypes.h b/storage/tokudb/ft-index/ft/fttypes.h deleted file mode 100644 index 052d634288348..0000000000000 --- a/storage/tokudb/ft-index/ft/fttypes.h +++ /dev/null @@ -1,382 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTTYPES_H -#define FTTYPES_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE 500 -#endif -#define _FILE_OFFSET_BITS 64 - -#include "toku_assert.h" -#include -#include - - -// Use the C++ bool and constants (true false), rather than BOOL, TRUE, and FALSE. - -typedef struct ft_handle *FT_HANDLE; -typedef struct ftnode *FTNODE; -typedef struct ftnode_disk_data *FTNODE_DISK_DATA; -typedef struct ftnode_leaf_basement_node *BASEMENTNODE; -typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO; -typedef struct sub_block *SUB_BLOCK; -typedef struct ft *FT; -typedef struct ft_header *FT_HEADER; -typedef struct ft_options *FT_OPTIONS; - -struct wbuf; -struct dbuf; - -typedef unsigned int ITEMLEN; -typedef const void *bytevec; - -typedef int64_t DISKOFF; /* Offset in a disk. -1 is the NULL pointer. */ -typedef uint64_t TXNID; - -typedef struct txnid_pair_s { - TXNID parent_id64; - TXNID child_id64; -} TXNID_PAIR; - - -#define TXNID_NONE_LIVING ((TXNID)0) -#define TXNID_NONE ((TXNID)0) -#define TXNID_MAX ((TXNID)-1) - -static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; - -typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems. -typedef struct gid_s { uint8_t *gid; } GID; // the gid is of size [DB_GID_SIZE] -typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code (so that we don't have to copy all 152 bytes when only a subset are even valid.) -#define ROLLBACK_NONE ((BLOCKNUM){0}) - -static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; } - -// This struct hold information about values stored in the cachetable. -// As one can tell from the names, we are probably violating an -// abstraction layer by placing names. -// -// The purpose of having this struct is to have a way for the -// cachetable to accumulate the some totals we are interested in. -// Breaking this abstraction layer by having these names was the -// easiest way. -// -typedef struct pair_attr_s { - long size; // size PAIR's value takes in memory - long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status - long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status - long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status - long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts - bool is_valid; -} PAIR_ATTR; - -static inline PAIR_ATTR make_pair_attr(long size) { - PAIR_ATTR result={ - .size = size, - .nonleaf_size = 0, - .leaf_size = 0, - .rollback_size = 0, - .cache_pressure_size = 0, - .is_valid = true - }; - return result; -} - -typedef struct { - uint32_t len; - char *data; -} BYTESTRING; - -/* Log Sequence Number (LSN) - * Make the LSN be a struct instead of an integer so that we get better type checking. */ -typedef struct __toku_lsn { uint64_t lsn; } LSN; -#define ZERO_LSN ((LSN){0}) -#define MAX_LSN ((LSN){UINT64_MAX}) - -/* Message Sequence Number (MSN) - * Make the MSN be a struct instead of an integer so that we get better type checking. */ -typedef struct __toku_msn { uint64_t msn; } MSN; -#define ZERO_MSN ((MSN){0}) // dummy used for message construction, to be filled in when msg is applied to tree -#define MIN_MSN ((MSN){(uint64_t)1 << 62}) // first 2^62 values reserved for messages created before Dr. No (for upgrade) -#define MAX_MSN ((MSN){UINT64_MAX}) - -typedef struct { - int64_t numrows; // delta versions in basements could be negative - int64_t numbytes; -} STAT64INFO_S, *STAT64INFO; - -static const STAT64INFO_S ZEROSTATS = {0,0}; - -/* At the brt layer, a FILENUM uniquely identifies an open file. - * At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary. - * With the introduction of the loader (ticket 2216), it is possible for the file that holds - * an open dictionary to change, so these are now separate and independent unique identifiers. - */ -typedef struct {uint32_t fileid;} FILENUM; -#define FILENUM_NONE ((FILENUM){UINT32_MAX}) - -typedef struct {uint64_t dictid;} DICTIONARY_ID; -#define DICTIONARY_ID_NONE ((DICTIONARY_ID){0}) - -typedef struct { - uint32_t num; - FILENUM *filenums; -} FILENUMS; - -typedef struct tokulogger *TOKULOGGER; -typedef struct txn_manager *TXN_MANAGER; -#define NULL_LOGGER ((TOKULOGGER)0) -typedef struct tokutxn *TOKUTXN; -typedef struct txninfo *TXNINFO; -#define NULL_TXN ((TOKUTXN)0) - -struct logged_btt_pair { - DISKOFF off; - int32_t size; -}; - -typedef struct cachetable *CACHETABLE; -typedef struct cachefile *CACHEFILE; -typedef struct ctpair *PAIR; -typedef class checkpointer *CHECKPOINTER; -typedef class bn_data *BN_DATA; - -/* tree command types */ -enum ft_msg_type { - FT_NONE = 0, - FT_INSERT = 1, - FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE. - //FT_DELETE_BOTH = 3, - FT_ABORT_ANY = 4, // Abort any commands on any matching key. - //FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value - FT_COMMIT_ANY = 6, - //FT_COMMIT_BOTH = 7, - FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions). - FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction). - FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction). - FT_INSERT_NO_OVERWRITE = 11, - FT_OPTIMIZE = 12, // Broadcast - FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode - FT_UPDATE = 14, - FT_UPDATE_BROADCAST_ALL = 15 -}; - -static inline bool -ft_msg_type_applies_once(enum ft_msg_type type) -{ - bool ret_val; - switch (type) { - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: - case FT_UPDATE: - ret_val = true; - break; - case FT_COMMIT_BROADCAST_ALL: - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - case FT_OPTIMIZE: - case FT_OPTIMIZE_FOR_UPGRADE: - case FT_UPDATE_BROADCAST_ALL: - case FT_NONE: - ret_val = false; - break; - default: - assert(false); - } - return ret_val; -} - -static inline bool -ft_msg_type_applies_all(enum ft_msg_type type) -{ - bool ret_val; - switch (type) { - case FT_NONE: - case FT_INSERT_NO_OVERWRITE: - case FT_INSERT: - case FT_DELETE_ANY: - case FT_ABORT_ANY: - case FT_COMMIT_ANY: - case FT_UPDATE: - ret_val = false; - break; - case FT_COMMIT_BROADCAST_ALL: - case FT_COMMIT_BROADCAST_TXN: - case FT_ABORT_BROADCAST_TXN: - case FT_OPTIMIZE: - case FT_OPTIMIZE_FOR_UPGRADE: - case FT_UPDATE_BROADCAST_ALL: - ret_val = true; - break; - default: - assert(false); - } - return ret_val; -} - -static inline bool -ft_msg_type_does_nothing(enum ft_msg_type type) -{ - return (type == FT_NONE); -} - -typedef struct xids_t *XIDS; -typedef struct fifo_msg_t *FIFO_MSG; -/* tree commands */ -struct ft_msg { - enum ft_msg_type type; - MSN msn; // message sequence number - XIDS xids; - union { - /* insert or delete */ - struct ft_cmd_insert_delete { - const DBT *key; // for insert, delete, upsertdel - const DBT *val; // for insert, delete, (and it is the "extra" for upsertdel, upsertdel_broadcast_all) - } id; - } u; -}; -// Message sent into brt to implement command (insert, delete, etc.) -// This structure supports nested transactions, and obsoletes ft_msg. -typedef struct ft_msg FT_MSG_S; -typedef struct ft_msg *FT_MSG; - -typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *); -typedef void (*setval_func)(const DBT *, void *); -typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *); -typedef void (*on_redirect_callback)(FT_HANDLE, void*); -typedef void (*remove_ft_ref_callback)(FT, void*); - -#define UU(x) x __attribute__((__unused__)) - -typedef struct memarena *MEMARENA; -typedef struct rollback_log_node *ROLLBACK_LOG_NODE; -typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; - -// -// Types of snapshots that can be taken by a tokutxn -// - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. -// used for SERIALIZABLE and READ UNCOMMITTED -// - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot -// used for REPEATABLE READ -// - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot -// used for READ COMMITTED -// - -typedef enum __TXN_SNAPSHOT_TYPE { - TXN_SNAPSHOT_NONE=0, - TXN_SNAPSHOT_ROOT=1, - TXN_SNAPSHOT_CHILD=2 -} TXN_SNAPSHOT_TYPE; - -typedef struct ancestors *ANCESTORS; -typedef struct pivot_bounds const * const PIVOT_BOUNDS; -typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA; -typedef struct unlockers *UNLOCKERS; - -enum reactivity { - RE_STABLE, - RE_FUSIBLE, - RE_FISSIBLE -}; - -enum split_mode { - SPLIT_EVENLY, - SPLIT_LEFT_HEAVY, - SPLIT_RIGHT_HEAVY -}; - -#endif diff --git a/storage/tokudb/ft-index/ft/hash-benchmarks/hash-benchmark-manually-open.cc b/storage/tokudb/ft-index/ft/hash-benchmarks/hash-benchmark-manually-open.cc deleted file mode 100644 index 308bb35248289..0000000000000 --- a/storage/tokudb/ft-index/ft/hash-benchmarks/hash-benchmark-manually-open.cc +++ /dev/null @@ -1,378 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -/* Benchmark various hash functions. */ - -#include -#include -#include -#include -#include - -#define N 200000000 -char *buf; - -static double tdiff (struct timeval *a, struct timeval *b) { - return a->tv_sec - b->tv_sec + (1e-6)*(a->tv_usec - b->tv_usec); -} - -#define measure_bandwidth(str, body) ({ \ - int c; \ - struct timeval start,end; \ - gettimeofday(&start, 0); \ - body; \ - gettimeofday(&end, 0); \ - double diff = tdiff(&end, &start); \ - printf("%s=%08x %d bytes in %8.6fs for %8.3fMB/s\n", str, c, N, diff, N*(1e-6)/diff); \ - }) - -int sum32 (int start, void *buf, int bytecount) { - int *ibuf = buf; - assert(bytecount%4==0); - while (bytecount>0) { - start+=*ibuf; - ibuf++; - bytecount-=4; - } - return start; -} - -static const uint32_t m = 0x5bd1e995; -static const int r = 24; -static const uint32_t seed = 0x3dd3b51a; - -#define USE_ZERO_CHECKSUM 0 - -static uint32_t MurmurHash2 ( const void * key, int len) -{ - if (USE_ZERO_CHECKSUM) return 0; - - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - - - // Initialize the hash to a 'random' value - - uint32_t h = seed; - - // Mix 4 bytes at a time into the hash - - const unsigned char * data = (const unsigned char *)key; - - while(len >= 4) - { - uint32_t k = *(uint32_t *)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - - switch(len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - - h ^= h >> 29; - h *= m; - h ^= h >> 31; - - return h; -} - -struct murmur { - int n_bytes_in_k; // How many bytes in k - uint32_t k; // These are the extra bytes. Bytes are shifted into the low-order bits. - uint32_t h; // The hash so far (up to the most recent 4-byte boundary) -}; - -void murmur_init (struct murmur *mm) { - mm->n_bytes_in_k=0; - mm->k =0; - mm->h = seed; -} - -#define MIX() ({ k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }) -#define LD1() data[0] -#define LD2() ((data[0]<<8) | data[1]) -#define LD3() ((data[0]<<16) | (data[1]<<8) | data[2]) -#define ADD1_0() (mm->k = LD1()) -#define ADD1() (mm->k = (k<<8) | LD1()) -#define ADD2_0() (mm->k = LD2()) -#define ADD2() (mm->k = (k<<16) | LD2()) -#define ADD3_0() (mm->k = LD3()) -#define ADD3() (mm->k = (k<<24) | LD3()) - -void murmur_add (struct murmur *mm, const void * key, unsigned int len) { - if (USE_ZERO_CHECKSUM) return; - if (len==0) return; - const int n_bytes_in_k = mm->n_bytes_in_k; - uint32_t k = mm->k; - const unsigned char *data = key; - uint32_t h = mm->h; - switch (n_bytes_in_k) { - case 0: - switch (len) { - case 1: ADD1_0(); mm->n_bytes_in_k = 1; mm->h=h; return; - case 2: ADD2_0(); mm->n_bytes_in_k = 2; mm->h=h; return; - case 3: ADD3_0(); mm->n_bytes_in_k = 3; mm->h=h; return; - default: break; - } - break; - case 1: - switch (len) { - case 1: ADD1(); mm->n_bytes_in_k = 2; mm->h=h; return; - case 2: ADD2(); mm->n_bytes_in_k = 3; mm->h=h; return; - case 3: ADD3(); mm->n_bytes_in_k = 0; MIX(); mm->h=h; return; - default: ADD3(); mm->n_bytes_in_k = 0; MIX(); len-=3; data+=3; break; - } - break; - case 2: - switch (len) { - case 1: ADD1(); mm->n_bytes_in_k = 3; mm->h=h; return; - case 2: ADD2(); mm->n_bytes_in_k = 0; MIX(); mm->h=h; return; - default: ADD2(); mm->n_bytes_in_k = 0; MIX(); len-=2; data+=2; break; - } - break; - case 3: - switch (len) { - case 1: ADD1(); mm->n_bytes_in_k = 0; MIX(); mm->h=h; return; - default: ADD1(); mm->n_bytes_in_k = 0; MIX(); len--; data++; break; - } - break; - default: assert(0); - } - - // We've used up the partial bytes at the beginning of k. - assert(mm->n_bytes_in_k==0); - while (len >= 4) { - uint32_t k = toku_dtoh32(*(uint32_t *)data); - //printf(" oldh=%08x k=%08x", h, k); - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - //printf(" h=%08x\n", h); - } - mm->h=h; - //printf("%s:%d h=%08x\n", __FILE__, __LINE__, h); - { - uint32_t k=0; - switch (len) { - case 3: k = *data << 16; data++; - case 2: k |= *data << 8; data++; - case 1: k |= *data; - } - mm->k = k; - mm->n_bytes_in_k = len; - //printf("now extra=%08x (%d bytes) n_bytes=%d\n", mm->k, len, mm->n_bytes_in_k); - - } -} - -uint32_t murmur_finish (struct murmur *mm) { - if (USE_ZERO_CHECKSUM) return 0; - uint32_t h = mm->h; - if (mm->n_bytes_in_k>0) { - h ^= mm->k; - h *= m; - } - if (0) { - // The real murmur function does this extra mixing at the end. We don't need that for fingerprint. - h ^= h >> 29; - h *= m; - h ^= h >> 31; - } - return h; -} - -struct sum84 { - uint32_t sum; - int i; -}; -void sum84_init (struct sum84 *s) { s->sum=0; s->i=0; }; -void sum84_add (struct sum84 *s, char *buf, int count) { - while (s->i%4!=0 && count>0) { - char v = *buf; - s->sum ^= v << (s->i%4)*8; - buf++; count--; s->i++; - } - while (count>4) { - s->sum ^= *(int*)buf; - buf+=4; count-=4; - } - while (count>0) { - char v = *buf; - s->sum ^= v << (s->i%4)*8; - buf++; count--; s->i++; - } -} -int sum84_finish (struct sum84 *s) { - return s->sum; -} - -uint32_t xor8_add (uint32_t x, char *buf, int count) { - while (count>4) { - x ^= *(int*)buf; - buf+=4; count-=4; - } - while (count>0) { - char v = *buf; - x ^= v; - buf++; count--; - } - return x; -} -uint32_t xor8_finish (uint32_t x) { - return (x ^ (x>>8) ^ (x>>16) ^ (x>>24))&0xff; -} - -uint64_t xor8_64_add (uint64_t x, char *buf, int count) { - while (count>8) { - x ^= *(uint64_t*)buf; - buf+=8; count-=8; - } - while (count>0) { - char v = *buf; - x ^= v; - buf++; count--; - } - return x; -} -uint32_t xor8_64_finish (uint64_t x) { - return (x ^ (x>>8) ^ (x>>16) ^ (x>>24) ^ (x>>32) ^ (x>>40) ^ (x>>48) ^ (x>>56))&0xff; -} - -static void measure_bandwidths (void) { - measure_bandwidth("crc32 ", c=crc32(0, buf, N)); - measure_bandwidth("sum32 ", c=sum32(0, buf, N)); - measure_bandwidth("murmur ", c=MurmurHash2(buf, N)); - measure_bandwidth("murmurf ", ({ struct murmur mm; murmur_init(&mm); murmur_add(&mm, buf, N); c=murmur_finish(&mm); })); - measure_bandwidth("sum84 ", ({ struct sum84 s; sum84_init(&s); sum84_add(&s, buf, N); c=sum84_finish(&s); })); - measure_bandwidth("xor32 ", ({ c=0; int j; for(j=0; j -#include -#include -#include -#include -#include - -#define N 200000000 -#define PRINT 0 -//#define N 128 -//#define PRINT 1 -unsigned char *buf; - -static double tdiff (struct timeval *a, struct timeval *b) { - return a->tv_sec - b->tv_sec + (1e-6)*(a->tv_usec - b->tv_usec); -} - -#define measure_bandwidth(str, body) ({ \ - int c; \ - struct timeval start,end; \ - gettimeofday(&start, 0); \ - body; \ - gettimeofday(&end, 0); \ - double diff = tdiff(&end, &start); \ - printf("%s=%08x %d bytes in %8.6fs for %8.3fMB/s\n", str, c, N, diff, N*(1e-6)/diff); \ - }) - -int sum32 (void *buf, int bytecount) { - int *ibuf = buf; - int start = 0; - assert(bytecount%4==0); - while (bytecount>0) { - start+=*ibuf; - ibuf++; - bytecount-=4; - } - return start; -} - -uint64_t sum64 (void *buf, int bytecount) { - uint64_t *ibuf = buf; - uint64_t start = 0; - assert(bytecount%8==0); - while (bytecount>0) { - start+=*ibuf; - ibuf++; - bytecount-=8; - } - return start; -} - -static const uint32_t m = 0x5bd1e995; -static const int r = 24; -static const uint32_t seed = 0x3dd3b51a; - -#define USE_ZERO_CHECKSUM 0 - -static uint32_t MurmurHash2 ( const void * key, int len) -{ - if (USE_ZERO_CHECKSUM) return 0; - - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - - - // Initialize the hash to a 'random' value - - uint32_t h = seed; - - // Mix 4 bytes at a time into the hash - - const unsigned char * data = (const unsigned char *)key; - - while(len >= 4) - { - uint32_t k = *(uint32_t *)data; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - - switch(len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - - h ^= h >> 29; - h *= m; - h ^= h >> 31; - - return h; -} - -struct murmur { - int n_bytes_in_k; // How many bytes in k - uint32_t k; // These are the extra bytes. Bytes are shifted into the low-order bits. - uint32_t h; // The hash so far (up to the most recent 4-byte boundary) -}; - -void murmur_init (struct murmur *mm) { - mm->n_bytes_in_k=0; - mm->k =0; - mm->h = seed; -} - -inline void murmur_add (struct murmur *mm, const void * key, unsigned int len) { - if (USE_ZERO_CHECKSUM) return; - assert(mm->n_bytes_in_k<4); - const unsigned char *data = key; - uint32_t h = mm->h; - { - int n_bytes_in_k = mm->n_bytes_in_k; - if (n_bytes_in_k>0) { - uint32_t k = mm->k; - while (n_bytes_in_k<4 && len>0) { - k = (k << 8) | *data; - n_bytes_in_k++; - data++; - len--; - } - if (n_bytes_in_k==4) { - //printf(" oldh=%08x k=%08x", h, k); - k *= m; - k ^= k >> r; - k *= m; - h *= m; - h ^= k; - mm->n_bytes_in_k = 0; - mm->k=0; - //printf(" h=%08x\n", h); - } else { - assert(len==0); - mm->n_bytes_in_k = n_bytes_in_k; - mm->k = k; - mm->h = h; - return; - } - } - } - // We've used up the partial bytes at the beginning of k. - assert(mm->n_bytes_in_k==0); - while (len >= 4) { - uint32_t k = toku_dtoh32(*(uint32_t *)data); - //printf(" oldh=%08x k=%08x", h, k); - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - //printf(" h=%08x\n", h); - } - mm->h=h; - //printf("%s:%d h=%08x\n", __FILE__, __LINE__, h); - { - uint32_t k=0; - switch (len) { - case 3: k = *data << 16; data++; - case 2: k |= *data << 8; data++; - case 1: k |= *data; - } - mm->k = k; - mm->n_bytes_in_k = len; - //printf("now extra=%08x (%d bytes) n_bytes=%d\n", mm->k, len, mm->n_bytes_in_k); - - } -} - -uint32_t murmur_finish (struct murmur *mm) { - if (USE_ZERO_CHECKSUM) return 0; - uint32_t h = mm->h; - if (mm->n_bytes_in_k>0) { - h ^= mm->k; - h *= m; - } - if (0) { - // The real murmur function does this extra mixing at the end. We don't need that for fingerprint. - h ^= h >> 29; - h *= m; - h ^= h >> 31; - } - return h; -} - -struct sum84 { - uint32_t sum; - int i; -}; -void sum84_init (struct sum84 *s) { s->sum=0; s->i=0; }; -void sum84_add (struct sum84 *s, unsigned char *buf, int count) { - while (s->i%4!=0 && count>0) { - char v = *buf; - s->sum ^= v << (s->i%4)*8; - buf++; count--; s->i++; - } - while (count>4) { - s->sum ^= *(int*)buf; - buf+=4; count-=4; - } - while (count>0) { - char v = *buf; - s->sum ^= v << (s->i%4)*8; - buf++; count--; s->i++; - } -} -int sum84_finish (struct sum84 *s) { - return s->sum; -} - -uint32_t xor8_add (uint32_t x, unsigned char *buf, int count) { - while (count>4) { - x ^= *(int*)buf; - buf+=4; count-=4; - } - while (count>0) { - char v = *buf; - x ^= v; - buf++; count--; - } - return x; -} -uint32_t xor8_finish (uint32_t x) { - return (x ^ (x>>8) ^ (x>>16) ^ (x>>24))&0xff; -} - -uint64_t xor8_64_add (uint64_t x, unsigned char *buf, int count) { - while (count>8) { - x ^= *(uint64_t*)buf; - buf+=8; count-=8; - } - while (count>0) { - char v = *buf; - x ^= v; - buf++; count--; - } - return x; -} -uint32_t xor8_64_finish (uint64_t x) { - return (x ^ (x>>8) ^ (x>>16) ^ (x>>24) ^ (x>>32) ^ (x>>40) ^ (x>>48) ^ (x>>56))&0xff; -} - -#define BYFOUR -typedef long int ptrdiff_t; -typedef unsigned int u4; - -static const unsigned long crc_table[8][256] = -{ - { - 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, - 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, - 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, - 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, - 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, - 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, - 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, - 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, - 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, - 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, - 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, - 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, - 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, - 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, - 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, - 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, - 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, - 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, - 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, - 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, - 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, - 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, - 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, - 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, - 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, - 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, - 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, - 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, - 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, - 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, - 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, - 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, - 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, - 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, - 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, - 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, - 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, - 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, - 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, - 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, - 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, - 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, - 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, - 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, - 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, - 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, - 0x2d02ef8dUL - - }, - { - 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, - 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, - 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, - 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, - 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, - 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, - 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, - 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, - 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, - 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, - 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, - 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, - 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, - 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, - 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, - 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, - 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, - 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, - 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, - 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, - 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, - 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, - 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, - 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, - 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, - 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, - 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, - 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, - 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, - 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, - 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, - 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, - 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, - 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, - 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, - 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, - 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, - 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, - 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, - 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, - 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, - 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, - 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, - 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, - 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, - 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, - 0x9324fd72UL - }, - { - 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, - 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, - 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, - 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, - 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, - 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, - 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, - 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, - 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, - 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, - 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, - 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, - 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, - 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, - 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, - 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, - 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, - 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, - 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, - 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, - 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, - 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, - 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, - 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, - 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, - 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, - 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, - 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, - 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, - 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, - 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, - 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, - 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, - 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, - 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, - 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, - 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, - 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, - 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, - 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, - 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, - 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, - 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, - 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, - 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, - 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, - 0xbe9834edUL - }, - { - 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, - 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, - 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, - 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, - 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, - 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, - 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, - 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, - 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, - 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, - 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, - 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, - 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, - 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, - 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, - 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, - 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, - 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, - 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, - 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, - 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, - 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, - 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, - 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, - 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, - 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, - 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, - 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, - 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, - 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, - 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, - 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, - 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, - 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, - 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, - 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, - 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, - 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, - 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, - 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, - 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, - 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, - 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, - 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, - 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, - 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, - 0xde0506f1UL - }, - { - 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, - 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, - 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, - 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, - 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, - 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, - 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, - 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, - 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, - 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, - 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, - 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, - 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, - 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, - 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, - 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, - 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, - 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, - 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, - 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, - 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, - 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, - 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, - 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, - 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, - 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, - 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, - 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, - 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, - 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, - 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, - 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, - 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, - 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, - 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, - 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, - 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, - 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, - 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, - 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, - 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, - 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, - 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, - 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, - 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, - 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, - 0x8def022dUL - }, - { - 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, - 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, - 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, - 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, - 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, - 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, - 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, - 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, - 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, - 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, - 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, - 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, - 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, - 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, - 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, - 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, - 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, - 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, - 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, - 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, - 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, - 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, - 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, - 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, - 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, - 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, - 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, - 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, - 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, - 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, - 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, - 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, - 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, - 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, - 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, - 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, - 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, - 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, - 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, - 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, - 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, - 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, - 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, - 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, - 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, - 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, - 0x72fd2493UL - }, - { - 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, - 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, - 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, - 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, - 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, - 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, - 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, - 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, - 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, - 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, - 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, - 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, - 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, - 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, - 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, - 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, - 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, - 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, - 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, - 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, - 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, - 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, - 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, - 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, - 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, - 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, - 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, - 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, - 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, - 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, - 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, - 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, - 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, - 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, - 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, - 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, - 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, - 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, - 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, - 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, - 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, - 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, - 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, - 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, - 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, - 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, - 0xed3498beUL - }, - { - 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, - 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, - 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, - 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, - 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, - 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, - 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, - 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, - 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, - 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, - 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, - 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, - 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, - 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, - 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, - 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, - 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, - 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, - 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, - 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, - 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, - 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, - 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, - 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, - 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, - 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, - 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, - 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, - 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, - 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, - 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, - 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, - 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, - 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, - 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, - 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, - 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, - 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, - 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, - 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, - 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, - 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, - 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, - 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, - 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, - 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, - 0xf10605deUL - - } -}; -inline static unsigned long crc32_little (unsigned long, const unsigned char *, unsigned); -inline static unsigned long crc32_big (unsigned long, const unsigned char *, unsigned); - -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - -/* ========================================================================= */ -inline unsigned long ZEXPORT crc32_local(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - if (buf == Z_NULL) return 0UL; - -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif /* DYNAMIC_CRC_TABLE */ - -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - u4 endian; - - endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); - } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; - while (len >= 8) { - DO8; - len -= 8; - } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; -} - -#ifdef BYFOUR -# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ - (((w)&0xff00)<<8)+(((w)&0xff)<<24)) -#define local static - -/* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 - -/* ========================================================================= */ -inline local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - register u4 c; - register const u4 FAR *buf4; - - c = (u4)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; - } - - buf4 = (const u4 FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; -} - -/* ========================================================================= */ -#define DOBIG4 c ^= *++buf4; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 - -/* ========================================================================= */ -inline local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - register u4 c; - register const u4 FAR *buf4; - - c = REV((u4)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; - } - - buf4 = (const u4 FAR *)(const void FAR *)buf; - buf4--; - while (len >= 32) { - DOBIG32; - len -= 32; - } - while (len >= 4) { - DOBIG4; - len -= 4; - } - buf4++; - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(REV(c)); -} - -#endif /* BYFOUR */ - -// Character-by-character implementation of x17. -static uint32_t x17c (uint32_t c, void *buf, int len) { - int i; - unsigned char *cbuf=buf; - for (i=0; i2) { - uint16_t s = *(uint16_t*)cbuf; - c = c*17*17 + (s&0xff)*17 + (s>>8); - len-=2; cbuf+=2; - } - while (len) { - c = c*17 + *cbuf; - len--; cbuf++; - } - return c; -} - -static uint32_t x17i (uint32_t c, void *buf, int len) { - unsigned char *cbuf=buf; - while ((((long)cbuf)&3 ) && len) { - c = c*17 + *cbuf; - len--; cbuf++; - } - while (len>4) { - uint32_t l = *(uint32_t*)cbuf; - c = c*17*17*17*17 + (l&0xff)*17*17*17 + ((l>>8)&0xff)*17*17 + ((l>>16)&0xff)*17 + ((l>>24)&0xff); - len-=4; cbuf+=4; - } - while (len) { - c = c*17 + *cbuf; - len--; cbuf++; - } - return c; -} - -uint32_t l17_fast64 (const void *buf, int len) { - assert(len%8==0); - const uint64_t *lbuf=buf; - uint64_t c=0; - while (len>0) { - c = c*17 + *lbuf; - if (PRINT) printf("%d: c=%016lx sum=%016lx\n", __LINE__, *lbuf, c); - lbuf++; - len-=8; - } - return c&0xFFFFFFFF; -} -struct l1764 { - uint64_t sum; - uint64_t input; - int n_input_bytes; -}; -void l1764_init(struct l1764 *l) { - l->sum=0; - l->input=0; - l->n_input_bytes=0; -} -inline void l1764_add (struct l1764 *l, const void *vbuf, int len) { - if (PRINT) printf("%d: n_input_bytes=%d len=%d\n", __LINE__, l->n_input_bytes, len); - int n_input_bytes = l->n_input_bytes; - const unsigned char *cbuf = vbuf; - // Special case short inputs - if (len==1) { - uint64_t input = l->input | ((uint64_t)(*cbuf))<<(8*n_input_bytes); - n_input_bytes++; - if (n_input_bytes==8) { - l->sum = l->sum*17 + input; - l->n_input_bytes = 0; - l->input = 0; - } else { - l->input = input; - l->n_input_bytes = n_input_bytes; - } - return; - } else if (len==2) { - uint64_t input = l->input; - uint64_t thisv = ((uint64_t)(*(uint16_t*)cbuf)); - if (n_input_bytes==7) { - l->sum = l->sum*17 + (input | (thisv<<(8*7))); - l->input = thisv>>8; - l->n_input_bytes = 1; - } else if (n_input_bytes==6) { - l->sum = l->sum*17 + (input | (thisv<<(8*6))); - l->input = 0; - l->n_input_bytes = 0; - } else { - l->input = input | (thisv<<(8*n_input_bytes)); - l->n_input_bytes += 2; - } - return; - } - - uint64_t sum; - //assert(len>=0); - if (n_input_bytes) { - uint64_t input = l->input; - if (len>=8) { - sum = l->sum; - while (len>=8) { - uint64_t thisv = *(uint64_t*)cbuf; - input |= thisv<<(8*n_input_bytes); - sum = sum*17 + input; - if (PRINT) printf("%d: input=%016lx sum=%016lx\n", __LINE__, input, sum); - input = thisv>>(8*(8-n_input_bytes)); - if (PRINT) printf("%d: input=%016lx\n", __LINE__, input); - len-=8; - cbuf+=8; - // n_input_bytes remains unchanged - if (PRINT) printf("%d: n_input_bytes=%d len=%d\n", __LINE__, l->n_input_bytes, len); - } - l->sum = sum; - } - if (len>=4) { - uint64_t thisv = *(uint32_t*)cbuf; - if (n_input_bytes<4) { - input |= thisv<<(8*n_input_bytes); - if (PRINT) printf("%d: input=%016lx\n", __LINE__, input); - n_input_bytes+=4; - } else { - input |= thisv<<(8*n_input_bytes); - l->sum = l->sum*17 + input; - if (PRINT) printf("%d: input=%016lx sum=%016lx\n", __LINE__, input, l->sum); - input = thisv>>(8*(8-n_input_bytes)); - n_input_bytes-=4; - if (PRINT) printf("%d: input=%016lx n_input_bytes=%d\n", __LINE__, input, n_input_bytes); - } - len-=4; - cbuf+=4; - if (PRINT) printf("%d: len=%d\n", __LINE__, len); - } - //assert(n_input_bytes<=8); - while (n_input_bytes<8 && len) { - input |= ((uint64_t)(*cbuf))<<(8*n_input_bytes); - n_input_bytes++; - cbuf++; - len--; - } - //assert(len>=0); - if (n_input_bytes<8) { - //assert(len==0); - l->input = input; - l->n_input_bytes = n_input_bytes; - if (PRINT) printf("%d: n_input_bytes=%d\n", __LINE__, l->n_input_bytes); - return; - } - sum = l->sum*17 + input; - } else { - //assert(len>=0); - sum = l->sum; - } - //assert(len>=0); - while (len>=8) { - sum = sum*17 + *(uint64_t*)cbuf; - cbuf+=8; - len -=8; - } - l->sum = sum; - n_input_bytes = 0; - uint64_t input; - l->n_input_bytes = len; - // Surprisingly, the loop is the fastest on bradley's laptop. - if (1) { - int i; - input=0; - for (i=0; i=4) { input = ((uint64_t)(*(uint32_t*)(cbuf))); cbuf+=4; len-=4; i=4;} - if (len>=2) { input |= ((uint64_t)(*(uint16_t*)(cbuf)))<<(i*8); cbuf+=2; len-=2; i+=2; } - if (len>=1) { input |= ((uint64_t)(*(uint8_t *)(cbuf)))<<(i*8); /*cbuf+=1; len-=1; i++;*/ } - } - l->input = input; - if (PRINT) printf("%d: n_input_bytes=%d\n", __LINE__, l->n_input_bytes); -} -uint32_t l1764_finish (struct l1764 *l) { - if (PRINT) printf("%d: n_input_bytes=%d\n", __LINE__, l->n_input_bytes); - assert(l->n_input_bytes==0); - return (l->sum)&0xffffffff; -} - -uint32_t l17_fast (const void *buf, int len) { - assert(len%4==0); - const uint32_t *lbuf=buf; - uint32_t c=0; - while (len>0) { - c = c*17 + *lbuf; - lbuf++; - len-=4; - } - return c; -} - -struct l17 { - uint32_t sum; - uint32_t input; - int input_len; -}; -void l17_init (struct l17 *l17) { - l17->sum=0; - l17->input=0; - l17->input_len=0; -} -void l17_add (struct l17 *l17, const void *buf, int len) { - const unsigned char *cbuf=buf; - while (l17->input_len%4 && len) { - l17->input_len |= (*cbuf<<(8*(3-l17->input_len))); - } -} - - -#define Nu N - -static void measure_bandwidths (void) { - int canon; // what is the results supposed to be. - measure_bandwidth("l17fast ", c=l17_fast(buf, N)); - measure_bandwidth("l17fast64", canon=c=l17_fast64(buf, N)); - measure_bandwidth("l17f64i ", ({ struct l1764 l; l1764_init(&l); l1764_add(&l, buf, Nu); c=l1764_finish(&l); assert(canon==c); })); - measure_bandwidth("l17f64ib1", ({ struct l1764 l; l1764_init(&l); int j; for(j=0; j - -#if 0 -int toku_keycompare (bytevec key1b, ITEMLEN key1len, bytevec key2b, ITEMLEN key2len) { - const unsigned char *key1 = key1b; - const unsigned char *key2 = key2b; - while (key1len > 0 && key2len > 0) { - unsigned char b1 = key1[0]; - unsigned char b2 = key2[0]; - if (b1b2) return 1; - key1len--; key1++; - key2len--; key2++; - } - if (key1lenkey2len) return 1; - return 0; -} - -#elif 0 -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - if (key1len==key2len) { - return memcmp(key1,key2,key1len); - } else if (key1len=0) return 1; /* If the keys are the same up to 2's length, then return 1 since key1 is longer than key2 */ - else return -1; - } -} -#elif 0 -/* This one looks tighter, but it does use memcmp... */ -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - int comparelen = key1len0; - k1++, k2++, comparelen--) { - if (*k1 != *k2) { - return (int)*k1-(int)*k2; - } - } - if (key1lenkey2len) return 1; - return 0; -} -#else -/* unroll that one four times */ -// when a and b are chars, return a-b is safe here because return type is int. No over/underflow possible. -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) { - int comparelen = key1len4; - k1+=4, k2+=4, comparelen-=4) { - { int v1=k1[0], v2=k2[0]; if (v1!=v2) return v1-v2; } - { int v1=k1[1], v2=k2[1]; if (v1!=v2) return v1-v2; } - { int v1=k1[2], v2=k2[2]; if (v1!=v2) return v1-v2; } - { int v1=k1[3], v2=k2[3]; if (v1!=v2) return v1-v2; } - } - for (; - comparelen>0; - k1++, k2++, comparelen--) { - if (*k1 != *k2) { - return (int)*k1-(int)*k2; - } - } - if (key1lenkey2len) return 1; - return 0; -} - -#endif - -int -toku_builtin_compare_fun (DB *db __attribute__((__unused__)), const DBT *a, const DBT*b) { - return toku_keycompare(a->data, a->size, b->data, b->size); -} diff --git a/storage/tokudb/ft-index/ft/key.h b/storage/tokudb/ft-index/ft/key.h deleted file mode 100644 index cf32e9d72496a..0000000000000 --- a/storage/tokudb/ft-index/ft/key.h +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_KEY_H -#define TOKU_KEY_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "ybt.h" -#include "fttypes.h" - -int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len); - -void toku_test_keycompare (void) ; - -int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default"))); - -#endif diff --git a/storage/tokudb/ft-index/ft/le-cursor.cc b/storage/tokudb/ft-index/ft/le-cursor.cc index b08fc62632c01..f840c021fd26e 100644 --- a/storage/tokudb/ft-index/ft/le-cursor.cc +++ b/storage/tokudb/ft-index/ft/le-cursor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,10 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft.h" -#include "ft-internal.h" -#include "le-cursor.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/le-cursor.h" +#include "ft/cursor.h" // A LE_CURSOR is a special purpose FT_CURSOR that: // - enables prefetching @@ -100,10 +101,6 @@ PATENT RIGHTS GRANT: // A LE_CURSOR is good for scanning a FT from beginning to end. Useful for hot indexing. struct le_cursor { - // TODO: remove DBs from the ft layer comparison function - // so this is never necessary - // use a fake db for comparisons. - struct __toku_db fake_db; FT_CURSOR ft_cursor; bool neg_infinity; // true when the le cursor is positioned at -infinity (initial setting) bool pos_infinity; // true when the le cursor is positioned at +infinity (when _next returns DB_NOTFOUND) @@ -123,8 +120,6 @@ toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_handle, TOKUTXN toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor); le_cursor->neg_infinity = false; le_cursor->pos_infinity = true; - // zero out the fake DB. this is a rare operation so it's not too slow. - memset(&le_cursor->fake_db, 0, sizeof(le_cursor->fake_db)); } } @@ -169,13 +164,9 @@ toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) { } else if (le_cursor->pos_infinity) { result = false; // all keys are less than +infinity } else { - // get the comparison function and descriptor from the cursor's ft - FT_HANDLE ft_handle = le_cursor->ft_cursor->ft_handle; - ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle); - le_cursor->fake_db.cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle); + FT ft = le_cursor->ft_cursor->ft_handle->ft; // get the current position from the cursor and compare it to the given key. - DBT *cursor_key = &le_cursor->ft_cursor->key; - int r = keycompare(&le_cursor->fake_db, cursor_key, key); + int r = ft->cmp(&le_cursor->ft_cursor->key, key); if (r <= 0) { result = true; // key is right of the cursor key } else { diff --git a/storage/tokudb/ft-index/ft/le-cursor.h b/storage/tokudb/ft-index/ft/le-cursor.h index eac5a4e5de4d5..2fc5e09bc2c36 100644 --- a/storage/tokudb/ft-index/ft/le-cursor.h +++ b/storage/tokudb/ft-index/ft/le-cursor.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef LE_CURSOR_H -#define LE_CURSOR_H - -#include "ft-ops.h" +#include "ft/ft-internal.h" // A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree // and returns the leaf entry to the caller. It maintains a copy of the key that it was last positioned over to @@ -104,10 +103,10 @@ PATENT RIGHTS GRANT: typedef struct le_cursor *LE_CURSOR; -// Create a leaf cursor for a tree (brt) within a transaction (txn) +// Create a leaf cursor for a tree (ft_h) within a transaction (txn) // Success: returns 0, stores the LE_CURSOR in the le_cursor_result // Failure: returns a non-zero error number -int toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE brt, TOKUTXN txn); +int toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_h, TOKUTXN txn); // Close and free the LE_CURSOR void toku_le_cursor_close(LE_CURSOR le_cursor); @@ -127,5 +126,3 @@ bool toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) // extracts position of le_cursor into estimate. Responsibility of caller to handle // thread safety. Caller (the indexer), does so by ensuring indexer lock is held void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate); - -#endif diff --git a/storage/tokudb/ft-index/ft/leafentry.cc b/storage/tokudb/ft-index/ft/leafentry.cc index bcd3cf01b0cc6..075f29fa1911e 100644 --- a/storage/tokudb/ft-index/ft/leafentry.cc +++ b/storage/tokudb/ft-index/ft/leafentry.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "wbuf.h" +#include "serialize/wbuf.h" #include "leafentry.h" void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) { diff --git a/storage/tokudb/ft-index/ft/leafentry.h b/storage/tokudb/ft-index/ft/leafentry.h index b664d1675a184..eddd49481fb50 100644 --- a/storage/tokudb/ft-index/ft/leafentry.h +++ b/storage/tokudb/ft-index/ft/leafentry.h @@ -1,9 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LEAFENTRY_H -#define TOKU_LEAFENTRY_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -33,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,17 +87,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include #include +#include -#include "txn_manager.h" -#include "rbuf.h" -#include "x1764.h" -#include "omt.h" +#include "ft/txn/txn_manager.h" +#include "ft/serialize/rbuf.h" +#include "ft/msg.h" /* Memory format of packed leaf entry @@ -212,6 +211,7 @@ void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le); int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form. int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete. +int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn); // Returns true if the value that is to be read is empty bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count) bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids) void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes) @@ -228,10 +228,13 @@ uint64_t le_outermost_uncommitted_xid (LEAFENTRY le); // r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r, because something unexpected went wrong (error case) typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context); -int le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_empty, TOKUTXN context); - int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, uint32_t *vallenp, TOKUTXN context); +void le_extract_val(LEAFENTRY le, + // should we return the entire leafentry as the val? + bool is_leaf_mode, bool is_snapshot_read, + TOKUTXN ttxn, uint32_t *vallen, void **val); + size_t leafentry_disksize_13(LEAFENTRY_13 le); @@ -242,11 +245,14 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored size_t *new_leafentry_memorysize, LEAFENTRY *new_leafentry_p); +class bn_data; + void -toku_le_apply_msg(FT_MSG msg, +toku_le_apply_msg(const ft_msg &msg, LEAFENTRY old_leafentry, // NULL if there was no stored data. bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced + uint32_t old_keylen, txn_gc_info *gc_info, LEAFENTRY *new_leafentry_p, int64_t * numbytes_delta_p); @@ -262,6 +268,3 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, txn_gc_info *gc_info, LEAFENTRY *new_leaf_entry, int64_t * numbytes_delta_p); - -#endif /* TOKU_LEAFENTRY_H */ - diff --git a/storage/tokudb/ft-index/ft/ftloader-callback.cc b/storage/tokudb/ft-index/ft/loader/callbacks.cc similarity index 98% rename from storage/tokudb/ft-index/ft/ftloader-callback.cc rename to storage/tokudb/ft-index/ft/loader/callbacks.cc index 3472d294551bf..40069c144f6a1 100644 --- a/storage/tokudb/ft-index/ft/ftloader-callback.cc +++ b/storage/tokudb/ft-index/ft/loader/callbacks.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,8 +95,8 @@ PATENT RIGHTS GRANT: #include #include -#include "ftloader-internal.h" -#include "ybt.h" +#include "loader/loader-internal.h" +#include "util/dbt.h" static void error_callback_lock(ft_loader_error_callback loader_error) { toku_mutex_lock(&loader_error->mutex); diff --git a/storage/tokudb/ft-index/ft/dbufio.cc b/storage/tokudb/ft-index/ft/loader/dbufio.cc similarity index 98% rename from storage/tokudb/ft-index/ft/dbufio.cc rename to storage/tokudb/ft-index/ft/loader/dbufio.cc index 69b3bd8e936e4..c3f72e14ab128 100644 --- a/storage/tokudb/ft-index/ft/dbufio.cc +++ b/storage/tokudb/ft-index/ft/loader/dbufio.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,16 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "dbufio.h" -#include "fttypes.h" -#include #include -#include -#include "memory.h" #include -#include "ftloader-internal.h" -#include "ft-internal.h" -#include "ft.h" +#include + +#include "portability/toku_assert.h" +#include "portability/memory.h" + +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "loader/dbufio.h" +#include "loader/loader-internal.h" struct dbufio_file { // i/o thread owns these diff --git a/storage/tokudb/ft-index/ft/dbufio.h b/storage/tokudb/ft-index/ft/loader/dbufio.h similarity index 98% rename from storage/tokudb/ft-index/ft/dbufio.h rename to storage/tokudb/ft-index/ft/loader/dbufio.h index 0762bf9a8c6d7..da31f22277dcf 100644 --- a/storage/tokudb/ft-index/ft/dbufio.h +++ b/storage/tokudb/ft-index/ft/loader/dbufio.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_DBUFIO_H -#define TOKU_DBUFIO_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #include @@ -108,5 +108,3 @@ int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t co int panic_dbufio_fileset(DBUFIO_FILESET, int error); void dbufio_print(DBUFIO_FILESET); - -#endif diff --git a/storage/tokudb/ft-index/ft/ftloader-internal.h b/storage/tokudb/ft-index/ft/loader/loader-internal.h similarity index 89% rename from storage/tokudb/ft-index/ft/ftloader-internal.h rename to storage/tokudb/ft-index/ft/loader/loader-internal.h index 8ceac6527e647..ea1b9c5afa389 100644 --- a/storage/tokudb/ft-index/ft/ftloader-internal.h +++ b/storage/tokudb/ft-index/ft/loader/loader-internal.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTLOADER_INTERNAL_H -#define FTLOADER_INTERNAL_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,27 +86,30 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #include -#include "fttypes.h" -#include "ftloader.h" -#include "queue.h" -#include -#include "dbufio.h" - -enum { EXTRACTOR_QUEUE_DEPTH = 2, - FILE_BUFFER_SIZE = 1<<24, - MIN_ROWSET_MEMORY = 1<<23, - MIN_MERGE_FANIN = 2, - FRACTAL_WRITER_QUEUE_DEPTH = 3, - FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2, - DBUFIO_DEPTH = 2, - TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big. - MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much - MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE -}; +#include "portability/toku_pthread.h" + +#include "loader/dbufio.h" +#include "loader/loader.h" +#include "util/queue.h" + +enum { + EXTRACTOR_QUEUE_DEPTH = 2, + FILE_BUFFER_SIZE = 1<<24, + MIN_ROWSET_MEMORY = 1<<23, + MIN_MERGE_FANIN = 2, + FRACTAL_WRITER_QUEUE_DEPTH = 3, + FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2, + DBUFIO_DEPTH = 2, + TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big. + MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much + MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE +}; /* These functions are exported to allow the tests to compile. */ @@ -257,7 +258,7 @@ struct ft_loader_s { int progress_callback_result; // initially zero, if any call to the poll function callback returns nonzero, we save the result here (and don't call the poll callback function again). - LSN load_lsn; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in brt headers made by this loader. + LSN load_lsn; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in ft headers made by this loader. TXNID load_root_xid; //(Root) transaction that performed the load. QUEUE *fractal_queues; // an array of work queues, one for each secondary index. @@ -280,7 +281,7 @@ uint64_t toku_ft_loader_get_n_rows(FTLOADER bl); struct fractal_thread_args { FTLOADER bl; const DESCRIPTOR descriptor; - int fd; // write the brt into tfd. + int fd; // write the ft into fd. int progress_allocation; QUEUE q; uint64_t total_disksize_estimate; @@ -312,17 +313,17 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func); // This is probably only for testing. -int toku_loader_write_brt_from_q_in_C (FTLOADER bl, - const DESCRIPTOR descriptor, - int fd, // write to here - int progress_allocation, - QUEUE q, - uint64_t total_disksize_estimate, - int which_db, - uint32_t target_nodesize, - uint32_t target_basementnodesize, - enum toku_compression_method target_compression_method, - uint32_t fanout); +int toku_loader_write_ft_from_q_in_C (FTLOADER bl, + const DESCRIPTOR descriptor, + int fd, // write to here + int progress_allocation, + QUEUE q, + uint64_t total_disksize_estimate, + int which_db, + uint32_t target_nodesize, + uint32_t target_basementnodesize, + enum toku_compression_method target_compression_method, + uint32_t fanout); int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *); @@ -339,7 +340,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, CACHETABLE cachetable, generate_row_for_put_func g, DB *src_db, - int N, FT_HANDLE brts[/*N*/], DB* dbs[/*N*/], + int N, FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], const char *new_fnames_in_env[/*N*/], ft_compare_func bt_compare_functions[/*N*/], const char *temp_file_template, @@ -362,5 +363,3 @@ int toku_ft_loader_get_error(FTLOADER bl, int *loader_errno); void ft_loader_lock_init(FTLOADER bl); void ft_loader_lock_destroy(FTLOADER bl); void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl); - -#endif // FTLOADER_INTERNAL_H diff --git a/storage/tokudb/ft-index/ft/ftloader.cc b/storage/tokudb/ft-index/ft/loader/loader.cc similarity index 96% rename from storage/tokudb/ft-index/ft/ftloader.cc rename to storage/tokudb/ft-index/ft/loader/loader.cc index 497f3138059fb..a6f41cd6b547c 100644 --- a/storage/tokudb/ft-index/ft/ftloader.cc +++ b/storage/tokudb/ft-index/ft/loader/loader.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,9 +91,7 @@ PATENT RIGHTS GRANT: #include -#if !TOKU_WINDOWS #include -#endif #include #include @@ -101,16 +99,21 @@ PATENT RIGHTS GRANT: #include #include #include -#include "x1764.h" -#include "ftloader-internal.h" -#include "ft-internal.h" -#include "sub_block.h" -#include "sub_block_map.h" -#include "pqueue.h" -#include "dbufio.h" -#include "leafentry.h" -#include "log-internal.h" -#include "ft.h" + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/leafentry.h" +#include "ft/loader/loader-internal.h" +#include "ft/loader/pqueue.h" +#include "ft/loader/dbufio.h" +#include "ft/logger/log-internal.h" +#include "ft/node.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" + +#include "util/x1764.h" static size_t (*os_fwrite_fun)(const void *,size_t,size_t,FILE*)=NULL; void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) { @@ -423,7 +426,7 @@ void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error) { destroy_rowset(&bl->primary_rowset); if (bl->primary_rowset_queue) { - queue_destroy(bl->primary_rowset_queue); + toku_queue_destroy(bl->primary_rowset_queue); bl->primary_rowset_queue = nullptr; } @@ -541,7 +544,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, CACHETABLE cachetable, generate_row_for_put_func g, DB *src_db, - int N, FT_HANDLE brts[/*N*/], DB* dbs[/*N*/], + int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], const char *new_fnames_in_env[/*N*/], ft_compare_func bt_compare_functions[/*N*/], const char *temp_file_template, @@ -585,11 +588,11 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, #define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = get_error_errno(); toku_ft_loader_internal_destroy(bl, true); return r; } lval = v; } while (0) MY_CALLOC_N(N, bl->root_xids_that_created); - for (int i=0; iroot_xids_that_created[i]=brts[i]->ft->h->root_xid_that_created; + for (int i=0; iroot_xids_that_created[i]=fts[i]->ft->h->root_xid_that_created; MY_CALLOC_N(N, bl->dbs); - for (int i=0; idbs[i]=dbs[i]; + for (int i=0; idbs[i]=dbs[i]; MY_CALLOC_N(N, bl->descriptors); - for (int i=0; idescriptors[i]=&brts[i]->ft->descriptor; + for (int i=0; idescriptors[i]=&fts[i]->ft->descriptor; MY_CALLOC_N(N, bl->new_fnames_in_env); for (int i=0; inew_fnames_in_env[i], new_fnames_in_env[i]); MY_CALLOC_N(N, bl->extracted_datasizes); // the calloc_n zeroed everything, which is what we want @@ -629,7 +632,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, int r = init_rowset(&bl->primary_rowset, memory_per_rowset_during_extract(bl)); if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } } - { int r = queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); + { int r = toku_queue_create(&bl->primary_rowset_queue, EXTRACTOR_QUEUE_DEPTH); if (r!=0) { toku_ft_loader_internal_destroy(bl, true); return r; } } { @@ -641,11 +644,11 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, return 0; } -int toku_ft_loader_open (/* out */ FTLOADER *blp, +int toku_ft_loader_open (FTLOADER *blp, /* out */ CACHETABLE cachetable, generate_row_for_put_func g, DB *src_db, - int N, FT_HANDLE brts[/*N*/], DB* dbs[/*N*/], + int N, FT_HANDLE fts[/*N*/], DB* dbs[/*N*/], const char *new_fnames_in_env[/*N*/], ft_compare_func bt_compare_functions[/*N*/], const char *temp_file_template, @@ -655,9 +658,9 @@ int toku_ft_loader_open (/* out */ FTLOADER *blp, uint64_t reserve_memory_size, bool compress_intermediates, bool allow_puts) { -// Effect: called by DB_ENV->create_loader to create a brt loader. +// Effect: called by DB_ENV->create_loader to create an ft loader. // Arguments: -// blp Return the brt loader here. +// blp Return a ft loader ("bulk loader") here. // g The function for generating a row // src_db The source database. Needed by g. May be NULL if that's ok with g. // N The number of dbs to create. @@ -672,7 +675,7 @@ int toku_ft_loader_open (/* out */ FTLOADER *blp, int result = 0; { int r = toku_ft_loader_internal_init(blp, cachetable, g, src_db, - N, brts, dbs, + N, fts, dbs, new_fnames_in_env, bt_compare_functions, temp_file_template, @@ -1138,7 +1141,7 @@ static void* extractor_thread (void *blv) { while (1) { void *item; { - int rq = queue_deq(bl->primary_rowset_queue, &item, NULL, NULL); + int rq = toku_queue_deq(bl->primary_rowset_queue, &item, NULL, NULL); if (rq==EOF) break; invariant(rq==0); // other errors are arbitrarily bad. } @@ -1169,7 +1172,7 @@ static void enqueue_for_extraction (FTLOADER bl) { struct rowset *XMALLOC(enqueue_me); *enqueue_me = bl->primary_rowset; zero_rowset(&bl->primary_rowset); - int r = queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL); + int r = toku_queue_enq(bl->primary_rowset_queue, (void*)enqueue_me, 1, NULL); resource_assert_zero(r); } @@ -1206,7 +1209,7 @@ finish_extractor (FTLOADER bl) { } //printf("%s:%d please finish extraction\n", __FILE__, __LINE__); { - int r = queue_eof(bl->primary_rowset_queue); + int r = toku_queue_eof(bl->primary_rowset_queue); invariant(r==0); } //printf("%s:%d joining\n", __FILE__, __LINE__); @@ -1218,7 +1221,7 @@ finish_extractor (FTLOADER bl) { bl->extractor_live = false; } { - int r = queue_destroy(bl->primary_rowset_queue); + int r = toku_queue_destroy(bl->primary_rowset_queue); invariant(r==0); bl->primary_rowset_queue = nullptr; } @@ -1378,7 +1381,7 @@ static int process_primary_rows (FTLOADER bl, struct rowset *primary_rowset) { } int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val) -/* Effect: Put a key-value pair into the brt loader. Called by DB_LOADER->put(). +/* Effect: Put a key-value pair into the ft loader. Called by DB_LOADER->put(). * Return value: 0 on success, an error number otherwise. */ { @@ -1882,7 +1885,7 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q if (to_q) { if (row_wont_fit(output_rowset, keys[mini].size + vals[mini].size)) { { - int r = queue_enq(q, (void*)output_rowset, 1, NULL); + int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL); if (r!=0) { result = r; break; @@ -1958,7 +1961,7 @@ int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q } if (result==0 && to_q) { - int r = queue_enq(q, (void*)output_rowset, 1, NULL); + int r = toku_queue_enq(q, (void*)output_rowset, 1, NULL); if (r!=0) result = r; else @@ -2149,7 +2152,7 @@ int merge_files (struct merge_fileset *fs, if (result) ft_loader_set_panic(bl, result, true, which_db, nullptr, nullptr); { - int r = queue_eof(output_q); + int r = toku_queue_eof(output_q); if (r!=0 && result==0) result = r; } // It's conceivable that the progress_allocation could be nonzero (for example if bl->N==0) @@ -2219,16 +2222,16 @@ struct dbout { int64_t n_translations_limit; struct translation *translation; toku_mutex_t mutex; - FT h; + FT ft; }; -static inline void dbout_init(struct dbout *out, FT h) { +static inline void dbout_init(struct dbout *out, FT ft) { out->fd = -1; out->current_off = 0; out->n_translations = out->n_translations_limit = 0; out->translation = NULL; toku_mutex_init(&out->mutex, NULL); - out->h = h; + out->ft = ft; } static inline void dbout_destroy(struct dbout *out) { @@ -2345,12 +2348,12 @@ static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc) lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0; lbuf->off = 0; - lbuf->xids = xids_get_root_xids(); + lbuf->xids = toku_xids_get_root_xids(); if (xid != TXNID_NONE) { XIDS new_xids = NULL; - int r = xids_create_child(lbuf->xids, &new_xids, xid); + int r = toku_xids_create_child(lbuf->xids, &new_xids, xid); assert(r == 0 && new_xids); - xids_destroy(&lbuf->xids); + toku_xids_destroy(&lbuf->xids); lbuf->xids = new_xids; } @@ -2371,7 +2374,7 @@ static int write_header (struct dbout *out, long long translation_location_on_di static void drain_writer_q(QUEUE q) { void *item; while (1) { - int r = queue_deq(q, &item, NULL, NULL); + int r = toku_queue_deq(q, &item, NULL, NULL); if (r == EOF) break; invariant(r == 0); @@ -2501,7 +2504,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, while (result == 0) { void *item; { - int rr = queue_deq(q, &item, NULL, NULL); + int rr = toku_queue_deq(q, &item, NULL, NULL); if (rr == EOF) break; if (rr != 0) { ft_loader_set_panic(bl, rr, true, which_db, nullptr, nullptr); @@ -2614,7 +2617,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, { invariant(sts.n_subtrees==1); - out.h->h->root_blocknum = make_blocknum(sts.subtrees[0].block); + out.ft->h->root_blocknum = make_blocknum(sts.subtrees[0].block); toku_free(sts.subtrees); sts.subtrees = NULL; // write the descriptor @@ -2630,7 +2633,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, char *XMALLOC_N(desc_size, buf); wbuf_init(&wbuf, buf, desc_size); toku_serialize_descriptor_contents_to_wbuf(&wbuf, descriptor); - uint32_t checksum = x1764_finish(&wbuf.checksum); + uint32_t checksum = toku_x1764_finish(&wbuf.checksum); wbuf_int(&wbuf, checksum); invariant(wbuf.ndone==desc_size); r = toku_os_write(out.fd, wbuf.buf, wbuf.ndone); @@ -2681,17 +2684,17 @@ static int toku_loader_write_ft_from_q (FTLOADER bl, return result; } -int toku_loader_write_brt_from_q_in_C (FTLOADER bl, - const DESCRIPTOR descriptor, - int fd, // write to here - int progress_allocation, - QUEUE q, - uint64_t total_disksize_estimate, - int which_db, - uint32_t target_nodesize, - uint32_t target_basementnodesize, - enum toku_compression_method target_compression_method, - uint32_t target_fanout) +int toku_loader_write_ft_from_q_in_C (FTLOADER bl, + const DESCRIPTOR descriptor, + int fd, // write to here + int progress_allocation, + QUEUE q, + uint64_t total_disksize_estimate, + int which_db, + uint32_t target_nodesize, + uint32_t target_basementnodesize, + enum toku_compression_method target_compression_method, + uint32_t target_fanout) // This is probably only for testing. { target_nodesize = target_nodesize == 0 ? default_loader_nodesize : target_nodesize; @@ -2723,7 +2726,7 @@ static int loader_do_i (FTLOADER bl, struct rowset *rows = &(bl->rows[which_db]); invariant(rows->data==NULL); // the rows should be all cleaned up already - int r = queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH); + int r = toku_queue_create(&bl->fractal_queues[which_db], FRACTAL_WRITER_QUEUE_DEPTH); if (r) goto error; { @@ -2767,7 +2770,7 @@ static int loader_do_i (FTLOADER bl, r = toku_pthread_create(bl->fractal_threads+which_db, NULL, fractal_thread, (void*)&fta); if (r) { - int r2 __attribute__((__unused__)) = queue_destroy(bl->fractal_queues[which_db]); + int r2 __attribute__((__unused__)) = toku_queue_destroy(bl->fractal_queues[which_db]); // ignore r2, since we already have an error bl->fractal_queues[which_db] = nullptr; goto error; @@ -2788,7 +2791,7 @@ static int loader_do_i (FTLOADER bl, if (r == 0) r = fta.errno_result; } } else { - queue_eof(bl->fractal_queues[which_db]); + toku_queue_eof(bl->fractal_queues[which_db]); r = toku_loader_write_ft_from_q(bl, descriptor, fd, progress_allocation, bl->fractal_queues[which_db], bl->extracted_datasizes[which_db], which_db, target_nodesize, target_basementnodesize, target_compression_method, target_fanout); @@ -2797,7 +2800,7 @@ static int loader_do_i (FTLOADER bl, error: // this is the cleanup code. Even if r==0 (no error) we fall through to here. if (bl->fractal_queues[which_db]) { - int r2 = queue_destroy(bl->fractal_queues[which_db]); + int r2 = toku_queue_destroy(bl->fractal_queues[which_db]); invariant(r2==0); bl->fractal_queues[which_db] = nullptr; } @@ -2938,17 +2941,13 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int // #3588 TODO just make a clean ule and append it to the omt // #3588 TODO can do the rebalancing here and avoid a lot of work later FTNODE leafnode = lbuf->node; - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); - DBT thekey = { .data = key, .size = (uint32_t) keylen }; - DBT theval = { .data = val, .size = (uint32_t) vallen }; - FT_MSG_S cmd = { .type = FT_INSERT, - .msn = ZERO_MSN, - .xids = lbuf->xids, - .u = { .id = { &thekey, &theval } } }; - uint64_t workdone=0; + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); + DBT kdbt, vdbt; + ft_msg msg(toku_fill_dbt(&kdbt, key, keylen), toku_fill_dbt(&vdbt, val, vallen), FT_INSERT, ZERO_MSN, lbuf->xids); + uint64_t workdone = 0; // there's no mvcc garbage in a bulk-loaded FT, so there's no need to pass useful gc info txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true); - toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, &workdone, stats_to_update); } static int write_literal(struct dbout *out, void*data, size_t len) { @@ -2988,7 +2987,7 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr toku_free(serialized_leaf); } toku_ftnode_free(&lbuf->node); - xids_destroy(&lbuf->xids); + toku_xids_destroy(&lbuf->xids); toku_free(lbuf); //printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX); @@ -3013,7 +3012,7 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla putbuf_int64(&ttable, out->translation[i].off); putbuf_int64(&ttable, out->translation[i].size); } - unsigned int checksum = x1764_memory(ttable.buf, ttable.off); + unsigned int checksum = toku_x1764_memory(ttable.buf, ttable.off); putbuf_int32(&ttable, checksum); // pad it to 512 zeros long long encoded_length = ttable.off; @@ -3036,7 +3035,7 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla static int write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) { int result = 0; - size_t size = toku_serialize_ft_size(out->h->h); + size_t size = toku_serialize_ft_size(out->ft->h); size_t alloced_size = roundup_to_multiple(512, size); struct wbuf wbuf; char *MALLOC_N_ALIGNED(512, alloced_size, buf); @@ -3044,8 +3043,8 @@ write_header (struct dbout *out, long long translation_location_on_disk, long lo result = get_error_errno(); } else { wbuf_init(&wbuf, buf, size); - out->h->h->on_disk_stats = out->h->in_memory_stats; - toku_serialize_ft_to_wbuf(&wbuf, out->h->h, translation_location_on_disk, translation_size_on_disk); + out->ft->h->on_disk_stats = out->ft->in_memory_stats; + toku_serialize_ft_to_wbuf(&wbuf, out->ft->h, translation_location_on_disk, translation_size_on_disk); for (size_t i=size; itotalchildkeylens = 0; - for (int i=0; ichildkeys[i], pivots[i]); - node->totalchildkeylens += pivots[i].size; - } + node->pivotkeys.create_from_dbts(pivots, n_children - 1); assert(node->bp); for (int i=0; ichildkeys[i].data); } for (int i=0; ibp); - toku_free(node->childkeys); + node->pivotkeys.destroy(); toku_free(node); toku_free(ndd); toku_free(subtree_info); diff --git a/storage/tokudb/ft-index/ft/ftloader.h b/storage/tokudb/ft-index/ft/loader/loader.h similarity index 91% rename from storage/tokudb/ft-index/ft/ftloader.h rename to storage/tokudb/ft-index/ft/loader/loader.h index 2243ddd080c20..4ef45dea0ac3e 100644 --- a/storage/tokudb/ft-index/ft/ftloader.h +++ b/storage/tokudb/ft-index/ft/loader/loader.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FTLOADER_H -#define FTLOADER_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +87,16 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include "ft/txn/txn.h" +#include "ft/cachetable/cachetable.h" +#include "ft/comparator.h" +#include "ft/ft-ops.h" + // The loader callbacks are C functions and need to be defined as such typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra); @@ -102,13 +107,13 @@ typedef struct ft_loader_s *FTLOADER; int toku_ft_loader_open (FTLOADER *bl, CACHETABLE cachetable, - generate_row_for_put_func g, - DB *src_db, - int N, - FT_HANDLE brts[/*N*/], DB* dbs[/*N*/], - const char * new_fnames_in_env[/*N*/], - ft_compare_func bt_compare_functions[/*N*/], - const char *temp_file_template, + generate_row_for_put_func g, + DB *src_db, + int N, + FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/], + const char * new_fnames_in_env[/*N*/], + ft_compare_func bt_compare_functions[/*N*/], + const char *temp_file_template, LSN load_lsn, TOKUTXN txn, bool reserve_memory, @@ -131,5 +136,3 @@ void toku_ft_loader_set_size_factor (uint32_t factor); void ft_loader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)); size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid); - -#endif // FTLOADER_H diff --git a/storage/tokudb/ft-index/ft/pqueue.cc b/storage/tokudb/ft-index/ft/loader/pqueue.cc similarity index 98% rename from storage/tokudb/ft-index/ft/pqueue.cc rename to storage/tokudb/ft-index/ft/loader/pqueue.cc index fa76551b81ff5..c50664f5e45e8 100644 --- a/storage/tokudb/ft-index/ft/pqueue.cc +++ b/storage/tokudb/ft-index/ft/loader/pqueue.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,8 +92,8 @@ PATENT RIGHTS GRANT: #include #include "toku_os.h" #include "ft-internal.h" -#include "ftloader-internal.h" -#include "pqueue.h" +#include "loader/loader-internal.h" +#include "loader/pqueue.h" #define pqueue_left(i) ((i) << 1) #define pqueue_right(i) (((i) << 1) + 1) diff --git a/storage/tokudb/ft-index/ft/pqueue.h b/storage/tokudb/ft-index/ft/loader/pqueue.h similarity index 97% rename from storage/tokudb/ft-index/ft/pqueue.h rename to storage/tokudb/ft-index/ft/loader/pqueue.h index cd550d70572d4..43df70e97ff9e 100644 --- a/storage/tokudb/ft-index/ft/pqueue.h +++ b/storage/tokudb/ft-index/ft/loader/pqueue.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_PQUEUE_H -#define TOKU_PQUEUE_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -121,6 +121,3 @@ void pqueue_free(pqueue_t *q); size_t pqueue_size(pqueue_t *q); int pqueue_insert(pqueue_t *q, pqueue_node_t *d); int pqueue_pop(pqueue_t *q, pqueue_node_t **d); - - -#endif //TOKU_PQUEUE_H diff --git a/storage/tokudb/ft-index/ft/locking-benchmarks/mfence-benchmark.cc b/storage/tokudb/ft-index/ft/locking-benchmarks/mfence-benchmark.cc deleted file mode 100644 index 0c8290d8d2f11..0000000000000 --- a/storage/tokudb/ft-index/ft/locking-benchmarks/mfence-benchmark.cc +++ /dev/null @@ -1,217 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -/* Time {m,l,s}fence vs.xchgl for a memory barrier. */ - -/* Timing numbers: - * Intel T2500 2GHZ - -do1 9.0ns/loop -mfence: 29.0ns/loop (marginal cost= 20.0ns) -sfence: 17.3ns/loop (marginal cost= 8.3ns) -lfence: 23.6ns/loop (marginal cost= 14.6ns) - xchgl: 35.8ns/loop (marginal cost= 26.8ns) - -* AMD Athlon 64 X2 Dual Core Processor 4200+ - Timings are more crazy - -do1 20.6ns/loop -mfence: 12.9ns/loop (marginal cost= -7.6ns) -sfence: 8.4ns/loop (marginal cost= -12.1ns) -lfence: 20.2ns/loop (marginal cost= -0.3ns) - xchgl: 16.6ns/loop (marginal cost= -3.9ns) - -do1 13.0ns/loop -mfence: 25.6ns/loop (marginal cost= 12.6ns) -sfence: 21.0ns/loop (marginal cost= 8.1ns) -lfence: 12.9ns/loop (marginal cost= -0.1ns) - xchgl: 29.3ns/loop (marginal cost= 16.3ns) - -*/ - - -#include -#include -#include - -enum { COUNT = 100000000 }; - -static inline void xchgl (void) { - { - /* - * According to the Intel Architecture Software Developer's - * Manual, Volume 3: System Programming Guide - * (http://www.intel.com/design/pro/manuals/243192.htm), page - * 7-6, "For the P6 family processors, locked operations - * serialize all outstanding load and store operations (that - * is, wait for them to complete)." - * Since xchg is locked by default, it is one way to do membar. - */ - int x=0, y; - asm volatile ("xchgl %0,%1" :"=r" (x) :"m" (y), "0" (x) :"memory"); - } -} - -static inline void mfence (void) { - asm volatile ("mfence":::"memory"); -} - -static inline void lfence (void) { - asm volatile ("lfence":::"memory"); -} - -static inline void sfence (void) { - asm volatile ("sfence":::"memory"); -} - -int lock_for_lock_and_unlock; -static inline void lock_and_unlock (void) { - (void)toku_sync_lock_test_and_set(&lock_for_lock_and_unlock, 1); - toku_sync_lock_release(&lock_for_lock_and_unlock); -} - - -double tdiff (struct timeval *start, struct timeval *end) { - return ((end->tv_sec-start->tv_sec + 1e-6*(end->tv_usec + start->tv_usec))/COUNT)*1e9; -} - -double nop_cost; - -void do1 (volatile int *x) { - int i; - struct timeval start, end; - gettimeofday(&start, 0); - for (i=0; i -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -float tdiff (struct timeval *start, struct timeval *end) { - return 1e6*(end->tv_sec-start->tv_sec) +(end->tv_usec - start->tv_usec); -} - -#define FILE "process.data" - -int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { - int r; - int fd; - void *p; - fd=open(FILE, O_CREAT|O_RDWR|O_TRUNC, 0666); assert(fd>=0); - int i; - for (i=0; i<4096; i++) { - r=write(fd, "\000", 1); - assert(r==1); - } - p=mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (p==MAP_FAILED) { - printf("err=%d %s (EPERM=%d)\n", errno, strerror(errno), EPERM); - } - assert(p!=MAP_FAILED); - r=close(fd); assert(r==0); - - pthread_rwlockattr_t attr; - pthread_rwlock_t *lock=p; - r=pthread_rwlockattr_init(&attr); assert(r==0); - r=pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); assert(r==0); - r=pthread_rwlock_init(lock, &attr); assert(r==0); - r=pthread_rwlock_init(lock+1, &attr); assert(r==0); - - r=pthread_rwlock_wrlock(lock); - - pid_t pid; - if ((pid=fork())==0) { - // I'm the child - r = munmap(p, 4096); assert(r==0); - fd = open(FILE, O_RDWR, 0666); assert(fd>=0); - p=mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - assert(p!=MAP_FAILED); - r=close(fd); assert(r==0); - - printf("A0\n"); - r=pthread_rwlock_wrlock(lock); - printf("C\n"); - sleep(1); - - r=pthread_rwlock_unlock(lock); - printf("D\n"); - - r=pthread_rwlock_rdlock(lock); - printf("E0\n"); - sleep(1); - - } else { - printf("A1\n"); - sleep(1); - printf("B\n"); - r=pthread_rwlock_unlock(lock); // release the lock grabbed before the fork - assert(r==0); - - sleep(1); - r=pthread_rwlock_rdlock(lock); - assert(r==0); - printf("E1\n"); - sleep(1); - - int status; - pid_t waited=wait(&status); - assert(waited==pid); - } - return 0; - - -#if 0 - - int j; - int i; - int r; - struct timeval start, end; - for (j=0; j<3; j++) { - for (i=0; i -#include -#include -#include -#include -#include - -float tdiff (struct timeval *start, struct timeval *end) { - return 1e6*(end->tv_sec-start->tv_sec) +(end->tv_usec - start->tv_usec); -} - -/* My own rwlock implementation. */ -struct brwl { - int mutex; - int state; // 0 for unlocked, -1 for a writer, otherwise many readers -}; - -static inline int xchg(volatile int *ptr, int x) -{ - __asm__("xchgl %0,%1" :"=r" (x) :"m" (*(ptr)), "0" (x) :"memory"); - return x; -} - -static inline void sfence (void) { - asm volatile ("sfence":::"memory"); -} - -static inline void brwl_rlock_fence (struct brwl *l) { - while (xchg(&l->mutex, 1)) ; - l->state++; - sfence(); - l->mutex=0; -} - -static inline void brwl_rlock_xchg (struct brwl *l) { - while (xchg(&l->mutex, 1)) ; - l->state++; - xchg(&l->mutex, 0); -} - -// Something wrong with the compiler for longs -static inline long -fetch_and_add (volatile long *p, long incr) -{ - long result = incr; - - __asm__ __volatile__ ("lock; xaddl %0, %1" : - "+r" (result), "+m" (*p) : : "memory"); - return result; -} - -static inline int -fetch_and_add_i (volatile int *p, int incr) -{ - int result = incr; - - __asm__ __volatile__ ("lock; xadd %0, %1" : - "+r" (result), "+m" (*p) : : "memory"); - return result; -} - -static inline int -gcc_fetch_and_add_i (volatile int *p, int incr) -{ - return toku_sync_fetch_and_add(p, incr); -} - -static inline long -gcc_fetch_and_add_l (volatile long *p, long incr) -{ - return toku_sync_fetch_and_add(p, incr); -} - -// Something wrong with the compiler for longs -/* Returns nonzero if the comparison succeeded. */ -static inline long -compare_and_swap_full(volatile long *addr, - long old, long new_val) -{ - char result; - __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" - : "+m"(*(addr)), "=q"(result) - : "r" (new_val), "a"(old) : "memory"); - return (int) result; -} - -/* Returns nonzero if the comparison succeeded. */ -// Atomically compare *addr to old_val, and replace *addr by new_val -// if the first comparison succeeds. Returns nonzero if the comparison -// succeeded and *addr was updated. -static inline int -compare_and_swap_full_i(volatile int *addr, - int old, int new_val) -{ - char result; - __asm__ __volatile__("lock; cmpxchg %2, %0; setz %1" - : "+m"(*(addr)), "=q"(result) - : "r" (new_val), "a"(old) : "memory"); - return (int) result; -} - -enum {K=100000}; -pthread_rwlock_t rwlocks[K]; -struct brwl blocks[K]; -pthread_mutex_t mlocks[K]; -long lvals[K]; -int ivals[K]; - -#define TIME(s, i, init, body) ({ \ - int j_tmp; \ - printf("%-24s", s); \ - for (j_tmp=0; j_tmp<3; j_tmp++) { \ - struct timeval start,end; \ - int i; \ - for (i=0; i -#include -#include -#include -#include -#include -#include -#include -#include - -float tdiff (struct timeval *start, struct timeval *end) { - return 1e6*(end->tv_sec-start->tv_sec) +(end->tv_usec - start->tv_usec); -} - -unsigned long long rtdiff (unsigned long long a, unsigned long long b) { - return (b-a); -} - -/* Simple function to check the return code and exit the program - if the function call failed - */ -static void compResults(char *string, int rc) { - if (rc) { - printf("Error on : %s, rc=%d", - string, rc); - exit(EXIT_FAILURE); - } - return; -} - -pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; - -void *rdlockThread(void *arg __attribute__((unused))) -{ - int rc; - int count=0; - - unsigned long long t_start, t_end; - - printf("Entered thread, getting read lock with mp wait\n"); - Retry: - - t_start = rdtsc(); - rc = pthread_rwlock_tryrdlock(&rwlock); - t_end = rdtsc(); - printf("pthread_rwlock_tryrdlock took %llu clocks\n", rtdiff(t_start,t_end)); - if (rc == EBUSY) { - if (count >= 10) { - printf("Retried too many times, failure!\n"); - - exit(EXIT_FAILURE); - } - ++count; - printf("Could not get lock, do other work, then RETRY...\n"); - sleep(1); - goto Retry; - } - compResults("pthread_rwlock_tryrdlock() 1\n", rc); - - sleep(2); - - printf("unlock the read lock\n"); - t_start = rdtsc(); - rc = pthread_rwlock_unlock(&rwlock); - t_end = rdtsc(); - compResults("pthread_rwlock_unlock()\n", rc); - printf("Took %llu clocks\n", rtdiff(t_start, t_end)); - - printf("Secondary thread complete\n"); - return NULL; -} - -int main(int argc __attribute__((unused)), char **argv) -{ - int rc=0; - pthread_t thread; - unsigned long long t_start, t_end; - - printf("Enter Testcase - %s\n", argv[0]); - - t_start = rdtsc(); - t_end = rdtsc(); - printf("nop Took %llu clocks\n", rtdiff(t_start, t_end)); - - { - int N=1000; - int i; - printf("Main, get and release the write lock %d times\n", N); - t_start = rdtsc(); - for (i=0; i -#include -#include -#include -#include -#include -#include - -float tdiff (struct timeval *start, struct timeval *end) { - return 1e6*(end->tv_sec-start->tv_sec) +(end->tv_usec - start->tv_usec); -} - -/* Simple function to check the return code and exit the program - if the function call failed - */ -static void compResults(char *string, int rc) { - if (rc) { - printf("Error on : %s, rc=%d", - string, rc); - exit(EXIT_FAILURE); - } - return; -} - -pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; - -void *rdlockThread(void *arg __attribute__((unused))) -{ - int rc; - int count=0; - - struct timeval start, end; - - printf("Entered thread, getting read lock with mp wait\n"); - Retry: - - gettimeofday(&start, 0); - rc = pthread_rwlock_tryrdlock(&rwlock); - gettimeofday(&end, 0); - printf("pthread_rwlock_tryrdlock took %9.3fus\n", tdiff(&start,&end)); - if (rc == EBUSY) { - if (count >= 10) { - printf("Retried too many times, failure!\n"); - - exit(EXIT_FAILURE); - } - ++count; - printf("Could not get lock, do other work, then RETRY...\n"); - sleep(1); - goto Retry; - } - compResults("pthread_rwlock_tryrdlock() 1\n", rc); - - sleep(2); - - printf("unlock the read lock\n"); - gettimeofday(&start, 0); - rc = pthread_rwlock_unlock(&rwlock); - gettimeofday(&end, 0); - compResults("pthread_rwlock_unlock()\n", rc); - printf("%lu.%6lu to %lu.%6lu is %9.2f\n", start.tv_sec, start.tv_usec, end.tv_sec, end.tv_usec, tdiff(&start, &end)); - - printf("Secondary thread complete\n"); - return NULL; -} - -int main(int argc __attribute__((unused)), char **argv) -{ - int rc=0; - pthread_t thread; - struct timeval start, end; - - printf("Enter Testcase - %s\n", argv[0]); - - gettimeofday(&start, 0); - gettimeofday(&end, 0); - printf("nop Took %9.2f\n", tdiff(&start, &end)); - - { - int N=1000; - int i; - printf("Main, get and release the write lock %d times\n", N); - gettimeofday(&start, 0); - for (i=0; i #include #include -#include "ft-internal.h" -#include "log.h" -#include "toku_list.h" -#include "memarena.h" -#include "logfilemgr.h" -#include "txn.h" -#include "txn_manager.h" -#include -#include -#include "rollback_log_node_cache.h" -#include "txn_child_manager.h" + +#include "portability/toku_list.h" +#include "portability/toku_pthread.h" +#include "ft/ft-internal.h" +#include "ft/logger/log.h" +#include "ft/logger/logfilemgr.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback_log_node_cache.h" + +#include "util/memarena.h" +#include "util/omt.h" using namespace toku; // Locking for the logger @@ -117,6 +118,7 @@ using namespace toku; #define LOGGER_MIN_BUF_SIZE (1<<24) +// TODO: Remove mylock, it has no value struct mylock { toku_mutex_t lock; }; @@ -155,7 +157,7 @@ struct tokulogger { DIR *dir; // descriptor for directory int fd; CACHETABLE ct; - int lg_max; // The size of the single file in the log. Default is 100MB in TokuDB + int lg_max; // The size of the single file in the log. Default is 100MB. // To access these, you must have the input lock LSN lsn; // the next available lsn @@ -179,8 +181,6 @@ struct tokulogger { tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk? uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf - void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ... - void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted. CACHEFILE rollback_cachefile; rollback_log_node_cache rollback_cache; TXN_MANAGER txn_manager; @@ -188,99 +188,7 @@ struct tokulogger { int toku_logger_find_next_unused_log_file(const char *directory, long long *result); int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles); - -struct txn_roll_info { - // these are number of rollback nodes and rollback entries for this txn. - // - // the current rollback node below has sequence number num_rollback_nodes - 1 - // (because they are numbered 0...num-1). often, the current rollback is - // already set to this block num, which means it exists and is available to - // log some entries. if the current rollback is NONE and the number of - // rollback nodes for this transaction is non-zero, then we will use - // the number of rollback nodes to know which sequence number to assign - // to a new one we create - uint64_t num_rollback_nodes; - uint64_t num_rollentries; - uint64_t num_rollentries_processed; - uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. - - // spilled rollback nodes are rollback nodes that were gorged by this - // transaction, retired, and saved in a list. - - // the spilled rollback head is the block number of the first rollback node - // that makes up the rollback log chain - BLOCKNUM spilled_rollback_head; - // the spilled rollback is the block number of the last rollback node that - // makes up the rollback log chain. - BLOCKNUM spilled_rollback_tail; - // the current rollback node block number we may use. if this is ROLLBACK_NONE, - // then we need to create one and set it here before using it. - BLOCKNUM current_rollback; -}; - -struct tokutxn { - // These don't change after create: - - TXNID_PAIR txnid; - - uint64_t snapshot_txnid64; // this is the lsn of the snapshot - const TXN_SNAPSHOT_TYPE snapshot_type; - const bool for_recovery; - const TOKULOGGER logger; - const TOKUTXN parent; - // The child txn is protected by the child_txn_manager lock - // and by the user contract. The user contract states (and is - // enforced at the ydb layer) that a child txn should not be created - // while another child exists. The txn_child_manager will protect - // other threads from trying to read this value while another - // thread commits/aborts the child - TOKUTXN child; - // statically allocated child manager, if this - // txn is a root txn, this manager will be used and set to - // child_manager for this transaction and all of its children - txn_child_manager child_manager_s; - // child manager for this transaction, all of its children, - // and all of its ancestors - txn_child_manager* child_manager; - // These don't change but they're created in a way that's hard to make - // strictly const. - DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn - xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. - XIDS xids; // Represents the xid list - - TOKUTXN snapshot_next; - TOKUTXN snapshot_prev; - - bool begin_was_logged; - bool declared_read_only; // true if the txn was declared read only when began - // These are not read until a commit, prepare, or abort starts, and - // they're "monotonic" (only go false->true) during operation: - bool do_fsync; - bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) - - // Not used until commit, prepare, or abort starts: - LSN do_fsync_lsn; - TOKU_XA_XID xa_xid; // for prepared transactions - TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; - void *progress_poll_fun_extra; - - toku_mutex_t txn_lock; - // Protected by the txn lock: - omt open_fts; // a collection of the fts that we touched. Indexed by filenum. - struct txn_roll_info roll_info; // Info used to manage rollback entries - - // mutex that protects the transition of the state variable - // the rest of the variables are used by the txn code and - // hot indexing to ensure that when hot indexing is processing a - // leafentry, a TOKUTXN cannot dissappear or change state out from - // underneath it - toku_mutex_t state_lock; - toku_cond_t state_cond; - TOKUTXN_STATE state; - uint32_t num_pin; // number of threads (all hot indexes) that want this - // txn to not transition to commit or abort - uint64_t client_id; -}; +void toku_logger_free_logfiles (char **logfiles, int n_logfiles); static inline int txn_has_current_rollback_log(TOKUTXN txn) { @@ -369,5 +277,3 @@ static inline char *fixup_fname(BYTESTRING *f) { fname[f->len]=0; return fname; } - -#endif diff --git a/storage/tokudb/ft-index/ft/log.h b/storage/tokudb/ft-index/ft/logger/log.h similarity index 91% rename from storage/tokudb/ft-index/ft/log.h rename to storage/tokudb/ft-index/ft/logger/log.h index 418fc83475126..180f118765b8d 100644 --- a/storage/tokudb/ft-index/ft/log.h +++ b/storage/tokudb/ft-index/ft/logger/log.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LOGGGER_H -#define TOKU_LOGGGER_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,34 +87,27 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include #include -#include -#include "fttypes.h" -#include "memory.h" -#include "x1764.h" +#include "portability/memory.h" +#include "portability/toku_portability.h" + +#include "ft/logger/recover.h" +#include "ft/txn/rollback.h" +#include "ft/txn/txn.h" +#include "util/bytestring.h" struct roll_entry; -#include "logger.h" -#include "rollback.h" -#include "recover.h" -#include "txn.h" - -static inline int toku_copy_BYTESTRING(BYTESTRING *target, BYTESTRING val) { - target->len = val.len; - target->data = (char *) toku_memdup(val.data, (size_t)val.len); - if (target->data==0) { - return get_error_errno(); - } - return 0; -} static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {} static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {} + static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {} static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {} static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {} @@ -130,6 +121,3 @@ static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); } int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress); uint64_t toku_log_upgrade_get_footprint(void); - - -#endif diff --git a/storage/tokudb/ft-index/ft/log_upgrade.cc b/storage/tokudb/ft-index/ft/logger/log_upgrade.cc similarity index 94% rename from storage/tokudb/ft-index/ft/log_upgrade.cc rename to storage/tokudb/ft-index/ft/logger/log_upgrade.cc index 31dbdb04d7ea1..6631759fae090 100644 --- a/storage/tokudb/ft-index/ft/log_upgrade.cc +++ b/storage/tokudb/ft-index/ft/logger/log_upgrade.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,8 +92,8 @@ PATENT RIGHTS GRANT: #include #include "log-internal.h" -#include "logcursor.h" -#include "checkpoint.h" +#include "logger/logcursor.h" +#include "cachetable/checkpoint.h" static uint64_t footprint = 0; // for debug and accountability @@ -209,10 +209,7 @@ verify_clean_shutdown_of_log_version_old(const char *log_dir, LSN * last_lsn, TX r = toku_logcursor_destroy(&cursor); assert(r == 0); cleanup_no_logcursor: - for(int i=0;i #include @@ -167,11 +167,8 @@ static int lc_open_logfile(TOKULOGCURSOR lc, int index) { lc->cur_fp = fopen(lc->logfiles[index], "rb"); if ( lc->cur_fp == NULL ) return DB_NOTFOUND; - // debug printf("%s:%d %s %p %u\n", __FUNCTION__, __LINE__, lc->logfiles[index], lc->buffer, (unsigned) lc->buffer_size); -#if !TOKU_WINDOWS //Windows reads logs fastest if we use default settings (not use setvbuf to change buffering) r = setvbuf(lc->cur_fp, (char *) lc->buffer, _IOFBF, lc->buffer_size); assert(r == 0); -#endif // position fp past header, ignore 0 length file (t:2384) unsigned int version=0; if ( lc_file_len(lc->logfiles[index]) >= 12 ) { @@ -194,7 +191,7 @@ static int lc_check_lsn(TOKULOGCURSOR lc, int dir) { // int index = lc->cur_logfiles_index; // fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n", // index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0); return LC_LSN_ERROR; } @@ -280,11 +277,7 @@ int toku_logcursor_destroy(TOKULOGCURSOR *lc) { (*lc)->entry_valid = false; } r = lc_close_cur_logfile(*lc); - int lf; - for(lf=0;lf<(*lc)->n_logfiles;lf++) { - if ( (*lc)->logfiles[lf] ) toku_free((*lc)->logfiles[lf]); - } - if ( (*lc)->logfiles ) toku_free((*lc)->logfiles); + toku_logger_free_logfiles((*lc)->logfiles, (*lc)->n_logfiles); if ( (*lc)->logdir ) toku_free((*lc)->logdir); if ( (*lc)->buffer ) toku_free((*lc)->buffer); toku_free(*lc); @@ -310,10 +303,10 @@ static int lc_log_read(TOKULOGCURSOR lc) toku_log_free_log_entry_resources(&(lc->entry)); time_t tnow = time(NULL); if (r==DB_BADFORMAT) { - fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); + fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); } else { - fprintf(stderr, "%.24s Tokudb unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); + fprintf(stderr, "%.24s TokuFT unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); } } return r; @@ -342,10 +335,10 @@ static int lc_log_read_backward(TOKULOGCURSOR lc) toku_log_free_log_entry_resources(&(lc->entry)); time_t tnow = time(NULL); if (r==DB_BADFORMAT) { - fprintf(stderr, "%.24s Tokudb bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); + fprintf(stderr, "%.24s TokuFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]); } else { - fprintf(stderr, "%.24s Tokudb uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); + fprintf(stderr, "%.24s TokuFT uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]); } } return r; @@ -463,10 +456,10 @@ int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) { // probably a corrupted last log entry due to a crash // try scanning forward from the beginning to find the last good entry time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery repairing log\n", ctime(&tnow)); + fprintf(stderr, "%.24s TokuFT recovery repairing log\n", ctime(&tnow)); r = lc_fix_bad_logfile(lc); if ( r != 0 ) { - fprintf(stderr, "%.24s Tokudb recovery repair unsuccessful\n", ctime(&tnow)); + fprintf(stderr, "%.24s TokuFT recovery repair unsuccessful\n", ctime(&tnow)); return DB_BADFORMAT; } // try reading again diff --git a/storage/tokudb/ft-index/ft/logcursor.h b/storage/tokudb/ft-index/ft/logger/logcursor.h similarity index 97% rename from storage/tokudb/ft-index/ft/logcursor.h rename to storage/tokudb/ft-index/ft/logger/logcursor.h index f374f6c287428..15774fb11d383 100644 --- a/storage/tokudb/ft-index/ft/logcursor.h +++ b/storage/tokudb/ft-index/ft/logger/logcursor.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKULOGCURSOR_H -#define TOKULOGCURSOR_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +87,13 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include - struct toku_logcursor; typedef struct toku_logcursor *TOKULOGCURSOR; @@ -127,6 +126,3 @@ int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le); int toku_logcursor_log_exists(const TOKULOGCURSOR lc); void toku_logcursor_print(TOKULOGCURSOR lc); - - -#endif // TOKULOGCURSOR_H diff --git a/storage/tokudb/ft-index/ft/logfilemgr.cc b/storage/tokudb/ft-index/ft/logger/logfilemgr.cc similarity index 97% rename from storage/tokudb/ft-index/ft/logfilemgr.cc rename to storage/tokudb/ft-index/ft/logger/logfilemgr.cc index 917760abc6c65..04d091ae1bc36 100644 --- a/storage/tokudb/ft-index/ft/logfilemgr.cc +++ b/storage/tokudb/ft-index/ft/logger/logfilemgr.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,9 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "log-internal.h" -#include "logcursor.h" -#include "logfilemgr.h" +#include "logger/log-internal.h" +#include "logger/logcursor.h" +#include "logger/logfilemgr.h" // for now, implement with singlely-linked-list // first = oldest (delete from beginning) @@ -186,10 +186,7 @@ int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xi toku_logfilemgr_add_logfile_info(lfm, lf_info); toku_logcursor_destroy(&cursor); } - for(int i=0;i - // this is the basic information we need to keep per logfile struct toku_logfile_info { int64_t index; @@ -118,6 +117,3 @@ LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm); void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn); void toku_logfilemgr_print(TOKULOGFILEMGR lfm); - - -#endif //TOKULOGFILEMGR_H diff --git a/storage/tokudb/ft-index/ft/logformat.cc b/storage/tokudb/ft-index/ft/logger/logformat.cc similarity index 97% rename from storage/tokudb/ft-index/ft/logformat.cc rename to storage/tokudb/ft-index/ft/logger/logformat.cc index aceedec4c5eb2..698b612c0781b 100644 --- a/storage/tokudb/ft-index/ft/logformat.cc +++ b/storage/tokudb/ft-index/ft/logger/logformat.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -195,7 +195,7 @@ const struct logtype logtypes[] = { {"BYTESTRING", "iname", 0}, // pathname of file {"uint8_t", "unlink_on_close", 0}, NULLFIELD}, IGNORE_LOG_BEGIN}, - //We do not use a TXNINFO struct since recovery log has + //We do not use a txninfo struct since recovery log has //FILENUMS and TOKUTXN has FTs (for open_fts) {"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0}, {"TXNID_PAIR", "parentxid", 0}, @@ -536,7 +536,7 @@ generate_log_writer (void) { if (strcmp(field_type->name, "timestamp") == 0) fprintf(cf, " if (timestamp == 0) timestamp = toku_get_timestamp();\n"); fprintf(cf, " wbuf_nocrc_%s(&wbuf, %s);\n", field_type->type, field_type->name)); - fprintf(cf, " wbuf_nocrc_int(&wbuf, x1764_memory(wbuf.buf, wbuf.ndone));\n"); + fprintf(cf, " wbuf_nocrc_int(&wbuf, toku_x1764_memory(wbuf.buf, wbuf.ndone));\n"); fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n"); fprintf(cf, " assert(wbuf.ndone==buflen);\n"); fprintf(cf, " logger->inbuf.n_in_buf += buflen;\n"); @@ -558,7 +558,7 @@ generate_log_reader (void) { fprintf(cf, " uint32_t checksum_in_file, len_in_file;\n"); fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &checksum_in_file); actual_len+=4; if (r!=0) return r;\n"); fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n"); - fprintf(cf, " if (checksum_in_file!=x1764_finish(checksum) || len_in_file!=actual_len || len1 != len_in_file) return DB_BADFORMAT;\n"); + fprintf(cf, " if (checksum_in_file!=toku_x1764_finish(checksum) || len_in_file!=actual_len || len1 != len_in_file) return DB_BADFORMAT;\n"); fprintf(cf, " return 0;\n"); fprintf(cf, "}\n\n"); }); @@ -568,12 +568,12 @@ generate_log_reader (void) { fprintf(cf, " uint32_t len1; int r;\n"); fprintf(cf, " uint32_t ignorelen=0;\n"); fprintf(cf, " struct x1764 checksum;\n"); - fprintf(cf, " x1764_init(&checksum);\n"); + fprintf(cf, " toku_x1764_init(&checksum);\n"); fprintf(cf, " r = toku_fread_uint32_t(infile, &len1, &checksum, &ignorelen); if (r!=0) return r;\n"); fprintf(cf, " int cmd=fgetc(infile);\n"); fprintf(cf, " if (cmd==EOF) return EOF;\n"); fprintf(cf, " char cmdchar = (char)cmd;\n"); - fprintf(cf, " x1764_add(&checksum, &cmdchar, 1);\n"); + fprintf(cf, " toku_x1764_add(&checksum, &cmdchar, 1);\n"); fprintf(cf, " le->cmd=(enum lt_cmd)cmd;\n"); fprintf(cf, " switch ((enum lt_cmd)cmd) {\n"); DO_LOGTYPES(lt, { @@ -639,14 +639,14 @@ generate_logprint (void) { fprintf(pf, " uint32_t len1, crc_in_file;\n"); fprintf(pf, " uint32_t ignorelen=0;\n"); fprintf(pf, " struct x1764 checksum;\n"); - fprintf(pf, " x1764_init(&checksum);\n"); + fprintf(pf, " toku_x1764_init(&checksum);\n"); fprintf(pf, " r=toku_fread_uint32_t(f, &len1, &checksum, &ignorelen);\n"); fprintf(pf, " if (r==EOF) return EOF;\n"); fprintf(pf, " cmd=fgetc(f);\n"); fprintf(pf, " if (cmd==EOF) return DB_BADFORMAT;\n"); fprintf(pf, " uint32_t len_in_file, len=1+4; // cmd + len1\n"); fprintf(pf, " char charcmd = (char)cmd;\n"); - fprintf(pf, " x1764_add(&checksum, &charcmd, 1);\n"); + fprintf(pf, " toku_x1764_add(&checksum, &charcmd, 1);\n"); fprintf(pf, " switch ((enum lt_cmd)cmd) {\n"); DO_LOGTYPES(lt, { if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); }); DO_LOGTYPES(lt, { @@ -664,7 +664,7 @@ generate_logprint (void) { fprintf(pf, "); if (r!=0) return r;\n"); }); fprintf(pf, " {\n"); - fprintf(pf, " uint32_t actual_murmur = x1764_finish(&checksum);\n"); + fprintf(pf, " uint32_t actual_murmur = toku_x1764_finish(&checksum);\n"); fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n"); fprintf(pf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n"); fprintf(pf, " if (crc_in_file!=actual_murmur) fprintf(outf, \" checksum=%%08x\", actual_murmur);\n"); @@ -798,7 +798,7 @@ generate_rollbacks (void) { fprintf(cf, " }\n assert(0);\n return 0;\n"); fprintf(cf, "}\n"); - fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)"); + fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)"); fprintf(hf, ";\n"); fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n"); fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n"); @@ -806,7 +806,7 @@ generate_rollbacks (void) { DO_ROLLBACKS(lt, { fprintf(cf, " case RT_%s:\n", lt->name); fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name); - fprintf(cf, " CAST_FROM_VOIDP(item, malloc_in_memarena(ma, mem_needed));\n"); + fprintf(cf, " CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n"); fprintf(cf, " item->cmd = cmd;\n"); DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name)); fprintf(cf, " *itemp = item;\n"); @@ -849,16 +849,15 @@ int main (int argc, const char *const argv[]) { pf = fopen(printpath, "w"); assert(pf!=0); fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n"); fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n"); - fprintf(hf, "#ifndef LOG_HEADER_H\n"); - fprintf(hf, "#define LOG_HEADER_H\n"); + fprintf(hf, "#pragma once\n"); fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2007-2013 Tokutek Inc. */\n"); fprintf2(cf, hf, "#ident \"Copyright (c) 2007-2013 Tokutek Inc. All rights reserved.\"\n"); fprintf2(cf, pf, "#include \n"); fprintf2(cf, pf, "#include \n"); - fprintf2(cf, pf, "#include \n"); - fprintf2(cf, pf, "#include \n"); + fprintf2(cf, pf, "#include \n"); fprintf(hf, "#include \n"); - fprintf(hf, "#include \n"); + fprintf(hf, "#include \n"); + fprintf(hf, "#include \n"); generate_enum(); generate_log_struct(); generate_dispatch(); @@ -867,7 +866,6 @@ int main (int argc, const char *const argv[]) { generate_rollbacks(); generate_log_entry_functions(); generate_logprint(); - fprintf(hf, "#endif\n"); { int r=fclose(hf); assert(r==0); r=fclose(cf); assert(r==0); diff --git a/storage/tokudb/ft-index/ft/logger.cc b/storage/tokudb/ft-index/ft/logger/logger.cc similarity index 94% rename from storage/tokudb/ft-index/ft/logger.cc rename to storage/tokudb/ft-index/ft/logger/logger.cc index 4d1872d19a5c4..2296a2b43f876 100644 --- a/storage/tokudb/ft-index/ft/logger.cc +++ b/storage/tokudb/ft-index/ft/logger/logger.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,12 +94,13 @@ PATENT RIGHTS GRANT: #include #include -#include "ft.h" -#include "log-internal.h" -#include "txn_manager.h" -#include "rollback_log_node_cache.h" -#include "huge_page_detection.h" -#include +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback_log_node_cache.h" + +#include "util/status.h" static const int log_format_version=TOKU_LOG_VERSION; @@ -151,8 +152,8 @@ static bool is_a_logfile_any_version (const char *name, uint64_t *number_result, // added for #2424, improved for #2521 static bool is_a_logfile (const char *name, long long *number_result) { bool rval; - uint64_t result= 0; - uint32_t version= 0; + uint64_t result; + uint32_t version; rval = is_a_logfile_any_version(name, &result, &version); if (rval && version != TOKU_LOG_VERSION) rval = false; @@ -164,18 +165,12 @@ static bool is_a_logfile (const char *name, long long *number_result) { // TODO: can't fail int toku_logger_create (TOKULOGGER *resultp) { - if (complain_and_return_true_if_huge_pages_are_enabled()) { - *resultp = NULL; - errno = TOKUDB_HUGE_PAGES_ENABLED; - return TOKUDB_HUGE_PAGES_ENABLED; - } TOKULOGGER CALLOC(result); if (result==0) return get_error_errno(); result->is_open=false; result->write_log_files = true; result->trim_log_files = true; result->directory=0; - result->remove_finalize_callback = NULL; // fd is uninitialized on purpose // ct is uninitialized on purpose result->lg_max = 100<<20; // 100MB default @@ -187,7 +182,7 @@ int toku_logger_create (TOKULOGGER *resultp) { result->last_completed_checkpoint_lsn = ZERO_LSN; // next_log_file_number is uninitialized // n_in_file is uninitialized - result->write_block_size = FT_DEFAULT_NODE_SIZE; // default logging size is the same as the default brt block size + result->write_block_size = FT_DEFAULT_NODE_SIZE; // default logging size is the same as the default ft block size toku_logfilemgr_create(&result->logfilemgr); *resultp=result; ml_init(&result->input_lock); @@ -234,7 +229,7 @@ toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID l if (logger->is_open) return EINVAL; int r; - TXNID last_xid_if_clean_shutdown= TXNID_NONE; + TXNID last_xid_if_clean_shutdown = TXNID_NONE; r = toku_logfilemgr_init(logger->logfilemgr, directory, &last_xid_if_clean_shutdown); if ( r!=0 ) return r; @@ -274,32 +269,30 @@ bool toku_logger_rollback_is_open (TOKULOGGER logger) { #define MAX_CACHED_ROLLBACK_NODES 4096 -void -toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) { - toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum); +void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) { + ft->blocktable.free_unused_blocknums(ft->h->root_blocknum); logger->rollback_cache.init(MAX_CACHED_ROLLBACK_NODES); } -int -toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) { +int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) { assert(logger->is_open); assert(!logger->rollback_cachefile); - FT_HANDLE t = NULL; // Note, there is no DB associated with this BRT. - toku_ft_handle_create(&t); - int r = toku_ft_handle_open(t, toku_product_name_strings.rollback_cachefile, create, create, cachetable, NULL_TXN); + FT_HANDLE ft_handle = nullptr; // Note, there is no DB associated with this FT. + toku_ft_handle_create(&ft_handle); + int r = toku_ft_handle_open(ft_handle, toku_product_name_strings.rollback_cachefile, create, create, cachetable, nullptr); if (r == 0) { - logger->rollback_cachefile = t->ft->cf; - toku_logger_initialize_rollback_cache(logger, t->ft); - - //Verify it is empty - //Must have no data blocks (rollback logs or otherwise). - toku_block_verify_no_data_blocks_except_root(t->ft->blocktable, t->ft->h->root_blocknum); - bool is_empty; - is_empty = toku_ft_is_empty_fast(t); + FT ft = ft_handle->ft; + logger->rollback_cachefile = ft->cf; + toku_logger_initialize_rollback_cache(logger, ft_handle->ft); + + // Verify it is empty + // Must have no data blocks (rollback logs or otherwise). + ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum); + bool is_empty = toku_ft_is_empty_fast(ft_handle); assert(is_empty); } else { - toku_ft_handle_close(t); + toku_ft_handle_close(ft_handle); } return r; } @@ -313,15 +306,15 @@ void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdo CACHEFILE cf = logger->rollback_cachefile; // stored in logger at rollback cachefile open if (cf) { FT_HANDLE ft_to_close; - { //Find "brt" + { //Find "ft_to_close" logger->rollback_cache.destroy(); FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); if (clean_shutdown) { //Verify it is safe to close it. assert(!ft->h->dirty); //Must not be dirty. - toku_free_unused_blocknums(ft->blocktable, ft->h->root_blocknum); - //Must have no data blocks (rollback logs or otherwise). - toku_block_verify_no_data_blocks_except_root(ft->blocktable, ft->h->root_blocknum); + ft->blocktable.free_unused_blocknums(ft->h->root_blocknum); + // Must have no data blocks (rollback logs or otherwise). + ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum); assert(!ft->h->dirty); } else { ft->h->dirty = 0; @@ -428,7 +421,7 @@ wait_till_output_available (TOKULOGGER logger) // Implementation hint: Use a pthread_cond_wait. // Entry: Holds the output_condition_lock (but not the inlock) // Exit: Holds the output_condition_lock and logger->output_is_available -// +// { tokutime_t t0 = toku_time_now(); while (!logger->output_is_available) { @@ -497,7 +490,7 @@ release_output (TOKULOGGER logger, LSN fsynced_lsn) toku_cond_broadcast(&logger->output_condition); toku_mutex_unlock(&logger->output_condition_lock); } - + static void swap_inbuf_outbuf (TOKULOGGER logger) // Effect: Swap the inbuf and outbuf @@ -634,7 +627,7 @@ int toku_logger_find_next_unused_log_file(const char *directory, long long *resu if (d==0) return get_error_errno(); while ((de=readdir(d))) { if (de==0) return get_error_errno(); - long long thisl; + long long thisl = -1; if ( is_a_logfile(de->d_name, &thisl) ) { if ((long long)thisl > maxf) maxf = thisl; } @@ -700,7 +693,7 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo while ((de=readdir(d))) { uint64_t thisl; uint32_t version_ignore; - if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template + if ( !(is_a_logfile_any_version(de->d_name, &thisl, &version_ignore)) ) continue; //#2424: Skip over files that don't match the exact logfile template if (n_results+1>=result_limit) { result_limit*=2; XREALLOC_N(result_limit, result); @@ -714,7 +707,7 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo // which are one character longer than old log file names ("xxx.tokulog2"). The comparison function // won't look beyond the terminating NUL, so an extra character in the comparison string doesn't matter. // Allow room for terminating NUL after "xxx.tokulog13" even if result[0] is of form "xxx.tokulog2." - int width = sizeof(result[0]+2); + int width = sizeof(result[0]+2); qsort(result, n_results, width, logfilenamecompare); *resultp = result; *n_logfiles = n_results; @@ -722,6 +715,12 @@ int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_lo return d ? closedir(d) : 0; } +void toku_logger_free_logfiles(char **logfiles, int n_logfiles) { + for (int i = 0; i < n_logfiles; i++) + toku_free(logfiles[i]); + toku_free(logfiles); +} + static int open_logfile (TOKULOGGER logger) // Entry and Exit: This thread has permission to modify the output. { @@ -730,7 +729,7 @@ static int open_logfile (TOKULOGGER logger) snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION); long long index = logger->next_log_file_number; if (logger->write_log_files) { - logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR); + logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR); if (logger->fd==-1) { return get_error_errno(); } @@ -748,7 +747,7 @@ static int open_logfile (TOKULOGGER logger) if ( logger->write_log_files ) { TOKULOGFILEINFO XMALLOC(lf_info); lf_info->index = index; - lf_info->maxlsn = logger->written_lsn; + lf_info->maxlsn = logger->written_lsn; lf_info->version = TOKU_LOG_VERSION; toku_logfilemgr_add_logfile_info(logger->logfilemgr, lf_info); } @@ -777,7 +776,7 @@ void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN trim_lsn) int n_logfiles = toku_logfilemgr_num_logfiles(lfm); TOKULOGFILEINFO lf_info = NULL; - + if ( logger->write_log_files && logger->trim_log_files) { while ( n_logfiles > 1 ) { // don't delete current logfile uint32_t log_version; @@ -857,7 +856,7 @@ void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool hold } static void -logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn) +logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn) // Entry: Holds the input lock and permission to modify output. // Exit: Holds only the permission to modify output. // Effect: Write the buffers to the output. If DO_FSYNC is true, then fsync. @@ -885,7 +884,7 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn) // close the log file if ( logger->write_log_files) { // fsyncs don't work to /dev/null - toku_file_fsync_without_accounting(logger->fd); + toku_file_fsync_without_accounting(logger->fd); } r = close(logger->fd); assert(r == 0); logger->fd = -1; @@ -908,7 +907,7 @@ void toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u if (txn) { BYTESTRING bs_fname = { .len = (uint32_t) strlen(fname), .data = (char *) fname }; // fsync log on fcreate - toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum, + toku_log_fcreate (txn->logger, (LSN*)0, 1, txn, toku_txn_get_txnid(txn), filenum, bs_fname, mode, treeflags, nodesize, basementnodesize, compression_method); } } @@ -946,7 +945,7 @@ int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len) { int vi=fgetc(f); if (vi==EOF) return -1; uint8_t vc=(uint8_t)vi; - x1764_add(mm, &vc, 1); + toku_x1764_add(mm, &vc, 1); (*len)++; *v = vc; return 0; @@ -1011,8 +1010,8 @@ int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t } int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len) { - TXNID parent= TXNID_NONE; - TXNID child= TXNID_NONE; + TXNID parent; + TXNID child; int r; r = toku_fread_TXNID(f, &parent, checksum, len); if (r != 0) { return r; } r = toku_fread_TXNID(f, &child, checksum, len); if (r != 0) { return r; } @@ -1115,7 +1114,7 @@ int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x17 XIDP vp; int r = toku_fread_XIDP(inf, &vp, checksum, len); if (r!=0) return r; - fprintf(outf, "%s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length); + fprintf(outf, " %s={formatID=0x%lx gtrid_length=%ld bqual_length=%ld data=", fieldname, vp->formatID, vp->gtrid_length, vp->bqual_length); toku_print_bytes(outf, vp->gtrid_length + vp->bqual_length, vp->data); fprintf(outf, "}"); toku_free(vp); @@ -1294,7 +1293,7 @@ static int peek_at_log (TOKULOGGER logger, char* filename, LSN *first_lsn) { if (logger->write_log_files) printf("couldn't open: %s\n", strerror(er)); return er; } - enum { SKIP = 12+1+4 }; // read the 12 byte header, the first cmd, and the first len + enum { SKIP = 12+1+4 }; // read the 12 byte header, the first message, and the first len unsigned char header[SKIP+8]; int r = read(fd, header, SKIP+8); if (r!=SKIP+8) return 0; // cannot determine that it's archivable, so we'll assume no. If a later-log is archivable is then this one will be too. @@ -1346,7 +1345,7 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) { for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile); if (r!=0) continue; // In case of error, just keep going - + if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) { break; } @@ -1398,18 +1397,18 @@ void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) { static LOGGER_STATUS_S logger_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(logger_status, k, c, t, "logger: " l, inc) static void status_init(void) { // Note, this function initializes the keyname, type, and legend fields. // Value fields are initialized to zero by compiler. STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS); - STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); - STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); + STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "number of long logger write operations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); logger_status.initialized = true; } #undef STATUS_INIT @@ -1435,7 +1434,7 @@ toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) { ////////////////////////////////////////////////////////////////////////////////////////////////////// -// Used for upgrade: +// Used for upgrade: // if any valid log files exist in log_dir, then // set *found_any_logs to true and set *version_found to version number of latest log int diff --git a/storage/tokudb/ft-index/ft/logger.h b/storage/tokudb/ft-index/ft/logger/logger.h similarity index 81% rename from storage/tokudb/ft-index/ft/logger.h rename to storage/tokudb/ft-index/ft/logger/logger.h index acbb5961d1ed9..83e6c9a73785e 100644 --- a/storage/tokudb/ft-index/ft/logger.h +++ b/storage/tokudb/ft-index/ft/logger/logger.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_LOGGER_H -#define TOKU_LOGGER_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +87,26 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "fttypes.h" -#include "ft_layout_version.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/txn/txn.h" + +typedef struct tokulogger *TOKULOGGER; enum { TOKU_LOG_VERSION_1 = 1, TOKU_LOG_VERSION_2 = 2, //After 2 we linked the log version to the FT_LAYOUT VERSION. //So it went from 2 to 13 (3-12 do not exist) + TOKU_LOG_VERSION_24 = 24, + TOKU_LOG_VERSION_25 = 25, // change rollinclude rollback log entry + TOKU_LOG_VERSION_26 = 26, // no change from 25 + TOKU_LOG_VERSION_27 = 27, // no change from 26 TOKU_LOG_VERSION = FT_LAYOUT_VERSION, TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION, }; @@ -109,8 +116,8 @@ int toku_logger_open (const char *directory, TOKULOGGER logger); int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid); void toku_logger_shutdown(TOKULOGGER logger); int toku_logger_close(TOKULOGGER *loggerp); -void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft); -int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create); +void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft); +int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create); void toku_logger_close_rollback(TOKULOGGER logger); void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown); bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open. @@ -118,7 +125,7 @@ bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback void toku_logger_fsync (TOKULOGGER logger); void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn); int toku_logger_is_open(TOKULOGGER logger); -void toku_logger_set_cachetable (TOKULOGGER logger, CACHETABLE ct); +void toku_logger_set_cachetable (TOKULOGGER logger, struct cachetable *ct); int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max); int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp); int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize); @@ -139,10 +146,24 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn); // given LSN and delete them. void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn); +// At the ft layer, a FILENUM uniquely identifies an open file. +struct FILENUM { + uint32_t fileid; +}; +static const FILENUM FILENUM_NONE = { .fileid = UINT32_MAX }; + +struct FILENUMS { + uint32_t num; + FILENUM *filenums; +}; + void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method); void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum); void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags); +// the log generation code requires a typedef if we want to pass by pointer +typedef TOKU_XA_XID *XIDP; + int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len); int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v); int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len); @@ -258,8 +279,63 @@ void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s); int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found); -TXN_MANAGER toku_logger_get_txn_manager(TOKULOGGER logger); - -static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL; - -#endif /* TOKU_LOGGER_H */ +struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger); + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) { + wbuf_nocrc_uint(wb, fileid.fileid); +} + +static inline void wbuf_FILENUM(struct wbuf *wb, FILENUM fileid) { + wbuf_uint(wb, fileid.fileid); +} + +static inline void wbuf_nocrc_FILENUMS(struct wbuf *wb, FILENUMS v) { + wbuf_nocrc_uint(wb, v.num); + for (uint32_t i = 0; i < v.num; i++) { + wbuf_nocrc_FILENUM(wb, v.filenums[i]); + } +} + +static inline void wbuf_FILENUMS(struct wbuf *wb, FILENUMS v) { + wbuf_uint(wb, v.num); + for (uint32_t i = 0; i < v.num; i++) { + wbuf_FILENUM(wb, v.filenums[i]); + } +} + +static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) { + wbuf_nocrc_uint32_t(w, xid->formatID); + wbuf_nocrc_uint8_t(w, xid->gtrid_length); + wbuf_nocrc_uint8_t(w, xid->bqual_length); + wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length); +} + +#include "ft/serialize/rbuf.h" + +static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) { + filenum->fileid = rbuf_int(rb); +} +static inline void rbuf_ma_FILENUM(struct rbuf *rb, memarena *UU(ma), FILENUM *filenum) { + rbuf_FILENUM(rb, filenum); +} + +static inline void rbuf_FILENUMS(struct rbuf *rb, FILENUMS *filenums) { + filenums->num = rbuf_int(rb); + XMALLOC_N(filenums->num, filenums->filenums); + for (uint32_t i = 0; i < filenums->num; i++) { + rbuf_FILENUM(rb, &(filenums->filenums[i])); + } +} + +static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *filenums) { + rbuf_ma_uint32_t(rb, ma, &(filenums->num)); + filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM)); + assert(filenums->filenums != NULL); + for (uint32_t i = 0; i < filenums->num; i++) { + rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i])); + } +} diff --git a/storage/tokudb/ft-index/ft/recover.cc b/storage/tokudb/ft-index/ft/logger/recover.cc similarity index 92% rename from storage/tokudb/ft-index/ft/recover.cc rename to storage/tokudb/ft-index/ft/logger/recover.cc index 79e5c7d1d90a7..ca284568f07bd 100644 --- a/storage/tokudb/ft-index/ft/recover.cc +++ b/storage/tokudb/ft-index/ft/logger/recover.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,16 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "ft.h" -#include "log-internal.h" -#include "logcursor.h" -#include "cachetable.h" -#include "checkpoint.h" -#include "txn_manager.h" +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/log_header.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/logcursor.h" +#include "ft/txn/txn_manager.h" +#include "util/omt.h" -int tokudb_recovery_trace = 0; // turn on recovery tracing, default off. +int tokuft_recovery_trace = 0; // turn on recovery tracing, default off. //#define DO_VERIFY_COUNTS #ifdef DO_VERIFY_COUNTS @@ -154,9 +155,9 @@ struct file_map_tuple { struct __toku_db fake_db; }; -static void file_map_tuple_init(struct file_map_tuple *tuple, FILENUM filenum, FT_HANDLE brt, char *iname) { +static void file_map_tuple_init(struct file_map_tuple *tuple, FILENUM filenum, FT_HANDLE ft_handle, char *iname) { tuple->filenum = filenum; - tuple->ft_handle = brt; + tuple->ft_handle = ft_handle; tuple->iname = iname; // use a fake DB for comparisons, using the ft's cmp descriptor memset(&tuple->fake_db, 0, sizeof(tuple->fake_db)); @@ -171,9 +172,9 @@ static void file_map_tuple_destroy(struct file_map_tuple *tuple) { } } -// Map filenum to brt +// Map filenum to ft_handle struct file_map { - OMT filenums; + toku::omt *filenums; }; // The recovery environment @@ -199,31 +200,33 @@ typedef struct recover_env *RECOVER_ENV; static void file_map_init(struct file_map *fmap) { - int r = toku_omt_create(&fmap->filenums); - assert(r == 0); + XMALLOC(fmap->filenums); + fmap->filenums->create(); } static void file_map_destroy(struct file_map *fmap) { - toku_omt_destroy(&fmap->filenums); + fmap->filenums->destroy(); + toku_free(fmap->filenums); + fmap->filenums = nullptr; } static uint32_t file_map_get_num_dictionaries(struct file_map *fmap) { - return toku_omt_size(fmap->filenums); + return fmap->filenums->size(); } static void file_map_close_dictionaries(struct file_map *fmap, LSN oplsn) { int r; while (1) { - uint32_t n = toku_omt_size(fmap->filenums); - if (n == 0) + uint32_t n = fmap->filenums->size(); + if (n == 0) { break; - OMTVALUE v; - r = toku_omt_fetch(fmap->filenums, n-1, &v); + } + struct file_map_tuple *tuple; + r = fmap->filenums->fetch(n - 1, &tuple); assert(r == 0); - r = toku_omt_delete_at(fmap->filenums, n-1); + r = fmap->filenums->delete_at(n - 1); assert(r == 0); - struct file_map_tuple *CAST_FROM_VOIDP(tuple, v); assert(tuple->ft_handle); // Logging is on again, but we must pass the right LSN into close. if (tuple->ft_handle) { // it's a DB, not a rollback file @@ -234,27 +237,29 @@ static void file_map_close_dictionaries(struct file_map *fmap, LSN oplsn) { } } -static int file_map_h(OMTVALUE omtv, void *v) { - struct file_map_tuple *CAST_FROM_VOIDP(a, omtv); - FILENUM *CAST_FROM_VOIDP(b, v); - if (a->filenum.fileid < b->fileid) return -1; - if (a->filenum.fileid > b->fileid) return +1; - return 0; +static int file_map_h(struct file_map_tuple *const &a, const FILENUM &b) { + if (a->filenum.fileid < b.fileid) { + return -1; + } else if (a->filenum.fileid > b.fileid) { + return 1; + } else { + return 0; + } } -static int file_map_insert (struct file_map *fmap, FILENUM fnum, FT_HANDLE brt, char *iname) { +static int file_map_insert (struct file_map *fmap, FILENUM fnum, FT_HANDLE ft_handle, char *iname) { struct file_map_tuple *XMALLOC(tuple); - file_map_tuple_init(tuple, fnum, brt, iname); - int r = toku_omt_insert(fmap->filenums, tuple, file_map_h, &fnum, NULL); + file_map_tuple_init(tuple, fnum, ft_handle, iname); + int r = fmap->filenums->insert(tuple, fnum, nullptr); return r; } static void file_map_remove(struct file_map *fmap, FILENUM fnum) { - OMTVALUE v; uint32_t idx; - int r = toku_omt_find_zero(fmap->filenums, file_map_h, &fnum, &v, &idx); + uint32_t idx; + struct file_map_tuple *tuple; + int r = fmap->filenums->find_zero(fnum, &tuple, &idx); if (r == 0) { - struct file_map_tuple *CAST_FROM_VOIDP(tuple, v); - r = toku_omt_delete_at(fmap->filenums, idx); + r = fmap->filenums->delete_at(idx); file_map_tuple_destroy(tuple); toku_free(tuple); } @@ -262,14 +267,15 @@ static void file_map_remove(struct file_map *fmap, FILENUM fnum) { // Look up file info: given FILENUM, return file_map_tuple (or DB_NOTFOUND) static int file_map_find(struct file_map *fmap, FILENUM fnum, struct file_map_tuple **file_map_tuple) { - OMTVALUE v; uint32_t idx; - int r = toku_omt_find_zero(fmap->filenums, file_map_h, &fnum, &v, &idx); + uint32_t idx; + struct file_map_tuple *tuple; + int r = fmap->filenums->find_zero(fnum, &tuple, &idx); if (r == 0) { - struct file_map_tuple *CAST_FROM_VOIDP(tuple, v); assert(tuple->filenum.fileid == fnum.fileid); *file_map_tuple = tuple; + } else { + assert(r == DB_NOTFOUND); } - else assert(r==DB_NOTFOUND); return r; } @@ -311,7 +317,7 @@ static int recover_env_init (RECOVER_ENV renv, renv->cp = toku_cachetable_get_checkpointer(renv->ct); toku_dbt_array_init(&renv->dest_keys, 1); toku_dbt_array_init(&renv->dest_vals, 1); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); return r; } @@ -319,7 +325,7 @@ static int recover_env_init (RECOVER_ENV renv, static void recover_env_cleanup (RECOVER_ENV renv) { int r; - assert(toku_omt_size(renv->fmap.filenums)==0); + invariant_zero(renv->fmap.filenums->size()); file_map_destroy(&renv->fmap); if (renv->destroy_logger_at_end) { @@ -338,7 +344,7 @@ static void recover_env_cleanup (RECOVER_ENV renv) { toku_dbt_array_destroy(&renv->dest_keys); toku_dbt_array_destroy(&renv->dest_vals); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__); } @@ -350,48 +356,48 @@ static const char *recover_state(RECOVER_ENV renv) { static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, bool must_create, int UU(mode), BYTESTRING *bs_iname, FILENUM filenum, uint32_t treeflags, TOKUTXN txn, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method, LSN max_acceptable_lsn) { int r = 0; - FT_HANDLE brt = NULL; + FT_HANDLE ft_handle = NULL; char *iname = fixup_fname(bs_iname); - toku_ft_handle_create(&brt); - toku_ft_set_flags(brt, treeflags); + toku_ft_handle_create(&ft_handle); + toku_ft_set_flags(ft_handle, treeflags); if (nodesize != 0) { - toku_ft_handle_set_nodesize(brt, nodesize); + toku_ft_handle_set_nodesize(ft_handle, nodesize); } if (basementnodesize != 0) { - toku_ft_handle_set_basementnodesize(brt, basementnodesize); + toku_ft_handle_set_basementnodesize(ft_handle, basementnodesize); } if (compression_method != TOKU_DEFAULT_COMPRESSION_METHOD) { - toku_ft_handle_set_compression_method(brt, compression_method); + toku_ft_handle_set_compression_method(ft_handle, compression_method); } // set the key compare functions if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) { - toku_ft_set_bt_compare(brt, renv->bt_compare); + toku_ft_set_bt_compare(ft_handle, renv->bt_compare); } if (renv->update_function) { - toku_ft_set_update(brt, renv->update_function); + toku_ft_set_update(ft_handle, renv->update_function); } // TODO mode (FUTURE FEATURE) //mode = mode; - r = toku_ft_handle_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, filenum, max_acceptable_lsn); + r = toku_ft_handle_open_recovery(ft_handle, iname, must_create, must_create, renv->ct, txn, filenum, max_acceptable_lsn); if (r != 0) { //Note: If ft_handle_open fails, then close_ft will NOT write a header to disk. //No need to provide lsn, so use the regular toku_ft_handle_close function - toku_ft_handle_close(brt); + toku_ft_handle_close(ft_handle); toku_free(iname); if (r == ENOENT) //Not an error to simply be missing. r = 0; return r; } - file_map_insert(&renv->fmap, filenum, brt, iname); + file_map_insert(&renv->fmap, filenum, ft_handle, iname); return 0; } @@ -417,7 +423,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE r = 0; // ignore it (log only has a begin checkpoint) break; default: - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); abort(); break; } @@ -427,7 +433,7 @@ static int toku_recover_begin_checkpoint (struct logtype_begin_checkpoint *l, RE static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoint *l, RECOVER_ENV renv) { int r; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv)); + fprintf(stderr, "%.24s TokuFT recovery bw_begin_checkpoint at %" PRIu64 " timestamp %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, recover_state(renv)); switch (renv->ss.ss) { case BACKWARD_NEWER_CHECKPOINT_END: // incomplete checkpoint, nothing to do @@ -439,13 +445,13 @@ static int toku_recover_backward_begin_checkpoint (struct logtype_begin_checkpoi renv->ss.checkpoint_begin_timestamp = l->timestamp; renv->goforward = true; tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", + fprintf(stderr, "%.24s TokuFT recovery turning around at begin checkpoint %" PRIu64 " time %" PRIu64 "\n", ctime(&tnow), l->lsn.lsn, renv->ss.checkpoint_end_timestamp - renv->ss.checkpoint_begin_timestamp); r = 0; break; default: - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); abort(); break; } @@ -475,7 +481,7 @@ static int toku_recover_end_checkpoint (struct logtype_end_checkpoint *l, RECOVE static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint *l, RECOVER_ENV renv) { time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv)); + fprintf(stderr, "%.24s TokuFT recovery bw_end_checkpoint at %" PRIu64 " timestamp %" PRIu64 " xid %" PRIu64 " (%s)\n", ctime(&tnow), l->lsn.lsn, l->timestamp, l->lsn_begin_checkpoint.lsn, recover_state(renv)); switch (renv->ss.ss) { case BACKWARD_NEWER_CHECKPOINT_END: renv->ss.ss = BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END; @@ -484,12 +490,12 @@ static int toku_recover_backward_end_checkpoint (struct logtype_end_checkpoint * renv->ss.checkpoint_end_timestamp = l->timestamp; return 0; case BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END: - fprintf(stderr, "Tokudb recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__); + fprintf(stderr, "TokuFT recovery %s:%d Should not see two end_checkpoint log entries without an intervening begin_checkpoint\n", __FILE__, __LINE__); abort(); default: break; } - fprintf(stderr, "Tokudb recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); + fprintf(stderr, "TokuFT recovery %s: %d Unknown checkpoint state %d\n", __FILE__, __LINE__, (int)renv->ss.ss); abort(); } @@ -826,7 +832,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) { if (r != 0) { int er = get_error_errno(); if (er != ENOENT) { - fprintf(stderr, "Tokudb recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er); + fprintf(stderr, "TokuFT recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, er); toku_free(iname); return r; } @@ -1253,7 +1259,7 @@ static int toku_recover_backward_hot_index(struct logtype_hot_index *UU(l), RECO // Effects: If there are no log files, or if there is a clean "shutdown" at // the end of the log, then we don't need recovery to run. // Returns: true if we need recovery, otherwise false. -int tokudb_needs_recovery(const char *log_dir, bool ignore_log_empty) { +int tokuft_needs_recovery(const char *log_dir, bool ignore_log_empty) { int needs_recovery; int r; TOKULOGCURSOR logcursor = NULL; @@ -1377,7 +1383,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di struct log_entry *le = NULL; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery starting in env %s\n", ctime(&tnow), env_dir); + fprintf(stderr, "%.24s TokuFT recovery starting in env %s\n", ctime(&tnow), env_dir); char org_wd[1000]; { @@ -1398,7 +1404,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di r = toku_logcursor_last(logcursor, &le); if (r != 0) { - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) fprintf(stderr, "RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); rr = DB_RUNRECOVERY; goto errorexit; } @@ -1413,10 +1419,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di toku_struct_stat buf; if (toku_stat(env_dir, &buf)!=0) { rr = get_error_errno(); - fprintf(stderr, "%.24s Tokudb recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir); + fprintf(stderr, "%.24s TokuFT recovery error: directory does not exist: %s\n", ctime(&tnow), env_dir); goto errorexit; } else if (!S_ISDIR(buf.st_mode)) { - fprintf(stderr, "%.24s Tokudb recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir); + fprintf(stderr, "%.24s TokuFT recovery error: this file is supposed to be a directory, but is not: %s\n", ctime(&tnow), env_dir); rr = ENOTDIR; goto errorexit; } } @@ -1425,13 +1431,13 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di tnow = time(NULL); time_t tlast; tlast = tnow; - fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn); + fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 "\n", ctime(&tnow), lastlsn.lsn); for (unsigned i=0; 1; i++) { // get the previous log entry (first time gets the last one) le = NULL; r = toku_logcursor_prev(logcursor, &le); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) recover_trace_le(__FUNCTION__, __LINE__, r, le); if (r != 0) { if (r == DB_NOTFOUND) @@ -1445,7 +1451,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di tnow = time(NULL); if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { thislsn = toku_log_entry_get_lsn(le); - fprintf(stderr, "%.24s Tokudb recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv)); + fprintf(stderr, "%.24s TokuFT recovery scanning backward from %" PRIu64 " at %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, recover_state(renv)); tlast = tnow; } } @@ -1454,10 +1460,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di assert(renv->ss.ss == BACKWARD_BETWEEN_CHECKPOINT_BEGIN_END || renv->ss.ss == BACKWARD_NEWER_CHECKPOINT_END); logtype_dispatch_assign(le, toku_recover_backward_, r, renv); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) recover_trace_le(__FUNCTION__, __LINE__, r, le); if (r != 0) { - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); rr = DB_RUNRECOVERY; goto errorexit; @@ -1474,7 +1480,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di assert(le); thislsn = toku_log_entry_get_lsn(le); tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); + fprintf(stderr, "%.24s TokuFT recovery starts scanning forward to %" PRIu64 " from %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); for (unsigned i=0; 1; i++) { @@ -1483,7 +1489,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di tnow = time(NULL); if (tnow - tlast >= TOKUDB_RECOVERY_PROGRESS_TIME) { thislsn = toku_log_entry_get_lsn(le); - fprintf(stderr, "%.24s Tokudb recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); + fprintf(stderr, "%.24s TokuFT recovery scanning forward to %" PRIu64 " at %" PRIu64 " left %" PRIu64 " (%s)\n", ctime(&tnow), lastlsn.lsn, thislsn.lsn, lastlsn.lsn - thislsn.lsn, recover_state(renv)); tlast = tnow; } } @@ -1492,10 +1498,10 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di assert(renv->ss.ss == FORWARD_BETWEEN_CHECKPOINT_BEGIN_END || renv->ss.ss == FORWARD_NEWER_CHECKPOINT_END); logtype_dispatch_assign(le, toku_recover_, r, renv); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) recover_trace_le(__FUNCTION__, __LINE__, r, le); if (r != 0) { - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) fprintf(stderr, "DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, r); rr = DB_RUNRECOVERY; goto errorexit; @@ -1504,7 +1510,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di // get the next log entry le = NULL; r = toku_logcursor_next(logcursor, &le); - if (tokudb_recovery_trace) + if (tokuft_recovery_trace) recover_trace_le(__FUNCTION__, __LINE__, r, le); if (r != 0) { if (r == DB_NOTFOUND) @@ -1532,7 +1538,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di uint32_t n = recover_get_num_live_txns(renv); if (n > 0) { tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); + fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); } } recover_abort_all_live_txns(renv); @@ -1540,7 +1546,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di uint32_t n = recover_get_num_live_txns(renv); if (n > 0) { tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); + fprintf(stderr, "%.24s TokuFT recovery has %" PRIu32 " prepared transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : ""); } } @@ -1549,7 +1555,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di n = file_map_get_num_dictionaries(&renv->fmap); if (n > 0) { tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y"); + fprintf(stderr, "%.24s TokuFT recovery closing %" PRIu32 " dictionar%s\n", ctime(&tnow), n, n > 1 ? "ies" : "y"); } file_map_close_dictionaries(&renv->fmap, lastlsn); @@ -1561,17 +1567,17 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di // checkpoint tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery making a checkpoint\n", ctime(&tnow)); + fprintf(stderr, "%.24s TokuFT recovery making a checkpoint\n", ctime(&tnow)); r = toku_checkpoint(renv->cp, renv->logger, NULL, NULL, NULL, NULL, RECOVERY_CHECKPOINT); assert(r == 0); tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery done\n", ctime(&tnow)); + fprintf(stderr, "%.24s TokuFT recovery done\n", ctime(&tnow)); return 0; errorexit: tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb recovery failed %d\n", ctime(&tnow), rr); + fprintf(stderr, "%.24s TokuFT recovery failed %d\n", ctime(&tnow), rr); if (logcursor) { r = toku_logcursor_destroy(&logcursor); @@ -1596,7 +1602,7 @@ toku_recover_unlock(int lockfd) { return toku_single_process_unlock(&lockfd_copy); } -int tokudb_recover(DB_ENV *env, +int tokuft_recover(DB_ENV *env, prepared_txn_callback_t prepared_txn_callback, keep_cachetable_callback_t keep_cachetable_callback, TOKULOGGER logger, @@ -1614,7 +1620,7 @@ int tokudb_recover(DB_ENV *env, return r; int rr = 0; - if (tokudb_needs_recovery(log_dir, false)) { + if (tokuft_needs_recovery(log_dir, false)) { struct recover_env renv; r = recover_env_init(&renv, env_dir, @@ -1643,7 +1649,7 @@ int tokudb_recover(DB_ENV *env, // Return 0 if recovery log exists, ENOENT if log is missing int -tokudb_recover_log_exists(const char * log_dir) { +tokuft_recover_log_exists(const char * log_dir) { int r; TOKULOGCURSOR logcursor; diff --git a/storage/tokudb/ft-index/ft/recover.h b/storage/tokudb/ft-index/ft/logger/recover.h similarity index 78% rename from storage/tokudb/ft-index/ft/recover.h rename to storage/tokudb/ft-index/ft/logger/recover.h index 342445d09acea..f08abc442007e 100644 --- a/storage/tokudb/ft-index/ft/recover.h +++ b/storage/tokudb/ft-index/ft/logger/recover.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKURECOVER_H -#define TOKURECOVER_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,55 +87,53 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include #include -#include -#include "fttypes.h" -#include "memory.h" -#include "x1764.h" +#include "portability/memory.h" +#include "portability/toku_portability.h" +#include "ft/comparator.h" +#include "ft/ft-ops.h" +#include "util/x1764.h" -typedef void (*prepared_txn_callback_t)(DB_ENV*, TOKUTXN); -typedef void (*keep_cachetable_callback_t)(DB_ENV*, CACHETABLE); +typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn); +typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct); -// Run tokudb recovery from the log +// Run tokuft recovery from the log // Returns 0 if success -int tokudb_recover (DB_ENV *env, - prepared_txn_callback_t prepared_txn_callback, - keep_cachetable_callback_t keep_cachetable_callback, - TOKULOGGER logger, - const char *env_dir, const char *log_dir, - ft_compare_func bt_compare, - ft_update_func update_function, - generate_row_for_put_func generate_row_for_put, - generate_row_for_del_func generate_row_for_del, - size_t cachetable_size); - -// Effect: Check the tokudb logs to determine whether or not we need to run recovery. +int tokuft_recover(DB_ENV *env, + prepared_txn_callback_t prepared_txn_callback, + keep_cachetable_callback_t keep_cachetable_callback, + struct tokulogger *logger, + const char *env_dir, + const char *log_dir, + ft_compare_func bt_compare, + ft_update_func update_function, + generate_row_for_put_func generate_row_for_put, + generate_row_for_del_func generate_row_for_del, + size_t cachetable_size); + +// Effect: Check the tokuft logs to determine whether or not we need to run recovery. // If the log is empty or if there is a clean shutdown at the end of the log, then we // dont need to run recovery. // Returns: true if we need recovery, otherwise false. -int tokudb_needs_recovery(const char *logdir, bool ignore_empty_log); +int tokuft_needs_recovery(const char *logdir, bool ignore_empty_log); // Return 0 if recovery log exists, ENOENT if log is missing -int tokudb_recover_log_exists(const char * log_dir); +int tokuft_recover_log_exists(const char * log_dir); // For test only - set callbacks for recovery testing void toku_recover_set_callback (void (*)(void*), void*); void toku_recover_set_callback2 (void (*)(void*), void*); -extern int tokudb_recovery_trace; +extern int tokuft_recovery_trace; int toku_recover_lock (const char *lock_dir, int *lockfd); int toku_recover_unlock(int lockfd); - -static const prepared_txn_callback_t NULL_prepared_txn_callback __attribute__((__unused__)) = NULL; -static const keep_cachetable_callback_t NULL_keep_cachetable_callback __attribute__((__unused__)) = NULL; - - -#endif // TOKURECOVER_H diff --git a/storage/tokudb/ft-index/ft/ft_msg.cc b/storage/tokudb/ft-index/ft/msg.cc similarity index 62% rename from storage/tokudb/ft-index/ft/ft_msg.cc rename to storage/tokudb/ft-index/ft/msg.cc index f03ae2a417c9f..1fedbe745af3c 100644 --- a/storage/tokudb/ft-index/ft/ft_msg.cc +++ b/storage/tokudb/ft-index/ft/msg.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,46 +88,84 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#include "portability/toku_portability.h" -#include -#include "fttypes.h" -#include "xids.h" -#include "ft_msg.h" +#include "ft/msg.h" +#include "ft/txn/xids.h" +#include "util/dbt.h" +ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) : + _key(key ? *key : toku_empty_dbt()), + _val(val ? *val : toku_empty_dbt()), + _type(t), _msn(m), _xids(x) { +} + +ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) { + const void *keyp, *valp; + uint32_t keylen, vallen; + enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb); + *is_fresh = rbuf_char(rb); + MSN m = rbuf_MSN(rb); + toku_xids_create_from_buffer(rb, x); + rbuf_bytes(rb, &keyp, &keylen); + rbuf_bytes(rb, &valp, &vallen); + + DBT k, v; + return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x); +} + +ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) { + const void *keyp, *valp; + uint32_t keylen, vallen; + enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb); + toku_xids_create_from_buffer(rb, x); + rbuf_bytes(rb, &keyp, &keylen); + rbuf_bytes(rb, &valp, &vallen); + + DBT k, v; + return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x); +} -uint32_t -ft_msg_get_keylen(FT_MSG ft_msg) { - uint32_t rval = ft_msg->u.id.key->size; - return rval; +const DBT *ft_msg::kdbt() const { + return &_key; } -uint32_t -ft_msg_get_vallen(FT_MSG ft_msg) { - uint32_t rval = ft_msg->u.id.val->size; - return rval; +const DBT *ft_msg::vdbt() const { + return &_val; } -XIDS -ft_msg_get_xids(FT_MSG ft_msg) { - XIDS rval = ft_msg->xids; - return rval; +enum ft_msg_type ft_msg::type() const { + return _type; } -void * -ft_msg_get_key(FT_MSG ft_msg) { - void * rval = ft_msg->u.id.key->data; - return rval; +MSN ft_msg::msn() const { + return _msn; } -void * -ft_msg_get_val(FT_MSG ft_msg) { - void * rval = ft_msg->u.id.val->data; - return rval; +XIDS ft_msg::xids() const { + return _xids; +} + +size_t ft_msg::total_size() const { + // Must store two 4-byte lengths + static const size_t key_val_overhead = 8; + + // 1 byte type, 1 byte freshness, then 8 byte MSN + static const size_t msg_overhead = 2 + sizeof(MSN); + + static const size_t total_overhead = key_val_overhead + msg_overhead; + + const size_t keyval_size = _key.size + _val.size; + const size_t xids_size = toku_xids_get_serialize_size(xids()); + return total_overhead + keyval_size + xids_size; } -enum ft_msg_type -ft_msg_get_type(FT_MSG ft_msg) { - enum ft_msg_type rval = ft_msg->type; - return rval; +void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const { + wbuf_nocrc_char(wb, (unsigned char) _type); + wbuf_nocrc_char(wb, (unsigned char) is_fresh); + wbuf_MSN(wb, _msn); + wbuf_nocrc_xids(wb, _xids); + wbuf_nocrc_bytes(wb, _key.data, _key.size); + wbuf_nocrc_bytes(wb, _val.data, _val.size); } diff --git a/storage/tokudb/ft-index/ft/omt.cc b/storage/tokudb/ft-index/ft/msg.h similarity index 51% rename from storage/tokudb/ft-index/ft/omt.cc rename to storage/tokudb/ft-index/ft/msg.h index 0e52947633018..3a26f0683996d 100644 --- a/storage/tokudb/ft-index/ft/omt.cc +++ b/storage/tokudb/ft-index/ft/msg.h @@ -1,5 +1,11 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* The purpose of this file is to provide access to the ft_msg, + * which is the ephemeral version of the messages that lives in + * a message buffer. + */ + #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -29,7 +35,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,145 +92,155 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#pragma once -#include -#include -#include #include -#include "omt.h" +#include "portability/toku_assert.h" +#include "portability/toku_stdint.h" -int -toku_omt_create_steal_sorted_array(OMT *omtp, OMTVALUE **valuesp, uint32_t numvalues, uint32_t capacity) { - OMT XMALLOC(omt); - omt->create_steal_sorted_array(valuesp, numvalues, capacity); - *omtp = omt; - return 0; -} +#include "ft/txn/xids.h" -//TODO: Put all omt API functions here. -int toku_omt_create (OMT *omtp) { - OMT XMALLOC(omt); - omt->create(); - *omtp = omt; - return 0; -} +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -void toku_omt_destroy(OMT *omtp) { - OMT omt=*omtp; - omt->destroy(); - toku_free(omt); - *omtp=NULL; -} +// Message Sequence Number (MSN) +typedef struct __toku_msn { uint64_t msn; } MSN; + +// dummy used for message construction, to be filled in when msg is applied to tree +static const MSN ZERO_MSN = { .msn = 0 }; + +// first 2^62 values reserved for messages created before Dr. No (for upgrade) +static const MSN MIN_MSN = { .msn = 1ULL << 62 }; +static const MSN MAX_MSN = { .msn = UINT64_MAX }; + +/* tree command types */ +enum ft_msg_type { + FT_NONE = 0, + FT_INSERT = 1, + FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE. + //FT_DELETE_BOTH = 3, + FT_ABORT_ANY = 4, // Abort any commands on any matching key. + //FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value + FT_COMMIT_ANY = 6, + //FT_COMMIT_BOTH = 7, + FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions). + FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction). + FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction). + FT_INSERT_NO_OVERWRITE = 11, + FT_OPTIMIZE = 12, // Broadcast + FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode + FT_UPDATE = 14, + FT_UPDATE_BROADCAST_ALL = 15 +}; -uint32_t toku_omt_size(OMT V) { - return V->size(); +static inline bool +ft_msg_type_applies_once(enum ft_msg_type type) +{ + bool ret_val; + switch (type) { + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: + case FT_UPDATE: + ret_val = true; + break; + case FT_COMMIT_BROADCAST_ALL: + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + case FT_OPTIMIZE: + case FT_OPTIMIZE_FOR_UPGRADE: + case FT_UPDATE_BROADCAST_ALL: + case FT_NONE: + ret_val = false; + break; + default: + assert(false); + } + return ret_val; +} + +static inline bool +ft_msg_type_applies_all(enum ft_msg_type type) +{ + bool ret_val; + switch (type) { + case FT_NONE: + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: + case FT_UPDATE: + ret_val = false; + break; + case FT_COMMIT_BROADCAST_ALL: + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + case FT_OPTIMIZE: + case FT_OPTIMIZE_FOR_UPGRADE: + case FT_UPDATE_BROADCAST_ALL: + ret_val = true; + break; + default: + assert(false); + } + return ret_val; } -int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, uint32_t numvalues) { - OMT XMALLOC(omt); - omt->create_from_sorted_array(values, numvalues); - *omtp=omt; - return 0; +static inline bool +ft_msg_type_does_nothing(enum ft_msg_type type) +{ + return (type == FT_NONE); } -int toku_omt_insert_at(OMT omt, OMTVALUE value, uint32_t index) { - return omt->insert_at(value, index); -} +class ft_msg { +public: + ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x); -int toku_omt_set_at (OMT omt, OMTVALUE value, uint32_t index) { - return omt->set_at(value, index); -} + enum ft_msg_type type() const; -int toku_omt_delete_at(OMT omt, uint32_t index) { - return omt->delete_at(index); -} + MSN msn() const; -int toku_omt_fetch(OMT omt, uint32_t i, OMTVALUE *v) { - return omt->fetch(i, v); -} + XIDS xids() const; -struct functor { - int (*f)(OMTVALUE, uint32_t, void *); - void *v; -}; -static_assert(std::is_pod::value, "not POD"); + const DBT *kdbt() const; -int call_functor(const OMTVALUE &v, uint32_t idx, functor *const ftor); -int call_functor(const OMTVALUE &v, uint32_t idx, functor *const ftor) { - return ftor->f(const_cast(v), idx, ftor->v); -} - -int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, uint32_t, void*), void*v) { - struct functor ftor = { .f = f, .v = v }; - return omt->iterate(&ftor); -} - -int toku_omt_iterate_on_range(OMT omt, uint32_t left, uint32_t right, int (*f)(OMTVALUE, uint32_t, void*), void*v) { - struct functor ftor = { .f = f, .v = v }; - return omt->iterate_on_range(left, right, &ftor); -} + const DBT *vdbt() const; -struct heftor { - int (*h)(OMTVALUE, void *v); - void *v; -}; -static_assert(std::is_pod::value, "not POD"); + size_t total_size() const; -int call_heftor(const OMTVALUE &v, const heftor &htor); -int call_heftor(const OMTVALUE &v, const heftor &htor) { - return htor.h(const_cast(v), htor.v); -} + void serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const; -int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, uint32_t *index) { - struct heftor htor = { .h = h, .v = v }; - return omt->insert(value, htor, index); -} + // deserialization goes through a static factory function so the ft msg + // API stays completely const and there's no default constructor + static ft_msg deserialize_from_rbuf(struct rbuf *rb, XIDS *xids, bool *is_fresh); -int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, uint32_t *index) { - struct heftor htor = { .h = h, .v = extra }; - return V->find_zero(htor, value, index); -} + // Version 13/14 messages did not have an msn - so `m' is the MSN + // that will be assigned to the message that gets deserialized. + static ft_msg deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *xids); -int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, uint32_t *index) { - struct heftor htor = { .h = h, .v = extra }; - return V->find(htor, direction, value, index); -} +private: + const DBT _key; + const DBT _val; + enum ft_msg_type _type; + MSN _msn; + XIDS _xids; +}; -int toku_omt_split_at(OMT omt, OMT *newomtp, uint32_t index) { - OMT XMALLOC(newomt); - int r = omt->split_at(newomt, index); - if (r != 0) { - toku_free(newomt); - } else { - *newomtp = newomt; - } - return r; -} +// For serialize / deserialize -int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) { - OMT XMALLOC(newomt); - newomt->merge(leftomt, rightomt); - toku_free(leftomt); - toku_free(rightomt); - *newomtp = newomt; - return 0; -} +#include "ft/serialize/wbuf.h" -int toku_omt_clone_noptr(OMT *dest, OMT src) { - OMT XMALLOC(omt); - omt->clone(*src); - *dest = omt; - return 0; +static inline void wbuf_MSN(struct wbuf *wb, MSN msn) { + wbuf_ulonglong(wb, msn.msn); } -void toku_omt_clear(OMT omt) { - omt->clear(); -} +#include "ft/serialize/rbuf.h" -size_t toku_omt_memory_size (OMT omt) { - return omt->memory_size(); +static inline MSN rbuf_MSN(struct rbuf *rb) { + MSN msn = { .msn = rbuf_ulonglong(rb) }; + return msn; } - diff --git a/storage/tokudb/ft-index/ft/msg_buffer.cc b/storage/tokudb/ft-index/ft/msg_buffer.cc new file mode 100644 index 0000000000000..3a72fdb709071 --- /dev/null +++ b/storage/tokudb/ft-index/ft/msg_buffer.cc @@ -0,0 +1,318 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "ft/msg_buffer.h" +#include "util/dbt.h" + +void message_buffer::create() { + _num_entries = 0; + _memory = nullptr; + _memory_size = 0; + _memory_used = 0; +} + +void message_buffer::clone(message_buffer *src) { + _num_entries = src->_num_entries; + _memory_used = src->_memory_used; + _memory_size = src->_memory_size; + XMALLOC_N(_memory_size, _memory); + memcpy(_memory, src->_memory, _memory_size); +} + +void message_buffer::destroy() { + if (_memory != nullptr) { + toku_free(_memory); + } +} + +void message_buffer::deserialize_from_rbuf(struct rbuf *rb, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **stale_offsets, int32_t *nstale, + int32_t **broadcast_offsets, int32_t *nbroadcast) { + // read the number of messages in this buffer + int n_in_this_buffer = rbuf_int(rb); + if (fresh_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *fresh_offsets); + } + if (stale_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *stale_offsets); + } + if (broadcast_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *broadcast_offsets); + } + + _resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be + + // deserialize each message individually, noting whether it was fresh + // and putting its buffer offset in the appropriate offsets array + for (int i = 0; i < n_in_this_buffer; i++) { + XIDS xids; + bool is_fresh; + const ft_msg msg = ft_msg::deserialize_from_rbuf(rb, &xids, &is_fresh); + + int32_t *dest; + if (ft_msg_type_applies_once(msg.type())) { + if (is_fresh) { + dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr; + } else { + dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr; + } + } else { + invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type())); + dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr; + } + + enqueue(msg, is_fresh, dest); + toku_xids_destroy(&xids); + } + + invariant(_num_entries == n_in_this_buffer); +} + +MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb, + MSN *highest_unused_msn_for_upgrade, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **broadcast_offsets, int32_t *nbroadcast) { + // read the number of messages in this buffer + int n_in_this_buffer = rbuf_int(rb); + if (fresh_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *fresh_offsets); + } + if (broadcast_offsets != nullptr) { + XMALLOC_N(n_in_this_buffer, *broadcast_offsets); + } + + // Atomically decrement the header's MSN count by the number + // of messages in the buffer. + MSN highest_msn_in_this_buffer = { + .msn = toku_sync_sub_and_fetch(&highest_unused_msn_for_upgrade->msn, n_in_this_buffer) + }; + + // Create the message buffers from the deserialized buffer. + for (int i = 0; i < n_in_this_buffer; i++) { + XIDS xids; + // There were no stale messages at this version, so call it fresh. + const bool is_fresh = true; + + // Increment our MSN, the last message should have the + // newest/highest MSN. See above for a full explanation. + highest_msn_in_this_buffer.msn++; + const ft_msg msg = ft_msg::deserialize_from_rbuf_v13(rb, highest_msn_in_this_buffer, &xids); + + int32_t *dest; + if (ft_msg_type_applies_once(msg.type())) { + dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr; + } else { + invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type())); + dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr; + } + + enqueue(msg, is_fresh, dest); + toku_xids_destroy(&xids); + } + + return highest_msn_in_this_buffer; +} + +void message_buffer::_resize(size_t new_size) { + XREALLOC_N(new_size, _memory); + _memory_size = new_size; +} + +static int next_power_of_two (int n) { + int r = 4096; + while (r < n) { + r*=2; + assert(r>0); + } + return r; +} + +struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t offset) const { + return (struct buffer_entry *) (_memory + offset); +} + +void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset) { + int need_space_here = msg_memsize_in_buffer(msg); + int need_space_total = _memory_used + need_space_here; + if (_memory == nullptr || need_space_total > _memory_size) { + // resize the buffer to the next power of 2 greater than the needed space + int next_2 = next_power_of_two(need_space_total); + _resize(next_2); + } + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + struct buffer_entry *entry = get_buffer_entry(_memory_used); + entry->type = (unsigned char) msg.type(); + entry->msn = msg.msn(); + toku_xids_cpy(&entry->xids_s, msg.xids()); + entry->is_fresh = is_fresh; + unsigned char *e_key = toku_xids_get_end_of_array(&entry->xids_s); + entry->keylen = keylen; + memcpy(e_key, msg.kdbt()->data, keylen); + entry->vallen = datalen; + memcpy(e_key + keylen, msg.vdbt()->data, datalen); + if (offset) { + *offset = _memory_used; + } + _num_entries++; + _memory_used += need_space_here; +} + +void message_buffer::set_freshness(int32_t offset, bool is_fresh) { + struct buffer_entry *entry = get_buffer_entry(offset); + entry->is_fresh = is_fresh; +} + +bool message_buffer::get_freshness(int32_t offset) const { + struct buffer_entry *entry = get_buffer_entry(offset); + return entry->is_fresh; +} + +ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const { + struct buffer_entry *entry = get_buffer_entry(offset); + uint32_t keylen = entry->keylen; + uint32_t vallen = entry->vallen; + enum ft_msg_type type = (enum ft_msg_type) entry->type; + MSN msn = entry->msn; + const XIDS xids = (XIDS) &entry->xids_s; + const void *key = toku_xids_get_end_of_array(xids); + const void *val = (uint8_t *) key + entry->keylen; + return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids); +} + +void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const { + struct buffer_entry *entry = get_buffer_entry(offset); + if (key != nullptr) { + toku_fill_dbt(key, toku_xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen); + } + if (msn != nullptr) { + *msn = entry->msn; + } +} + +int message_buffer::num_entries() const { + return _num_entries; +} + +size_t message_buffer::buffer_size_in_use() const { + return _memory_used; +} + +size_t message_buffer::memory_size_in_use() const { + return sizeof(*this) + _memory_used; +} + +size_t message_buffer::memory_footprint() const { + return sizeof(*this) + toku_memory_footprint(_memory, _memory_used); +} + +bool message_buffer::equals(message_buffer *other) const { + return (_memory_used == other->_memory_used && + memcmp(_memory, other->_memory, _memory_used) == 0); +} + +void message_buffer::serialize_to_wbuf(struct wbuf *wb) const { + wbuf_nocrc_int(wb, _num_entries); + struct msg_serialize_fn { + struct wbuf *wb; + msg_serialize_fn(struct wbuf *w) : wb(w) { } + int operator()(const ft_msg &msg, bool is_fresh) { + msg.serialize_to_wbuf(wb, is_fresh); + return 0; + } + } serialize_fn(wb); + iterate(serialize_fn); +} + +size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) { + const uint32_t keylen = msg.kdbt()->size; + const uint32_t datalen = msg.vdbt()->size; + const size_t xidslen = toku_xids_get_size(msg.xids()); + return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S); +} diff --git a/storage/tokudb/ft-index/ft/xids-internal.h b/storage/tokudb/ft-index/ft/msg_buffer.h similarity index 53% rename from storage/tokudb/ft-index/ft/xids-internal.h rename to storage/tokudb/ft-index/ft/msg_buffer.h index f0e1b97c7332c..b63b4a354b28e 100644 --- a/storage/tokudb/ft-index/ft/xids-internal.h +++ b/storage/tokudb/ft-index/ft/msg_buffer.h @@ -1,10 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef XIDS_INTERNAL_H -#define XIDS_INTERNAL_H - -#ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -33,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: @@ -90,29 +86,96 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#pragma once + +#include "ft/msg.h" +#include "ft/txn/xids.h" +#include "util/dbt.h" + +class message_buffer { +public: + void create(); + + void clone(message_buffer *dst); + + void destroy(); + + // effect: deserializes a message buffer from the given rbuf + // returns: *fresh_offsets (etc) malloc'd to be num_entries large and + // populated with *nfresh (etc) offsets in the message buffer + // requires: if fresh_offsets (etc) != nullptr, then nfresh != nullptr + void deserialize_from_rbuf(struct rbuf *rb, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **stale_offsets, int32_t *nstale, + int32_t **broadcast_offsets, int32_t *nbroadcast); + + // effect: deserializes a message buffer whose messages are at version 13/14 + // returns: similar to deserialize_from_rbuf(), excpet there are no stale messages + // and each message is assigned a sequential value from *highest_unused_msn_for_upgrade, + // which is modified as needed using toku_sync_fech_and_sub() + // returns: the highest MSN assigned to any message in this buffer + // requires: similar to deserialize_from_rbuf(), and highest_unused_msn_for_upgrade != nullptr + MSN deserialize_from_rbuf_v13(struct rbuf *rb, + MSN *highest_unused_msn_for_upgrade, + int32_t **fresh_offsets, int32_t *nfresh, + int32_t **broadcast_offsets, int32_t *nbroadcast); + + void enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset); + + void set_freshness(int32_t offset, bool is_fresh); + + bool get_freshness(int32_t offset) const; + + ft_msg get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const; + + void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const; + + int num_entries() const; + + size_t buffer_size_in_use() const; + + size_t memory_size_in_use() const; + + size_t memory_footprint() const; + + template + int iterate(F &fn) const { + for (int32_t offset = 0; offset < _memory_used; ) { + DBT k, v; + const ft_msg msg = get_message(offset, &k, &v); + bool is_fresh = get_freshness(offset); + int r = fn(msg, is_fresh); + if (r != 0) { + return r; + } + offset += msg_memsize_in_buffer(msg); + } + return 0; + } + bool equals(message_buffer *other) const; -// Variable size list of transaction ids (known in design doc as xids<>). -// ids[0] is the outermost transaction. -// ids[num_xids - 1] is the innermost transaction. -// Should only be accessed by accessor functions xids_xxx, not directly. + void serialize_to_wbuf(struct wbuf *wb) const; -// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space -#if TOKU_WINDOWS -#pragma pack(push, 1) -#endif + static size_t msg_memsize_in_buffer(const ft_msg &msg); -typedef struct __attribute__((__packed__)) xids_t { - uint8_t num_xids; // maximum value of MAX_TRANSACTION_RECORDS - 1 ... - // ... because transaction 0 is implicit - TXNID ids[]; -} XIDS_S; +private: + void _resize(size_t new_size); -#if TOKU_WINDOWS -#pragma pack(pop) -#endif + // If this isn't packged, the compiler aligns the xids array and we waste a lot of space + struct __attribute__((__packed__)) buffer_entry { + unsigned int keylen; + unsigned int vallen; + unsigned char type; + bool is_fresh; + MSN msn; + XIDS_S xids_s; + }; + struct buffer_entry *get_buffer_entry(int32_t offset) const; -#endif + int _num_entries; + char *_memory; // An array of bytes into which buffer entries are embedded. + int _memory_size; // How big is _memory + int _memory_used; // How many bytes are in use? +}; diff --git a/storage/tokudb/ft-index/ft/node.cc b/storage/tokudb/ft-index/ft/node.cc new file mode 100644 index 0000000000000..f6a8c0bb2b379 --- /dev/null +++ b/storage/tokudb/ft-index/ft/node.cc @@ -0,0 +1,1980 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "util/scoped_malloc.h" +#include "util/sort.h" + +// Effect: Fill in N as an empty ftnode. +// TODO: Rename toku_ftnode_create +void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int num_children, int layout_version, unsigned int flags) { + paranoid_invariant(layout_version != 0); + paranoid_invariant(height >= 0); + + n->max_msn_applied_to_node_on_disk = ZERO_MSN; // correct value for root node, harmless for others + n->flags = flags; + n->blocknum = blocknum; + n->layout_version = layout_version; + n->layout_version_original = layout_version; + n->layout_version_read_from_disk = layout_version; + n->height = height; + n->pivotkeys.create_empty(); + n->bp = 0; + n->n_children = num_children; + n->oldest_referenced_xid_known = TXNID_NONE; + + if (num_children > 0) { + XMALLOC_N(num_children, n->bp); + for (int i = 0; i < num_children; i++) { + BP_BLOCKNUM(n,i).b=0; + BP_STATE(n,i) = PT_INVALID; + BP_WORKDONE(n,i) = 0; + BP_INIT_TOUCHED_CLOCK(n, i); + set_BNULL(n,i); + if (height > 0) { + set_BNC(n, i, toku_create_empty_nl()); + } else { + set_BLB(n, i, toku_create_empty_bn()); + } + } + } + n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty + + toku_ft_status_note_ftnode(height, true); +} + +// destroys the internals of the ftnode, but it does not free the values +// that are stored +// this is common functionality for toku_ftnode_free and rebalance_ftnode_leaf +// MUST NOT do anything besides free the structures that have been allocated +void toku_destroy_ftnode_internals(FTNODE node) { + node->pivotkeys.destroy(); + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node,i) == PT_AVAIL) { + if (node->height > 0) { + destroy_nonleaf_childinfo(BNC(node,i)); + } else { + destroy_basement_node(BLB(node, i)); + } + } else if (BP_STATE(node,i) == PT_COMPRESSED) { + SUB_BLOCK sb = BSB(node,i); + toku_free(sb->compressed_ptr); + toku_free(sb); + } else { + paranoid_invariant(is_BNULL(node, i)); + } + set_BNULL(node, i); + } + toku_free(node->bp); + node->bp = NULL; +} + +/* Frees a node, including all the stuff in the hash table. */ +void toku_ftnode_free(FTNODE *nodep) { + FTNODE node = *nodep; + toku_ft_status_note_ftnode(node->height, false); + toku_destroy_ftnode_internals(node); + toku_free(node); + *nodep = nullptr; +} + +void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint) { + STAT64INFO_S deltas = ZEROSTATS; + // capture deltas before rebalancing basements for serialization + deltas = toku_get_and_clear_basement_stats(ftnode); + // locking not necessary here with respect to checkpointing + // in Clayface (because of the pending lock and cachetable lock + // in toku_cachetable_begin_checkpoint) + // essentially, if we are dealing with a for_checkpoint + // parameter in a function that is called by the flush_callback, + // then the cachetable needs to ensure that this is called in a safe + // manner that does not interfere with the beginning + // of a checkpoint, which it does with the cachetable lock + // and pending lock + toku_ft_update_stats(&ft->h->on_disk_stats, deltas); + if (for_checkpoint) { + toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas); + } +} + +void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) { + for (int i = 0; i < node->n_children; i++) { + BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i); + paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); + BP_STATE(cloned_node,i) = PT_AVAIL; + BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i); + if (node->height == 0) { + set_BLB(cloned_node, i, toku_clone_bn(BLB(node,i))); + } else { + set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i))); + } + } +} + +void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft) { + // free the basement node + assert(!node->dirty); + BASEMENTNODE bn = BLB(node, childnum); + toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + destroy_basement_node(bn); + set_BNULL(node, childnum); + BP_STATE(node, childnum) = PT_ON_DISK; +} + +BASEMENTNODE toku_detach_bn(FTNODE node, int childnum) { + assert(BP_STATE(node, childnum) == PT_AVAIL); + BASEMENTNODE bn = BLB(node, childnum); + set_BNULL(node, childnum); + BP_STATE(node, childnum) = PT_ON_DISK; + return bn; +} + +// +// Orthopush +// + +struct store_msg_buffer_offset_extra { + int32_t *offsets; + int i; +}; + +int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) __attribute__((nonnull(3))); +int store_msg_buffer_offset(const int32_t &offset, const uint32_t UU(idx), struct store_msg_buffer_offset_extra *const extra) +{ + extra->offsets[extra->i] = offset; + extra->i++; + return 0; +} + +/** + * Given pointers to offsets within a message buffer where we can find messages, + * figure out the MSN of each message, and compare those MSNs. Returns 1, + * 0, or -1 if a is larger than, equal to, or smaller than b. + */ +int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo); +int msg_buffer_offset_msn_cmp(message_buffer &msg_buffer, const int32_t &ao, const int32_t &bo) +{ + MSN amsn, bmsn; + msg_buffer.get_message_key_msn(ao, nullptr, &amsn); + msg_buffer.get_message_key_msn(bo, nullptr, &bmsn); + if (amsn.msn > bmsn.msn) { + return +1; + } + if (amsn.msn < bmsn.msn) { + return -1; + } + return 0; +} + +/** + * Given a message buffer and and offset, apply the message with toku_ft_bn_apply_msg, or discard it, + * based on its MSN and the MSN of the basement node. + */ +static void +do_bn_apply_msg(FT_HANDLE ft_handle, BASEMENTNODE bn, message_buffer *msg_buffer, int32_t offset, + txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update) { + DBT k, v; + ft_msg msg = msg_buffer->get_message(offset, &k, &v); + + // The messages are being iterated over in (key,msn) order or just in + // msn order, so all the messages for one key, from one buffer, are in + // ascending msn order. So it's ok that we don't update the basement + // node's msn until the end. + if (msg.msn().msn > bn->max_msn_applied.msn) { + toku_ft_bn_apply_msg( + ft_handle->ft->cmp, + ft_handle->ft->update_fun, + bn, + msg, + gc_info, + workdone, + stats_to_update + ); + } else { + toku_ft_status_note_msn_discard(); + } + + // We must always mark message as stale since it has been marked + // (using omt::iterate_and_mark_range) + // It is possible to call do_bn_apply_msg even when it won't apply the message because + // the node containing it could have been evicted and brought back in. + msg_buffer->set_freshness(offset, false); +} + + +struct iterate_do_bn_apply_msg_extra { + FT_HANDLE t; + BASEMENTNODE bn; + NONLEAF_CHILDINFO bnc; + txn_gc_info *gc_info; + uint64_t *workdone; + STAT64INFO stats_to_update; +}; + +int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) __attribute__((nonnull(3))); +int iterate_do_bn_apply_msg(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_msg_extra *const e) +{ + do_bn_apply_msg(e->t, e->bn, &e->bnc->msg_buffer, offset, e->gc_info, e->workdone, e->stats_to_update); + return 0; +} + +/** + * Given the bounds of the basement node to which we will apply messages, + * find the indexes within message_tree which contain the range of + * relevant messages. + * + * The message tree contains offsets into the buffer, where messages are + * found. The pivot_bounds are the lower bound exclusive and upper bound + * inclusive, because they come from pivot keys in the tree. We want OMT + * indices, which must have the lower bound be inclusive and the upper + * bound exclusive. We will get these by telling omt::find to look + * for something strictly bigger than each of our pivot bounds. + * + * Outputs the OMT indices in lbi (lower bound inclusive) and ube (upper + * bound exclusive). + */ +template +static void +find_bounds_within_message_tree( + const toku::comparator &cmp, + const find_bounds_omt_t &message_tree, /// tree holding message buffer offsets, in which we want to look for indices + message_buffer *msg_buffer, /// message buffer in which messages are found + const pivot_bounds &bounds, /// key bounds within the basement node we're applying messages to + uint32_t *lbi, /// (output) "lower bound inclusive" (index into message_tree) + uint32_t *ube /// (output) "upper bound exclusive" (index into message_tree) + ) +{ + int r = 0; + + if (!toku_dbt_is_empty(bounds.lbe())) { + // By setting msn to MAX_MSN and by using direction of +1, we will + // get the first message greater than (in (key, msn) order) any + // message (with any msn) with the key lower_bound_exclusive. + // This will be a message we want to try applying, so it is the + // "lower bound inclusive" within the message_tree. + struct toku_msg_buffer_key_msn_heaviside_extra lbi_extra(cmp, msg_buffer, bounds.lbe(), MAX_MSN); + int32_t found_lb; + r = message_tree.template find(lbi_extra, +1, &found_lb, lbi); + if (r == DB_NOTFOUND) { + // There is no relevant data (the lower bound is bigger than + // any message in this tree), so we have no range and we're + // done. + *lbi = 0; + *ube = 0; + return; + } + if (!toku_dbt_is_empty(bounds.ubi())) { + // Check if what we found for lbi is greater than the upper + // bound inclusive that we have. If so, there are no relevant + // messages between these bounds. + const DBT *ubi = bounds.ubi(); + const int32_t offset = found_lb; + DBT found_lbidbt; + msg_buffer->get_message_key_msn(offset, &found_lbidbt, nullptr); + int c = cmp(&found_lbidbt, ubi); + // These DBTs really are both inclusive bounds, so we need + // strict inequality in order to determine that there's + // nothing between them. If they're equal, then we actually + // need to apply the message pointed to by lbi, and also + // anything with the same key but a bigger msn. + if (c > 0) { + *lbi = 0; + *ube = 0; + return; + } + } + } else { + // No lower bound given, it's negative infinity, so we start at + // the first message in the OMT. + *lbi = 0; + } + if (!toku_dbt_is_empty(bounds.ubi())) { + // Again, we use an msn of MAX_MSN and a direction of +1 to get + // the first thing bigger than the upper_bound_inclusive key. + // This is therefore the smallest thing we don't want to apply, + // and omt::iterate_on_range will not examine it. + struct toku_msg_buffer_key_msn_heaviside_extra ube_extra(cmp, msg_buffer, bounds.ubi(), MAX_MSN); + r = message_tree.template find(ube_extra, +1, nullptr, ube); + if (r == DB_NOTFOUND) { + // Couldn't find anything in the buffer bigger than our key, + // so we need to look at everything up to the end of + // message_tree. + *ube = message_tree.size(); + } + } else { + // No upper bound given, it's positive infinity, so we need to go + // through the end of the OMT. + *ube = message_tree.size(); + } +} + +/** + * For each message in the ancestor's buffer (determined by childnum) that + * is key-wise between lower_bound_exclusive and upper_bound_inclusive, + * apply the message to the basement node. We treat the bounds as minus + * or plus infinity respectively if they are NULL. Do not mark the node + * as dirty (preserve previous state of 'dirty' bit). + */ +static void +bnc_apply_messages_to_basement_node( + FT_HANDLE t, // used for comparison function + BASEMENTNODE bn, // where to apply messages + FTNODE ancestor, // the ancestor node where we can find messages to apply + int childnum, // which child buffer of ancestor contains messages we want + const pivot_bounds &bounds, // contains pivot key bounds of this basement node + txn_gc_info *gc_info, + bool* msgs_applied + ) +{ + int r; + NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); + + // Determine the offsets in the message trees between which we need to + // apply messages from this buffer + STAT64INFO_S stats_delta = {0,0}; + uint64_t workdone_this_ancestor = 0; + + uint32_t stale_lbi, stale_ube; + if (!bn->stale_ancestor_messages_applied) { + find_bounds_within_message_tree(t->ft->cmp, bnc->stale_message_tree, &bnc->msg_buffer, bounds, &stale_lbi, &stale_ube); + } else { + stale_lbi = 0; + stale_ube = 0; + } + uint32_t fresh_lbi, fresh_ube; + find_bounds_within_message_tree(t->ft->cmp, bnc->fresh_message_tree, &bnc->msg_buffer, bounds, &fresh_lbi, &fresh_ube); + + // We now know where all the messages we must apply are, so one of the + // following 4 cases will do the application, depending on which of + // the lists contains relevant messages: + // + // 1. broadcast messages and anything else, or a mix of fresh and stale + // 2. only fresh messages + // 3. only stale messages + if (bnc->broadcast_list.size() > 0 || + (stale_lbi != stale_ube && fresh_lbi != fresh_ube)) { + // We have messages in multiple trees, so we grab all + // the relevant messages' offsets and sort them by MSN, then apply + // them in MSN order. + const int buffer_size = ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + bnc->broadcast_list.size()); + toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); + int32_t *offsets = reinterpret_cast(offsets_buf.get()); + struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; + + // Populate offsets array with offsets to stale messages + r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); + assert_zero(r); + + // Then store fresh offsets, and mark them to be moved to stale later. + r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); + assert_zero(r); + + // Store offsets of all broadcast messages. + r = bnc->broadcast_list.iterate(&sfo_extra); + assert_zero(r); + invariant(sfo_extra.i == buffer_size); + + // Sort by MSN. + toku::sort::mergesort_r(offsets, buffer_size, bnc->msg_buffer); + + // Apply the messages in MSN order. + for (int i = 0; i < buffer_size; ++i) { + *msgs_applied = true; + do_bn_apply_msg(t, bn, &bnc->msg_buffer, offsets[i], gc_info, &workdone_this_ancestor, &stats_delta); + } + } else if (stale_lbi == stale_ube) { + // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. + struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; + r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); + assert_zero(r); + } else { + invariant(fresh_lbi == fresh_ube); + // No fresh messages to apply, we just apply stale messages. + + if (stale_ube - stale_lbi > 0) *msgs_applied = true; + struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; + + r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); + assert_zero(r); + } + // + // update stats + // + if (workdone_this_ancestor > 0) { + (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); + } + if (stats_delta.numbytes || stats_delta.numrows) { + toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); + } +} + +static void +apply_ancestors_messages_to_bn( + FT_HANDLE t, + FTNODE node, + int childnum, + ANCESTORS ancestors, + const pivot_bounds &bounds, + txn_gc_info *gc_info, + bool* msgs_applied + ) +{ + BASEMENTNODE curr_bn = BLB(node, childnum); + const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum); + for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > curr_bn->max_msn_applied.msn) { + paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); + bnc_apply_messages_to_basement_node( + t, + curr_bn, + curr_ancestors->node, + curr_ancestors->childnum, + curr_bounds, + gc_info, + msgs_applied + ); + // We don't want to check this ancestor node again if the + // next time we query it, the msn hasn't changed. + curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk; + } + } + // At this point, we know all the stale messages above this + // basement node have been applied, and any new messages will be + // fresh, so we don't need to look at stale messages for this + // basement node, unless it gets evicted (and this field becomes + // false when it's read in again). + curr_bn->stale_ancestor_messages_applied = true; +} + +void +toku_apply_ancestors_messages_to_node ( + FT_HANDLE t, + FTNODE node, + ANCESTORS ancestors, + const pivot_bounds &bounds, + bool* msgs_applied, + int child_to_read + ) +// Effect: +// Bring a leaf node up-to-date according to all the messages in the ancestors. +// If the leaf node is already up-to-date then do nothing. +// If the leaf node is not already up-to-date, then record the work done +// for that leaf in each ancestor. +// Requires: +// This is being called when pinning a leaf node for the query path. +// The entire root-to-leaf path is pinned and appears in the ancestors list. +{ + VERIFY_NODE(t, node); + paranoid_invariant(node->height == 0); + + TXN_MANAGER txn_manager = toku_ft_get_txn_manager(t); + txn_manager_state txn_state_for_gc(txn_manager); + + TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t); + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + node->oldest_referenced_xid_known, + true); + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + apply_ancestors_messages_to_bn( + t, + node, + child_to_read, + ancestors, + bounds, + &gc_info, + msgs_applied + ); + } + else { + // know we are a leaf node + // An important invariant: + // We MUST bring every available basement node for a dirty node up to date. + // flushing on the cleaner thread depends on this. This invariant + // allows the cleaner thread to just pick an internal node and flush it + // as opposed to being forced to start from the root. + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + apply_ancestors_messages_to_bn( + t, + node, + i, + ancestors, + bounds, + &gc_info, + msgs_applied + ); + } + } + VERIFY_NODE(t, node); +} + +static bool bn_needs_ancestors_messages( + FT ft, + FTNODE node, + int childnum, + const pivot_bounds &bounds, + ANCESTORS ancestors, + MSN* max_msn_applied + ) +{ + BASEMENTNODE bn = BLB(node, childnum); + const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum); + bool needs_ancestors_messages = false; + for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > bn->max_msn_applied.msn) { + paranoid_invariant(BP_STATE(curr_ancestors->node, curr_ancestors->childnum) == PT_AVAIL); + NONLEAF_CHILDINFO bnc = BNC(curr_ancestors->node, curr_ancestors->childnum); + if (bnc->broadcast_list.size() > 0) { + needs_ancestors_messages = true; + goto cleanup; + } + if (!bn->stale_ancestor_messages_applied) { + uint32_t stale_lbi, stale_ube; + find_bounds_within_message_tree(ft->cmp, + bnc->stale_message_tree, + &bnc->msg_buffer, + curr_bounds, + &stale_lbi, + &stale_ube); + if (stale_lbi < stale_ube) { + needs_ancestors_messages = true; + goto cleanup; + } + } + uint32_t fresh_lbi, fresh_ube; + find_bounds_within_message_tree(ft->cmp, + bnc->fresh_message_tree, + &bnc->msg_buffer, + curr_bounds, + &fresh_lbi, + &fresh_ube); + if (fresh_lbi < fresh_ube) { + needs_ancestors_messages = true; + goto cleanup; + } + if (curr_ancestors->node->max_msn_applied_to_node_on_disk.msn > max_msn_applied->msn) { + max_msn_applied->msn = curr_ancestors->node->max_msn_applied_to_node_on_disk.msn; + } + } + } +cleanup: + return needs_ancestors_messages; +} + +bool toku_ft_leaf_needs_ancestors_messages( + FT ft, + FTNODE node, + ANCESTORS ancestors, + const pivot_bounds &bounds, + MSN *const max_msn_in_path, + int child_to_read + ) +// Effect: Determine whether there are messages in a node's ancestors +// which must be applied to it. These messages are in the correct +// keyrange for any available basement nodes, and are in nodes with the +// correct max_msn_applied_to_node_on_disk. +// Notes: +// This is an approximate query. +// Output: +// max_msn_in_path: max of "max_msn_applied_to_node_on_disk" over +// ancestors. This is used later to update basement nodes' +// max_msn_applied values in case we don't do the full algorithm. +// Returns: +// true if there may be some such messages +// false only if there are definitely no such messages +// Rationale: +// When we pin a node with a read lock, we want to quickly determine if +// we should exchange it for a write lock in preparation for applying +// messages. If there are no messages, we don't need the write lock. +{ + paranoid_invariant(node->height == 0); + bool needs_ancestors_messages = false; + // child_to_read may be -1 in test cases + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + needs_ancestors_messages = bn_needs_ancestors_messages( + ft, + node, + child_to_read, + bounds, + ancestors, + max_msn_in_path + ); + } + else { + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + needs_ancestors_messages = bn_needs_ancestors_messages( + ft, + node, + i, + bounds, + ancestors, + max_msn_in_path + ); + if (needs_ancestors_messages) { + goto cleanup; + } + } + } +cleanup: + return needs_ancestors_messages; +} + +void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) { + invariant(node->height == 0); + if (!node->dirty && child_to_read >= 0) { + paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); + BASEMENTNODE bn = BLB(node, child_to_read); + if (max_msn_applied.msn > bn->max_msn_applied.msn) { + // see comment below + (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); + } + } + else { + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { continue; } + BASEMENTNODE bn = BLB(node, i); + if (max_msn_applied.msn > bn->max_msn_applied.msn) { + // This function runs in a shared access context, so to silence tools + // like DRD, we use a CAS and ignore the result. + // Any threads trying to update these basement nodes should be + // updating them to the same thing (since they all have a read lock on + // the same root-to-leaf path) so this is safe. + (void) toku_sync_val_compare_and_swap(&bn->max_msn_applied.msn, bn->max_msn_applied.msn, max_msn_applied.msn); + } + } + } +} + +struct copy_to_stale_extra { + FT ft; + NONLEAF_CHILDINFO bnc; +}; + +int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) __attribute__((nonnull(3))); +int copy_to_stale(const int32_t &offset, const uint32_t UU(idx), struct copy_to_stale_extra *const extra) +{ + MSN msn; + DBT key; + extra->bnc->msg_buffer.get_message_key_msn(offset, &key, &msn); + struct toku_msg_buffer_key_msn_heaviside_extra heaviside_extra(extra->ft->cmp, &extra->bnc->msg_buffer, &key, msn); + int r = extra->bnc->stale_message_tree.insert(offset, heaviside_extra, nullptr); + invariant_zero(r); + return 0; +} + +void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc) { + struct copy_to_stale_extra cts_extra = { .ft = ft, .bnc = bnc }; + int r = bnc->fresh_message_tree.iterate_over_marked(&cts_extra); + invariant_zero(r); + bnc->fresh_message_tree.delete_all_marked(); +} + +void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node) { + invariant(node->height > 0); + for (int i = 0; i < node->n_children; ++i) { + if (BP_STATE(node, i) != PT_AVAIL) { + continue; + } + NONLEAF_CHILDINFO bnc = BNC(node, i); + // We can't delete things out of the fresh tree inside the above + // procedures because we're still looking at the fresh tree. Instead + // we have to move messages after we're done looking at it. + toku_ft_bnc_move_messages_to_stale(ft, bnc); + } +} + +// +// Balance // Availibility // Size + +struct rebalance_array_info { + uint32_t offset; + LEAFENTRY *le_array; + uint32_t *key_sizes_array; + const void **key_ptr_array; + static int fn(const void* key, const uint32_t keylen, const LEAFENTRY &le, + const uint32_t idx, struct rebalance_array_info *const ai) { + ai->le_array[idx+ai->offset] = le; + ai->key_sizes_array[idx+ai->offset] = keylen; + ai->key_ptr_array[idx+ai->offset] = key; + return 0; + } +}; + +// There must still be at least one child +// Requires that all messages in buffers above have been applied. +// Because all messages above have been applied, setting msn of all new basements +// to max msn of existing basements is correct. (There cannot be any messages in +// buffers above that still need to be applied.) +void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) { + + assert(node->height == 0); + assert(node->dirty); + + uint32_t num_orig_basements = node->n_children; + // Count number of leaf entries in this leaf (num_le). + uint32_t num_le = 0; + for (uint32_t i = 0; i < num_orig_basements; i++) { + num_le += BLB_DATA(node, i)->num_klpairs(); + } + + uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array + + // Create an array of OMTVALUE's that store all the pointers to all the data. + // Each element in leafpointers is a pointer to a leaf. + toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc); + LEAFENTRY *leafpointers = reinterpret_cast(leafpointers_buf.get()); + leafpointers[0] = NULL; + + toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc); + const void **key_pointers = reinterpret_cast(key_pointers_buf.get()); + key_pointers[0] = NULL; + + toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc); + uint32_t *key_sizes = reinterpret_cast(key_sizes_buf.get()); + + // Capture pointers to old mempools' buffers (so they can be destroyed) + toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements); + BASEMENTNODE *old_bns = reinterpret_cast(old_bns_buf.get()); + old_bns[0] = NULL; + + uint32_t curr_le = 0; + for (uint32_t i = 0; i < num_orig_basements; i++) { + bn_data* bd = BLB_DATA(node, i); + struct rebalance_array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers }; + bd->iterate(&ai); + curr_le += bd->num_klpairs(); + } + + // Create an array that will store indexes of new pivots. + // Each element in new_pivots is the index of a pivot key. + // (Allocating num_le of them is overkill, but num_le is an upper bound.) + toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc); + uint32_t *new_pivots = reinterpret_cast(new_pivots_buf.get()); + new_pivots[0] = 0; + + // Each element in le_sizes is the size of the leafentry pointed to by leafpointers. + toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc); + size_t *le_sizes = reinterpret_cast(le_sizes_buf.get()); + le_sizes[0] = 0; + + // Create an array that will store the size of each basement. + // This is the sum of the leaf sizes of all the leaves in that basement. + // We don't know how many basements there will be, so we use num_le as the upper bound. + + // Sum of all le sizes in a single basement + toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc); + size_t *bn_le_sizes = reinterpret_cast(bn_le_sizes_buf.get()); + + // Sum of all key sizes in a single basement + toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc); + size_t *bn_key_sizes = reinterpret_cast(bn_key_sizes_buf.get()); + + // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les). + // Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.) + toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc); + uint32_t *num_les_this_bn = reinterpret_cast(num_les_this_bn_buf.get()); + num_les_this_bn[0] = 0; + + // Figure out the new pivots. + // We need the index of each pivot, and for each basement we need + // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement). + uint32_t curr_pivot = 0; + uint32_t num_le_in_curr_bn = 0; + uint32_t bn_size_so_far = 0; + for (uint32_t i = 0; i < num_le; i++) { + uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); + le_sizes[i] = curr_le_size; + if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) { + // cap off the current basement node to end with the element before i + new_pivots[curr_pivot] = i-1; + curr_pivot++; + num_le_in_curr_bn = 0; + bn_size_so_far = 0; + } + num_le_in_curr_bn++; + num_les_this_bn[curr_pivot] = num_le_in_curr_bn; + bn_le_sizes[curr_pivot] += curr_le_size; + bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i]; // uint32_t le_offset + bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i]; + } + // curr_pivot is now the total number of pivot keys in the leaf node + int num_pivots = curr_pivot; + int num_children = num_pivots + 1; + + // now we need to fill in the new basement nodes and pivots + + // TODO: (Zardosht) this is an ugly thing right now + // Need to figure out how to properly deal with seqinsert. + // I am not happy with how this is being + // handled with basement nodes + uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1); + + // choose the max msn applied to any basement as the max msn applied to all new basements + MSN max_msn = ZERO_MSN; + for (uint32_t i = 0; i < num_orig_basements; i++) { + MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i); + max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn; + } + // remove the basement node in the node, we've saved a copy + for (uint32_t i = 0; i < num_orig_basements; i++) { + // save a reference to the old basement nodes + // we will need them to ensure that the memory + // stays intact + old_bns[i] = toku_detach_bn(node, i); + } + // Now destroy the old basements, but do not destroy leaves + toku_destroy_ftnode_internals(node); + + // now reallocate pieces and start filling them in + invariant(num_children > 0); + + node->n_children = num_children; + XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp) + for (int i = 0; i < num_children; i++) { + set_BLB(node, i, toku_create_empty_bn()); // allocate empty basements and set bp pointers + } + + // now we start to fill in the data + + // first the pivots + toku::scoped_malloc pivotkeys_buf(num_pivots * sizeof(DBT)); + DBT *pivotkeys = reinterpret_cast(pivotkeys_buf.get()); + for (int i = 0; i < num_pivots; i++) { + uint32_t size = key_sizes[new_pivots[i]]; + const void *key = key_pointers[new_pivots[i]]; + toku_fill_dbt(&pivotkeys[i], key, size); + } + node->pivotkeys.create_from_dbts(pivotkeys, num_pivots); + + uint32_t baseindex_this_bn = 0; + // now the basement nodes + for (int i = 0; i < num_children; i++) { + // put back seqinsert + BLB_SEQINSERT(node, i) = tmp_seqinsert; + + // create start (inclusive) and end (exclusive) boundaries for data of basement node + uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; // index of first leaf in basement + uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1; // index of first leaf in next basement + uint32_t num_in_bn = curr_end - curr_start; // number of leaves in this basement + + // create indexes for new basement + invariant(baseindex_this_bn == curr_start); + uint32_t num_les_to_copy = num_les_this_bn[i]; + invariant(num_les_to_copy == num_in_bn); + + bn_data* bd = BLB_DATA(node, i); + bd->set_contents_as_clone_of_sorted_array( + num_les_to_copy, + &key_pointers[baseindex_this_bn], + &key_sizes[baseindex_this_bn], + &leafpointers[baseindex_this_bn], + &le_sizes[baseindex_this_bn], + bn_key_sizes[i], // Total key sizes + bn_le_sizes[i] // total le sizes + ); + + BP_STATE(node,i) = PT_AVAIL; + BP_TOUCH_CLOCK(node,i); + BLB_MAX_MSN_APPLIED(node,i) = max_msn; + baseindex_this_bn += num_les_to_copy; // set to index of next bn + } + node->max_msn_applied_to_node_on_disk = max_msn; + + // destroy buffers of old mempools + for (uint32_t i = 0; i < num_orig_basements; i++) { + destroy_basement_node(old_bns[i]); + } +} + +bool toku_ftnode_fully_in_memory(FTNODE node) { + for (int i = 0; i < node->n_children; i++) { + if (BP_STATE(node,i) != PT_AVAIL) { + return false; + } + } + return true; +} + +void toku_ftnode_assert_fully_in_memory(FTNODE UU(node)) { + paranoid_invariant(toku_ftnode_fully_in_memory(node)); +} + +uint32_t toku_ftnode_leaf_num_entries(FTNODE node) { + toku_ftnode_assert_fully_in_memory(node); + uint32_t num_entries = 0; + for (int i = 0; i < node->n_children; i++) { + num_entries += BLB_DATA(node, i)->num_klpairs(); + } + return num_entries; +} + +enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize) { + enum reactivity re = RE_STABLE; + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant(node->height==0); + unsigned int size = toku_serialize_ftnode_size(node); + if (size > nodesize && toku_ftnode_leaf_num_entries(node) > 1) { + re = RE_FISSIBLE; + } else if ((size*4) < nodesize && !BLB_SEQINSERT(node, node->n_children-1)) { + re = RE_FUSIBLE; + } + return re; +} + +enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout) { + paranoid_invariant(node->height > 0); + int n_children = node->n_children; + if (n_children > (int) fanout) { + return RE_FISSIBLE; + } + if (n_children * 4 < (int) fanout) { + return RE_FUSIBLE; + } + return RE_STABLE; +} + +enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node) { + toku_ftnode_assert_fully_in_memory(node); + if (node->height == 0) { + return toku_ftnode_get_leaf_reactivity(node, ft->h->nodesize); + } else { + return toku_ftnode_get_nonleaf_reactivity(node, ft->h->fanout); + } +} + +unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc) { + return bnc->msg_buffer.buffer_size_in_use(); +} + +// Return true if the size of the buffers plus the amount of work done is large enough. +// Return false if there is nothing to be flushed (the buffers empty). +bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize) { + uint64_t size = toku_serialize_ftnode_size(node); + + bool buffers_are_empty = true; + toku_ftnode_assert_fully_in_memory(node); + // + // the nonleaf node is gorged if the following holds true: + // - the buffers are non-empty + // - the total workdone by the buffers PLUS the size of the buffers + // is greater than nodesize (which as of Maxwell should be + // 4MB) + // + paranoid_invariant(node->height > 0); + for (int child = 0; child < node->n_children; ++child) { + size += BP_WORKDONE(node, child); + } + for (int child = 0; child < node->n_children; ++child) { + if (toku_bnc_nbytesinbuf(BNC(node, child)) > 0) { + buffers_are_empty = false; + break; + } + } + return ((size > nodesize) + && + (!buffers_are_empty)); +} + +int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc) { + return bnc->msg_buffer.num_entries(); +} + +// how much memory does this child buffer consume? +long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) { + return (sizeof(*bnc) + + bnc->msg_buffer.memory_footprint() + + bnc->fresh_message_tree.memory_size() + + bnc->stale_message_tree.memory_size() + + bnc->broadcast_list.memory_size()); +} + +// how much memory in this child buffer holds useful data? +// originally created solely for use by test program(s). +long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc) { + return (sizeof(*bnc) + + bnc->msg_buffer.memory_size_in_use() + + bnc->fresh_message_tree.memory_size() + + bnc->stale_message_tree.memory_size() + + bnc->broadcast_list.memory_size()); +} + +// +// Garbage collection +// Message injection +// Message application +// + +// Used only by test programs: append a child node to a parent node +void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey) { + int childnum = node->n_children; + node->n_children++; + REALLOC_N(node->n_children, node->bp); + BP_BLOCKNUM(node,childnum) = child->blocknum; + BP_STATE(node,childnum) = PT_AVAIL; + BP_WORKDONE(node, childnum) = 0; + set_BNC(node, childnum, toku_create_empty_nl()); + if (pivotkey) { + invariant(childnum > 0); + node->pivotkeys.insert_at(pivotkey, childnum - 1); + } + node->dirty = 1; +} + +void +toku_ft_bn_apply_msg_once ( + BASEMENTNODE bn, + const ft_msg &msg, + uint32_t idx, + uint32_t le_keylen, + LEAFENTRY le, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +// Effect: Apply msg to leafentry (msn is ignored) +// Calculate work done by message on leafentry and add it to caller's workdone counter. +// idx is the location where it goes +// le is old leafentry +{ + size_t newsize=0, oldsize=0, workdone_this_le=0; + LEAFENTRY new_le=0; + int64_t numbytes_delta = 0; // how many bytes of user data (not including overhead) were added or deleted from this row + int64_t numrows_delta = 0; // will be +1 or -1 or 0 (if row was added or deleted or not) + uint32_t key_storage_size = msg.kdbt()->size + sizeof(uint32_t); + if (le) { + oldsize = leafentry_memsize(le) + key_storage_size; + } + + // toku_le_apply_msg() may call bn_data::mempool_malloc_and_update_dmt() to allocate more space. + // That means le is guaranteed to not cause a sigsegv but it may point to a mempool that is + // no longer in use. We'll have to release the old mempool later. + toku_le_apply_msg( + msg, + le, + &bn->data_buffer, + idx, + le_keylen, + gc_info, + &new_le, + &numbytes_delta + ); + // at this point, we cannot trust cmd->u.id.key to be valid. + // The dmt may have realloced its mempool and freed the one containing key. + + newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0; + if (le && new_le) { + workdone_this_le = (oldsize > newsize ? oldsize : newsize); // work done is max of le size before and after message application + + } else { // we did not just replace a row, so ... + if (le) { + // ... we just deleted a row ... + workdone_this_le = oldsize; + numrows_delta = -1; + } + if (new_le) { + // ... or we just added a row + workdone_this_le = newsize; + numrows_delta = 1; + } + } + if (workdone) { // test programs may call with NULL + *workdone += workdone_this_le; + } + + // now update stat64 statistics + bn->stat64_delta.numrows += numrows_delta; + bn->stat64_delta.numbytes += numbytes_delta; + // the only reason stats_to_update may be null is for tests + if (stats_to_update) { + stats_to_update->numrows += numrows_delta; + stats_to_update->numbytes += numbytes_delta; + } + +} + +static const uint32_t setval_tag = 0xee0ccb99; // this was gotten by doing "cat /dev/random|head -c4|od -x" to get a random number. We want to make sure that the user actually passes us the setval_extra_s that we passed in. +struct setval_extra_s { + uint32_t tag; + bool did_set_val; + int setval_r; // any error code that setval_fun wants to return goes here. + // need arguments for toku_ft_bn_apply_msg_once + BASEMENTNODE bn; + MSN msn; // captured from original message, not currently used + XIDS xids; + const DBT *key; + uint32_t idx; + uint32_t le_keylen; + LEAFENTRY le; + txn_gc_info *gc_info; + uint64_t * workdone; // set by toku_ft_bn_apply_msg_once() + STAT64INFO stats_to_update; +}; + +/* + * If new_val == NULL, we send a delete message instead of an insert. + * This happens here instead of in do_delete() for consistency. + * setval_fun() is called from handlerton, passing in svextra_v + * from setval_extra_s input arg to ft->update_fun(). + */ +static void setval_fun (const DBT *new_val, void *svextra_v) { + struct setval_extra_s *CAST_FROM_VOIDP(svextra, svextra_v); + paranoid_invariant(svextra->tag==setval_tag); + paranoid_invariant(!svextra->did_set_val); + svextra->did_set_val = true; + + { + // can't leave scope until toku_ft_bn_apply_msg_once if + // this is a delete + DBT val; + ft_msg msg(svextra->key, + new_val ? new_val : toku_init_dbt(&val), + new_val ? FT_INSERT : FT_DELETE_ANY, + svextra->msn, svextra->xids); + toku_ft_bn_apply_msg_once(svextra->bn, msg, + svextra->idx, svextra->le_keylen, svextra->le, + svextra->gc_info, + svextra->workdone, svextra->stats_to_update); + svextra->setval_r = 0; + } +} + +// We are already past the msn filter (in toku_ft_bn_apply_msg(), which calls do_update()), +// so capturing the msn in the setval_extra_s is not strictly required. The alternative +// would be to put a dummy msn in the messages created by setval_fun(), but preserving +// the original msn seems cleaner and it preserves accountability at a lower layer. +static int do_update(ft_update_func update_fun, const DESCRIPTOR_S *desc, BASEMENTNODE bn, const ft_msg &msg, uint32_t idx, + LEAFENTRY le, + void* keydata, + uint32_t keylen, + txn_gc_info *gc_info, + uint64_t * workdone, + STAT64INFO stats_to_update) { + LEAFENTRY le_for_update; + DBT key; + const DBT *keyp; + const DBT *update_function_extra; + DBT vdbt; + const DBT *vdbtp; + + // the location of data depends whether this is a regular or + // broadcast update + if (msg.type() == FT_UPDATE) { + // key is passed in with command (should be same as from le) + // update function extra is passed in with command + keyp = msg.kdbt(); + update_function_extra = msg.vdbt(); + } else { + invariant(msg.type() == FT_UPDATE_BROADCAST_ALL); + // key is not passed in with broadcast, it comes from le + // update function extra is passed in with command + paranoid_invariant(le); // for broadcast updates, we just hit all leafentries + // so this cannot be null + paranoid_invariant(keydata); + paranoid_invariant(keylen); + paranoid_invariant(msg.kdbt()->size == 0); + keyp = toku_fill_dbt(&key, keydata, keylen); + update_function_extra = msg.vdbt(); + } + toku_ft_status_note_update(msg.type() == FT_UPDATE_BROADCAST_ALL); + + if (le && !le_latest_is_del(le)) { + // if the latest val exists, use it, and we'll use the leafentry later + uint32_t vallen; + void *valp = le_latest_val_and_len(le, &vallen); + vdbtp = toku_fill_dbt(&vdbt, valp, vallen); + } else { + // otherwise, the val and leafentry are both going to be null + vdbtp = NULL; + } + le_for_update = le; + + struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, msg.msn(), msg.xids(), + keyp, idx, keylen, le_for_update, gc_info, + workdone, stats_to_update}; + // call handlerton's ft->update_fun(), which passes setval_extra to setval_fun() + FAKE_DB(db, desc); + int r = update_fun( + &db, + keyp, + vdbtp, + update_function_extra, + setval_fun, &setval_extra + ); + + if (r == 0) { r = setval_extra.setval_r; } + return r; +} + +// Should be renamed as something like "apply_msg_to_basement()." +void +toku_ft_bn_apply_msg ( + const toku::comparator &cmp, + ft_update_func update_fun, + BASEMENTNODE bn, + const ft_msg &msg, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +// Effect: +// Put a msg into a leaf. +// Calculate work done by message on leafnode and add it to caller's workdone counter. +// The leaf could end up "too big" or "too small". The caller must fix that up. +{ + LEAFENTRY storeddata; + void* key = NULL; + uint32_t keylen = 0; + + uint32_t num_klpairs; + int r; + struct toku_msg_leafval_heaviside_extra be(cmp, msg.kdbt()); + + unsigned int doing_seqinsert = bn->seqinsert; + bn->seqinsert = 0; + + switch (msg.type()) { + case FT_INSERT_NO_OVERWRITE: + case FT_INSERT: { + uint32_t idx; + if (doing_seqinsert) { + idx = bn->data_buffer.num_klpairs(); + DBT kdbt; + r = bn->data_buffer.fetch_key_and_len(idx-1, &kdbt.size, &kdbt.data); + if (r != 0) goto fz; + int c = toku_msg_leafval_heaviside(kdbt, be); + if (c >= 0) goto fz; + r = DB_NOTFOUND; + } else { + fz: + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + } + if (r==DB_NOTFOUND) { + storeddata = 0; + } else { + assert_zero(r); + } + toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update); + + // if the insertion point is within a window of the right edge of + // the leaf then it is sequential + // window = min(32, number of leaf entries/16) + { + uint32_t s = bn->data_buffer.num_klpairs(); + uint32_t w = s / 16; + if (w == 0) w = 1; + if (w > 32) w = 32; + + // within the window? + if (s - idx <= w) + bn->seqinsert = doing_seqinsert + 1; + } + break; + } + case FT_DELETE_ANY: + case FT_ABORT_ANY: + case FT_COMMIT_ANY: { + uint32_t idx; + // Apply to all the matches + + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + if (r == DB_NOTFOUND) break; + assert_zero(r); + toku_ft_bn_apply_msg_once(bn, msg, idx, keylen, storeddata, gc_info, workdone, stats_to_update); + + break; + } + case FT_OPTIMIZE_FOR_UPGRADE: + // fall through so that optimize_for_upgrade performs rest of the optimize logic + case FT_COMMIT_BROADCAST_ALL: + case FT_OPTIMIZE: + // Apply to all leafentries + num_klpairs = bn->data_buffer.num_klpairs(); + for (uint32_t idx = 0; idx < num_klpairs; ) { + void* curr_keyp = NULL; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); + assert_zero(r); + int deleted = 0; + if (!le_is_clean(storeddata)) { //If already clean, nothing to do. + // message application code needs a key in order to determine how much + // work was done by this message. since this is a broadcast message, + // we have to create a new message whose key is the current le's key. + DBT curr_keydbt; + ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen), + msg.vdbt(), msg.type(), msg.msn(), msg.xids()); + toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update); + // at this point, we cannot trust msg.kdbt to be valid. + uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); + if (new_dmt_size != num_klpairs) { + paranoid_invariant(new_dmt_size + 1 == num_klpairs); + //Item was deleted. + deleted = 1; + } + } + if (deleted) + num_klpairs--; + else + idx++; + } + paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); + + break; + case FT_COMMIT_BROADCAST_TXN: + case FT_ABORT_BROADCAST_TXN: + // Apply to all leafentries if txn is represented + num_klpairs = bn->data_buffer.num_klpairs(); + for (uint32_t idx = 0; idx < num_klpairs; ) { + void* curr_keyp = NULL; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_keyp); + assert_zero(r); + int deleted = 0; + if (le_has_xids(storeddata, msg.xids())) { + // message application code needs a key in order to determine how much + // work was done by this message. since this is a broadcast message, + // we have to create a new message whose key is the current le's key. + DBT curr_keydbt; + ft_msg curr_msg(toku_fill_dbt(&curr_keydbt, curr_keyp, curr_keylen), + msg.vdbt(), msg.type(), msg.msn(), msg.xids()); + toku_ft_bn_apply_msg_once(bn, curr_msg, idx, curr_keylen, storeddata, gc_info, workdone, stats_to_update); + uint32_t new_dmt_size = bn->data_buffer.num_klpairs(); + if (new_dmt_size != num_klpairs) { + paranoid_invariant(new_dmt_size + 1 == num_klpairs); + //Item was deleted. + deleted = 1; + } + } + if (deleted) + num_klpairs--; + else + idx++; + } + paranoid_invariant(bn->data_buffer.num_klpairs() == num_klpairs); + + break; + case FT_UPDATE: { + uint32_t idx; + r = bn->data_buffer.find_zero( + be, + &storeddata, + &key, + &keylen, + &idx + ); + if (r==DB_NOTFOUND) { + { + //Point to msg's copy of the key so we don't worry about le being freed + //TODO: 46 MAYBE Get rid of this when le_apply message memory is better handled + key = msg.kdbt()->data; + keylen = msg.kdbt()->size; + } + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update); + } else if (r==0) { + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update); + } // otherwise, a worse error, just return it + break; + } + case FT_UPDATE_BROADCAST_ALL: { + // apply to all leafentries. + uint32_t idx = 0; + uint32_t num_leafentries_before; + while (idx < (num_leafentries_before = bn->data_buffer.num_klpairs())) { + void* curr_key = nullptr; + uint32_t curr_keylen = 0; + r = bn->data_buffer.fetch_klpair(idx, &storeddata, &curr_keylen, &curr_key); + assert_zero(r); + + //TODO: 46 replace this with something better than cloning key + // TODO: (Zardosht) This may be unnecessary now, due to how the key + // is handled in the bndata. Investigate and determine + char clone_mem[curr_keylen]; // only lasts one loop, alloca would overflow (end of function) + memcpy((void*)clone_mem, curr_key, curr_keylen); + curr_key = (void*)clone_mem; + + // This is broken below. Have a compilation error checked + // in as a reminder + r = do_update(update_fun, cmp.get_descriptor(), bn, msg, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update); + assert_zero(r); + + if (num_leafentries_before == bn->data_buffer.num_klpairs()) { + // we didn't delete something, so increment the index. + idx++; + } + } + break; + } + case FT_NONE: break; // don't do anything + } + + return; +} + +static inline int +key_msn_cmp(const DBT *a, const DBT *b, const MSN amsn, const MSN bmsn, const toku::comparator &cmp) { + int r = cmp(a, b); + if (r == 0) { + if (amsn.msn > bmsn.msn) { + r = +1; + } else if (amsn.msn < bmsn.msn) { + r = -1; + } else { + r = 0; + } + } + return r; +} + +int toku_msg_buffer_key_msn_heaviside(const int32_t &offset, const struct toku_msg_buffer_key_msn_heaviside_extra &extra) { + MSN query_msn; + DBT query_key; + extra.msg_buffer->get_message_key_msn(offset, &query_key, &query_msn); + return key_msn_cmp(&query_key, extra.key, query_msn, extra.msn, extra.cmp); +} + +int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extra, const int32_t &ao, const int32_t &bo) { + MSN amsn, bmsn; + DBT akey, bkey; + extra.msg_buffer->get_message_key_msn(ao, &akey, &amsn); + extra.msg_buffer->get_message_key_msn(bo, &bkey, &bmsn); + return key_msn_cmp(&akey, &bkey, amsn, bmsn, extra.cmp); +} + +// Effect: Enqueue the message represented by the parameters into the +// bnc's buffer, and put it in either the fresh or stale message tree, +// or the broadcast list. +static void bnc_insert_msg(NONLEAF_CHILDINFO bnc, const ft_msg &msg, bool is_fresh, const toku::comparator &cmp) { + int r = 0; + int32_t offset; + bnc->msg_buffer.enqueue(msg, is_fresh, &offset); + enum ft_msg_type type = msg.type(); + if (ft_msg_type_applies_once(type)) { + DBT key; + toku_fill_dbt(&key, msg.kdbt()->data, msg.kdbt()->size); + struct toku_msg_buffer_key_msn_heaviside_extra extra(cmp, &bnc->msg_buffer, &key, msg.msn()); + if (is_fresh) { + r = bnc->fresh_message_tree.insert(offset, extra, nullptr); + assert_zero(r); + } else { + r = bnc->stale_message_tree.insert(offset, extra, nullptr); + assert_zero(r); + } + } else { + invariant(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)); + const uint32_t idx = bnc->broadcast_list.size(); + r = bnc->broadcast_list.insert_at(offset, idx); + assert_zero(r); + } +} + +// This is only exported for tests. +void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp) +{ + DBT k, v; + ft_msg msg(toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, data, datalen), type, msn, xids); + bnc_insert_msg(bnc, msg, is_fresh, cmp); +} + +// append a msg to a nonleaf node's child buffer +static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE node, + int childnum, const ft_msg &msg, bool is_fresh) { + paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL); + bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp); + node->dirty = 1; +} + +// This is only exported for tests. +void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val) { + ft_msg msg(key, val, type, msn, xids); + ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh); +} + +static void ft_nonleaf_msg_once_to_child(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Previously we had passive aggressive promotion, but that causes a lot of I/O a the checkpoint. So now we are just putting it in the buffer here. +// Also we don't worry about the node getting overfull here. It's the caller's problem. +{ + unsigned int childnum = (target_childnum >= 0 + ? target_childnum + : toku_ftnode_which_child(node, msg.kdbt(), cmp)); + ft_append_msg_to_child_buffer(cmp, node, childnum, msg, is_fresh); + NONLEAF_CHILDINFO bnc = BNC(node, childnum); + bnc->flow[0] += flow_deltas[0]; + bnc->flow[1] += flow_deltas[1]; +} + +// TODO: Remove me, I'm boring. +static int ft_compare_pivot(const toku::comparator &cmp, const DBT *key, const DBT *pivot) { + return cmp(key, pivot); +} + +/* Find the leftmost child that may contain the key. + * If the key exists it will be in the child whose number + * is the return value of this function. + */ +int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp) { + // a funny case of no pivots + if (node->n_children <= 1) return 0; + + DBT pivot; + + // check the last key to optimize seq insertions + int n = node->n_children-1; + int c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(n - 1, &pivot)); + if (c > 0) return n; + + // binary search the pivots + int lo = 0; + int hi = n-1; // skip the last one, we checked it above + int mi; + while (lo < hi) { + mi = (lo + hi) / 2; + c = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot)); + if (c > 0) { + lo = mi+1; + continue; + } + if (c < 0) { + hi = mi; + continue; + } + return mi; + } + return lo; +} + +// Used for HOT. +int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp) { + DBT pivot; + int low = 0; + int hi = node->n_children - 1; + int mi; + while (low < hi) { + mi = (low + hi) / 2; + int r = ft_compare_pivot(cmp, k, node->pivotkeys.fill_pivot(mi, &pivot)); + if (r > 0) { + low = mi + 1; + } else if (r < 0) { + hi = mi; + } else { + // if they were exactly equal, then we want the sub-tree under + // the next pivot. + return mi + 1; + } + } + invariant(low == hi); + return low; +} + +void toku_ftnode_save_ct_pair(CACHEKEY UU(key), void *value_data, PAIR p) { + FTNODE CAST_FROM_VOIDP(node, value_data); + node->ct_pair = p; +} + +static void +ft_nonleaf_msg_all(const toku::comparator &cmp, FTNODE node, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Effect: Put the message into a nonleaf node. We put it into all children, possibly causing the children to become reactive. +// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. +// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) +{ + for (int i = 0; i < node->n_children; i++) { + ft_nonleaf_msg_once_to_child(cmp, node, i, msg, is_fresh, flow_deltas); + } +} + +static void +ft_nonleaf_put_msg(const toku::comparator &cmp, FTNODE node, int target_childnum, const ft_msg &msg, bool is_fresh, size_t flow_deltas[]) +// Effect: Put the message into a nonleaf node. We may put it into a child, possibly causing the child to become reactive. +// We don't do the splitting and merging. That's up to the caller after doing all the puts it wants to do. +// The re_array[i] gets set to the reactivity of any modified child i. (And there may be several such children.) +// +{ + + // + // see comments in toku_ft_leaf_apply_msg + // to understand why we handle setting + // node->max_msn_applied_to_node_on_disk here, + // and don't do it in toku_ftnode_put_msg + // + MSN msg_msn = msg.msn(); + invariant(msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn); + node->max_msn_applied_to_node_on_disk = msg_msn; + + if (ft_msg_type_applies_once(msg.type())) { + ft_nonleaf_msg_once_to_child(cmp, node, target_childnum, msg, is_fresh, flow_deltas); + } else if (ft_msg_type_applies_all(msg.type())) { + ft_nonleaf_msg_all(cmp, node, msg, is_fresh, flow_deltas); + } else { + paranoid_invariant(ft_msg_type_does_nothing(msg.type())); + } +} + +// Garbage collect one leaf entry. +static void +ft_basement_node_gc_once(BASEMENTNODE bn, + uint32_t index, + void* keyp, + uint32_t keylen, + LEAFENTRY leaf_entry, + txn_gc_info *gc_info, + STAT64INFO_S * delta) +{ + paranoid_invariant(leaf_entry); + + // Don't run garbage collection on non-mvcc leaf entries. + if (leaf_entry->type != LE_MVCC) { + goto exit; + } + + // Don't run garbage collection if this leafentry decides it's not worth it. + if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) { + goto exit; + } + + LEAFENTRY new_leaf_entry; + new_leaf_entry = NULL; + + // The mempool doesn't free itself. When it allocates new memory, + // this pointer will be set to the older memory that must now be + // freed. + void * maybe_free; + maybe_free = NULL; + + // These will represent the number of bytes and rows changed as + // part of the garbage collection. + int64_t numbytes_delta; + int64_t numrows_delta; + toku_le_garbage_collect(leaf_entry, + &bn->data_buffer, + index, + keyp, + keylen, + gc_info, + &new_leaf_entry, + &numbytes_delta); + + numrows_delta = 0; + if (new_leaf_entry) { + numrows_delta = 0; + } else { + numrows_delta = -1; + } + + // If we created a new mempool buffer we must free the + // old/original buffer. + if (maybe_free) { + toku_free(maybe_free); + } + + // Update stats. + bn->stat64_delta.numrows += numrows_delta; + bn->stat64_delta.numbytes += numbytes_delta; + delta->numrows += numrows_delta; + delta->numbytes += numbytes_delta; + +exit: + return; +} + +// Garbage collect all leaf entries for a given basement node. +static void +basement_node_gc_all_les(BASEMENTNODE bn, + txn_gc_info *gc_info, + STAT64INFO_S * delta) +{ + int r = 0; + uint32_t index = 0; + uint32_t num_leafentries_before; + while (index < (num_leafentries_before = bn->data_buffer.num_klpairs())) { + void* keyp = NULL; + uint32_t keylen = 0; + LEAFENTRY leaf_entry; + r = bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp); + assert_zero(r); + ft_basement_node_gc_once( + bn, + index, + keyp, + keylen, + leaf_entry, + gc_info, + delta + ); + // Check if the leaf entry was deleted or not. + if (num_leafentries_before == bn->data_buffer.num_klpairs()) { + ++index; + } + } +} + +// Garbage collect all leaf entires in all basement nodes. +static void +ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info) +{ + toku_ftnode_assert_fully_in_memory(node); + paranoid_invariant_zero(node->height); + // Loop through each leaf entry, garbage collecting as we go. + for (int i = 0; i < node->n_children; ++i) { + // Perform the garbage collection. + BASEMENTNODE bn = BLB(node, i); + STAT64INFO_S delta; + delta.numrows = 0; + delta.numbytes = 0; + basement_node_gc_all_les(bn, gc_info, &delta); + toku_ft_update_stats(&ft->in_memory_stats, delta); + } +} + +void toku_ftnode_leaf_run_gc(FT ft, FTNODE node) { + TOKULOGGER logger = toku_cachefile_logger(ft->cf); + if (logger) { + TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); + txn_manager_state txn_state_for_gc(txn_manager); + txn_state_for_gc.init(); + TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager); + + // Perform full garbage collection. + // + // - txn_state_for_gc + // a fresh snapshot of the transaction system. + // - oldest_referenced_xid_for_simple_gc + // the oldest xid in any live list as of right now - suitible for simple gc + // - node->oldest_referenced_xid_known + // the last known oldest referenced xid for this node and any unapplied messages. + // it is a lower bound on the actual oldest referenced xid - but becasue there + // may be abort messages above us, we need to be careful to only use this value + // for implicit promotion (as opposed to the oldest referenced xid for simple gc) + // + // The node has its own oldest referenced xid because it must be careful not to implicitly promote + // provisional entries for transactions that are no longer live, but may have abort messages + // somewhere above us in the tree. + txn_gc_info gc_info(&txn_state_for_gc, + oldest_referenced_xid_for_simple_gc, + node->oldest_referenced_xid_known, + true); + ft_leaf_gc_all_les(ft, node, &gc_info); + } +} + +void +toku_ftnode_put_msg ( + const toku::comparator &cmp, + ft_update_func update_fun, + FTNODE node, + int target_childnum, + const ft_msg &msg, + bool is_fresh, + txn_gc_info *gc_info, + size_t flow_deltas[], + STAT64INFO stats_to_update + ) +// Effect: Push message into the subtree rooted at NODE. +// If NODE is a leaf, then +// put message into leaf, applying it to the leafentries +// If NODE is a nonleaf, then push the message into the message buffer(s) of the relevent child(ren). +// The node may become overfull. That's not our problem. +{ + toku_ftnode_assert_fully_in_memory(node); + // + // see comments in toku_ft_leaf_apply_msg + // to understand why we don't handle setting + // node->max_msn_applied_to_node_on_disk here, + // and instead defer to these functions + // + if (node->height==0) { + toku_ft_leaf_apply_msg(cmp, update_fun, node, target_childnum, msg, gc_info, nullptr, stats_to_update); + } else { + ft_nonleaf_put_msg(cmp, node, target_childnum, msg, is_fresh, flow_deltas); + } +} + +// Effect: applies the message to the leaf if the appropriate basement node is in memory. +// This function is called during message injection and/or flushing, so the entire +// node MUST be in memory. +void toku_ft_leaf_apply_msg( + const toku::comparator &cmp, + ft_update_func update_fun, + FTNODE node, + int target_childnum, // which child to inject to, or -1 if unknown + const ft_msg &msg, + txn_gc_info *gc_info, + uint64_t *workdone, + STAT64INFO stats_to_update + ) +{ + VERIFY_NODE(t, node); + toku_ftnode_assert_fully_in_memory(node); + + // + // Because toku_ft_leaf_apply_msg is called with the intent of permanently + // applying a message to a leaf node (meaning the message is permanently applied + // and will be purged from the system after this call, as opposed to + // toku_apply_ancestors_messages_to_node, which applies a message + // for a query, but the message may still reside in the system and + // be reapplied later), we mark the node as dirty and + // take the opportunity to update node->max_msn_applied_to_node_on_disk. + // + node->dirty = 1; + + // + // we cannot blindly update node->max_msn_applied_to_node_on_disk, + // we must check to see if the msn is greater that the one already stored, + // because the message may have already been applied earlier (via + // toku_apply_ancestors_messages_to_node) to answer a query + // + // This is why we handle node->max_msn_applied_to_node_on_disk both here + // and in ft_nonleaf_put_msg, as opposed to in one location, toku_ftnode_put_msg. + // + MSN msg_msn = msg.msn(); + if (msg_msn.msn > node->max_msn_applied_to_node_on_disk.msn) { + node->max_msn_applied_to_node_on_disk = msg_msn; + } + + if (ft_msg_type_applies_once(msg.type())) { + unsigned int childnum = (target_childnum >= 0 + ? target_childnum + : toku_ftnode_which_child(node, msg.kdbt(), cmp)); + BASEMENTNODE bn = BLB(node, childnum); + if (msg.msn().msn > bn->max_msn_applied.msn) { + bn->max_msn_applied = msg.msn(); + toku_ft_bn_apply_msg(cmp, + update_fun, + bn, + msg, + gc_info, + workdone, + stats_to_update); + } else { + toku_ft_status_note_msn_discard(); + } + } + else if (ft_msg_type_applies_all(msg.type())) { + for (int childnum=0; childnumn_children; childnum++) { + if (msg.msn().msn > BLB(node, childnum)->max_msn_applied.msn) { + BLB(node, childnum)->max_msn_applied = msg.msn(); + toku_ft_bn_apply_msg(cmp, + update_fun, + BLB(node, childnum), + msg, + gc_info, + workdone, + stats_to_update); + } else { + toku_ft_status_note_msn_discard(); + } + } + } + else if (!ft_msg_type_does_nothing(msg.type())) { + invariant(ft_msg_type_does_nothing(msg.type())); + } + VERIFY_NODE(t, node); +} + diff --git a/storage/tokudb/ft-index/ft/node.h b/storage/tokudb/ft-index/ft/node.h new file mode 100644 index 0000000000000..7b1b4023d84ab --- /dev/null +++ b/storage/tokudb/ft-index/ft/node.h @@ -0,0 +1,588 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#include "ft/bndata.h" +#include "ft/comparator.h" +#include "ft/ft.h" +#include "ft/msg_buffer.h" + +/* Pivot keys. + * Child 0's keys are <= pivotkeys[0]. + * Child 1's keys are <= pivotkeys[1]. + * Child 1's keys are > pivotkeys[0]. + * etc + */ +class ftnode_pivot_keys { +public: + // effect: create an empty set of pivot keys + void create_empty(); + + // effect: create pivot keys by copying the given DBT array + void create_from_dbts(const DBT *keys, int n); + + // effect: create pivot keys as a clone of an existing set of pivotkeys + void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys); + + void destroy(); + + // effect: deserialize pivot keys previously serialized by serialize_to_wbuf() + void deserialize_from_rbuf(struct rbuf *rb, int n); + + // returns: unowned DBT representing the i'th pivot key + DBT get_pivot(int i) const; + + // effect: fills a DBT with the i'th pivot key + // returns: the given dbt + DBT *fill_pivot(int i, DBT *dbt) const; + + // effect: insert a pivot into the i'th position, shifting others to the right + void insert_at(const DBT *key, int i); + + // effect: append pivotkeys to the end of our own pivot keys + void append(const ftnode_pivot_keys &pivotkeys); + + // effect: replace the pivot at the i'th position + void replace_at(const DBT *key, int i); + + // effect: removes the i'th pivot key, shifting others to the left + void delete_at(int i); + + // effect: split the pivot keys, removing all pivots at position greater + // than or equal to `i' and storing them in *other + // requires: *other is empty (size == 0) + void split_at(int i, ftnode_pivot_keys *other); + + // effect: serialize pivot keys to a wbuf + // requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available + void serialize_to_wbuf(struct wbuf *wb) const; + + int num_pivots() const; + + // return: the total size of this data structure + size_t total_size() const; + + // return: the sum of the keys sizes of each pivot (for serialization) + size_t serialized_size() const; + +private: + inline size_t _align4(size_t x) const { + return roundup_to_multiple(4, x); + } + + // effect: create pivot keys, in fixed key format, by copying the given key array + void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n); + + char *_fixed_key(int i) const { + return &_fixed_keys[i * _fixed_keylen_aligned]; + } + + bool _fixed_format() const { + return _fixed_keys != nullptr; + } + + void sanity_check() const; + + void _insert_at_dbt(const DBT *key, int i); + void _append_dbt(const ftnode_pivot_keys &pivotkeys); + void _replace_at_dbt(const DBT *key, int i); + void _delete_at_dbt(int i); + void _split_at_dbt(int i, ftnode_pivot_keys *other); + + void _insert_at_fixed(const DBT *key, int i); + void _append_fixed(const ftnode_pivot_keys &pivotkeys); + void _replace_at_fixed(const DBT *key, int i); + void _delete_at_fixed(int i); + void _split_at_fixed(int i, ftnode_pivot_keys *other); + + // adds/destroys keys at a certain index (in dbt format), + // maintaining _total_size, but not _num_pivots + void _add_key_dbt(const DBT *key, int i); + void _destroy_key_dbt(int i); + + // conversions to and from packed key array format + void _convert_to_dbt_format(); + void _convert_to_fixed_format(); + + // If every key is _fixed_keylen long, then _fixed_key is a + // packed array of keys.. + char *_fixed_keys; + // The actual length of the fixed key + size_t _fixed_keylen; + // The aligned length that we use for fixed key storage + size_t _fixed_keylen_aligned; + + // ..otherwise _fixed_keys is null and we store an array of dbts, + // each representing a key. this is simpler but less cache-efficient. + DBT *_dbt_keys; + + int _num_pivots; + size_t _total_size; +}; + +// TODO: class me up +struct ftnode { + MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk + unsigned int flags; + BLOCKNUM blocknum; // Which block number is this node? + int layout_version; // What version of the data structure? + int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging) + int layout_version_read_from_disk; // transient, not serialized to disk, (useful for debugging) + uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk + int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ + int dirty; + uint32_t fullhash; + + // for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced. + // for leaf nodes, represents number of basement nodes + int n_children; + ftnode_pivot_keys pivotkeys; + + // What's the oldest referenced xid that this node knows about? The real oldest + // referenced xid might be younger, but this is our best estimate. We use it + // as a heuristic to transition provisional mvcc entries from provisional to + // committed (from implicity committed to really committed). + // + // A better heuristic would be the oldest live txnid, but we use this since it + // still works well most of the time, and its readily available on the inject + // code path. + TXNID oldest_referenced_xid_known; + + // array of size n_children, consisting of ftnode partitions + // each one is associated with a child + // for internal nodes, the ith partition corresponds to the ith message buffer + // for leaf nodes, the ith partition corresponds to the ith basement node + struct ftnode_partition *bp; + struct ctpair *ct_pair; +}; +typedef struct ftnode *FTNODE; + +// data of an available partition of a leaf ftnode +struct ftnode_leaf_basement_node { + bn_data data_buffer; + unsigned int seqinsert; // number of sequential inserts to this leaf + MSN max_msn_applied; // max message sequence number applied + bool stale_ancestor_messages_applied; + STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk +}; +typedef struct ftnode_leaf_basement_node *BASEMENTNODE; + +enum pt_state { // declare this to be packed so that when used below it will only take 1 byte. + PT_INVALID = 0, + PT_ON_DISK = 1, + PT_COMPRESSED = 2, + PT_AVAIL = 3}; + +enum ftnode_child_tag { + BCT_INVALID = 0, + BCT_NULL, + BCT_SUBBLOCK, + BCT_LEAF, + BCT_NONLEAF +}; + +typedef toku::omt off_omt_t; +typedef toku::omt marked_off_omt_t; + +// data of an available partition of a nonleaf ftnode +struct ftnode_nonleaf_childinfo { + message_buffer msg_buffer; + off_omt_t broadcast_list; + marked_off_omt_t fresh_message_tree; + off_omt_t stale_message_tree; + uint64_t flow[2]; // current and last checkpoint +}; +typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO; + +typedef struct ftnode_child_pointer { + union { + struct sub_block *subblock; + struct ftnode_nonleaf_childinfo *nonleaf; + struct ftnode_leaf_basement_node *leaf; + } u; + enum ftnode_child_tag tag; +} FTNODE_CHILD_POINTER; + +struct ftnode_disk_data { + // + // stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk + // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless + // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition + // The SIZE is the size of the compressed partition. + // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. + // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. + uint32_t start; + uint32_t size; +}; +typedef struct ftnode_disk_data *FTNODE_DISK_DATA; + +// TODO: Turn these into functions instead of macros +#define BP_START(node_dd,i) ((node_dd)[i].start) +#define BP_SIZE(node_dd,i) ((node_dd)[i].size) + +// a ftnode partition, associated with a child of a node +struct ftnode_partition { + // the following three variables are used for nonleaf nodes + // for leaf nodes, they are meaningless + BLOCKNUM blocknum; // blocknum of child + + // How many bytes worth of work was performed by messages in each buffer. + uint64_t workdone; + + // + // pointer to the partition. Depending on the state, they may be different things + // if state == PT_INVALID, then the node was just initialized and ptr == NULL + // if state == PT_ON_DISK, then ptr == NULL + // if state == PT_COMPRESSED, then ptr points to a struct sub_block* + // if state == PT_AVAIL, then ptr is: + // a struct ftnode_nonleaf_childinfo for internal nodes, + // a struct ftnode_leaf_basement_node for leaf nodes + // + struct ftnode_child_pointer ptr; + // + // at any time, the partitions may be in one of the following three states (stored in pt_state): + // PT_INVALID - means that the partition was just initialized + // PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress + // PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress + // PT_AVAIL - means the partition is decompressed and in memory + // + enum pt_state state; // make this an enum to make debugging easier. + + // clock count used to for pe_callback to determine if a node should be evicted or not + // for now, saturating the count at 1 + uint8_t clock_count; +}; + +// +// TODO: Fix all these names +// Organize declarations +// Fix widespread parameter ordering inconsistencies +// +BASEMENTNODE toku_create_empty_bn(void); +BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. +NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo); +BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn); +NONLEAF_CHILDINFO toku_create_empty_nl(void); +void destroy_basement_node (BASEMENTNODE bn); +void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl); +void toku_destroy_ftnode_internals(FTNODE node); +void toku_ftnode_free (FTNODE *node); +bool toku_ftnode_fully_in_memory(FTNODE node); +void toku_ftnode_assert_fully_in_memory(FTNODE node); +void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft); +BASEMENTNODE toku_detach_bn(FTNODE node, int childnum); +void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint); +void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node); + +void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children, + int layout_version, unsigned int flags); + +int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp); +void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p); + +// +// TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces +// +struct toku_msg_buffer_key_msn_heaviside_extra { + const toku::comparator &cmp; + message_buffer *msg_buffer; + const DBT *key; + MSN msn; + toku_msg_buffer_key_msn_heaviside_extra(const toku::comparator &c, message_buffer *mb, const DBT *k, MSN m) : + cmp(c), msg_buffer(mb), key(k), msn(m) { + } +}; +int toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra); + +struct toku_msg_buffer_key_msn_cmp_extra { + const toku::comparator &cmp; + message_buffer *msg_buffer; + toku_msg_buffer_key_msn_cmp_extra(const toku::comparator &c, message_buffer *mb) : + cmp(c), msg_buffer(mb) { + } +}; +int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b); + +struct toku_msg_leafval_heaviside_extra { + const toku::comparator &cmp; + DBT const *const key; + toku_msg_leafval_heaviside_extra(const toku::comparator &c, const DBT *k) : + cmp(c), key(k) { + } +}; +int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be); + +unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc); +int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc); +long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc); +long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc); +void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp); +void toku_bnc_empty(NONLEAF_CHILDINFO bnc); +void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known); +bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull)); + +bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize); +uint32_t toku_ftnode_leaf_num_entries(FTNODE node); +void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize); + +void toku_ftnode_leaf_run_gc(FT ft, FTNODE node); + +enum reactivity { + RE_STABLE, + RE_FUSIBLE, + RE_FISSIBLE +}; + +enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node); +enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout); +enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize); + +/** + * Finds the next child for HOT to flush to, given that everything up to + * and including k has been flattened. + * + * If k falls between pivots in node, then we return the childnum where k + * lies. + * + * If k is equal to some pivot, then we return the next (to the right) + * childnum. + */ +int toku_ftnode_hot_next_child(FTNODE node, const DBT *k, const toku::comparator &cmp); + +void toku_ftnode_put_msg(const toku::comparator &cmp, ft_update_func update_fun, + FTNODE node, int target_childnum, + const ft_msg &msg, bool is_fresh, txn_gc_info *gc_info, + size_t flow_deltas[], STAT64INFO stats_to_update); + +void toku_ft_bn_apply_msg_once(BASEMENTNODE bn, const ft_msg &msg, uint32_t idx, + uint32_t le_keylen, LEAFENTRY le, txn_gc_info *gc_info, + uint64_t *workdonep, STAT64INFO stats_to_update); + +void toku_ft_bn_apply_msg(const toku::comparator &cmp, ft_update_func update_fun, + BASEMENTNODE bn, const ft_msg &msg, txn_gc_info *gc_info, + uint64_t *workdone, STAT64INFO stats_to_update); + +void toku_ft_leaf_apply_msg(const toku::comparator &cmp, ft_update_func update_fun, + FTNODE node, int target_childnum, + const ft_msg &msg, txn_gc_info *gc_info, + uint64_t *workdone, STAT64INFO stats_to_update); + +// +// Message management for orthopush +// + +struct ancestors { + // This is the root node if next is NULL (since the root has no ancestors) + FTNODE node; + // Which buffer holds messages destined to the node whose ancestors this list represents. + int childnum; + struct ancestors *next; +}; +typedef struct ancestors *ANCESTORS; + +void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc); + +void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node); + +// TODO: Should ft_handle just be FT? +class pivot_bounds; +void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors, + const pivot_bounds &bounds, + bool *msgs_applied, int child_to_read); + +bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors, + const pivot_bounds &bounds, + MSN *const max_msn_in_path, int child_to_read); + +void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read); + +struct ft_search; +int toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search); + +// +// internal node inline functions +// TODO: Turn the macros into real functions +// + +static inline void set_BNULL(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + node->bp[i].ptr.tag = BCT_NULL; +} + +static inline bool is_BNULL (FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + return node->bp[i].ptr.tag == BCT_NULL; +} + +static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_NONLEAF); + return p.u.nonleaf; +} + +static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_NONLEAF; + p->u.nonleaf = nl; +} + +static inline BASEMENTNODE BLB(FTNODE node, int i) { + paranoid_invariant(i >= 0); + // The optimizer really doesn't like it when we compare + // i to n_children as signed integers. So we assert that + // n_children is in fact positive before doing a comparison + // on the values forcibly cast to unsigned ints. + paranoid_invariant(node->n_children > 0); + paranoid_invariant((unsigned) i < (unsigned) node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_LEAF); + return p.u.leaf; +} + +static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_LEAF; + p->u.leaf = bn; +} + +static inline struct sub_block *BSB(FTNODE node, int i) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER p = node->bp[i].ptr; + paranoid_invariant(p.tag==BCT_SUBBLOCK); + return p.u.subblock; +} + +static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) { + paranoid_invariant(i >= 0); + paranoid_invariant(i < node->n_children); + FTNODE_CHILD_POINTER *p = &node->bp[i].ptr; + p->tag = BCT_SUBBLOCK; + p->u.subblock = sb; +} + +// ftnode partition macros +// BP stands for ftnode_partition +#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum) +#define BP_STATE(node,i) ((node)->bp[i].state) +#define BP_WORKDONE(node, i)((node)->bp[i].workdone) + +// +// macros for managing a node's clock +// Should be managed by ft-ops.c, NOT by serialize/deserialize +// + +// +// BP_TOUCH_CLOCK uses a compare and swap because multiple threads +// that have a read lock on an internal node may try to touch the clock +// simultaneously +// +#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1) +#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0) +#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0) +// not crazy about having these two here, one is for the case where we create new +// nodes, such as in splits and creating new roots, and the other is for when +// we are deserializing a node and not all bp's are touched +#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1) +#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0) + +// ftnode leaf basementnode macros, +#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied) +#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied) +#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer)) +#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size()) +#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert) diff --git a/storage/tokudb/ft-index/ft/omt.h b/storage/tokudb/ft-index/ft/omt.h deleted file mode 100644 index b8f87790bd320..0000000000000 --- a/storage/tokudb/ft-index/ft/omt.h +++ /dev/null @@ -1,416 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#if !defined(TOKU_OMT_H) -#define TOKU_OMT_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -// Order Maintenance Tree (OMT) -// -// Maintains a collection of totally ordered values, where each value has an integer weight. -// The OMT is a mutable datatype. -// -// The Abstraction: -// -// An OMT is a vector of values, $V$, where $|V|$ is the length of the vector. -// The vector is numbered from $0$ to $|V|-1$. -// Each value has a weight. The weight of the $i$th element is denoted $w(V_i)$. -// -// We can create a new OMT, which is the empty vector. -// -// We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where -// $|V'|=1+|V|$ and -// -// V'_j = V_j if $ji$. -// -// We can specify $i$ using a kind of function instead of as an integer. -// Let $b$ be a function mapping from values to nonzero integers, such that -// the signum of $b$ is monotically increasing. -// We can specify $i$ as the minimum integer such that $b(V_i)>0$. -// -// We look up a value using its index, or using a Heaviside function. -// For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing. -// When lookup up values, we can look up -// $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.) -// (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.) -// $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.) -// $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.) -// -// When looking up a value using a Heaviside function, we get the value and its index. -// -// We can also split an OMT into two OMTs, splitting the weight of the values evenly. -// Find a value $j$ such that the values to the left of $j$ have about the same total weight as the values to the right of $j$. -// The resulting two OMTs contain the values to the left of $j$ and the values to the right of $j$ respectively. -// All of the values from the original OMT go into one of the new OMTs. -// If the weights of the values don't split exactly evenly, then the implementation has the freedom to choose whether -// the new left OMT or the new right OMT is larger. -// -// Performance: -// Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function. -// The memory required is O(|V|). -// -// The programming API: - -//typedef struct value *OMTVALUE; // A slight improvement over using void*. -#include -typedef void *OMTVALUE; -typedef toku::omt *OMT; - - -int toku_omt_create (OMT *omtp); -// Effect: Create an empty OMT. Stores it in *omtp. -// Requires: omtp != NULL -// Returns: -// 0 success -// ENOMEM out of memory (and doesn't modify *omtp) -// Performance: constant time. - -int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, uint32_t numvalues); -// Effect: Create a OMT containing values. The number of values is in numvalues. -// Stores the new OMT in *omtp. -// Requires: omtp != NULL -// Requires: values != NULL -// Requires: values is sorted -// Returns: -// 0 success -// ENOMEM out of memory (and doesn't modify *omtp) -// Performance: time=O(numvalues) -// Rational: Normally to insert N values takes O(N lg N) amortized time. -// If the N values are known in advance, are sorted, and -// the structure is empty, we can batch insert them much faster. - -int toku_omt_create_steal_sorted_array(OMT *omtp, OMTVALUE **valuesp, uint32_t numvalues, uint32_t steal_capacity); -// Effect: Create an OMT containing values. The number of values is in numvalues. -// On success the OMT takes ownership of *valuesp array, and sets valuesp=NULL. -// Requires: omtp != NULL -// Requires: valuesp != NULL -// Requires: *valuesp is sorted -// Requires: *valuesp was allocated with toku_malloc -// Requires: Capacity of the *valuesp array is <= steal_capacity -// Requires: On success, *valuesp may not be accessed again by the caller. -// Returns: -// 0 success -// ENOMEM out of memory (and doesn't modify *omtp) -// EINVAL *valuesp == NULL or numvalues > capacity -// Performance: time=O(1) -// Rational: toku_omt_create_from_sorted_array takes O(numvalues) time. -// By taking ownership of the array, we save a malloc and memcpy, -// and possibly a free (if the caller is done with the array). - -void toku_omt_destroy(OMT *omtp); -// Effect: Destroy an OMT, freeing all its memory. -// Does not free the OMTVALUEs stored in the OMT. -// Those values may be freed before or after calling toku_omt_destroy. -// Also sets *omtp=NULL. -// Requires: omtp != NULL -// Requires: *omtp != NULL -// Rationale: The usage is to do something like -// toku_omt_destroy(&s->omt); -// and now s->omt will have a NULL pointer instead of a dangling freed pointer. -// Rationale: Returns no values since free() cannot fail. -// Rationale: Does not free the OMTVALUEs to reduce complexity. -// Performance: time=O(toku_omt_size(*omtp)) - -uint32_t toku_omt_size(OMT V); -// Effect: return |V|. -// Requires: V != NULL -// Performance: time=O(1) - -int toku_omt_iterate_on_range(OMT omt, uint32_t left, uint32_t right, int (*f)(OMTVALUE, uint32_t, void*), void*v); -// Effect: Iterate over the values of the omt, from left to right, calling f on each value. -// The second argument passed to f is the index of the value. -// The third argument passed to f is v. -// The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive). -// We will iterate only over [left,right) -// -// Requires: omt != NULL -// left <= right -// Requires: f != NULL -// Returns: -// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate. -// If f always returns zero, then toku_omt_iterate returns 0. -// Requires: Don't modify omt while running. (E.g., f may not insert or delete values form omt.) -// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt. -// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. - -int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, uint32_t, void*), void*v); -// Effect: Iterate over the values of the omt, from left to right, calling f on each value. -// The second argument passed to f is the index of the value. -// The third argument passed to f is v. -// The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive). -// Requires: omt != NULL -// Requires: f != NULL -// Returns: -// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate. -// If f always returns zero, then toku_omt_iterate returns 0. -// Requires: Don't modify omt while running. (E.g., f may not insert or delete values form omt.) -// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt. -// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. - -int toku_omt_insert_at(OMT omt, OMTVALUE value, uint32_t idx); -// Effect: Increases indexes of all items at slot >= index by 1. -// Insert value into the position at index. -// -// Returns: -// 0 success -// EINVAL if index>toku_omt_size(omt) -// ENOMEM -// On error, omt is unchanged. -// Performance: time=O(\log N) amortized time. -// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. - -int toku_omt_set_at (OMT omt, OMTVALUE value, uint32_t idx); -// Effect: Replaces the item at index with value. -// Returns: -// 0 success -// EINVAL if index>=toku_omt_size(omt) -// On error, omt i sunchanged. -// Performance: time=O(\log N) -// Rationale: The BRT needs to be able to replace a value with another copy of the same value (allocated in a different location) - -int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, uint32_t *idx); -// Effect: Insert value into the OMT. -// If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST. -// Otherwise, let i be the minimum value such that $h(V_i, v)>0$. -// If no such i exists, then let i be |V| -// Then this has the same effect as -// omt_insert_at(tree, value, i); -// If index!=NULL then i is stored in *index -// Requires: The signum of h must be monotonically increasing. -// Returns: -// 0 success -// DB_KEYEXIST the key is present (h was equal to zero for some value) -// ENOMEM -// On nonzero return, omt is unchanged. -// On nonzero non-DB_KEYEXIST return, *index is unchanged. -// Performance: time=O(\log N) amortized. -// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. - -int toku_omt_delete_at(OMT omt, uint32_t idx); -// Effect: Delete the item in slot index. -// Decreases indexes of all items at slot >= index by 1. -// Returns -// 0 success -// EINVAL if index>=toku_omt_size(omt) -// On error, omt is unchanged. -// Rationale: To delete an item, first find its index using toku_omt_find, then delete it. -// Performance: time=O(\log N) amortized. - -int toku_omt_fetch (OMT V, uint32_t i, OMTVALUE *v); -// Effect: Set *v=V_i -// If c!=NULL then set c's abstract offset to i. -// Requires: v != NULL -// Returns -// 0 success -// EINVAL if index>=toku_omt_size(omt) -// On nonzero return, *v is unchanged, and c (if nonnull) is either -// invalidated or unchanged. -// Performance: time=O(\log N) -// Implementation Notes: It is possible that c was previously valid and was -// associated with a different OMT. If c is changed by this -// function, the function must remove c's association with the old -// OMT, and associate it with the new OMT. - -int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, uint32_t *idx); -// Effect: Find the smallest i such that h(V_i, extra)>=0 -// If there is such an i and h(V_i,extra)==0 then set *index=i and return 0. -// If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND. -// If there is no such i then set *index=toku_omt_size(V) and return DB_NOTFOUND. -// Requires: index!=NULL - -int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, uint32_t *idx); -// Effect: -// If direction >0 then find the smallest i such that h(V_i,extra)>0. -// If direction <0 then find the largest i such that h(V_i,extra)<0. -// (Direction may not be equal to zero.) -// If value!=NULL then store V_i in *value -// If index!=NULL then store i in *index. -// Requires: The signum of h is monotically increasing. -// Returns -// 0 success -// DB_NOTFOUND no such value is found. -// On nonzero return, *value and *index are unchanged, and c (if nonnull) is either invalidated -// or unchanged. -// Performance: time=O(\log N) -// Rationale: -// Here's how to use the find function to find various things -// Cases for find: -// find first value: ( h(v)=+1, direction=+1 ) -// find last value ( h(v)=-1, direction=-1 ) -// find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 ) -// find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 ) -// find X or successor to X ( same as find first X. ) -// -// Rationale: To help understand heaviside functions and behavor of find: -// There are 7 kinds of heaviside functions. -// The signus of the h must be monotonically increasing. -// Given a function of the following form, A is the element -// returned for direction>0, B is the element returned -// for direction<0, C is the element returned for -// direction==0 (see find_zero) (with a return of 0), and D is the element -// returned for direction==0 (see find_zero) with a return of DB_NOTFOUND. -// If any of A, B, or C are not found, then asking for the -// associated direction will return DB_NOTFOUND. -// See find_zero for more information. -// -// Let the following represent the signus of the heaviside function. -// -// -...- -// A -// D -// -// +...+ -// B -// D -// -// 0...0 -// C -// -// -...-0...0 -// AC -// -// 0...0+...+ -// C B -// -// -...-+...+ -// AB -// D -// -// -...-0...0+...+ -// AC B - -int toku_omt_split_at(OMT omt, OMT *newomt, uint32_t idx); -// Effect: Create a new OMT, storing it in *newomt. -// The values to the right of index (starting at index) are moved to *newomt. -// Requires: omt != NULL -// Requires: newomt != NULL -// Returns -// 0 success, -// EINVAL if index > toku_omt_size(omt) -// ENOMEM -// On nonzero return, omt and *newomt are unmodified. -// Performance: time=O(n) -// Rationale: We don't need a split-evenly operation. We need to split items so that their total sizes -// are even, and other similar splitting criteria. It's easy to split evenly by calling toku_omt_size(), and dividing by two. - -int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt); -// Effect: Appends leftomt and rightomt to produce a new omt. -// Sets *newomt to the new omt. -// On success, leftomt and rightomt destroyed,. -// Returns 0 on success -// ENOMEM on out of memory. -// On error, nothing is modified. -// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case. - -int toku_omt_clone_noptr(OMT *dest, OMT src); -// Effect: Creates a copy of an omt. -// Sets *dest to the clone -// Each element is assumed to be stored directly in the omt, that is, the OMTVALUEs are not pointers, they are data. Thus no extra memory allocation is required. -// Returns 0 on success -// ENOMEM on out of memory. -// On error, nothing is modified. -// Performance: time between O(n) and O(n log n), depending how long it -// takes to traverse src. - -void toku_omt_clear(OMT omt); -// Effect: Set the tree to be empty. -// Note: Will not reallocate or resize any memory, since returning void precludes calling malloc. -// Performance: time=O(1) - -size_t toku_omt_memory_size (OMT omt); -// Effect: Return the size (in bytes) of the omt, as it resides in main memory. Don't include any of the OMTVALUES. - - - -#endif /* #ifndef TOKU_OMT_H */ - diff --git a/storage/tokudb/ft-index/ft/pivotkeys.cc b/storage/tokudb/ft-index/ft/pivotkeys.cc new file mode 100644 index 0000000000000..cf37777d89265 --- /dev/null +++ b/storage/tokudb/ft-index/ft/pivotkeys.cc @@ -0,0 +1,491 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/memory.h" + +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" + +void ftnode_pivot_keys::create_empty() { + _num_pivots = 0; + _total_size = 0; + _fixed_keys = nullptr; + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + _dbt_keys = nullptr; +} + +void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) { + create_empty(); + _num_pivots = n; + + // see if every key has the same length + bool keys_same_size = true; + for (int i = 1; i < _num_pivots; i++) { + if (keys[i].size != keys[i - 1].size) { + keys_same_size = false; + break; + } + } + + if (keys_same_size && _num_pivots > 0) { + // if so, store pivots in a tightly packed array of fixed length keys + _fixed_keylen = keys[0].size; + _fixed_keylen_aligned = _align4(_fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys); + for (int i = 0; i < _num_pivots; i++) { + invariant(keys[i].size == _fixed_keylen); + memcpy(_fixed_key(i), keys[i].data, _fixed_keylen); + } + } else { + // otherwise we'll just store the pivots in an array of dbts + XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + for (int i = 0; i < _num_pivots; i++) { + size_t size = keys[i].size; + toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size); + _total_size += size; + } + } + + sanity_check(); +} + +void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) { + create_empty(); + _num_pivots = n; + _fixed_keylen = fixed_keylen; + _fixed_keylen_aligned = _align4(fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMEMDUP_N(_fixed_keys, fixedkeys, _total_size); +} + +// effect: create pivot keys as a clone of an existing set of pivotkeys +void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) { + if (pivotkeys._fixed_format()) { + _create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots); + } else { + create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots); + } + + sanity_check(); +} + +void ftnode_pivot_keys::destroy() { + if (_dbt_keys != nullptr) { + for (int i = 0; i < _num_pivots; i++) { + toku_destroy_dbt(&_dbt_keys[i]); + } + toku_free(_dbt_keys); + _dbt_keys = nullptr; + } + if (_fixed_keys != nullptr) { + toku_free(_fixed_keys); + _fixed_keys = nullptr; + } + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + _num_pivots = 0; + _total_size = 0; +} + +void ftnode_pivot_keys::_convert_to_fixed_format() { + invariant(!_fixed_format()); + + // convert to a tightly packed array of fixed length keys + _fixed_keylen = _dbt_keys[0].size; + _fixed_keylen_aligned = _align4(_fixed_keylen); + _total_size = _fixed_keylen_aligned * _num_pivots; + XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys); + for (int i = 0; i < _num_pivots; i++) { + invariant(_dbt_keys[i].size == _fixed_keylen); + memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen); + } + + // destroy the dbt array format + for (int i = 0; i < _num_pivots; i++) { + toku_destroy_dbt(&_dbt_keys[i]); + } + toku_free(_dbt_keys); + _dbt_keys = nullptr; + + invariant(_fixed_format()); + sanity_check(); +} + +void ftnode_pivot_keys::_convert_to_dbt_format() { + invariant(_fixed_format()); + + // convert to an aray of dbts + REALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + for (int i = 0; i < _num_pivots; i++) { + toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen); + } + // pivots sizes are not aligned up dbt format + _total_size = _num_pivots * _fixed_keylen; + + // destroy the fixed key format + toku_free(_fixed_keys); + _fixed_keys = nullptr; + _fixed_keylen = 0; + _fixed_keylen_aligned = 0; + + invariant(!_fixed_format()); + sanity_check(); +} + +void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) { + _num_pivots = n; + _total_size = 0; + _fixed_keys = nullptr; + _fixed_keylen = 0; + _dbt_keys = nullptr; + + XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys); + bool keys_same_size = true; + for (int i = 0; i < _num_pivots; i++) { + const void *pivotkeyptr; + uint32_t size; + rbuf_bytes(rb, &pivotkeyptr, &size); + toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size); + _total_size += size; + if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) { + // not all keys are the same size, we'll stick to the dbt array format + keys_same_size = false; + } + } + + if (keys_same_size && _num_pivots > 0) { + _convert_to_fixed_format(); + } + + sanity_check(); +} + +DBT ftnode_pivot_keys::get_pivot(int i) const { + paranoid_invariant(i < _num_pivots); + if (_fixed_format()) { + paranoid_invariant(i * _fixed_keylen_aligned < _total_size); + DBT dbt; + toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen); + return dbt; + } else { + return _dbt_keys[i]; + } +} + +DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const { + paranoid_invariant(i < _num_pivots); + if (_fixed_format()) { + toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen); + } else { + toku_copyref_dbt(dbt, _dbt_keys[i]); + } + return dbt; +} + +void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) { + toku_clone_dbt(&_dbt_keys[i], *key); + _total_size += _dbt_keys[i].size; +} + +void ftnode_pivot_keys::_destroy_key_dbt(int i) { + invariant(_total_size >= _dbt_keys[i].size); + _total_size -= _dbt_keys[i].size; + toku_destroy_dbt(&_dbt_keys[i]); +} + +void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) { + // make space for a new pivot, slide existing keys to the right + REALLOC_N_ALIGNED(64, _num_pivots + 1, _dbt_keys); + memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT)); + _add_key_dbt(key, i); +} + +void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) { + REALLOC_N_ALIGNED(64, (_num_pivots + 1) * _fixed_keylen_aligned, _fixed_keys); + // TODO: This is not going to be valgrind-safe, because we do not initialize the space + // between _fixed_keylen and _fixed_keylen_aligned (but we probably should) + memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen_aligned); + memcpy(_fixed_key(i), key->data, _fixed_keylen); + _total_size += _fixed_keylen_aligned; +} + +void ftnode_pivot_keys::insert_at(const DBT *key, int i) { + invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n + + // if the new key doesn't have the same size, we can't be in fixed format + if (_fixed_format() && key->size != _fixed_keylen) { + _convert_to_dbt_format(); + } + + if (_fixed_format()) { + _insert_at_fixed(key, i); + } else { + _insert_at_dbt(key, i); + } + _num_pivots++; + + invariant(total_size() > 0); +} + +void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) { + REALLOC_N_ALIGNED(64, _num_pivots + pivotkeys._num_pivots, _dbt_keys); + bool other_fixed = pivotkeys._fixed_format(); + for (int i = 0; i < pivotkeys._num_pivots; i++) { + size_t size = other_fixed ? pivotkeys._fixed_keylen : + pivotkeys._dbt_keys[i].size; + toku_memdup_dbt(&_dbt_keys[_num_pivots + i], + other_fixed ? pivotkeys._fixed_key(i) : + pivotkeys._dbt_keys[i].data, + size); + _total_size += size; + } +} + +void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) { + if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) { + // other pivotkeys have the same fixed keylen + REALLOC_N_ALIGNED(64, (_num_pivots + pivotkeys._num_pivots) * _fixed_keylen_aligned, _fixed_keys); + memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size); + _total_size += pivotkeys._total_size; + } else { + // must convert to dbt format, other pivotkeys have different length'd keys + _convert_to_dbt_format(); + _append_dbt(pivotkeys); + } +} + +void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) { + if (_fixed_format()) { + _append_fixed(pivotkeys); + } else { + _append_dbt(pivotkeys); + } + _num_pivots += pivotkeys._num_pivots; + + sanity_check(); +} + +void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) { + _destroy_key_dbt(i); + _add_key_dbt(key, i); +} + +void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) { + if (key->size == _fixed_keylen) { + memcpy(_fixed_key(i), key->data, _fixed_keylen); + } else { + // must convert to dbt format, replacement key has different length + _convert_to_dbt_format(); + _replace_at_dbt(key, i); + } +} + +void ftnode_pivot_keys::replace_at(const DBT *key, int i) { + if (i < _num_pivots) { + if (_fixed_format()) { + _replace_at_fixed(key, i); + } else { + _replace_at_dbt(key, i); + } + } else { + invariant(i == _num_pivots); // appending to the end is ok + insert_at(key, i); + } + invariant(total_size() > 0); +} + +void ftnode_pivot_keys::_delete_at_fixed(int i) { + memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen_aligned); + _total_size -= _fixed_keylen_aligned; +} + +void ftnode_pivot_keys::_delete_at_dbt(int i) { + // slide over existing keys, then shrink down to size + _destroy_key_dbt(i); + memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT)); + REALLOC_N_ALIGNED(64, _num_pivots - 1, _dbt_keys); +} + +void ftnode_pivot_keys::delete_at(int i) { + invariant(i < _num_pivots); + + if (_fixed_format()) { + _delete_at_fixed(i); + } else { + _delete_at_dbt(i); + } + + _num_pivots--; +} + +void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) { + // recreate the other set of pivots from index >= i + other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i); + + // shrink down to size + _total_size = i * _fixed_keylen_aligned; + REALLOC_N_ALIGNED(64, _total_size, _fixed_keys); +} + +void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) { + // recreate the other set of pivots from index >= i + other->create_from_dbts(&_dbt_keys[i], _num_pivots - i); + + // destroy everything greater, shrink down to size + for (int k = i; k < _num_pivots; k++) { + _destroy_key_dbt(k); + } + REALLOC_N_ALIGNED(64, i, _dbt_keys); +} + +void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) { + if (i < _num_pivots) { + if (_fixed_format()) { + _split_at_fixed(i, other); + } else { + _split_at_dbt(i, other); + } + _num_pivots = i; + } + + sanity_check(); +} + +void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const { + bool fixed = _fixed_format(); + size_t written = 0; + for (int i = 0; i < _num_pivots; i++) { + size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size; + invariant(size); + wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size); + written += size; + } + invariant(written == serialized_size()); +} + +int ftnode_pivot_keys::num_pivots() const { + // if we have fixed size keys, the number of pivots should be consistent + paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots)); + return _num_pivots; +} + +size_t ftnode_pivot_keys::total_size() const { + // if we have fixed size keys, the total size should be consistent + paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots)); + return _total_size; +} + +size_t ftnode_pivot_keys::serialized_size() const { + // we only return the size that will be used when serialized, so we calculate based + // on the fixed keylen and not the aligned keylen. + return _fixed_format() ? _num_pivots * _fixed_keylen : _total_size; +} + +void ftnode_pivot_keys::sanity_check() const { + if (_fixed_format()) { + invariant(_dbt_keys == nullptr); + invariant(_fixed_keylen_aligned == _align4(_fixed_keylen)); + invariant(_num_pivots * _fixed_keylen <= _total_size); + invariant(_num_pivots * _fixed_keylen_aligned == _total_size); + } else { + invariant(_num_pivots == 0 || _dbt_keys != nullptr); + size_t size = 0; + for (int i = 0; i < _num_pivots; i++) { + size += _dbt_keys[i].size; + } + invariant(size == _total_size); + } +} diff --git a/storage/tokudb/ft-index/ft/serialize/block_allocator.cc b/storage/tokudb/ft-index/ft/serialize/block_allocator.cc new file mode 100644 index 0000000000000..6af0ae82b05b7 --- /dev/null +++ b/storage/tokudb/ft-index/ft/serialize/block_allocator.cc @@ -0,0 +1,513 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#ident "$Id$" + +#include + +#include + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_stdint.h" +#include "portability/toku_stdlib.h" + +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_allocator_strategy.h" + +#if TOKU_DEBUG_PARANOID +#define VALIDATE() validate() +#else +#define VALIDATE() +#endif + +static FILE *ba_trace_file = nullptr; + +void block_allocator::maybe_initialize_trace(void) { + const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH"); + if (ba_trace_path != nullptr) { + ba_trace_file = toku_os_fopen(ba_trace_path, "w"); + if (ba_trace_file == nullptr) { + fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), " + "but it could not be opened for writing (errno %d)\n", + ba_trace_path, get_maybe_error_errno()); + } else { + fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path); + } + } +} + +void block_allocator::maybe_close_trace() { + if (ba_trace_file != nullptr) { + int r = toku_os_fclose(ba_trace_file); + if (r != 0) { + fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n", + r, get_maybe_error_errno()); + } else { + fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n"); + } + } +} + +void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) { + // the alignment must be at least 512 and aligned with 512 to work with direct I/O + assert(alignment >= 512 && (alignment % 512) == 0); + + _reserve_at_beginning = reserve_at_beginning; + _alignment = alignment; + _n_blocks = 0; + _blocks_array_size = 1; + XMALLOC_N(_blocks_array_size, _blocks_array); + _n_bytes_in_use = reserve_at_beginning; + _strategy = BA_STRATEGY_FIRST_FIT; + + memset(&_trace_lock, 0, sizeof(toku_mutex_t)); + toku_mutex_init(&_trace_lock, nullptr); + + VALIDATE(); +} + +void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) { + _create_internal(reserve_at_beginning, alignment); + _trace_create(); +} + +void block_allocator::destroy() { + toku_free(_blocks_array); + _trace_destroy(); + toku_mutex_destroy(&_trace_lock); +} + +void block_allocator::set_strategy(enum allocation_strategy strategy) { + _strategy = strategy; +} + +void block_allocator::grow_blocks_array_by(uint64_t n_to_add) { + if (_n_blocks + n_to_add > _blocks_array_size) { + uint64_t new_size = _n_blocks + n_to_add; + uint64_t at_least = _blocks_array_size * 2; + if (at_least > new_size) { + new_size = at_least; + } + _blocks_array_size = new_size; + XREALLOC_N(_blocks_array_size, _blocks_array); + } +} + +void block_allocator::grow_blocks_array() { + grow_blocks_array_by(1); +} + +void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, + struct blockpair *pairs, uint64_t n_blocks) { + _create_internal(reserve_at_beginning, alignment); + + _n_blocks = n_blocks; + grow_blocks_array_by(_n_blocks); + memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair)); + std::sort(_blocks_array, _blocks_array + _n_blocks); + for (uint64_t i = 0; i < _n_blocks; i++) { + // Allocator does not support size 0 blocks. See block_allocator_free_block. + invariant(_blocks_array[i].size > 0); + invariant(_blocks_array[i].offset >= _reserve_at_beginning); + invariant(_blocks_array[i].offset % _alignment == 0); + + _n_bytes_in_use += _blocks_array[i].size; + } + + VALIDATE(); + + _trace_create_from_blockpairs(); +} + +// Effect: align a value by rounding up. +static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; +} + +struct block_allocator::blockpair * +block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) { + switch (_strategy) { + case BA_STRATEGY_FIRST_FIT: + return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment); + case BA_STRATEGY_BEST_FIT: + return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment); + case BA_STRATEGY_HEAT_ZONE: + return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat); + case BA_STRATEGY_PADDED_FIT: + return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment); + default: + abort(); + } +} + +// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). +void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) { + struct blockpair *bp; + + // Allocator does not support size 0 blocks. See block_allocator_free_block. + invariant(size > 0); + + grow_blocks_array(); + _n_bytes_in_use += size; + + uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment); + + if (_n_blocks == 0) { + // First and only block + assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use + _blocks_array[0].offset = align(_reserve_at_beginning, _alignment); + _blocks_array[0].size = size; + *offset = _blocks_array[0].offset; + goto done; + } else if (end_of_reserve + size <= _blocks_array[0].offset ) { + // Check to see if the space immediately after the reserve is big enough to hold the new block. + bp = &_blocks_array[0]; + memmove(bp + 1, bp, _n_blocks * sizeof(*bp)); + bp[0].offset = end_of_reserve; + bp[0].size = size; + *offset = end_of_reserve; + goto done; + } + + bp = choose_block_to_alloc_after(size, heat); + if (bp != nullptr) { + // our allocation strategy chose the space after `bp' to fit the new block + uint64_t answer_offset = align(bp->offset + bp->size, _alignment); + uint64_t blocknum = bp - _blocks_array; + invariant(&_blocks_array[blocknum] == bp); + invariant(blocknum < _n_blocks); + memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp)); + bp[1].offset = answer_offset; + bp[1].size = size; + *offset = answer_offset; + } else { + // It didn't fit anywhere, so fit it on the end. + assert(_n_blocks < _blocks_array_size); + bp = &_blocks_array[_n_blocks]; + uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment); + bp->offset = answer_offset; + bp->size = size; + *offset = answer_offset; + } + +done: + _n_blocks++; + VALIDATE(); + + _trace_alloc(size, heat, *offset); +} + +// Find the index in the blocks array that has a particular offset. Requires that the block exist. +// Use binary search so it runs fast. +int64_t block_allocator::find_block(uint64_t offset) { + VALIDATE(); + if (_n_blocks == 1) { + assert(_blocks_array[0].offset == offset); + return 0; + } + + uint64_t lo = 0; + uint64_t hi = _n_blocks; + while (1) { + assert(lo < hi); // otherwise no such block exists. + uint64_t mid = (lo + hi) / 2; + uint64_t thisoff = _blocks_array[mid].offset; + if (thisoff < offset) { + lo = mid + 1; + } else if (thisoff > offset) { + hi = mid; + } else { + return mid; + } + } +} + +// To support 0-sized blocks, we need to include size as an input to this function. +// All 0-sized blocks at the same offset can be considered identical, but +// a 0-sized block can share offset with a non-zero sized block. +// The non-zero sized block is not exchangable with a zero sized block (or vice versa), +// so inserting 0-sized blocks can cause corruption here. +void block_allocator::free_block(uint64_t offset) { + VALIDATE(); + int64_t bn = find_block(offset); + assert(bn >= 0); // we require that there is a block with that offset. + _n_bytes_in_use -= _blocks_array[bn].size; + memmove(&_blocks_array[bn], &_blocks_array[bn + 1], + (_n_blocks - bn - 1) * sizeof(struct blockpair)); + _n_blocks--; + VALIDATE(); + + _trace_free(offset); +} + +uint64_t block_allocator::block_size(uint64_t offset) { + int64_t bn = find_block(offset); + assert(bn >=0); // we require that there is a block with that offset. + return _blocks_array[bn].size; +} + +uint64_t block_allocator::allocated_limit() const { + if (_n_blocks == 0) { + return _reserve_at_beginning; + } else { + struct blockpair *last = &_blocks_array[_n_blocks - 1]; + return last->offset + last->size; + } +} + +// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. +// Return the offset and size of the block with that number. +// Return 0 if there is a block that big, return nonzero if b is too big. +int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) { + if (b ==0 ) { + *offset = 0; + *size = _reserve_at_beginning; + return 0; + } else if (b > _n_blocks) { + return -1; + } else { + *offset =_blocks_array[b - 1].offset; + *size =_blocks_array[b - 1].size; + return 0; + } +} + +// Requires: report->file_size_bytes is filled in +// Requires: report->data_bytes is filled in +// Requires: report->checkpoint_bytes_additional is filled in +void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) { + assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); + + report->unused_bytes = 0; + report->unused_blocks = 0; + report->largest_unused_block = 0; + if (_n_blocks > 0) { + //Deal with space before block 0 and after reserve: + { + struct blockpair *bp = &_blocks_array[0]; + assert(bp->offset >= align(_reserve_at_beginning, _alignment)); + uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment); + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + + //Deal with space between blocks: + for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) { + // Consider the space after blocknum + struct blockpair *bp = &_blocks_array[blocknum]; + uint64_t this_offset = bp[0].offset; + uint64_t this_size = bp[0].size; + uint64_t end_of_this_block = align(this_offset+this_size, _alignment); + uint64_t next_offset = bp[1].offset; + uint64_t free_space = next_offset - end_of_this_block; + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + + //Deal with space after last block + { + struct blockpair *bp = &_blocks_array[_n_blocks-1]; + uint64_t this_offset = bp[0].offset; + uint64_t this_size = bp[0].size; + uint64_t end_of_this_block = align(this_offset+this_size, _alignment); + if (end_of_this_block < report->file_size_bytes) { + uint64_t free_space = report->file_size_bytes - end_of_this_block; + assert(free_space > 0); + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } + } else { + // No blocks. Just the reserve. + uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment); + if (end_of_this_block < report->file_size_bytes) { + uint64_t free_space = report->file_size_bytes - end_of_this_block; + assert(free_space > 0); + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } + } +} + +void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) { + report->data_bytes = _n_bytes_in_use; + report->data_blocks = _n_blocks; + report->file_size_bytes = 0; + report->checkpoint_bytes_additional = 0; + get_unused_statistics(report); +} + +void block_allocator::validate() const { + uint64_t n_bytes_in_use = _reserve_at_beginning; + for (uint64_t i = 0; i < _n_blocks; i++) { + n_bytes_in_use += _blocks_array[i].size; + if (i > 0) { + assert(_blocks_array[i].offset > _blocks_array[i - 1].offset); + assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size ); + } + } + assert(n_bytes_in_use == _n_bytes_in_use); +} + +// Tracing + +void block_allocator::_trace_create(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n", + this, _reserve_at_beginning, _alignment); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_create_from_blockpairs(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ", + this, _reserve_at_beginning, _alignment); + for (uint64_t i = 0; i < _n_blocks; i++) { + fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ", + _blocks_array[i].offset, _blocks_array[i].size); + } + fprintf(ba_trace_file, "\n"); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_destroy(void) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_destroy %p\n", this); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + this, size, heat, offset); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} + +void block_allocator::_trace_free(uint64_t offset) { + if (ba_trace_file != nullptr) { + toku_mutex_lock(&_trace_lock); + fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset); + toku_mutex_unlock(&_trace_lock); + + fflush(ba_trace_file); + } +} diff --git a/storage/tokudb/ft-index/ft/serialize/block_allocator.h b/storage/tokudb/ft-index/ft/serialize/block_allocator.h new file mode 100644 index 0000000000000..b50dadc9e5616 --- /dev/null +++ b/storage/tokudb/ft-index/ft/serialize/block_allocator.h @@ -0,0 +1,267 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_pthread.h" +#include "portability/toku_stdint.h" + +// Block allocator. +// +// A block allocator manages the allocation of variable-sized blocks. +// The translation of block numbers to addresses is handled elsewhere. +// The allocation of block numbers is handled elsewhere. +// +// When creating a block allocator we also specify a certain-sized +// block at the beginning that is preallocated (and cannot be allocated or freed) +// +// We can allocate blocks of a particular size at a particular location. +// We can allocate blocks of a particular size at a location chosen by the allocator. +// We can free blocks. +// We can determine the size of a block. + +class block_allocator { +public: + static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096; + + // How much must be reserved at the beginning for the block? + // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. + // So 4096 should be enough. + static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096; + + static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0, + "block allocator header must have proper alignment"); + + static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2; + + enum allocation_strategy { + BA_STRATEGY_FIRST_FIT = 1, + BA_STRATEGY_BEST_FIT, + BA_STRATEGY_PADDED_FIT, + BA_STRATEGY_HEAT_ZONE + }; + + struct blockpair { + uint64_t offset; + uint64_t size; + blockpair(uint64_t o, uint64_t s) : + offset(o), size(s) { + } + int operator<(const struct blockpair &rhs) const { + return offset < rhs.offset; + } + int operator<(const uint64_t &o) const { + return offset < o; + } + }; + + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. + // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // All blocks be start on a multiple of ALIGNMENT. + // Aborts if we run out of memory. + // Parameters + // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // alignment (IN) Block alignment. + void create(uint64_t reserve_at_beginning, uint64_t alignment); + + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. + // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs' + // All blocks be start on a multiple of ALIGNMENT. + // Aborts if we run out of memory. + // Parameters + // pairs, unowned array of pairs to copy + // n_blocks, Size of pairs array + // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // alignment (IN) Block alignment. + void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, + struct blockpair *pairs, uint64_t n_blocks); + + // Effect: Destroy this block allocator + void destroy(); + + // Effect: Set the allocation strategy that the allocator should use + // Requires: No other threads are operating on this block allocator + void set_strategy(enum allocation_strategy strategy); + + // Effect: Allocate a block of the specified size at an address chosen by the allocator. + // Aborts if anything goes wrong. + // The block address will be a multiple of the alignment. + // Parameters: + // size (IN): The size of the block. (The size does not have to be aligned.) + // offset (OUT): The location of the block. + // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint) + // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary + void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset); + + // Effect: Free the block at offset. + // Requires: There must be a block currently allocated at that offset. + // Parameters: + // offset (IN): The offset of the block. + void free_block(uint64_t offset); + + // Effect: Return the size of the block that starts at offset. + // Requires: There must be a block currently allocated at that offset. + // Parameters: + // offset (IN): The offset of the block. + uint64_t block_size(uint64_t offset); + + // Effect: Check to see if the block allocator is OK. This may take a long time. + // Usage Hints: Probably only use this for unit tests. + // TODO: Private? + void validate() const; + + // Effect: Return the unallocated block address of "infinite" size. + // That is, return the smallest address that is above all the allocated blocks. + uint64_t allocated_limit() const; + + // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. + // Return the offset and size of the block with that number. + // Return 0 if there is a block that big, return nonzero if b is too big. + // Rationale: This is probably useful only for tests. + int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size); + + // Effect: Fill in report to indicate how the file is used. + // Requires: + // report->file_size_bytes is filled in + // report->data_bytes is filled in + // report->checkpoint_bytes_additional is filled in + void get_unused_statistics(TOKU_DB_FRAGMENTATION report); + + // Effect: Fill in report->data_bytes with the number of bytes in use + // Fill in report->data_blocks with the number of blockpairs in use + // Fill in unused statistics using this->get_unused_statistics() + // Requires: + // report->file_size is ignored on return + // report->checkpoint_bytes_additional is ignored on return + void get_statistics(TOKU_DB_FRAGMENTATION report); + + // Block allocator tracing. + // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file + // should be written to. + // - Trace may be replayed by ba_trace_replay tool in tools/ directory + // eg: "cat mytracefile | ba_trace_replay" + static void maybe_initialize_trace(); + static void maybe_close_trace(); + +private: + void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment); + void grow_blocks_array_by(uint64_t n_to_add); + void grow_blocks_array(); + int64_t find_block(uint64_t offset); + struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat); + + // Tracing + toku_mutex_t _trace_lock; + void _trace_create(void); + void _trace_create_from_blockpairs(void); + void _trace_destroy(void); + void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset); + void _trace_free(uint64_t offset); + + // How much to reserve at the beginning + uint64_t _reserve_at_beginning; + // Block alignment + uint64_t _alignment; + // How many blocks + uint64_t _n_blocks; + // How big is the blocks_array. Must be >= n_blocks. + uint64_t _blocks_array_size; + // These blocks are sorted by address. + struct blockpair *_blocks_array; + // Including the reserve_at_beginning + uint64_t _n_bytes_in_use; + // The allocation strategy are we using + enum allocation_strategy _strategy; +}; diff --git a/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc new file mode 100644 index 0000000000000..f896a41aaba8b --- /dev/null +++ b/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.cc @@ -0,0 +1,274 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include + +#include + +#include "portability/toku_assert.h" + +#include "ft/serialize/block_allocator_strategy.h" + +static uint64_t _align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; +} + +static uint64_t _roundup_to_power_of_two(uint64_t value) { + uint64_t r = 4096; + while (r < value) { + r *= 2; + invariant(r > 0); + } + return r; +} + +// First fit block allocation +static struct block_allocator::blockpair * +_first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t max_padding) { + if (n_blocks == 1) { + // won't enter loop, can't underflow the direction < 0 case + return nullptr; + } + + struct block_allocator::blockpair *bp = &blocks_array[0]; + for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; + n_spaces_to_check--, bp++) { + // Consider the space after bp + uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; + uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); + if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1] + invariant(bp - blocks_array < (int64_t) n_blocks); + return bp; + } + } + return nullptr; +} + +static struct block_allocator::blockpair * +_first_fit_bw(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) { + if (n_blocks == 1) { + // won't enter loop, can't underflow the direction < 0 case + return nullptr; + } + + struct block_allocator::blockpair *bp = &blocks_array[-1]; + for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; + n_spaces_to_check--, bp--) { + // Consider the space after bp + uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; + uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); + if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) { + invariant(blocks_array - bp < (int64_t) n_blocks); + return bp; + } + } + return nullptr; +} + +struct block_allocator::blockpair * +block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + return _first_fit(blocks_array, n_blocks, size, alignment, 0); +} + +// Best fit block allocation +struct block_allocator::blockpair * +block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + struct block_allocator::blockpair *best_bp = nullptr; + uint64_t best_hole_size = 0; + for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { + // Consider the space after blocknum + struct block_allocator::blockpair *bp = &blocks_array[blocknum]; + uint64_t possible_offset = _align(bp->offset + bp->size, alignment); + uint64_t possible_end_offset = possible_offset + size; + if (possible_end_offset <= bp[1].offset) { + // It fits here. Is it the best fit? + uint64_t hole_size = bp[1].offset - possible_end_offset; + if (best_bp == nullptr || hole_size < best_hole_size) { + best_hole_size = hole_size; + best_bp = bp; + } + } + } + return best_bp; +} + +static uint64_t padded_fit_alignment = 4096; + +// TODO: These compiler specific directives should be abstracted in a portability header +// portability/toku_compiler.h? +__attribute__((__constructor__)) +static void determine_padded_fit_alignment_from_env(void) { + // TODO: Should be in portability as 'toku_os_getenv()?' + const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT"); + if (s != nullptr && strlen(s) > 0) { + const int64_t alignment = strtoll(s, nullptr, 10); + if (alignment <= 0) { + fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), " + "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n", + s, padded_fit_alignment); + } else { + padded_fit_alignment = _roundup_to_power_of_two(alignment); + fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", + padded_fit_alignment); + } + } +} + +// First fit into a block that is oversized by up to max_padding. +// The hope is that if we purposefully waste a bit of space at allocation +// time we'll be more likely to reuse this block later. +struct block_allocator::blockpair * +block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment) { + return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment); +} + +static double hot_zone_threshold = 0.85; + +// TODO: These compiler specific directives should be abstracted in a portability header +// portability/toku_compiler.h? +__attribute__((__constructor__)) +static void determine_hot_zone_threshold_from_env(void) { + // TODO: Should be in portability as 'toku_os_getenv()?' + const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD"); + if (s != nullptr && strlen(s) > 0) { + const double hot_zone = strtod(s, nullptr); + if (hot_zone < 1 || hot_zone > 99) { + fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), " + "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s); + hot_zone_threshold = 85 / 100; + } else { + fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s); + hot_zone_threshold = hot_zone / 100; + } + } +} + +struct block_allocator::blockpair * +block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t heat) { + if (heat > 0) { + struct block_allocator::blockpair *bp, *boundary_bp; + + // Hot allocation. Find the beginning of the hot zone. + boundary_bp = &blocks_array[n_blocks - 1]; + uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment); + uint64_t hot_zone_offset = static_cast(hot_zone_threshold * highest_offset); + + boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset); + uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp; + uint64_t blocks_outside_zone = boundary_bp - blocks_array; + invariant(blocks_in_zone + blocks_outside_zone == n_blocks); + + if (blocks_in_zone > 0) { + // Find the first fit in the hot zone, going forward. + bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0); + if (bp != nullptr) { + return bp; + } + } + if (blocks_outside_zone > 0) { + // Find the first fit in the cold zone, going backwards. + bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]); + if (bp != nullptr) { + return bp; + } + } + } else { + // Cold allocations are simply first-fit from the beginning. + return _first_fit(blocks_array, n_blocks, size, alignment, 0); + } + return nullptr; +} diff --git a/storage/tokudb/ft-index/include/tdb-internal.h b/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h similarity index 79% rename from storage/tokudb/ft-index/include/tdb-internal.h rename to storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h index 42f6358287176..3b7c0bafe4e9f 100644 --- a/storage/tokudb/ft-index/include/tdb-internal.h +++ b/storage/tokudb/ft-index/ft/serialize/block_allocator_strategy.h @@ -1,8 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -#ifndef _TDB_INTERNAL_H -#define _TDB_INTERNAL_H /* COPYING CONDITIONS NOTICE: @@ -32,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -89,22 +86,30 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#include "toku_list.h" -// Included by db.h, defines some internal structures. These structures are inlined in some versions of db.h -// the types DB_TXN and so forth have been defined. +#include -//// This list structure is repeated here (from toku_list.h) so that the db.h file will be standalone. Any code that depends on this list matching the structure in toku_list.h -//// will get flagged by the compiler if someone changes one but not the other. See #2276. -//struct toku_list { -// struct toku_list *next, *prev; -//}; +#include "ft/serialize/block_allocator.h" -struct simple_dbt { - uint32_t len; - void *data; -}; +// Block allocation strategy implementations + +class block_allocator_strategy { +public: + static struct block_allocator::blockpair * + first_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); + + static struct block_allocator::blockpair * + best_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); -// end of _TDB_INTERNAL_H: -#endif + static struct block_allocator::blockpair * + padded_fit(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment); + + static struct block_allocator::blockpair * + heat_zone(struct block_allocator::blockpair *blocks_array, + uint64_t n_blocks, uint64_t size, uint64_t alignment, + uint64_t heat); +}; diff --git a/storage/tokudb/ft-index/ft/serialize/block_table.cc b/storage/tokudb/ft-index/ft/serialize/block_table.cc new file mode 100644 index 0000000000000..561f03a8871f9 --- /dev/null +++ b/storage/tokudb/ft-index/ft/serialize/block_table.cc @@ -0,0 +1,1046 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" + +// ugly but pragmatic, need access to dirty bits while holding translation lock +// TODO: Refactor this (possibly with FT-301) +#include "ft/ft-internal.h" + +// TODO: reorganize this dependency (FT-303) +#include "ft/ft-ops.h" // for toku_maybe_truncate_file +#include "ft/serialize/block_table.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "ft/serialize/block_allocator.h" + +#include "util/nb_mutex.h" +#include "util/scoped_malloc.h" + +// indicates the end of a freelist +static const BLOCKNUM freelist_null = { -1 }; + +// value of block_translation_pair.size if blocknum is unused +static const DISKOFF size_is_free = (DISKOFF) -1; + +// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock +static const DISKOFF diskoff_unused = (DISKOFF) -2; + +void block_table::_mutex_lock() { + toku_mutex_lock(&_mutex); +} + +void block_table::_mutex_unlock() { + toku_mutex_unlock(&_mutex); +} + +// TODO: Move lock to FT +void toku_ft_lock(FT ft) { + block_table *bt = &ft->blocktable; + bt->_mutex_lock(); +} + +// TODO: Move lock to FT +void toku_ft_unlock(FT ft) { + block_table *bt = &ft->blocktable; + toku_mutex_assert_locked(&bt->_mutex); + bt->_mutex_unlock(); +} + +// There are two headers: the reserve must fit them both and be suitably aligned. +static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0, + "Block allocator's header reserve must be suitibly aligned"); +static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == + block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + "Block allocator's total header reserve must exactly fit two headers"); + +// does NOT initialize the block allocator: the caller is responsible +void block_table::_create_internal() { + memset(&_current, 0, sizeof(struct translation)); + memset(&_inprogress, 0, sizeof(struct translation)); + memset(&_checkpointed, 0, sizeof(struct translation)); + memset(&_mutex, 0, sizeof(_mutex)); + toku_mutex_init(&_mutex, nullptr); + nb_mutex_init(&_safe_file_size_lock); +} + +// Fill in the checkpointed translation from buffer, and copy checkpointed to current. +// The one read from disk is the last known checkpointed one, so we are keeping it in +// place and then setting current (which is never stored on disk) for current use. +// The translation_buffer has translation only, we create the rest of the block_table. +int block_table::create_from_buffer(int fd, + DISKOFF location_on_disk, //Location of translation_buffer + DISKOFF size_on_disk, + unsigned char *translation_buffer) { + // Does not initialize the block allocator + _create_internal(); + + // Deserialize the translation and copy it to current + int r = _translation_deserialize_from_buffer(&_checkpointed, + location_on_disk, size_on_disk, + translation_buffer); + if (r != 0) { + return r; + } + _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); + + // Determine the file size + int64_t file_size; + r = toku_os_get_file_size(fd, &file_size); + lazy_assert_zero(r); + invariant(file_size >= 0); + _safe_file_size = file_size; + + // Gather the non-empty translations and use them to create the block allocator + toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b * + sizeof(struct block_allocator::blockpair)); + struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); + uint64_t n_pairs = 0; + for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) { + struct block_translation_pair pair = _checkpointed.block_translation[i]; + if (pair.size > 0) { + invariant(pair.u.diskoff != diskoff_unused); + pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size); + } + } + + _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT, + pairs, n_pairs); + + return 0; +} + +void block_table::create() { + // Does not initialize the block allocator + _create_internal(); + + _checkpointed.type = TRANSLATION_CHECKPOINTED; + _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); + _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b; + _checkpointed.blocknum_freelist_head = freelist_null; + XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation); + for (int64_t i = 0; i < _checkpointed.length_of_array; i++) { + _checkpointed.block_translation[i].size = 0; + _checkpointed.block_translation[i].u.diskoff = diskoff_unused; + } + + // we just created a default checkpointed, now copy it to current. + _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); + + // Create an empty block allocator. + _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + block_allocator::BLOCK_ALLOCATOR_ALIGNMENT); +} + +// TODO: Refactor with FT-303 +static void ft_set_dirty(FT ft, bool for_checkpoint) { + invariant(ft->h->type == FT_CURRENT); + if (for_checkpoint) { + invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + ft->checkpoint_header->dirty = 1; + } else { + ft->h->dirty = 1; + } +} + +void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) { + toku_mutex_assert_locked(&_mutex); + uint64_t new_size_needed = _bt_block_allocator.allocated_limit(); + //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. + if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) { + nb_mutex_lock(&_safe_file_size_lock, &_mutex); + + // Must hold _safe_file_size_lock to change _safe_file_size. + if (new_size_needed < _safe_file_size) { + int64_t safe_file_size_before = _safe_file_size; + // Not safe to use the 'to-be-truncated' portion until truncate is done. + _safe_file_size = new_size_needed; + _mutex_unlock(); + + uint64_t size_after; + toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); + _mutex_lock(); + + _safe_file_size = size_after; + } + nb_mutex_unlock(&_safe_file_size_lock); + } +} + +void block_table::maybe_truncate_file_on_open(int fd) { + _mutex_lock(); + _maybe_truncate_file(fd, _safe_file_size); + _mutex_unlock(); +} + +void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) { + // We intend to malloc a fresh block, so the incoming translation should be empty + invariant_null(dst->block_translation); + + invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); + invariant(newtype == TRANSLATION_DEBUG || + (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || + (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); + dst->type = newtype; + dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; + dst->blocknum_freelist_head = src->blocknum_freelist_head; + + // destination btt is of fixed size. Allocate + memcpy the exact length necessary. + dst->length_of_array = dst->smallest_never_used_blocknum.b; + XMALLOC_N(dst->length_of_array, dst->block_translation); + memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation)); + + // New version of btt is not yet stored on disk. + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; +} + +int64_t block_table::get_blocks_in_use_unlocked() { + BLOCKNUM b; + struct translation *t = &_current; + int64_t num_blocks = 0; + { + //Reserved blocknums do not get upgraded; They are part of the header. + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + if (t->block_translation[b.b].size != size_is_free) { + num_blocks++; + } + } + } + return num_blocks; +} + +void block_table::_maybe_optimize_translation(struct translation *t) { + //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just + //on a free list. Doing so requires us to regenerate the free list. + //This is O(n) work, so do it only if you're already doing that. + + BLOCKNUM b; + paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); + //Calculate how large the free suffix is. + int64_t freed; + { + for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { + if (t->block_translation[b.b-1].size != size_is_free) { + break; + } + } + freed = t->smallest_never_used_blocknum.b - b.b; + } + if (freed>0) { + t->smallest_never_used_blocknum.b = b.b; + if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { + //We're using more memory than necessary to represent this now. Reduce. + uint64_t new_length = t->smallest_never_used_blocknum.b * 2; + XREALLOC_N(new_length, t->block_translation); + t->length_of_array = new_length; + //No need to zero anything out. + } + + //Regenerate free list. + t->blocknum_freelist_head.b = freelist_null.b; + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + if (t->block_translation[b.b].size == size_is_free) { + t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; + t->blocknum_freelist_head = b; + } + } + } +} + +// block table must be locked by caller of this function +void block_table::note_start_checkpoint_unlocked() { + toku_mutex_assert_locked(&_mutex); + + // We're going to do O(n) work to copy the translation, so we + // can afford to do O(n) work by optimizing the translation + _maybe_optimize_translation(&_current); + + // Copy current translation to inprogress translation. + _copy_translation(&_inprogress, &_current, TRANSLATION_INPROGRESS); + + _checkpoint_skipped = false; +} + +void block_table::note_skipped_checkpoint() { + //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header + _mutex_lock(); + paranoid_invariant_notnull(_inprogress.block_translation); + _checkpoint_skipped = true; + _mutex_unlock(); +} + +// Purpose: free any disk space used by previous checkpoint that isn't in use by either +// - current state +// - in-progress checkpoint +// capture inprogress as new checkpointed. +// For each entry in checkpointBTT +// if offset does not match offset in inprogress +// assert offset does not match offset in current +// free (offset,len) from checkpoint +// move inprogress to checkpoint (resetting type) +// inprogress = NULL +void block_table::note_end_checkpoint(int fd) { + // Free unused blocks + _mutex_lock(); + uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit(); + paranoid_invariant_notnull(_inprogress.block_translation); + if (_checkpoint_skipped) { + toku_free(_inprogress.block_translation); + memset(&_inprogress, 0, sizeof(_inprogress)); + goto end; + } + + //Make certain inprogress was allocated space on disk + assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); + assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); + + { + struct translation *t = &_checkpointed; + for (int64_t i = 0; i < t->length_of_array; i++) { + struct block_translation_pair *pair = &t->block_translation[i]; + if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) { + assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair)); + _bt_block_allocator.free_block(pair->u.diskoff); + } + } + toku_free(_checkpointed.block_translation); + _checkpointed = _inprogress; + _checkpointed.type = TRANSLATION_CHECKPOINTED; + memset(&_inprogress, 0, sizeof(_inprogress)); + _maybe_truncate_file(fd, allocated_limit_at_start); + } +end: + _mutex_unlock(); +} + +bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) { + invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); + return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; +} + +void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { + invariant(_is_valid_blocknum(t, b)); +} + +bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { + invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); + return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; +} + +// should be freeable +void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { + invariant(_is_valid_freeable_blocknum(t, b)); +} + +// Also used only in ft-serialize-test. +void block_table::block_free(uint64_t offset) { + _mutex_lock(); + _bt_block_allocator.free_block(offset); + _mutex_unlock(); +} + +int64_t block_table::_calculate_size_on_disk(struct translation *t) { + return 8 + // smallest_never_used_blocknum + 8 + // blocknum_freelist_head + t->smallest_never_used_blocknum.b * 16 + // Array + 4; // 4 for checksum +} + +// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. +bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { + return t->block_translation && + b.b < t->smallest_never_used_blocknum.b && + old_pair->u.diskoff == t->block_translation[b.b].u.diskoff; +} + +void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) { + toku_mutex_assert_locked(&_mutex); + ft_set_dirty(ft, for_checkpoint); + + struct translation *t = &_current; + struct block_translation_pair old_pair = t->block_translation[b.b]; + //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint + bool cannot_free = (bool) + ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { + _bt_block_allocator.free_block(old_pair.u.diskoff); + } + + uint64_t allocator_offset = diskoff_unused; + t->block_translation[b.b].size = size; + if (size > 0) { + // Allocate a new block if the size is greater than 0, + // if the size is just 0, offset will be set to diskoff_unused + _bt_block_allocator.alloc_block(size, heat, &allocator_offset); + } + t->block_translation[b.b].u.diskoff = allocator_offset; + *offset = allocator_offset; + + //Update inprogress btt if appropriate (if called because Pending bit is set). + if (for_checkpoint) { + paranoid_invariant(b.b < _inprogress.length_of_array); + _inprogress.block_translation[b.b] = t->block_translation[b.b]; + } +} + +void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) { + // Requires: holding _mutex + uint64_t size_needed = block_size + block_offset; + if (size_needed > _safe_file_size) { + // Must hold _safe_file_size_lock to change _safe_file_size. + nb_mutex_lock(&_safe_file_size_lock, &_mutex); + if (size_needed > _safe_file_size) { + _mutex_unlock(); + + int64_t size_after; + toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after); + + _mutex_lock(); + _safe_file_size = size_after; + } + nb_mutex_unlock(&_safe_file_size_lock); + } +} + +void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) { + _mutex_lock(); + struct translation *t = &_current; + _verify_valid_freeable_blocknum(t, b); + _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat); + + _ensure_safe_write_unlocked(fd, size, *offset); + _mutex_unlock(); +} + +bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) { + return pair->size == 0 && pair->u.diskoff == diskoff_unused; +} + +// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. +// The space must be 512-byte aligned (both the starting address and the size). +// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. +void block_table::_alloc_inprogress_translation_on_disk_unlocked() { + toku_mutex_assert_locked(&_mutex); + + struct translation *t = &_inprogress; + paranoid_invariant_notnull(t->block_translation); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + //Each inprogress is allocated only once + paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b])); + + //Allocate a new block + int64_t size = _calculate_size_on_disk(t); + uint64_t offset; + _bt_block_allocator.alloc_block(size, 0, &offset); + t->block_translation[b.b].u.diskoff = offset; + t->block_translation[b.b].size = size; +} + +// Effect: Serializes the blocktable to a wbuf (which starts uninitialized) +// A clean shutdown runs checkpoint start so that current and inprogress are copies. +// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) +// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. +// It *is* guaranteed that we can read up to the next 512-byte boundary, however +void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w, + int64_t *address, int64_t *size) { + _mutex_lock(); + struct translation *t = &_inprogress; + + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy. + uint64_t size_translation = _calculate_size_on_disk(t); + uint64_t size_aligned = roundup_to_multiple(512, size_translation); + assert((int64_t)size_translation==t->block_translation[b.b].size); + { + //Init wbuf + if (0) + printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); + char *XMALLOC_N_ALIGNED(512, size_aligned, buf); + for (uint64_t i=size_translation; ismallest_never_used_blocknum); + wbuf_BLOCKNUM(w, t->blocknum_freelist_head); + int64_t i; + for (i=0; ismallest_never_used_blocknum.b; i++) { + if (0) + printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); + wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); + wbuf_DISKOFF(w, t->block_translation[i].size); + } + uint32_t checksum = toku_x1764_finish(&w->checksum); + wbuf_int(w, checksum); + *address = t->block_translation[b.b].u.diskoff; + *size = size_translation; + assert((*address)%512 == 0); + + _ensure_safe_write_unlocked(fd, size_aligned, *address); + _mutex_unlock(); +} + +// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) +void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { + struct translation *t = &_current; + _verify_valid_blocknum(t, b); + if (offset) { + *offset = t->block_translation[b.b].u.diskoff; + } + if (size) { + *size = t->block_translation[b.b].size; + } +} + +// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) +void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { + _mutex_lock(); + _translate_blocknum_to_offset_size_unlocked(b, offset, size); + _mutex_unlock(); +} + +// Only called by toku_allocate_blocknum +// Effect: expand the array to maintain size invariant +// given that one more never-used blocknum will soon be used. +void block_table::_maybe_expand_translation(struct translation *t) { + if (t->length_of_array <= t->smallest_never_used_blocknum.b) { + //expansion is necessary + uint64_t new_length = t->smallest_never_used_blocknum.b * 2; + XREALLOC_N(new_length, t->block_translation); + uint64_t i; + for (i = t->length_of_array; i < new_length; i++) { + t->block_translation[i].u.next_free_blocknum = freelist_null; + t->block_translation[i].size = size_is_free; + } + t->length_of_array = new_length; + } +} + +void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM result; + struct translation *t = &_current; + if (t->blocknum_freelist_head.b == freelist_null.b) { + // no previously used blocknums are available + // use a never used blocknum + _maybe_expand_translation(t); //Ensure a never used blocknums is available + result = t->smallest_never_used_blocknum; + t->smallest_never_used_blocknum.b++; + } else { // reuse a previously used blocknum + result = t->blocknum_freelist_head; + BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; + t->blocknum_freelist_head = next; + } + //Verify the blocknum is free + paranoid_invariant(t->block_translation[result.b].size == size_is_free); + //blocknum is not free anymore + t->block_translation[result.b].u.diskoff = diskoff_unused; + t->block_translation[result.b].size = 0; + _verify_valid_freeable_blocknum(t, result); + *res = result; + ft_set_dirty(ft, false); +} + +void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) { + _mutex_lock(); + _allocate_blocknum_unlocked(res, ft); + _mutex_unlock(); +} + +void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) { + _verify_valid_freeable_blocknum(t, b); + paranoid_invariant(t->block_translation[b.b].size != size_is_free); + + t->block_translation[b.b].size = size_is_free; + t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; + t->blocknum_freelist_head = b; +} + +// Effect: Free a blocknum. +// If the blocknum holds the only reference to a block on disk, free that block +void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM b = *bp; + bp->b = 0; //Remove caller's reference. + + struct block_translation_pair old_pair = _current.block_translation[b.b]; + + _free_blocknum_in_translation(&_current, b); + if (for_checkpoint) { + paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + _free_blocknum_in_translation(&_inprogress, b); + } + + //If the size is 0, no disk block has ever been assigned to this blocknum. + if (old_pair.size > 0) { + //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint + bool cannot_free = (bool) + (_translation_prevents_freeing(&_inprogress, b, &old_pair) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + if (!cannot_free) { + _bt_block_allocator.free_block(old_pair.u.diskoff); + } + } + else { + paranoid_invariant(old_pair.size==0); + paranoid_invariant(old_pair.u.diskoff == diskoff_unused); + } + ft_set_dirty(ft, for_checkpoint); +} + +void block_table::free_blocknum(BLOCKNUM *bp, FT ft, bool for_checkpoint) { + _mutex_lock(); + _free_blocknum_unlocked(bp, ft, for_checkpoint); + _mutex_unlock(); +} + +// Verify there are no free blocks. +void block_table::verify_no_free_blocknums() { + invariant(_current.blocknum_freelist_head.b == freelist_null.b); +} + +// Frees blocknums that have a size of 0 and unused diskoff +// Currently used for eliminating unused cached rollback log nodes +void block_table::free_unused_blocknums(BLOCKNUM root) { + _mutex_lock(); + int64_t smallest = _current.smallest_never_used_blocknum.b; + for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { + if (i == root.b) { + continue; + } + BLOCKNUM b = make_blocknum(i); + if (_current.block_translation[b.b].size == 0) { + invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused); + _free_blocknum_in_translation(&_current, b); + } + } + _mutex_unlock(); +} + +bool block_table::_no_data_blocks_except_root(BLOCKNUM root) { + bool ok = true; + _mutex_lock(); + int64_t smallest = _current.smallest_never_used_blocknum.b; + if (root.b < RESERVED_BLOCKNUMS) { + ok = false; + goto cleanup; + } + for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) { + if (i == root.b) { + continue; + } + BLOCKNUM b = make_blocknum(i); + if (_current.block_translation[b.b].size != size_is_free) { + ok = false; + goto cleanup; + } + } + cleanup: + _mutex_unlock(); + return ok; +} + +// Verify there are no data blocks except root. +// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. +void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) { + paranoid_invariant(_no_data_blocks_except_root(root)); +} + +bool block_table::_blocknum_allocated(BLOCKNUM b) { + _mutex_lock(); + struct translation *t = &_current; + _verify_valid_blocknum(t, b); + bool ok = t->block_translation[b.b].size != size_is_free; + _mutex_unlock(); + return ok; +} + +// Verify a blocknum is currently allocated. +void block_table::verify_blocknum_allocated(BLOCKNUM UU(b)) { + paranoid_invariant(_blocknum_allocated(b)); +} + +// Only used by toku_dump_translation table (debug info) +void block_table::_dump_translation_internal(FILE *f, struct translation *t) { + if (t->block_translation) { + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); + fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); + fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); + fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); + fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); + fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); + int64_t i; + for (i=0; ilength_of_array; i++) { + fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + } + fprintf(f, "\n"); + } else { + fprintf(f, " does not exist\n"); + } +} + +// Only used by toku_ft_dump which is only for debugging purposes +// "pretty" just means we use tabs so we can parse output easier later +void block_table::dump_translation_table_pretty(FILE *f) { + _mutex_lock(); + struct translation *t = &_checkpointed; + assert(t->block_translation != nullptr); + for (int64_t i = 0; i < t->length_of_array; ++i) { + fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + } + _mutex_unlock(); +} + +// Only used by toku_ft_dump which is only for debugging purposes +void block_table::dump_translation_table(FILE *f) { + _mutex_lock(); + fprintf(f, "Current block translation:"); + _dump_translation_internal(f, &_current); + fprintf(f, "Checkpoint in progress block translation:"); + _dump_translation_internal(f, &_inprogress); + fprintf(f, "Checkpointed block translation:"); + _dump_translation_internal(f, &_checkpointed); + _mutex_unlock(); +} + +// Only used by ftdump +void block_table::blocknum_dump_translation(BLOCKNUM b) { + _mutex_lock(); + + struct translation *t = &_current; + if (b.b < t->length_of_array) { + struct block_translation_pair *bx = &t->block_translation[b.b]; + printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); + } + _mutex_unlock(); +} + +// Must not call this function when anything else is using the blocktable. +// No one may use the blocktable afterwards. +void block_table::destroy(void) { + // TODO: translation.destroy(); + toku_free(_current.block_translation); + toku_free(_inprogress.block_translation); + toku_free(_checkpointed.block_translation); + + _bt_block_allocator.destroy(); + toku_mutex_destroy(&_mutex); + nb_mutex_destroy(&_safe_file_size_lock); +} + +int block_table::_translation_deserialize_from_buffer(struct translation *t, + DISKOFF location_on_disk, + uint64_t size_on_disk, + // out: buffer with serialized translation + unsigned char *translation_buffer) { + int r = 0; + assert(location_on_disk != 0); + t->type = TRANSLATION_CHECKPOINTED; + + // check the checksum + uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4); + uint64_t offset = size_on_disk - 4; + uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); + if (x1764 != stored_x1764) { + fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); + r = TOKUDB_BAD_CHECKSUM; + goto exit; + } + + struct rbuf rb; + rb.buf = translation_buffer; + rb.ndone = 0; + rb.size = size_on_disk-4;//4==checksum + + t->smallest_never_used_blocknum = rbuf_blocknum(&rb); + t->length_of_array = t->smallest_never_used_blocknum.b; + invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); + t->blocknum_freelist_head = rbuf_blocknum(&rb); + XMALLOC_N(t->length_of_array, t->block_translation); + for (int64_t i = 0; i < t->length_of_array; i++) { + t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb); + t->block_translation[i].size = rbuf_DISKOFF(&rb); + } + invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); + +exit: + return r; +} + +int block_table::iterate(enum translation_type type, + BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { + struct translation *src; + + int r = 0; + switch (type) { + case TRANSLATION_CURRENT: + src = &_current; + break; + case TRANSLATION_INPROGRESS: + src = &_inprogress; + break; + case TRANSLATION_CHECKPOINTED: + src = &_checkpointed; + break; + default: + r = EINVAL; + } + + struct translation fakecurrent; + memset(&fakecurrent, 0, sizeof(struct translation)); + + struct translation *t = &fakecurrent; + if (r == 0) { + _mutex_lock(); + _copy_translation(t, src, TRANSLATION_DEBUG); + t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] = + src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; + _mutex_unlock(); + int64_t i; + for (i=0; ismallest_never_used_blocknum.b; i++) { + struct block_translation_pair pair = t->block_translation[i]; + if (data_only && i< RESERVED_BLOCKNUMS) continue; + if (used_only && pair.size <= 0) continue; + r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); + if (r!=0) break; + } + toku_free(t->block_translation); + } + return r; +} + +typedef struct { + int64_t used_space; + int64_t total_space; +} frag_extra; + +static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { + frag_extra *info = (frag_extra *) extra; + + if (size + address > info->total_space) + info->total_space = size + address; + info->used_space += size; + return 0; +} + +void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) { + frag_extra info = { 0, 0 }; + int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); + assert_zero(r); + + if (total_sizep) *total_sizep = info.total_space; + if (used_sizep) *used_sizep = info.used_space; +} + +void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) { + toku_mutex_assert_locked(&_mutex); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); + _realloc_on_disk_internal(b, size, offset, ft, false, 0); +} + +void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) { + _mutex_lock(); + _realloc_descriptor_on_disk_unlocked(size, offset, ft); + _ensure_safe_write_unlocked(fd, size, *offset); + _mutex_unlock(); +} + +void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) { + _mutex_lock(); + BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); + _translate_blocknum_to_offset_size_unlocked(b, offset, size); + _mutex_unlock(); +} + +void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { + // Requires: blocktable lock is held. + // Requires: report->file_size_bytes is already filled in. + + // Count the headers. + report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->data_blocks = 1; + report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->checkpoint_blocks_additional = 1; + + struct translation *current = &_current; + for (int64_t i = 0; i < current->length_of_array; i++) { + struct block_translation_pair *pair = ¤t->block_translation[i]; + if (pair->size > 0) { + report->data_bytes += pair->size; + report->data_blocks++; + } + } + + struct translation *checkpointed = &_checkpointed; + for (int64_t i = 0; i < checkpointed->length_of_array; i++) { + struct block_translation_pair *pair = &checkpointed->block_translation[i]; + if (pair->size > 0 && !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; + } + } + + struct translation *inprogress = &_inprogress; + for (int64_t i = 0; i < inprogress->length_of_array; i++) { + struct block_translation_pair *pair = &inprogress->block_translation[i]; + if (pair->size > 0 && !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff) && + !(i < checkpointed->length_of_array && + checkpointed->block_translation[i].size > 0 && + checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; + } + } + + _bt_block_allocator.get_unused_statistics(report); +} + +void block_table::get_info64(struct ftinfo64 *s) { + _mutex_lock(); + + struct translation *current = &_current; + s->num_blocks_allocated = current->length_of_array; + s->num_blocks_in_use = 0; + s->size_allocated = 0; + s->size_in_use = 0; + + for (int64_t i = 0; i < current->length_of_array; ++i) { + struct block_translation_pair *block = ¤t->block_translation[i]; + if (block->size != size_is_free) { + ++s->num_blocks_in_use; + s->size_in_use += block->size; + if (block->u.diskoff != diskoff_unused) { + uint64_t limit = block->u.diskoff + block->size; + if (limit > s->size_allocated) { + s->size_allocated = limit; + } + } + } + } + + _mutex_unlock(); +} + +int block_table::iterate_translation_tables(uint64_t checkpoint_count, + int (*iter)(uint64_t checkpoint_count, + int64_t total_num_rows, + int64_t blocknum, + int64_t diskoff, + int64_t size, + void *extra), + void *iter_extra) { + int error = 0; + _mutex_lock(); + + int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array; + for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) { + struct block_translation_pair *block = &_current.block_translation[i]; + error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + } + for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) { + struct block_translation_pair *block = &_checkpointed.block_translation[i]; + error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + } + + _mutex_unlock(); + return error; +} diff --git a/storage/tokudb/ft-index/ft/serialize/block_table.h b/storage/tokudb/ft-index/ft/serialize/block_table.h new file mode 100644 index 0000000000000..534befaf42632 --- /dev/null +++ b/storage/tokudb/ft-index/ft/serialize/block_table.h @@ -0,0 +1,338 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2014 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/toku_stdint.h" +#include "portability/toku_pthread.h" + +#include "ft/serialize/block_allocator.h" +#include "util/nb_mutex.h" + +struct ft; + +typedef struct blocknum_s { int64_t b; } BLOCKNUM; + +// Offset in a disk. -1 is the 'null' pointer. +typedef int64_t DISKOFF; + +// Unmovable reserved first, then reallocable. +// We reserve one blocknum for the translation table itself. +enum { + RESERVED_BLOCKNUM_NULL = 0, + RESERVED_BLOCKNUM_TRANSLATION = 1, + RESERVED_BLOCKNUM_DESCRIPTOR = 2, + RESERVED_BLOCKNUMS +}; + +typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); + +static inline BLOCKNUM make_blocknum(int64_t b) { + BLOCKNUM result = { .b = b }; + return result; +} +static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; + +/** + * There are three copies of the translation table (btt) in the block table: + * + * checkpointed Is initialized by deserializing from disk, + * and is the only version ever read from disk. + * When read from disk it is copied to current. + * It is immutable. It can be replaced by an inprogress btt. + * + * inprogress Is only filled by copying from current, + * and is the only version ever serialized to disk. + * (It is serialized to disk on checkpoint and clean shutdown.) + * At end of checkpoint it replaces 'checkpointed'. + * During a checkpoint, any 'pending' dirty writes will update + * inprogress. + * + * current Is initialized by copying from checkpointed, + * is the only version ever modified while the database is in use, + * and is the only version ever copied to inprogress. + * It is never stored on disk. + */ +class block_table { +public: + enum translation_type { + TRANSLATION_NONE = 0, + TRANSLATION_CURRENT, + TRANSLATION_INPROGRESS, + TRANSLATION_CHECKPOINTED, + TRANSLATION_DEBUG + }; + + void create(); + + int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); + + void destroy(); + + // Checkpointing + void note_start_checkpoint_unlocked(); + void note_end_checkpoint(int fd); + void note_skipped_checkpoint(); + void maybe_truncate_file_on_open(int fd); + + // Blocknums + void allocate_blocknum(BLOCKNUM *res, struct ft *ft); + void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat); + void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint); + void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void free_unused_blocknums(BLOCKNUM root); + void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd); + void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size); + + // External verfication + void verify_blocknum_allocated(BLOCKNUM b); + void verify_no_data_blocks_except_root(BLOCKNUM root); + void verify_no_free_blocknums(); + + // Serialization + void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size); + + // DEBUG ONLY (ftdump included), tests included + void blocknum_dump_translation(BLOCKNUM b); + void dump_translation_table_pretty(FILE *f); + void dump_translation_table(FILE *f); + void block_free(uint64_t offset); + + int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); + void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep); + + // Requires: blocktable lock is held. + // Requires: report->file_size_bytes is already filled in. + void get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report); + + int64_t get_blocks_in_use_unlocked(); + + void get_info64(struct ftinfo64 *); + + int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); + +private: + struct block_translation_pair { + // If in the freelist, use next_free_blocknum, otherwise diskoff. + union { + DISKOFF diskoff; + BLOCKNUM next_free_blocknum; + } u; + + // Set to 0xFFFFFFFFFFFFFFFF for free + DISKOFF size; + }; + + // This is the BTT (block translation table) + // When the translation (btt) is stored on disk: + // In Header: + // size_on_disk + // location_on_disk + // In block translation table (in order): + // smallest_never_used_blocknum + // blocknum_freelist_head + // array + // a checksum + struct translation { + enum translation_type type; + + // Number of elements in array (block_translation). always >= smallest_never_used_blocknum + int64_t length_of_array; + BLOCKNUM smallest_never_used_blocknum; + + // Next (previously used) unused blocknum (free list) + BLOCKNUM blocknum_freelist_head; + struct block_translation_pair *block_translation; + + // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size + // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff + }; + + void _create_internal(); + int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize + DISKOFF location_on_disk, // location of translation_buffer + uint64_t size_on_disk, + unsigned char * translation_buffer); // buffer with serialized translation + + void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype); + void _maybe_optimize_translation(struct translation *t); + void _maybe_expand_translation(struct translation *t); + bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); + void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b); + int64_t _calculate_size_on_disk(struct translation *t); + bool _pair_is_unallocated(struct block_translation_pair *pair); + void _alloc_inprogress_translation_on_disk_unlocked(); + void _dump_translation_internal(FILE *f, struct translation *t); + + // Blocknum management + void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft); + void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint); + void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft); + void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat); + void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + + // File management + void _maybe_truncate_file(int fd, uint64_t size_needed_before); + void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset); + + // Verification + bool _is_valid_blocknum(struct translation *t, BLOCKNUM b); + void _verify_valid_blocknum(struct translation *t, BLOCKNUM b); + bool _is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b); + void _verify_valid_freeable_blocknum(struct translation *t, BLOCKNUM b); + bool _no_data_blocks_except_root(BLOCKNUM root); + bool _blocknum_allocated(BLOCKNUM b); + + // Locking + // + // TODO: Move the lock to the FT + void _mutex_lock(); + void _mutex_unlock(); + + // The current translation is the one used by client threads. + // It is not represented on disk. + struct translation _current; + + // The translation used by the checkpoint currently in progress. + // If the checkpoint thread allocates a block, it must also update the current translation. + struct translation _inprogress; + + // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes, + // after which any blocks used only in this translation can be freed. + struct translation _checkpointed; + + // The in-memory data structure for block allocation. + // There is no on-disk data structure for block allocation. + // Note: This is *allocation* not *translation* - the block allocator is unaware of which + // blocks are used for which translation, but simply allocates and deallocates blocks. + block_allocator _bt_block_allocator; + toku_mutex_t _mutex; + struct nb_mutex _safe_file_size_lock; + bool _checkpoint_skipped; + uint64_t _safe_file_size; + + // Because the lock is in a weird place right now + friend void toku_ft_lock(struct ft *ft); + friend void toku_ft_unlock(struct ft *ft); +}; + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { + wbuf_ulonglong(w, b.b); +} + +static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { + wbuf_nocrc_ulonglong(w, b.b); +} + +static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) { + wbuf_ulonglong(wb, (uint64_t) off); +} + +#include "ft/serialize/rbuf.h" + +static inline DISKOFF rbuf_DISKOFF(struct rbuf *rb) { + return rbuf_ulonglong(rb); +} + +static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) { + BLOCKNUM result = make_blocknum(rbuf_longlong(rb)); + return result; +} + +static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) { + *blocknum = rbuf_blocknum(rb); +} diff --git a/storage/tokudb/ft-index/ft/compress.cc b/storage/tokudb/ft-index/ft/serialize/compress.cc similarity index 96% rename from storage/tokudb/ft-index/ft/compress.cc rename to storage/tokudb/ft-index/ft/serialize/compress.cc index 2b0187e0b4f92..e905220026b35 100644 --- a/storage/tokudb/ft-index/ft/compress.cc +++ b/storage/tokudb/ft-index/ft/serialize/compress.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -165,11 +165,12 @@ void toku_compress (enum toku_compression_method a, assert(1 <= *destLen); *destLen = 1; } else { - qlz_state_compress *XCALLOC(qsc); + toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress)); + qlz_state_compress *qsc = reinterpret_cast(qsc_buf.get()); size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc); - assert(actual_destlen +1 <= *destLen); - *destLen = actual_destlen+1; // add one for the rfc1950-style header byte. - toku_free(qsc); + assert(actual_destlen + 1 <= *destLen); + // add one for the rfc1950-style header byte. + *destLen = actual_destlen + 1; } // Fill in that first byte dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4); diff --git a/storage/tokudb/ft-index/ft/compress.h b/storage/tokudb/ft-index/ft/serialize/compress.h similarity index 98% rename from storage/tokudb/ft-index/ft/compress.h rename to storage/tokudb/ft-index/ft/serialize/compress.h index bc25b55be8bf7..8b3bb2185b628 100644 --- a/storage/tokudb/ft-index/ft/compress.h +++ b/storage/tokudb/ft-index/ft/serialize/compress.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_COMPRESS_H -#define TOKU_COMPRESS_H - - #include #include @@ -131,5 +129,3 @@ void toku_decompress (Bytef *dest, uLongf destLen, // This function can decompress data compressed with either zlib or quicklz compression methods (calling toku_compress(), which puts an appropriate header on so we know which it is.) // Requires: destLen is equal to the actual decompressed size of the data. // Requires: The source must have been properly compressed. - -#endif diff --git a/storage/tokudb/ft-index/ft/ft-node-deserialize.cc b/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc similarity index 96% rename from storage/tokudb/ft-index/ft/ft-node-deserialize.cc rename to storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc index 4f869419266f7..4e55c222eb71f 100644 --- a/storage/tokudb/ft-index/ft/ft-node-deserialize.cc +++ b/storage/tokudb/ft-index/ft/serialize/ft-node-deserialize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,9 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include +#include "ft/node.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" /* * ft-node-deserialize.c - @@ -111,7 +112,7 @@ void initialize_ftnode(FTNODE node, BLOCKNUM blocknum) { node->fullhash = 0xDEADBEEF; // Is this 'spoof' ok? - node->thisnodename = blocknum; + node->blocknum = blocknum; node->dirty = 0; node->bp = NULL; // Can we use this initialization as a correctness assert in @@ -132,7 +133,7 @@ int read_and_check_magic(struct rbuf *rb) { int r = 0; - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); if (memcmp(magic, "tokuleaf", 8)!=0 && memcmp(magic, "tokunode", 8)!=0) { @@ -193,7 +194,7 @@ check_node_info_checksum(struct rbuf *rb) { int r = 0; // Verify checksum of header stored. - uint32_t checksum = x1764_memory(rb->buf, rb->ndone); + uint32_t checksum = toku_x1764_memory(rb->buf, rb->ndone); uint32_t stored_checksum = rbuf_int(rb); if (stored_checksum != checksum) { @@ -229,7 +230,7 @@ check_legacy_end_checksum(struct rbuf *rb) { int r = 0; uint32_t expected_xsum = rbuf_int(rb); - uint32_t actual_xsum = x1764_memory(rb->buf, rb->size - 4); + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); if (expected_xsum != actual_xsum) { r = TOKUDB_BAD_CHECKSUM; } diff --git a/storage/tokudb/ft-index/ft/ft-serialize.cc b/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc similarity index 88% rename from storage/tokudb/ft-index/ft/ft-serialize.cc rename to storage/tokudb/ft-index/ft/serialize/ft-serialize.cc index e6fbe0a2ce42e..4e4475922556e 100644 --- a/storage/tokudb/ft-index/ft/ft-serialize.cc +++ b/storage/tokudb/ft-index/ft/serialize/ft-serialize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,13 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "compress.h" -#include "ft.h" -#include "ft-internal.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/msg.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/compress.h" +#include "ft/serialize/ft-serialize.h" // not version-sensitive because we only serialize a descriptor using the current layout_version uint32_t -toku_serialize_descriptor_size(const DESCRIPTOR desc) { +toku_serialize_descriptor_size(DESCRIPTOR desc) { //Checksum NOT included in this. Checksum only exists in header's version. uint32_t size = 4; // four bytes for size of descriptor size += desc->dbt.size; @@ -103,7 +107,7 @@ toku_serialize_descriptor_size(const DESCRIPTOR desc) { } static uint32_t -deserialize_descriptor_size(const DESCRIPTOR desc, int layout_version) { +deserialize_descriptor_size(DESCRIPTOR desc, int layout_version) { //Checksum NOT included in this. Checksum only exists in header's version. uint32_t size = 4; // four bytes for size of descriptor if (layout_version == FT_LAYOUT_VERSION_13) @@ -112,8 +116,7 @@ deserialize_descriptor_size(const DESCRIPTOR desc, int layout_version) { return size; } -void -toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc) { +void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc) { wbuf_bytes(wb, desc->dbt.data, desc->dbt.size); } @@ -121,7 +124,7 @@ toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR des //descriptor. //Descriptors are NOT written during the header checkpoint process. void -toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset) { +toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset) { // make the checksum int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum int64_t size_aligned = roundup_to_multiple(512, size); @@ -132,7 +135,7 @@ toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF toku_serialize_descriptor_contents_to_wbuf(&w, desc); { //Add checksum - uint32_t checksum = x1764_finish(&w.checksum); + uint32_t checksum = toku_x1764_finish(&w.checksum); wbuf_int(&w, checksum); } lazy_assert(w.ndone==w.size); @@ -146,32 +149,24 @@ toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF static void deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, int layout_version) { if (layout_version <= FT_LAYOUT_VERSION_13) { - // in older versions of TokuDB the Descriptor had a 4 byte + // in older versions of tokuft, the descriptor had a 4 byte // version, which we skip over (void) rbuf_int(rb); } uint32_t size; - bytevec data; + const void *data; rbuf_bytes(rb, &data, &size); - bytevec data_copy = data; - if (size > 0) { - data_copy = toku_memdup(data, size); //Cannot keep the reference from rbuf. Must copy. - lazy_assert(data_copy); - } else { - lazy_assert(size==0); - data_copy = NULL; - } - toku_fill_dbt(&desc->dbt, data_copy, size); + toku_memdup_dbt(&desc->dbt, data, size); } static int -deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_version) { +deserialize_descriptor_from(int fd, block_table *bt, DESCRIPTOR desc, int layout_version) { int r = 0; DISKOFF offset; DISKOFF size; - unsigned char *dbuf = NULL; - toku_get_descriptor_offset_size(bt, &offset, &size); + unsigned char *dbuf = nullptr; + bt->get_descriptor_offset_size(&offset, &size); memset(desc, 0, sizeof(*desc)); if (size > 0) { lazy_assert(size>=4); //4 for checksum @@ -185,7 +180,7 @@ deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_ } { // check the checksum - uint32_t x1764 = x1764_memory(dbuf, size-4); + uint32_t x1764 = toku_x1764_memory(dbuf, size-4); //printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk); uint32_t stored_x1764 = toku_dtoh32(*(int*)(dbuf + size-4)); if (x1764 != stored_x1764) { @@ -195,12 +190,10 @@ deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_ goto exit; } } - { - struct rbuf rb = {.buf = dbuf, .size = (unsigned int) size, .ndone = 0}; - //Not temporary; must have a toku_memdup'd copy. - deserialize_descriptor_from_rbuf(&rb, desc, layout_version); - } - lazy_assert(deserialize_descriptor_size(desc, layout_version)+4 == size); + + struct rbuf rb = { .buf = dbuf, .size = (unsigned int) size, .ndone = 0 }; + deserialize_descriptor_from_rbuf(&rb, desc, layout_version); + lazy_assert(deserialize_descriptor_size(desc, layout_version) + 4 == size); toku_free(dbuf); } } @@ -210,7 +203,7 @@ deserialize_descriptor_from(int fd, BLOCK_TABLE bt, DESCRIPTOR desc, int layout_ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) // Effect: Deserialize the ft header. -// We deserialize brt header only once and then share everything with all the brts. +// We deserialize ft_header only once and then share everything with all the FTs. { int r; FT ft = NULL; @@ -222,7 +215,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) //Verification of initial elements. //Check magic number - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); lazy_assert(memcmp(magic,"tokudata",8)==0); @@ -244,7 +237,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) size = rbuf_network_int(rb); lazy_assert(size == rb->size); - bytevec tmp_byte_order_check; + const void *tmp_byte_order_check; lazy_assert((sizeof tmp_byte_order_check) >= 8); rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order int64_t byte_order_stored; @@ -254,13 +247,13 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) uint64_t checkpoint_count; checkpoint_count = rbuf_ulonglong(rb); LSN checkpoint_lsn; - checkpoint_lsn = rbuf_lsn(rb); + checkpoint_lsn = rbuf_LSN(rb); unsigned nodesize; nodesize = rbuf_int(rb); DISKOFF translation_address_on_disk; - translation_address_on_disk = rbuf_diskoff(rb); + translation_address_on_disk = rbuf_DISKOFF(rb); DISKOFF translation_size_on_disk; - translation_size_on_disk = rbuf_diskoff(rb); + translation_size_on_disk = rbuf_DISKOFF(rb); lazy_assert(translation_address_on_disk > 0); lazy_assert(translation_size_on_disk > 0); @@ -281,11 +274,10 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) assert(readsz <= (ssize_t)size_to_read); } // Create table and read in data. - r = toku_blocktable_create_from_buffer(fd, - &ft->blocktable, - translation_address_on_disk, - translation_size_on_disk, - tbuf); + r = ft->blocktable.create_from_buffer(fd, + translation_address_on_disk, + translation_size_on_disk, + tbuf); toku_free(tbuf); if (r != 0) { goto exit; @@ -353,7 +345,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) time_of_last_optimize_begin = rbuf_ulonglong(rb); time_of_last_optimize_end = rbuf_ulonglong(rb); count_of_optimize_in_progress = rbuf_int(rb); - msn_at_start_of_last_completed_optimize = rbuf_msn(rb); + msn_at_start_of_last_completed_optimize = rbuf_MSN(rb); } enum toku_compression_method compression_method; @@ -362,7 +354,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) { unsigned char method = rbuf_char(rb); compression_method = (enum toku_compression_method) method; - highest_unused_msn_for_upgrade = rbuf_msn(rb); + highest_unused_msn_for_upgrade = rbuf_MSN(rb); } else { // we hard coded zlib until 5.2, then quicklz in 5.2 if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { @@ -375,7 +367,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) MSN max_msn_in_ft; max_msn_in_ft = ZERO_MSN; // We'll upgrade it from the root node later if necessary if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_21) { - max_msn_in_ft = rbuf_msn(rb); + max_msn_in_ft = rbuf_MSN(rb); } (void) rbuf_int(rb); //Read in checksum and ignore (already verified). @@ -433,13 +425,14 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) } invariant((uint32_t) ft->layout_version_read_from_disk == version); - r = deserialize_descriptor_from(fd, ft->blocktable, &ft->descriptor, version); + r = deserialize_descriptor_from(fd, &ft->blocktable, &ft->descriptor, version); if (r != 0) { goto exit; } - // copy descriptor to cmp_descriptor for #4541 - ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size; - ft->cmp_descriptor.dbt.data = toku_xmemdup(ft->descriptor.dbt.data, ft->descriptor.dbt.size); + + // initialize for svn #4541 + toku_clone_dbt(&ft->cmp_descriptor.dbt, ft->descriptor.dbt); + // Version 13 descriptors had an extra 4 bytes that we don't read // anymore. Since the header is going to think it's the current // version if it gets written out, we need to write the descriptor in @@ -462,6 +455,8 @@ serialize_ft_min_size (uint32_t version) { size_t size = 0; switch(version) { + case FT_LAYOUT_VERSION_27: + case FT_LAYOUT_VERSION_26: case FT_LAYOUT_VERSION_25: case FT_LAYOUT_VERSION_24: case FT_LAYOUT_VERSION_23: @@ -517,7 +512,7 @@ serialize_ft_min_size (uint32_t version) { abort(); } - lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); + lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } @@ -559,7 +554,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, rbuf_init(rb, prefix, prefix_size); //Check magic number - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); if (memcmp(magic,"tokudata",8)!=0) { if ((*(uint64_t*)magic) == 0) { @@ -594,7 +589,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, //If too big, it is corrupt. We would probably notice during checksum //but may have to do a multi-gigabyte malloc+read to find out. //If its too small reading rbuf would crash, so verify. - if (size > BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { + if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { r = TOKUDB_DICTIONARY_NO_HEADER; goto exit; } @@ -623,7 +618,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed) uint32_t calculated_x1764; - calculated_x1764 = x1764_memory(rb->buf, rb->size-4); + calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4); uint32_t stored_x1764; stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); if (calculated_x1764 != stored_x1764) { @@ -633,7 +628,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, } //Verify byte order - bytevec tmp_byte_order_check; + const void *tmp_byte_order_check; lazy_assert((sizeof toku_byte_order_host) == 8); rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order int64_t byte_order_stored; @@ -645,7 +640,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, //Load checkpoint count *checkpoint_count = rbuf_ulonglong(rb); - *checkpoint_lsn = rbuf_lsn(rb); + *checkpoint_lsn = rbuf_LSN(rb); //Restart at beginning during regular deserialization rb->ndone = 0; @@ -667,11 +662,11 @@ toku_deserialize_ft_from(int fd, { struct rbuf rb_0; struct rbuf rb_1; - uint64_t checkpoint_count_0; - uint64_t checkpoint_count_1; + uint64_t checkpoint_count_0 = 0; + uint64_t checkpoint_count_1 = 0; LSN checkpoint_lsn_0; LSN checkpoint_lsn_1; - uint32_t version_0, version_1, version = 0; + uint32_t version_0 = 0, version_1 = 0, version = 0; bool h0_acceptable = false; bool h1_acceptable = false; struct rbuf *rb = NULL; @@ -683,7 +678,7 @@ toku_deserialize_ft_from(int fd, h0_acceptable = true; } - toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE; + toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1); if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) { h1_acceptable = true; @@ -762,7 +757,7 @@ toku_deserialize_ft_from(int fd, size_t toku_serialize_ft_size (FT_HEADER h) { size_t size = serialize_ft_min_size(h->layout_version); //There is no dynamic data. - lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); + lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } @@ -803,28 +798,30 @@ void toku_serialize_ft_to_wbuf ( wbuf_char(wbuf, (unsigned char) h->compression_method); wbuf_MSN(wbuf, h->highest_unused_msn_for_upgrade); wbuf_MSN(wbuf, h->max_msn_in_ft); - uint32_t checksum = x1764_finish(&wbuf->checksum); + uint32_t checksum = toku_x1764_finish(&wbuf->checksum); wbuf_int(wbuf, checksum); lazy_assert(wbuf->ndone == wbuf->size); } -void toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf) { +void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS); struct wbuf w_translation; int64_t size_translation; int64_t address_translation; - //Must serialize translation first, to get address,size for header. - toku_serialize_translation_to_wbuf(blocktable, fd, &w_translation, - &address_translation, - &size_translation); - assert(size_translation == w_translation.ndone); // the bytes written are the size - assert(w_translation.size % 512 == 0); // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. + // Must serialize translation first, to get address,size for header. + bt->serialize_translation_to_wbuf(fd, &w_translation, + &address_translation, + &size_translation); + assert(size_translation == w_translation.ndone); + + // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. + assert(w_translation.size % 512 == 0); struct wbuf w_main; size_t size_main = toku_serialize_ft_size(h); size_t size_main_aligned = roundup_to_multiple(512, size_main); - assert(size_main_alignedcheckpoint_count & 0x1) ? 0 : BLOCK_ALLOCATOR_HEADER_RESERVE; + main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); toku_free(w_main.buf); toku_free(w_translation.buf); diff --git a/storage/tokudb/ft-index/ft/ft_msg.h b/storage/tokudb/ft-index/ft/serialize/ft-serialize.h similarity index 77% rename from storage/tokudb/ft-index/ft/ft_msg.h rename to storage/tokudb/ft-index/ft/serialize/ft-serialize.h index f468d7f647be0..dc8bb68ae40cd 100644 --- a/storage/tokudb/ft-index/ft/ft_msg.h +++ b/storage/tokudb/ft-index/ft/serialize/ft-serialize.h @@ -1,14 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* The purpose of this file is to provide access to the ft_msg, - * which is the ephemeral version of the fifo_msg. - */ - -#ifndef FT_MSG_H -#define FT_MSG_H - -#ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -37,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,31 +86,27 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -uint32_t ft_msg_get_keylen(FT_MSG ft_msg); - -uint32_t ft_msg_get_vallen(FT_MSG ft_msg); - -XIDS ft_msg_get_xids(FT_MSG ft_msg); - -void * ft_msg_get_key(FT_MSG ft_msg); - -void * ft_msg_get_val(FT_MSG ft_msg); - -enum ft_msg_type ft_msg_get_type(FT_MSG ft_msg); - -void ft_msg_from_fifo_msg(FT_MSG ft_msg, FIFO_MSG fifo_msg); - -#if 0 - -void ft_msg_from_dbts(FT_MSG ft_msg, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type); +#pragma once -#endif +#include "ft/ft.h" +#include "ft/serialize/block_table.h" +size_t toku_serialize_ft_size(struct ft_header *h); +void toku_serialize_ft_to(int fd, struct ft_header *h, block_table *bt, CACHEFILE cf); +void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, struct ft_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk); +void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset); +void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc); +int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft); -#endif // FT_MSG_H +// TODO rename +int deserialize_ft_from_fd_into_rbuf(int fd, + toku_off_t offset_of_header, + struct rbuf *rb, + uint64_t *checkpoint_count, + LSN *checkpoint_lsn, + uint32_t *version_p); +// used by verify +// TODO rename +int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); diff --git a/storage/tokudb/ft-index/ft/ft_layout_version.h b/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h similarity index 93% rename from storage/tokudb/ft-index/ft/ft_layout_version.h rename to storage/tokudb/ft-index/ft/serialize/ft_layout_version.h index da401ea7c5519..cf16d472355ff 100644 --- a/storage/tokudb/ft-index/ft/ft_layout_version.h +++ b/storage/tokudb/ft-index/ft/serialize/ft_layout_version.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef FT_LAYOUT_VERSION_H -#define FT_LAYOUT_VERSION_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +87,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -//Must be defined before other recursive headers could include logger.h +//Must be defined before other recursive headers could include logger/recover.h enum ft_layout_version_e { FT_LAYOUT_VERSION_5 = 5, FT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate FT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdatase flags #333 FT_LAYOUT_VERSION_8 = 8, // Diff from 7 to 8: Use murmur instead of crc32. We are going to make a simplification and stop supporting version 7 and before. Current As of Beta 1.0.6 FT_LAYOUT_VERSION_9 = 9, // Diff from 8 to 9: Variable-sized blocks and compression. - FT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from brt layer + FT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from ft layer FT_LAYOUT_VERSION_11 = 11, // Diff from 10 to 11: Nested transaction leafentries (completely redesigned). FT_CMDs on disk now support XIDS (multiple txnids) instead of exactly one. FT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added FT_CMD 'FT_INSERT_NO_OVERWRITE', compressed block format, num old blocks FT_LAYOUT_VERSION_13 = 13, // Diff from 12 to 13: Fixed loader pivot bug, added build_id to every node, timestamps to ft @@ -107,8 +107,8 @@ enum ft_layout_version_e { FT_LAYOUT_VERSION_15 = 15, // Diff from 14 to 15: basement nodes, last verification time FT_LAYOUT_VERSION_16 = 16, // Dr. No: No subtree estimates, partition layout information represented more transparently. // ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only - FT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to brt_header - FT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to brt_header + FT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to ft header + FT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to ft header FT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, highest_unused_msn_for_upgrade FT_LAYOUT_VERSION_20 = 20, // Deadshot: Add compression method to log_fcreate, // mgr_last_xid after begin checkpoint, @@ -119,6 +119,8 @@ enum ft_layout_version_e { FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902 FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry + FT_LAYOUT_VERSION_26 = 26, // Hojo: basements store key/vals separately on disk for fixed klpair length BNs + FT_LAYOUT_VERSION_27 = 27, // serialize message trees with nonleaf buffers to avoid key, msn sort on deserialize FT_NEXT_VERSION, // the version after the current version FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line. FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported @@ -128,5 +130,3 @@ enum ft_layout_version_e { FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM = FT_LAYOUT_VERSION_14, FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES = FT_LAYOUT_VERSION_15, }; - -#endif diff --git a/storage/tokudb/ft-index/ft/ft_node-serialize.cc b/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc similarity index 73% rename from storage/tokudb/ft-index/ft/ft_node-serialize.cc rename to storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc index 6fb6774ee34eb..8e6e27b34b347 100644 --- a/storage/tokudb/ft-index/ft/ft_node-serialize.cc +++ b/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,19 +89,27 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft-internal.h" -#include "log-internal.h" -#include -#include -#include -#include -#include "ft.h" -#include -#include +#include "portability/toku_atomic.h" + +#include "ft/cachetable/cachetable.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/node.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/serialize/compress.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" +#include "util/sort.h" +#include "util/threadpool.h" +#include "util/status.h" +#include "util/scoped_malloc.h" static FT_UPGRADE_STATUS_S ft_upgrade_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ft_upgrade_status, k, c, t, "brt upgrade: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ft_upgrade_status, k, c, t, "ft upgrade: " l, inc) static void status_init(void) @@ -126,6 +134,7 @@ toku_ft_upgrade_get_status(FT_UPGRADE_STATUS s) { static int num_cores = 0; // cache the number of cores for the parallelization static struct toku_thread_pool *ft_pool = NULL; +bool toku_serialize_in_parallel; int get_num_cores(void) { return num_cores; @@ -135,18 +144,24 @@ struct toku_thread_pool *get_ft_pool(void) { return ft_pool; } -void -toku_ft_serialize_layer_init(void) { +void toku_serialize_set_parallel(bool in_parallel) { + toku_serialize_in_parallel = in_parallel; +} + +void toku_ft_serialize_layer_init(void) { num_cores = toku_os_get_number_active_processors(); - int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); + int r = toku_thread_pool_create(&ft_pool, num_cores); + lazy_assert_zero(r); + block_allocator::maybe_initialize_trace(); + toku_serialize_in_parallel = false; } -void -toku_ft_serialize_layer_destroy(void) { +void toku_ft_serialize_layer_destroy(void) { toku_thread_pool_destroy(&ft_pool); + block_allocator::maybe_close_trace(); } -enum {FILE_CHANGE_INCREMENT = (16<<20)}; +enum { FILE_CHANGE_INCREMENT = (16 << 20) }; static inline uint64_t alignup64(uint64_t a, uint64_t b) { @@ -193,7 +208,7 @@ toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size // Return 0 on success, otherwise an error number. { - int64_t file_size; + int64_t file_size = 0; //TODO(yoni): Allow variable stripe_width (perhaps from ft) for larger raids const uint64_t stripe_width = 4096; { @@ -240,9 +255,6 @@ enum { 4), // build_id }; -#include "sub_block.h" -#include "sub_block_map.h" - // uncompressed header offsets enum { uncompressed_magic_offset = 0, @@ -279,75 +291,86 @@ serialize_node_header(FTNODE node, FTNODE_DISK_DATA ndd, struct wbuf *wbuf) { wbuf_nocrc_int(wbuf, BP_SIZE (ndd, i)); // and the size } // checksum the header - uint32_t end_to_end_checksum = x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf)); + uint32_t end_to_end_checksum = toku_x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf)); wbuf_nocrc_int(wbuf, end_to_end_checksum); invariant(wbuf->ndone == wbuf->size); } -static int -wbufwriteleafentry(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t UU(idx), struct wbuf * const wb) { - // need to pack the leafentry as it was in versions - // where the key was integrated into it - uint32_t begin_spot UU() = wb->ndone; - uint32_t le_disk_size = leafentry_disksize(le); - wbuf_nocrc_uint8_t(wb, le->type); - wbuf_nocrc_uint32_t(wb, keylen); - if (le->type == LE_CLEAN) { - wbuf_nocrc_uint32_t(wb, le->u.clean.vallen); - wbuf_nocrc_literal_bytes(wb, key, keylen); - wbuf_nocrc_literal_bytes(wb, le->u.clean.val, le->u.clean.vallen); - } - else { - paranoid_invariant(le->type == LE_MVCC); - wbuf_nocrc_uint32_t(wb, le->u.mvcc.num_cxrs); - wbuf_nocrc_uint8_t(wb, le->u.mvcc.num_pxrs); - wbuf_nocrc_literal_bytes(wb, key, keylen); - wbuf_nocrc_literal_bytes(wb, le->u.mvcc.xrs, le_disk_size - (1 + 4 + 1)); - } - uint32_t end_spot UU() = wb->ndone; - paranoid_invariant((end_spot - begin_spot) == keylen + sizeof(keylen) + le_disk_size); - return 0; -} - -static uint32_t +static uint32_t serialize_ftnode_partition_size (FTNODE node, int i) { uint32_t result = 0; paranoid_invariant(node->bp[i].state == PT_AVAIL); result++; // Byte that states what the partition is if (node->height > 0) { - result += 4; // size of bytes in buffer table - result += toku_bnc_nbytesinbuf(BNC(node, i)); + NONLEAF_CHILDINFO bnc = BNC(node, i); + // number of messages (4 bytes) plus size of the buffer + result += (4 + toku_bnc_nbytesinbuf(bnc)); + // number of offsets (4 bytes) plus an array of 4 byte offsets, for each message tree + result += (4 + (4 * bnc->fresh_message_tree.size())); + result += (4 + (4 * bnc->stale_message_tree.size())); + result += (4 + (4 * bnc->broadcast_list.size())); } else { - result += 4; // n_entries in buffer table + result += 4 + bn_data::HEADER_LENGTH; // n_entries in buffer table + basement header result += BLB_NBYTESINDATA(node, i); } result += 4; // checksum return result; } -#define FTNODE_PARTITION_OMT_LEAVES 0xaa -#define FTNODE_PARTITION_FIFO_MSG 0xbb +#define FTNODE_PARTITION_DMT_LEAVES 0xaa +#define FTNODE_PARTITION_MSG_BUFFER 0xbb -static void -serialize_nonleaf_childinfo(NONLEAF_CHILDINFO bnc, struct wbuf *wb) -{ - unsigned char ch = FTNODE_PARTITION_FIFO_MSG; +UU() static int +assert_fresh(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) { + bool is_fresh = msg_buffer->get_freshness(offset); + assert(is_fresh); + return 0; +} + +UU() static int +assert_stale(const int32_t &offset, const uint32_t UU(idx), message_buffer *const msg_buffer) { + bool is_fresh = msg_buffer->get_freshness(offset); + assert(!is_fresh); + return 0; +} + +static void bnc_verify_message_trees(NONLEAF_CHILDINFO UU(bnc)) { +#ifdef TOKU_DEBUG_PARANOID + bnc->fresh_message_tree.iterate(&bnc->msg_buffer); + bnc->stale_message_tree.iterate(&bnc->msg_buffer); +#endif +} + +static int +wbuf_write_offset(const int32_t &offset, const uint32_t UU(idx), struct wbuf *const wb) { + wbuf_nocrc_int(wb, offset); + return 0; +} + +static void serialize_child_buffer(NONLEAF_CHILDINFO bnc, struct wbuf *wb) { + unsigned char ch = FTNODE_PARTITION_MSG_BUFFER; wbuf_nocrc_char(wb, ch); - // serialize the FIFO, first the number of entries, then the elements - wbuf_nocrc_int(wb, toku_bnc_n_entries(bnc)); - FIFO_ITERATE( - bnc->buffer, key, keylen, data, datalen, type, msn, xids, is_fresh, - { - paranoid_invariant((int)type>=0 && type<256); - wbuf_nocrc_char(wb, (unsigned char)type); - wbuf_nocrc_char(wb, (unsigned char)is_fresh); - wbuf_MSN(wb, msn); - wbuf_nocrc_xids(wb, xids); - wbuf_nocrc_bytes(wb, key, keylen); - wbuf_nocrc_bytes(wb, data, datalen); - }); + + // serialize the message buffer + bnc->msg_buffer.serialize_to_wbuf(wb); + + // serialize the message trees (num entries, offsets array): + // first, verify their contents are consistent with the message buffer + bnc_verify_message_trees(bnc); + + // fresh + wbuf_nocrc_int(wb, bnc->fresh_message_tree.size()); + bnc->fresh_message_tree.iterate(wb); + + // stale + wbuf_nocrc_int(wb, bnc->stale_message_tree.size()); + bnc->stale_message_tree.iterate(wb); + + // broadcast + wbuf_nocrc_int(wb, bnc->broadcast_list.size()); + bnc->broadcast_list.iterate(wb); } // @@ -357,13 +380,11 @@ serialize_nonleaf_childinfo(NONLEAF_CHILDINFO bnc, struct wbuf *wb) // static void serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { - if (sb->uncompressed_ptr == NULL) { - assert(sb->uncompressed_size == 0); - sb->uncompressed_size = serialize_ftnode_partition_size(node,i); - sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); - } else { - assert(sb->uncompressed_size > 0); - } + // Caller should have allocated memory. + invariant_notnull(sb->uncompressed_ptr); + invariant(sb->uncompressed_size > 0); + paranoid_invariant(sb->uncompressed_size == serialize_ftnode_partition_size(node, i)); + // // Now put the data into sb->uncompressed_ptr // @@ -371,21 +392,18 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); if (node->height > 0) { // TODO: (Zardosht) possibly exit early if there are no messages - serialize_nonleaf_childinfo(BNC(node, i), &wb); + serialize_child_buffer(BNC(node, i), &wb); } else { - unsigned char ch = FTNODE_PARTITION_OMT_LEAVES; - BN_DATA bd = BLB_DATA(node, i); + unsigned char ch = FTNODE_PARTITION_DMT_LEAVES; + bn_data* bd = BLB_DATA(node, i); wbuf_nocrc_char(&wb, ch); - wbuf_nocrc_uint(&wb, bd->omt_size()); + wbuf_nocrc_uint(&wb, bd->num_klpairs()); - // - // iterate over leafentries and place them into the buffer - // - bd->omt_iterate(&wb); + bd->serialize_to_wbuf(&wb); } - uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); + uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); wbuf_nocrc_int(&wb, end_to_end_checksum); invariant(wb.ndone == wb.size); invariant(sb->uncompressed_size==wb.ndone); @@ -397,13 +415,13 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { // static void compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method method) { - assert(sb->compressed_ptr == NULL); - set_compressed_size_bound(sb, method); - // add 8 extra bytes, 4 for compressed size, 4 for decompressed size - sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound + 8); + invariant(sb->compressed_ptr != nullptr); + invariant(sb->compressed_size_bound > 0); + paranoid_invariant(sb->compressed_size_bound == toku_compress_bound(method, sb->uncompressed_size)); + // // This probably seems a bit complicated. Here is what is going on. - // In TokuDB 5.0, sub_blocks were compressed and the compressed data + // In TokuFT 5.0, sub_blocks were compressed and the compressed data // was checksummed. The checksum did NOT include the size of the compressed data // and the size of the uncompressed data. The fields of sub_block only reference the // compressed data, and it is the responsibility of the user of the sub_block @@ -429,14 +447,14 @@ compress_ftnode_sub_block(struct sub_block *sb, enum toku_compression_method met extra[1] = toku_htod32(sb->uncompressed_size); // now checksum the entire thing sb->compressed_size += 8; // now add the eight bytes that we saved for the sizes - sb->xsum = x1764_memory(sb->compressed_ptr,sb->compressed_size); + sb->xsum = toku_x1764_memory(sb->compressed_ptr,sb->compressed_size); // // This is the end result for Dr. No and forward. For ftnodes, sb->compressed_ptr contains // two integers at the beginning, the size and uncompressed size, and then the compressed // data. sb->xsum contains the checksum of this entire thing. // - // In TokuDB 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum + // In TokuFT 5.0, sb->compressed_ptr only contained the compressed data, sb->xsum // checksummed only the compressed data, and the checksumming of the sizes were not // done here. // @@ -457,7 +475,7 @@ serialize_ftnode_info_size(FTNODE node) retval += 4; // flags retval += 4; // height; retval += 8; // oldest_referenced_xid_known - retval += node->totalchildkeylens; // total length of pivots + retval += node->pivotkeys.serialized_size(); retval += (node->n_children-1)*4; // encode length of each pivot if (node->height > 0) { retval += node->n_children*8; // child blocknum's @@ -466,13 +484,12 @@ serialize_ftnode_info_size(FTNODE node) return retval; } -static void serialize_ftnode_info(FTNODE node, - SUB_BLOCK sb // output - ) { - assert(sb->uncompressed_size == 0); - assert(sb->uncompressed_ptr == NULL); - sb->uncompressed_size = serialize_ftnode_info_size(node); - sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size); +static void serialize_ftnode_info(FTNODE node, SUB_BLOCK sb) { + // Memory must have been allocated by our caller. + invariant(sb->uncompressed_size > 0); + invariant_notnull(sb->uncompressed_ptr); + paranoid_invariant(sb->uncompressed_size == serialize_ftnode_info_size(node)); + struct wbuf wb; wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size); @@ -481,11 +498,8 @@ static void serialize_ftnode_info(FTNODE node, wbuf_nocrc_uint(&wb, node->flags); wbuf_nocrc_int (&wb, node->height); wbuf_TXNID(&wb, node->oldest_referenced_xid_known); + node->pivotkeys.serialize_to_wbuf(&wb); - // pivot information - for (int i = 0; i < node->n_children-1; i++) { - wbuf_nocrc_bytes(&wb, node->childkeys[i].data, node->childkeys[i].size); - } // child blocks, only for internal nodes if (node->height > 0) { for (int i = 0; i < node->n_children; i++) { @@ -493,7 +507,7 @@ static void serialize_ftnode_info(FTNODE node, } } - uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); + uint32_t end_to_end_checksum = toku_x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); wbuf_nocrc_int(&wb, end_to_end_checksum); invariant(wb.ndone == wb.size); invariant(sb->uncompressed_size==wb.ndone); @@ -507,7 +521,7 @@ toku_serialize_ftnode_size (FTNODE node) { // As of now, this seems to be called if and only if the entire node is supposed // to be in memory, so we will assert it. // - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); result += serialize_node_header_size(node); result += serialize_ftnode_info_size(node); for (int i = 0; i < node->n_children; i++) { @@ -516,204 +530,6 @@ toku_serialize_ftnode_size (FTNODE node) { return result; } -struct array_info { - uint32_t offset; - LEAFENTRY* le_array; - uint32_t* key_sizes_array; - const void** key_ptr_array; -}; - -static int -array_item(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx, struct array_info *const ai) { - ai->le_array[idx+ai->offset] = le; - ai->key_sizes_array[idx+ai->offset] = keylen; - ai->key_ptr_array[idx+ai->offset] = key; - return 0; -} - -// There must still be at least one child -// Requires that all messages in buffers above have been applied. -// Because all messages above have been applied, setting msn of all new basements -// to max msn of existing basements is correct. (There cannot be any messages in -// buffers above that still need to be applied.) -void -rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) -{ - assert(node->height == 0); - assert(node->dirty); - - uint32_t num_orig_basements = node->n_children; - // Count number of leaf entries in this leaf (num_le). - uint32_t num_le = 0; - for (uint32_t i = 0; i < num_orig_basements; i++) { - num_le += BLB_DATA(node, i)->omt_size(); - } - - uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array - - // Create an array of OMTVALUE's that store all the pointers to all the data. - // Each element in leafpointers is a pointer to a leaf. - toku::scoped_malloc leafpointers_buf(sizeof(LEAFENTRY) * num_alloc); - LEAFENTRY *leafpointers = reinterpret_cast(leafpointers_buf.get()); - leafpointers[0] = NULL; - - toku::scoped_malloc key_pointers_buf(sizeof(void *) * num_alloc); - const void **key_pointers = reinterpret_cast(key_pointers_buf.get()); - key_pointers[0] = NULL; - - toku::scoped_malloc key_sizes_buf(sizeof(uint32_t) * num_alloc); - uint32_t *key_sizes = reinterpret_cast(key_sizes_buf.get()); - - // Capture pointers to old mempools' buffers (so they can be destroyed) - toku::scoped_malloc old_bns_buf(sizeof(BASEMENTNODE) * num_orig_basements); - BASEMENTNODE *old_bns = reinterpret_cast(old_bns_buf.get()); - old_bns[0] = NULL; - - uint32_t curr_le = 0; - for (uint32_t i = 0; i < num_orig_basements; i++) { - BN_DATA bd = BLB_DATA(node, i); - struct array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers }; - bd->omt_iterate(&ai); - curr_le += bd->omt_size(); - } - - // Create an array that will store indexes of new pivots. - // Each element in new_pivots is the index of a pivot key. - // (Allocating num_le of them is overkill, but num_le is an upper bound.) - toku::scoped_malloc new_pivots_buf(sizeof(uint32_t) * num_alloc); - uint32_t *new_pivots = reinterpret_cast(new_pivots_buf.get()); - new_pivots[0] = 0; - - // Each element in le_sizes is the size of the leafentry pointed to by leafpointers. - toku::scoped_malloc le_sizes_buf(sizeof(size_t) * num_alloc); - size_t *le_sizes = reinterpret_cast(le_sizes_buf.get()); - le_sizes[0] = 0; - - // Create an array that will store the size of each basement. - // This is the sum of the leaf sizes of all the leaves in that basement. - // We don't know how many basements there will be, so we use num_le as the upper bound. - toku::scoped_malloc bn_sizes_buf(sizeof(size_t) * num_alloc); - size_t *bn_sizes = reinterpret_cast(bn_sizes_buf.get()); - bn_sizes[0] = 0; - - // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les). - // Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.) - toku::scoped_malloc num_les_this_bn_buf(sizeof(uint32_t) * num_alloc); - uint32_t *num_les_this_bn = reinterpret_cast(num_les_this_bn_buf.get()); - num_les_this_bn[0] = 0; - - // Figure out the new pivots. - // We need the index of each pivot, and for each basement we need - // the number of leaves and the sum of the sizes of the leaves (memory requirement for basement). - uint32_t curr_pivot = 0; - uint32_t num_le_in_curr_bn = 0; - uint32_t bn_size_so_far = 0; - for (uint32_t i = 0; i < num_le; i++) { - uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); - le_sizes[i] = curr_le_size; - if ((bn_size_so_far + curr_le_size > basementnodesize) && (num_le_in_curr_bn != 0)) { - // cap off the current basement node to end with the element before i - new_pivots[curr_pivot] = i-1; - curr_pivot++; - num_le_in_curr_bn = 0; - bn_size_so_far = 0; - } - num_le_in_curr_bn++; - num_les_this_bn[curr_pivot] = num_le_in_curr_bn; - bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i]; - bn_sizes[curr_pivot] = bn_size_so_far; - } - // curr_pivot is now the total number of pivot keys in the leaf node - int num_pivots = curr_pivot; - int num_children = num_pivots + 1; - - // now we need to fill in the new basement nodes and pivots - - // TODO: (Zardosht) this is an ugly thing right now - // Need to figure out how to properly deal with seqinsert. - // I am not happy with how this is being - // handled with basement nodes - uint32_t tmp_seqinsert = BLB_SEQINSERT(node, num_orig_basements - 1); - - // choose the max msn applied to any basement as the max msn applied to all new basements - MSN max_msn = ZERO_MSN; - for (uint32_t i = 0; i < num_orig_basements; i++) { - MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i); - max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn; - } - // remove the basement node in the node, we've saved a copy - for (uint32_t i = 0; i < num_orig_basements; i++) { - // save a reference to the old basement nodes - // we will need them to ensure that the memory - // stays intact - old_bns[i] = toku_detach_bn(node, i); - } - // Now destroy the old basements, but do not destroy leaves - toku_destroy_ftnode_internals(node); - - // now reallocate pieces and start filling them in - invariant(num_children > 0); - node->totalchildkeylens = 0; - - XCALLOC_N(num_pivots, node->childkeys); // allocate pointers to pivot structs - node->n_children = num_children; - XCALLOC_N(num_children, node->bp); // allocate pointers to basements (bp) - for (int i = 0; i < num_children; i++) { - set_BLB(node, i, toku_create_empty_bn()); // allocate empty basements and set bp pointers - } - - // now we start to fill in the data - - // first the pivots - for (int i = 0; i < num_pivots; i++) { - uint32_t keylen = key_sizes[new_pivots[i]]; - const void *key = key_pointers[new_pivots[i]]; - toku_memdup_dbt(&node->childkeys[i], key, keylen); - node->totalchildkeylens += keylen; - } - - uint32_t baseindex_this_bn = 0; - // now the basement nodes - for (int i = 0; i < num_children; i++) { - // put back seqinsert - BLB_SEQINSERT(node, i) = tmp_seqinsert; - - // create start (inclusive) and end (exclusive) boundaries for data of basement node - uint32_t curr_start = (i==0) ? 0 : new_pivots[i-1]+1; // index of first leaf in basement - uint32_t curr_end = (i==num_pivots) ? num_le : new_pivots[i]+1; // index of first leaf in next basement - uint32_t num_in_bn = curr_end - curr_start; // number of leaves in this basement - - // create indexes for new basement - invariant(baseindex_this_bn == curr_start); - uint32_t num_les_to_copy = num_les_this_bn[i]; - invariant(num_les_to_copy == num_in_bn); - - // construct mempool for this basement - size_t size_this_bn = bn_sizes[i]; - - BN_DATA bd = BLB_DATA(node, i); - bd->replace_contents_with_clone_of_sorted_array( - num_les_to_copy, - &key_pointers[baseindex_this_bn], - &key_sizes[baseindex_this_bn], - &leafpointers[baseindex_this_bn], - &le_sizes[baseindex_this_bn], - size_this_bn - ); - - BP_STATE(node,i) = PT_AVAIL; - BP_TOUCH_CLOCK(node,i); - BLB_MAX_MSN_APPLIED(node,i) = max_msn; - baseindex_this_bn += num_les_to_copy; // set to index of next bn - } - node->max_msn_applied_to_node_on_disk = max_msn; - - // destroy buffers of old mempools - for (uint32_t i = 0; i < num_orig_basements; i++) { - destroy_basement_node(old_bns[i]); - } -} // end of rebalance_ftnode_leaf() - struct serialize_times { tokutime_t serialize_time; tokutime_t compress_time; @@ -878,34 +694,50 @@ int toku_serialize_ftnode_to_memory(FTNODE node, // The resulting buffer is guaranteed to be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needed). // 512-byte padding is for O_DIRECT to work. { - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); if (do_rebalancing && node->height == 0) { - rebalance_ftnode_leaf(node, basementnodesize); + toku_ftnode_leaf_rebalance(node, basementnodesize); } const int npartitions = node->n_children; // Each partition represents a compressed sub block // For internal nodes, a sub block is a message buffer // For leaf nodes, a sub block is a basement node - toku::scoped_malloc sb_buf(sizeof(struct sub_block) * npartitions); + toku::scoped_calloc sb_buf(sizeof(struct sub_block) * npartitions); struct sub_block *sb = reinterpret_cast(sb_buf.get()); XREALLOC_N(npartitions, *ndd); - struct sub_block sb_node_info; - for (int i = 0; i < npartitions; i++) { - sub_block_init(&sb[i]);; - } - sub_block_init(&sb_node_info); // // First, let's serialize and compress the individual sub blocks // - struct serialize_times st; - memset(&st, 0, sizeof(st)); + + // determine how large our serialization and compression buffers need to be. + size_t serialize_buf_size = 0, compression_buf_size = 0; + for (int i = 0; i < node->n_children; i++) { + sb[i].uncompressed_size = serialize_ftnode_partition_size(node, i); + sb[i].compressed_size_bound = toku_compress_bound(compression_method, sb[i].uncompressed_size); + serialize_buf_size += sb[i].uncompressed_size; + compression_buf_size += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + } + + // give each sub block a base pointer to enough buffer space for serialization and compression + toku::scoped_malloc serialize_buf(serialize_buf_size); + toku::scoped_malloc compression_buf(compression_buf_size); + for (size_t i = 0, uncompressed_offset = 0, compressed_offset = 0; i < (size_t) node->n_children; i++) { + sb[i].uncompressed_ptr = reinterpret_cast(serialize_buf.get()) + uncompressed_offset; + sb[i].compressed_ptr = reinterpret_cast(compression_buf.get()) + compressed_offset; + uncompressed_offset += sb[i].uncompressed_size; + compressed_offset += sb[i].compressed_size_bound + 8; // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + invariant(uncompressed_offset <= serialize_buf_size); + invariant(compressed_offset <= compression_buf_size); + } + + // do the actual serialization now that we have buffer space + struct serialize_times st = { 0, 0 }; if (in_parallel) { serialize_and_compress_in_parallel(node, npartitions, compression_method, sb, &st); - } - else { + } else { serialize_and_compress_serially(node, npartitions, compression_method, sb, &st); } @@ -913,16 +745,31 @@ int toku_serialize_ftnode_to_memory(FTNODE node, // Now lets create a sub-block that has the common node information, // This does NOT include the header // + + // determine how large our serialization and copmression buffers need to be + struct sub_block sb_node_info; + sub_block_init(&sb_node_info); + size_t sb_node_info_uncompressed_size = serialize_ftnode_info_size(node); + size_t sb_node_info_compressed_size_bound = toku_compress_bound(compression_method, sb_node_info_uncompressed_size); + toku::scoped_malloc sb_node_info_uncompressed_buf(sb_node_info_uncompressed_size); + toku::scoped_malloc sb_node_info_compressed_buf(sb_node_info_compressed_size_bound + 8); // add 8 extra bytes, 4 for compressed size, 4 for decompressed size + sb_node_info.uncompressed_size = sb_node_info_uncompressed_size; + sb_node_info.uncompressed_ptr = sb_node_info_uncompressed_buf.get(); + sb_node_info.compressed_size_bound = sb_node_info_compressed_size_bound; + sb_node_info.compressed_ptr = sb_node_info_compressed_buf.get(); + + // do the actual serialization now that we have buffer space serialize_and_compress_sb_node_info(node, &sb_node_info, compression_method, &st); + // + // At this point, we have compressed each of our pieces into individual sub_blocks, + // we can put the header and all the subblocks into a single buffer and return it. + // + // update the serialize times, ignore the header for simplicity. we captured all // of the partitions' serialize times so that's probably good enough. toku_ft_status_update_serialize_times(node, st.serialize_time, st.compress_time); - // now we have compressed each of our pieces into individual sub_blocks, - // we can put the header and all the subblocks into a single buffer - // and return it. - // The total size of the node is: // size of header + disk size of the n+1 sub_block's created above uint32_t total_node_size = (serialize_node_header_size(node) // uncompressed header @@ -940,11 +787,10 @@ int toku_serialize_ftnode_to_memory(FTNODE node, total_uncompressed_size += sb[i].uncompressed_size + 4; } + // now create the final serialized node uint32_t total_buffer_size = roundup_to_multiple(512, total_node_size); // make the buffer be 512 bytes. - char *XMALLOC_N_ALIGNED(512, total_buffer_size, data); char *curr_ptr = data; - // now create the final serialized node // write the header struct wbuf wb; @@ -968,33 +814,20 @@ int toku_serialize_ftnode_to_memory(FTNODE node, curr_ptr += sizeof(sb[i].xsum); } // Zero the rest of the buffer - for (uint32_t i=total_node_size; i(*bytes_to_write) % 512 == 0); return 0; } int -toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT h, bool for_checkpoint) { +toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) { size_t n_to_write; size_t n_uncompressed_bytes; @@ -1016,10 +849,10 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA int r = toku_serialize_ftnode_to_memory( node, ndd, - h->h->basementnodesize, - h->h->compression_method, + ft->h->basementnodesize, + ft->h->compression_method, do_rebalancing, - false, // in_parallel + toku_serialize_in_parallel, // in_parallel &n_to_write, &n_uncompressed_bytes, &compressed_buf @@ -1032,8 +865,12 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA invariant(blocknum.b>=0); DISKOFF offset; - toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset, - h, fd, for_checkpoint); //dirties h + // Dirties the ft + ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, + ft, fd, for_checkpoint, + // Allocations for nodes high in the tree are considered 'hot', + // as they are likely to move again in the next checkpoint. + node->height); tokutime_t t0 = toku_time_now(); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); @@ -1048,70 +885,119 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA } static void -deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rbuf, - DESCRIPTOR desc, ft_compare_func cmp) { - int r; - int n_in_this_buffer = rbuf_int(rbuf); - int32_t *fresh_offsets = NULL, *stale_offsets = NULL; - int32_t *broadcast_offsets = NULL; - int nfresh = 0, nstale = 0; - int nbroadcast_offsets = 0; - if (cmp) { - XMALLOC_N(n_in_this_buffer, stale_offsets); - XMALLOC_N(n_in_this_buffer, fresh_offsets); - XMALLOC_N(n_in_this_buffer, broadcast_offsets); - } - toku_fifo_resize(bnc->buffer, rbuf->size + 64); - for (int i = 0; i < n_in_this_buffer; i++) { - bytevec key; ITEMLEN keylen; - bytevec val; ITEMLEN vallen; - // this is weird but it's necessary to pass icc and gcc together - unsigned char ctype = rbuf_char(rbuf); - enum ft_msg_type type = (enum ft_msg_type) ctype; - bool is_fresh = rbuf_char(rbuf); - MSN msn = rbuf_msn(rbuf); - XIDS xids; - xids_create_from_buffer(rbuf, &xids); - rbuf_bytes(rbuf, &key, &keylen); /* Returns a pointer into the rbuf. */ - rbuf_bytes(rbuf, &val, &vallen); - int32_t *dest; - if (cmp) { - if (ft_msg_type_applies_once(type)) { - if (is_fresh) { - dest = &fresh_offsets[nfresh]; - nfresh++; - } else { - dest = &stale_offsets[nstale]; - nstale++; - } - } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) { - dest = &broadcast_offsets[nbroadcast_offsets]; - nbroadcast_offsets++; - } else { - abort(); - } - } else { - dest = NULL; - } - r = toku_fifo_enq(bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, dest); /* Copies the data into the fifo */ - lazy_assert_zero(r); - xids_destroy(&xids); - } - invariant(rbuf->ndone == rbuf->size); - - if (cmp) { - struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = desc, .cmp = cmp, .fifo = bnc->buffer }; - r = toku::sort::mergesort_r(fresh_offsets, nfresh, extra); - assert_zero(r); - bnc->fresh_message_tree.destroy(); - bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer); - r = toku::sort::mergesort_r(stale_offsets, nstale, extra); - assert_zero(r); +sort_and_steal_offset_arrays(NONLEAF_CHILDINFO bnc, + const toku::comparator &cmp, + int32_t **fresh_offsets, int32_t nfresh, + int32_t **stale_offsets, int32_t nstale, + int32_t **broadcast_offsets, int32_t nbroadcast) { + // We always have fresh / broadcast offsets (even if they are empty) + // but we may not have stale offsets, in the case of v13 upgrade. + invariant(fresh_offsets != nullptr); + invariant(broadcast_offsets != nullptr); + invariant(cmp.valid()); + + typedef toku::sort msn_sort; + + const int32_t n_in_this_buffer = nfresh + nstale + nbroadcast; + struct toku_msg_buffer_key_msn_cmp_extra extra(cmp, &bnc->msg_buffer); + msn_sort::mergesort_r(*fresh_offsets, nfresh, extra); + bnc->fresh_message_tree.destroy(); + bnc->fresh_message_tree.create_steal_sorted_array(fresh_offsets, nfresh, n_in_this_buffer); + if (stale_offsets) { + msn_sort::mergesort_r(*stale_offsets, nstale, extra); bnc->stale_message_tree.destroy(); - bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, n_in_this_buffer); - bnc->broadcast_list.destroy(); - bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer); + bnc->stale_message_tree.create_steal_sorted_array(stale_offsets, nstale, n_in_this_buffer); + } + bnc->broadcast_list.destroy(); + bnc->broadcast_list.create_steal_sorted_array(broadcast_offsets, nbroadcast, n_in_this_buffer); +} + +static MSN +deserialize_child_buffer_v13(FT ft, NONLEAF_CHILDINFO bnc, struct rbuf *rb) { + // We skip 'stale' offsets for upgraded nodes. + int32_t nfresh = 0, nbroadcast = 0; + int32_t *fresh_offsets = nullptr, *broadcast_offsets = nullptr; + + // Only sort buffers if we have a valid comparison function. In certain scenarios, + // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes + // for simple inspection and don't actually require that the message buffers are + // properly sorted. This is very ugly, but correct. + const bool sort = ft->cmp.valid(); + + MSN highest_msn_in_this_buffer = + bnc->msg_buffer.deserialize_from_rbuf_v13(rb, &ft->h->highest_unused_msn_for_upgrade, + sort ? &fresh_offsets : nullptr, &nfresh, + sort ? &broadcast_offsets : nullptr, &nbroadcast); + + if (sort) { + sort_and_steal_offset_arrays(bnc, ft->cmp, + &fresh_offsets, nfresh, + nullptr, 0, // no stale offsets + &broadcast_offsets, nbroadcast); } + + return highest_msn_in_this_buffer; +} + +static void +deserialize_child_buffer_v26(NONLEAF_CHILDINFO bnc, struct rbuf *rb, const toku::comparator &cmp) { + int32_t nfresh = 0, nstale = 0, nbroadcast = 0; + int32_t *fresh_offsets, *stale_offsets, *broadcast_offsets; + + // Only sort buffers if we have a valid comparison function. In certain scenarios, + // like deserialie_ft_versioned() or tokuftdump, we'll need to deserialize ftnodes + // for simple inspection and don't actually require that the message buffers are + // properly sorted. This is very ugly, but correct. + const bool sort = cmp.valid(); + + // read in the message buffer + bnc->msg_buffer.deserialize_from_rbuf(rb, + sort ? &fresh_offsets : nullptr, &nfresh, + sort ? &stale_offsets : nullptr, &nstale, + sort ? &broadcast_offsets : nullptr, &nbroadcast); + + if (sort) { + sort_and_steal_offset_arrays(bnc, cmp, + &fresh_offsets, nfresh, + &stale_offsets, nstale, + &broadcast_offsets, nbroadcast); + } +} + +static void +deserialize_child_buffer(NONLEAF_CHILDINFO bnc, struct rbuf *rb) { + // read in the message buffer + bnc->msg_buffer.deserialize_from_rbuf(rb, + nullptr, nullptr, // fresh_offsets, nfresh, + nullptr, nullptr, // stale_offsets, nstale, + nullptr, nullptr); // broadcast_offsets, nbroadcast + + // read in each message tree (fresh, stale, broadcast) + int32_t nfresh = rbuf_int(rb); + int32_t *XMALLOC_N(nfresh, fresh_offsets); + for (int i = 0; i < nfresh; i++) { + fresh_offsets[i] = rbuf_int(rb); + } + + int32_t nstale = rbuf_int(rb); + int32_t *XMALLOC_N(nstale, stale_offsets); + for (int i = 0; i < nstale; i++) { + stale_offsets[i] = rbuf_int(rb); + } + + int32_t nbroadcast = rbuf_int(rb); + int32_t *XMALLOC_N(nbroadcast, broadcast_offsets); + for (int i = 0; i < nbroadcast; i++) { + broadcast_offsets[i] = rbuf_int(rb); + } + + // build OMTs out of each offset array + bnc->fresh_message_tree.destroy(); + bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, nfresh); + bnc->stale_message_tree.destroy(); + bnc->stale_message_tree.create_steal_sorted_array(&stale_offsets, nstale, nstale); + bnc->broadcast_list.destroy(); + bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast, nbroadcast); } // dump a buffer to stderr @@ -1177,7 +1063,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) { NONLEAF_CHILDINFO toku_create_empty_nl(void) { NONLEAF_CHILDINFO XMALLOC(cn); - int r = toku_fifo_create(&cn->buffer); assert_zero(r); + cn->msg_buffer.create(); cn->fresh_message_tree.create_no_array(); cn->stale_message_tree.create_no_array(); cn->broadcast_list.create_no_array(); @@ -1185,13 +1071,16 @@ NONLEAF_CHILDINFO toku_create_empty_nl(void) { return cn; } -// does NOT create OMTs, just the FIFO +// must clone the OMTs, since we serialize them along with the message buffer NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) { NONLEAF_CHILDINFO XMALLOC(cn); - toku_fifo_clone(orig_childinfo->buffer, &cn->buffer); + cn->msg_buffer.clone(&orig_childinfo->msg_buffer); cn->fresh_message_tree.create_no_array(); + cn->fresh_message_tree.clone(orig_childinfo->fresh_message_tree); cn->stale_message_tree.create_no_array(); + cn->stale_message_tree.clone(orig_childinfo->stale_message_tree); cn->broadcast_list.create_no_array(); + cn->broadcast_list.clone(orig_childinfo->broadcast_list); memset(cn->flow, 0, sizeof cn->flow); return cn; } @@ -1204,7 +1093,7 @@ void destroy_basement_node (BASEMENTNODE bn) void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) { - toku_fifo_free(&nl->buffer); + nl->msg_buffer.destroy(); nl->fresh_message_tree.destroy(); nl->stale_message_tree.destroy(); nl->broadcast_list.destroy(); @@ -1214,13 +1103,13 @@ void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) void read_block_from_fd_into_rbuf( int fd, BLOCKNUM blocknum, - FT h, + FT ft, struct rbuf *rb ) { // get the file offset and block size for the block DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); DISKOFF size_aligned = roundup_to_multiple(512, size); uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); rbuf_init(rb, raw_block, size); @@ -1236,11 +1125,12 @@ static const int read_header_heuristic_max = 32*1024; #define MIN(a,b) (((a)>(b)) ? (b) : (a)) #endif -static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb, struct ftnode_fetch_extra *bfe) // Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case. -{ +static void read_ftnode_header_from_fd_into_rbuf_if_small_enough(int fd, BLOCKNUM blocknum, + FT ft, struct rbuf *rb, + ftnode_fetch_extra *bfe) { DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(ft->blocktable, blocknum, &offset, &size); + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size)); uint8_t *XMALLOC_N_ALIGNED(512, roundup_to_multiple(512, size), raw_block); rbuf_init(rb, raw_block, read_size); @@ -1268,11 +1158,11 @@ read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb) int r = 0; sb->compressed_size = rbuf_int(rb); sb->uncompressed_size = rbuf_int(rb); - bytevec* cp = (bytevec*)&sb->compressed_ptr; + const void **cp = (const void **) &sb->compressed_ptr; rbuf_literal_bytes(rb, cp, sb->compressed_size); sb->xsum = rbuf_int(rb); // let's check the checksum - uint32_t actual_xsum = x1764_memory((char *)sb->compressed_ptr-8, 8+sb->compressed_size); + uint32_t actual_xsum = toku_x1764_memory((char *)sb->compressed_ptr-8, 8+sb->compressed_size); if (sb->xsum != actual_xsum) { r = TOKUDB_BAD_CHECKSUM; } @@ -1317,7 +1207,7 @@ verify_ftnode_sub_block (struct sub_block *sb) // first verify the checksum uint32_t data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end uint32_t stored_xsum = toku_dtoh32(*((uint32_t *)((char *)sb->uncompressed_ptr + data_size))); - uint32_t actual_xsum = x1764_memory(sb->uncompressed_ptr, data_size); + uint32_t actual_xsum = toku_x1764_memory(sb->uncompressed_ptr, data_size); if (stored_xsum != actual_xsum) { dump_bad_block((Bytef *) sb->uncompressed_ptr, sb->uncompressed_size); r = TOKUDB_BAD_CHECKSUM; @@ -1349,7 +1239,7 @@ deserialize_ftnode_info( struct rbuf rb; rbuf_init(&rb, (unsigned char *) sb->uncompressed_ptr, data_size); - node->max_msn_applied_to_node_on_disk = rbuf_msn(&rb); + node->max_msn_applied_to_node_on_disk = rbuf_MSN(&rb); (void)rbuf_int(&rb); node->flags = rbuf_int(&rb); node->height = rbuf_int(&rb); @@ -1367,20 +1257,10 @@ deserialize_ftnode_info( // n_children is now in the header, nd the allocatio of the node->bp is in deserialize_ftnode_from_rbuf. // now the pivots - node->totalchildkeylens = 0; if (node->n_children > 1) { - XMALLOC_N(node->n_children - 1, node->childkeys); - for (int i=0; i < node->n_children-1; i++) { - bytevec childkeyptr; - unsigned int cklen; - rbuf_bytes(&rb, &childkeyptr, &cklen); - toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen); - node->totalchildkeylens += cklen; - } - } - else { - node->childkeys = NULL; - node->totalchildkeylens = 0; + node->pivotkeys.deserialize_from_rbuf(&rb, node->n_children - 1); + } else { + node->pivotkeys.create_empty(); } // if this is an internal node, unpack the block nums, and fill in necessary fields @@ -1412,10 +1292,10 @@ setup_available_ftnode_partition(FTNODE node, int i) { } } -// Assign the child_to_read member of the bfe from the given brt node +// Assign the child_to_read member of the bfe from the given ftnode // that has been brought into memory. static void -update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe) +update_bfe_using_ftnode(FTNODE node, ftnode_fetch_extra *bfe) { if (bfe->type == ftnode_fetch_subset && bfe->search != NULL) { // we do not take into account prefetching yet @@ -1424,8 +1304,7 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe) // we find out what basement node the query cares about // and check if it is available bfe->child_to_read = toku_ft_search_which_child( - &bfe->h->cmp_descriptor, - bfe->h->compare_fun, + bfe->ft->cmp, node, bfe->search ); @@ -1435,10 +1314,9 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe) // we can possibly require is a single basement node // we find out what basement node the query cares about // and check if it is available - paranoid_invariant(bfe->h->compare_fun); if (node->height == 0) { - int left_child = toku_bfe_leftmost_child_wanted(bfe, node); - int right_child = toku_bfe_rightmost_child_wanted(bfe, node); + int left_child = bfe->leftmost_child_wanted(node); + int right_child = bfe->rightmost_child_wanted(node); if (left_child == right_child) { bfe->child_to_read = left_child; } @@ -1447,17 +1325,17 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe) } // Using the search parameters in the bfe, this function will -// initialize all of the given brt node's partitions. +// initialize all of the given ftnode's partitions. static void setup_partitions_using_bfe(FTNODE node, - struct ftnode_fetch_extra *bfe, + ftnode_fetch_extra *bfe, bool data_in_memory) { // Leftmost and Rightmost Child bounds. int lc, rc; if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) { - lc = toku_bfe_leftmost_child_wanted(bfe, node); - rc = toku_bfe_rightmost_child_wanted(bfe, node); + lc = bfe->leftmost_child_wanted(node); + rc = bfe->rightmost_child_wanted(node); } else { lc = -1; rc = -1; @@ -1466,11 +1344,11 @@ setup_partitions_using_bfe(FTNODE node, // // setup memory needed for the node // - //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->thisnodename.b, bfe->type, lc, rc); + //printf("node height %d, blocknum %" PRId64 ", type %d lc %d rc %d\n", node->height, node->blocknum.b, bfe->type, lc, rc); for (int i = 0; i < node->n_children; i++) { BP_INIT_UNTOUCHED_CLOCK(node,i); if (data_in_memory) { - BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc)) + BP_STATE(node, i) = ((bfe->wants_child_available(i) || (lc <= i && i <= rc)) ? PT_AVAIL : PT_COMPRESSED); } else { BP_STATE(node, i) = PT_ON_DISK; @@ -1494,7 +1372,7 @@ setup_partitions_using_bfe(FTNODE node, } } -static void setup_ftnode_partitions(FTNODE node, struct ftnode_fetch_extra* bfe, bool data_in_memory) +static void setup_ftnode_partitions(FTNODE node, ftnode_fetch_extra *bfe, bool data_in_memory) // Effect: Used when reading a ftnode into main memory, this sets up the partitions. // We set bfe->child_to_read as well as the BP_STATE and the data pointers (e.g., with set_BSB or set_BNULL or other set_ operations). // Arguments: Node: the node to set up. @@ -1517,8 +1395,7 @@ deserialize_ftnode_partition( struct sub_block *sb, FTNODE node, int childnum, // which partition to deserialize - DESCRIPTOR desc, - ft_compare_func cmp + const toku::comparator &cmp ) { int r = 0; @@ -1536,20 +1413,25 @@ deserialize_ftnode_partition( ch = rbuf_char(&rb); if (node->height > 0) { - assert(ch == FTNODE_PARTITION_FIFO_MSG); - deserialize_child_buffer(BNC(node, childnum), &rb, desc, cmp); + assert(ch == FTNODE_PARTITION_MSG_BUFFER); + NONLEAF_CHILDINFO bnc = BNC(node, childnum); + if (node->layout_version_read_from_disk <= FT_LAYOUT_VERSION_26) { + // Layout version <= 26 did not serialize sorted message trees to disk. + deserialize_child_buffer_v26(bnc, &rb, cmp); + } else { + deserialize_child_buffer(bnc, &rb); + } BP_WORKDONE(node, childnum) = 0; } else { - assert(ch == FTNODE_PARTITION_OMT_LEAVES); + assert(ch == FTNODE_PARTITION_DMT_LEAVES); BLB_SEQINSERT(node, childnum) = 0; uint32_t num_entries = rbuf_int(&rb); // we are now at the first byte of first leafentry data_size -= rb.ndone; // remaining bytes of leafentry data - + BASEMENTNODE bn = BLB(node, childnum); - bn->data_buffer.initialize_from_data(num_entries, &rb.buf[rb.ndone], data_size); - rb.ndone += data_size; + bn->data_buffer.deserialize_from_rbuf(num_entries, &rb, data_size, node->layout_version_read_from_disk); } assert(rb.ndone == rb.size); exit: @@ -1558,7 +1440,7 @@ deserialize_ftnode_partition( static int decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child, - DESCRIPTOR desc, ft_compare_func cmp, tokutime_t *decompress_time) + const toku::comparator &cmp, tokutime_t *decompress_time) { int r = 0; tokutime_t t0 = toku_time_now(); @@ -1566,7 +1448,7 @@ decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_s tokutime_t t1 = toku_time_now(); if (r == 0) { // at this point, sb->uncompressed_ptr stores the serialized node partition - r = deserialize_ftnode_partition(&curr_sb, node, child, desc, cmp); + r = deserialize_ftnode_partition(&curr_sb, node, child, cmp); } *decompress_time = t1 - t0; @@ -1597,7 +1479,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum) // Effect: Allocate an FTNODE and fill in the values that are not read from FTNODE XMALLOC(node); node->fullhash = fullhash; - node->thisnodename = blocknum; + node->blocknum = blocknum; node->dirty = 0; node->bp = nullptr; node->oldest_referenced_xid_known = TXNID_NONE; @@ -1609,7 +1491,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, FTNODE_DISK_DATA* ndd, BLOCKNUM blocknum, uint32_t fullhash, - struct ftnode_fetch_extra *bfe, + ftnode_fetch_extra *bfe, struct rbuf *rb, int fd) // If we have enough information in the rbuf to construct a header, then do so. @@ -1633,7 +1515,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, goto cleanup; } - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); if (memcmp(magic, "tokuleaf", 8)!=0 && memcmp(magic, "tokunode", 8)!=0) { @@ -1681,7 +1563,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, } uint32_t checksum; - checksum = x1764_memory(rb->buf, rb->ndone); + checksum = toku_x1764_memory(rb->buf, rb->ndone); uint32_t stored_checksum; stored_checksum = rbuf_int(rb); if (stored_checksum != checksum) { @@ -1701,21 +1583,22 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, } // Finish reading compressed the sub_block - bytevec* cp; - cp = (bytevec*)&sb_node_info.compressed_ptr; + const void **cp; + cp = (const void **) &sb_node_info.compressed_ptr; rbuf_literal_bytes(rb, cp, sb_node_info.compressed_size); sb_node_info.xsum = rbuf_int(rb); // let's check the checksum uint32_t actual_xsum; - actual_xsum = x1764_memory((char *)sb_node_info.compressed_ptr-8, 8+sb_node_info.compressed_size); + actual_xsum = toku_x1764_memory((char *)sb_node_info.compressed_ptr-8, 8+sb_node_info.compressed_size); if (sb_node_info.xsum != actual_xsum) { r = TOKUDB_BAD_CHECKSUM; goto cleanup; } // Now decompress the subblock - sb_node_info.uncompressed_ptr = toku_xmalloc(sb_node_info.uncompressed_size); { + toku::scoped_malloc sb_node_info_buf(sb_node_info.uncompressed_size); + sb_node_info.uncompressed_ptr = sb_node_info_buf.get(); tokutime_t decompress_t0 = toku_time_now(); toku_decompress( (Bytef *) sb_node_info.uncompressed_ptr, @@ -1725,25 +1608,21 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, ); tokutime_t decompress_t1 = toku_time_now(); decompress_time = decompress_t1 - decompress_t0; - } - // at this point sb->uncompressed_ptr stores the serialized node info. - r = deserialize_ftnode_info(&sb_node_info, node); - if (r != 0) { - goto cleanup; + // at this point sb->uncompressed_ptr stores the serialized node info. + r = deserialize_ftnode_info(&sb_node_info, node); + if (r != 0) { + goto cleanup; + } } - toku_free(sb_node_info.uncompressed_ptr); - sb_node_info.uncompressed_ptr = NULL; - // Now we have the ftnode_info. We have a bunch more stuff in the // rbuf, so we might be able to store the compressed data for some // objects. // We can proceed to deserialize the individual subblocks. - paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type)); // setup the memory of the partitions - // for partitions being decompressed, create either FIFO or basement node + // for partitions being decompressed, create either message buffer or basement node // for partitions staying compressed, create sub_block setup_ftnode_partitions(node, bfe, false); @@ -1763,7 +1642,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, // handle clock for (int i = 0; i < node->n_children; i++) { - if (toku_bfe_wants_child_available(bfe, i)) { + if (bfe->wants_child_available(i)) { paranoid_invariant(BP_STATE(node,i) == PT_AVAIL); BP_TOUCH_CLOCK(node,i); } @@ -1792,17 +1671,16 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, // also creates MSN's for older messages created in older versions // that did not generate MSN's for messages. These new MSN's are // generated from the root downwards, counting backwards from MIN_MSN -// and persisted in the brt header. +// and persisted in the ft header. static int deserialize_and_upgrade_internal_node(FTNODE node, struct rbuf *rb, - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, STAT64INFO info) { - int r = 0; int version = node->layout_version_read_from_disk; - if(version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { + if (version == FT_LAST_LAYOUT_VERSION_WITH_FINGERPRINT) { (void) rbuf_int(rb); // 10. fingerprint } @@ -1826,18 +1704,8 @@ deserialize_and_upgrade_internal_node(FTNODE node, } } - node->childkeys = NULL; - node->totalchildkeylens = 0; - // I. Allocate keys based on number of children. - XMALLOC_N(node->n_children - 1, node->childkeys); - // II. Copy keys from buffer to allocated keys in ftnode. - for (int i = 0; i < node->n_children - 1; ++i) { - bytevec childkeyptr; - unsigned int cklen; - rbuf_bytes(rb, &childkeyptr, &cklen); // 17. child key pointers - toku_memdup_dbt(&node->childkeys[i], childkeyptr, cklen); - node->totalchildkeylens += cklen; - } + // Pivot keys + node->pivotkeys.deserialize_from_rbuf(rb, node->n_children - 1); // Create space for the child node buffers (a.k.a. partitions). XMALLOC_N(node->n_children, node->bp); @@ -1849,12 +1717,15 @@ deserialize_and_upgrade_internal_node(FTNODE node, } // Read in the child buffer maps. - struct sub_block_map child_buffer_map[node->n_children]; for (int i = 0; i < node->n_children; ++i) { - // The following fields are read in the - // sub_block_map_deserialize() call: - // 19. index 20. offset 21. size - sub_block_map_deserialize(&child_buffer_map[i], rb); + // The following fields were previously used by the `sub_block_map' + // They include: + // - 4 byte index + (void) rbuf_int(rb); + // - 4 byte offset + (void) rbuf_int(rb); + // - 4 byte size + (void) rbuf_int(rb); } // We need to setup this node's partitions, but we can't call the @@ -1866,8 +1737,8 @@ deserialize_and_upgrade_internal_node(FTNODE node, // sure we properly intitialize our partitions before filling them // in from our soon-to-be-upgraded node. update_bfe_using_ftnode(node, bfe); - struct ftnode_fetch_extra temp_bfe; - temp_bfe.type = ftnode_fetch_all; + ftnode_fetch_extra temp_bfe; + temp_bfe.create_for_full_read(nullptr); setup_partitions_using_bfe(node, &temp_bfe, true); // Cache the highest MSN generated for the message buffers. This @@ -1889,87 +1760,13 @@ deserialize_and_upgrade_internal_node(FTNODE node, // Deserialize de-compressed buffers. for (int i = 0; i < node->n_children; ++i) { NONLEAF_CHILDINFO bnc = BNC(node, i); - int n_in_this_buffer = rbuf_int(rb); // 22. node count - - int32_t *fresh_offsets = NULL; - int32_t *broadcast_offsets = NULL; - int nfresh = 0; - int nbroadcast_offsets = 0; - - if (bfe->h->compare_fun) { - XMALLOC_N(n_in_this_buffer, fresh_offsets); - // We skip 'stale' offsets for upgraded nodes. - XMALLOC_N(n_in_this_buffer, broadcast_offsets); - } - - // Atomically decrement the header's MSN count by the number - // of messages in the buffer. - MSN lowest; - uint64_t amount = n_in_this_buffer; - lowest.msn = toku_sync_sub_and_fetch(&bfe->h->h->highest_unused_msn_for_upgrade.msn, amount); + MSN highest_msn_in_this_buffer = deserialize_child_buffer_v13(bfe->ft, bnc, rb); if (highest_msn.msn == 0) { - highest_msn.msn = lowest.msn + n_in_this_buffer; - } - - // Create the FIFO entires from the deserialized buffer. - for (int j = 0; j < n_in_this_buffer; ++j) { - bytevec key; ITEMLEN keylen; - bytevec val; ITEMLEN vallen; - unsigned char ctype = rbuf_char(rb); // 23. message type - enum ft_msg_type type = (enum ft_msg_type) ctype; - XIDS xids; - xids_create_from_buffer(rb, &xids); // 24. XID - rbuf_bytes(rb, &key, &keylen); // 25. key - rbuf_bytes(rb, &val, &vallen); // 26. value - - // can we factor this out? - int32_t *dest; - if (bfe->h->compare_fun) { - if (ft_msg_type_applies_once(type)) { - dest = &fresh_offsets[nfresh]; - nfresh++; - } else if (ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type)) { - dest = &broadcast_offsets[nbroadcast_offsets]; - nbroadcast_offsets++; - } else { - abort(); - } - } else { - dest = NULL; - } - - // Increment our MSN, the last message should have the - // newest/highest MSN. See above for a full explanation. - lowest.msn++; - r = toku_fifo_enq(bnc->buffer, - key, - keylen, - val, - vallen, - type, - lowest, - xids, - true, - dest); - lazy_assert_zero(r); - xids_destroy(&xids); - } - - if (bfe->h->compare_fun) { - struct toku_fifo_entry_key_msn_cmp_extra extra = { .desc = &bfe->h->cmp_descriptor, - .cmp = bfe->h->compare_fun, - .fifo = bnc->buffer }; - typedef toku::sort key_msn_sort; - r = key_msn_sort::mergesort_r(fresh_offsets, nfresh, extra); - assert_zero(r); - bnc->fresh_message_tree.destroy(); - bnc->fresh_message_tree.create_steal_sorted_array(&fresh_offsets, nfresh, n_in_this_buffer); - bnc->broadcast_list.destroy(); - bnc->broadcast_list.create_steal_sorted_array(&broadcast_offsets, nbroadcast_offsets, n_in_this_buffer); + highest_msn.msn = highest_msn_in_this_buffer.msn; } } - // Assign the highest msn from our upgrade message FIFO queues. + // Assign the highest msn from our upgrade message buffers node->max_msn_applied_to_node_on_disk = highest_msn; // Since we assigned MSNs to this node's messages, we need to dirty it. node->dirty = 1; @@ -1978,7 +1775,7 @@ deserialize_and_upgrade_internal_node(FTNODE node, // still have the pointer to the buffer). if (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { uint32_t expected_xsum = toku_dtoh32(*(uint32_t*)(rb->buf+rb->size-4)); // 27. checksum - uint32_t actual_xsum = x1764_memory(rb->buf, rb->size-4); + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size-4); if (expected_xsum != actual_xsum) { fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", __FUNCTION__, @@ -1987,13 +1784,13 @@ deserialize_and_upgrade_internal_node(FTNODE node, actual_xsum); fprintf(stderr, "Checksum failure while reading node in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); + toku_cachefile_fname_in_env(bfe->ft->cf)); fflush(stderr); return toku_db_badformat(); } } - return r; + return 0; } // This function takes a deserialized version 13 or 14 buffer and @@ -2001,7 +1798,7 @@ deserialize_and_upgrade_internal_node(FTNODE node, static int deserialize_and_upgrade_leaf_node(FTNODE node, struct rbuf *rb, - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, STAT64INFO info) { int r = 0; @@ -2037,23 +1834,26 @@ deserialize_and_upgrade_leaf_node(FTNODE node, // basement node. node->n_children = 1; XMALLOC_N(node->n_children, node->bp); - // This is a malloc(0), but we need to do it in order to get a pointer - // we can free() later. - XMALLOC_N(node->n_children - 1, node->childkeys); - node->totalchildkeylens = 0; + node->pivotkeys.create_empty(); // Create one basement node to contain all the leaf entries by // setting up the single partition and updating the bfe. update_bfe_using_ftnode(node, bfe); - struct ftnode_fetch_extra temp_bfe; - fill_bfe_for_full_read(&temp_bfe, bfe->h); + ftnode_fetch_extra temp_bfe; + temp_bfe.create_for_full_read(bfe->ft); setup_partitions_using_bfe(node, &temp_bfe, true); // 11. Deserialize the partition maps, though they are not used in the - // newer versions of brt nodes. - struct sub_block_map part_map[npartitions]; - for (int i = 0; i < npartitions; ++i) { - sub_block_map_deserialize(&part_map[i], rb); + // newer versions of ftnodes. + for (int i = 0; i < node->n_children; ++i) { + // The following fields were previously used by the `sub_block_map' + // They include: + // - 4 byte index + (void) rbuf_int(rb); + // - 4 byte offset + (void) rbuf_int(rb); + // - 4 byte size + (void) rbuf_int(rb); } // Copy all of the leaf entries into the single basement node. @@ -2086,13 +1886,18 @@ deserialize_and_upgrade_leaf_node(FTNODE node, assert_zero(r); // Copy the pointer value straight into the OMT LEAFENTRY new_le_in_bn = nullptr; + void *maybe_free; bn->data_buffer.get_space_for_insert( i, key, keylen, new_le_size, - &new_le_in_bn + &new_le_in_bn, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } memcpy(new_le_in_bn, new_le, new_le_size); toku_free(new_le); } @@ -2101,20 +1906,19 @@ deserialize_and_upgrade_leaf_node(FTNODE node, if (has_end_to_end_checksum) { data_size -= sizeof(uint32_t); } - bn->data_buffer.initialize_from_data(n_in_buf, &rb->buf[rb->ndone], data_size); - rb->ndone += data_size; + bn->data_buffer.deserialize_from_rbuf(n_in_buf, rb, data_size, node->layout_version_read_from_disk); } // Whatever this is must be less than the MSNs of every message above // it, so it's ok to take it here. - bn->max_msn_applied = bfe->h->h->highest_unused_msn_for_upgrade; + bn->max_msn_applied = bfe->ft->h->highest_unused_msn_for_upgrade; bn->stale_ancestor_messages_applied = false; node->max_msn_applied_to_node_on_disk = bn->max_msn_applied; // Checksum (end to end) is only on version 14 if (has_end_to_end_checksum) { uint32_t expected_xsum = rbuf_int(rb); // 17. checksum - uint32_t actual_xsum = x1764_memory(rb->buf, rb->size - 4); + uint32_t actual_xsum = toku_x1764_memory(rb->buf, rb->size - 4); if (expected_xsum != actual_xsum) { fprintf(stderr, "%s:%d: Bad checksum: expected = %" PRIx32 ", actual= %" PRIx32 "\n", __FUNCTION__, @@ -2123,7 +1927,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node, actual_xsum); fprintf(stderr, "Checksum failure while reading node in file %s.\n", - toku_cachefile_fname_in_env(bfe->h->cf)); + toku_cachefile_fname_in_env(bfe->ft->cf)); fflush(stderr); return toku_db_badformat(); } @@ -2141,7 +1945,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node, static int read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, DISKOFF offset, DISKOFF size, - FT h, + FT ft, struct rbuf *rb, /* out */ int *layout_version_p); @@ -2153,7 +1957,7 @@ static int deserialize_and_upgrade_ftnode(FTNODE node, FTNODE_DISK_DATA* ndd, BLOCKNUM blocknum, - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, STAT64INFO info, int fd) { @@ -2164,16 +1968,14 @@ deserialize_and_upgrade_ftnode(FTNODE node, // we read the different sub-sections. // get the file offset and block size for the block DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(bfe->h->blocktable, - blocknum, - &offset, - &size); + bfe->ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + struct rbuf rb; r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, - bfe->h, + bfe->ft, &rb, &version); if (r != 0) { @@ -2183,7 +1985,7 @@ deserialize_and_upgrade_ftnode(FTNODE node, // Re-read the magic field from the previous call, since we are // restarting with a fresh rbuf. { - bytevec magic; + const void *magic; rbuf_literal_bytes(&rb, &magic, 8); // 1. magic } @@ -2245,7 +2047,7 @@ deserialize_ftnode_from_rbuf( FTNODE_DISK_DATA* ndd, BLOCKNUM blocknum, uint32_t fullhash, - struct ftnode_fetch_extra* bfe, + ftnode_fetch_extra *bfe, STAT64INFO info, struct rbuf *rb, int fd @@ -2265,7 +2067,7 @@ deserialize_ftnode_from_rbuf( // now start reading from rbuf // first thing we do is read the header information - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); if (memcmp(magic, "tokuleaf", 8)!=0 && memcmp(magic, "tokunode", 8)!=0) { @@ -2313,7 +2115,7 @@ deserialize_ftnode_from_rbuf( } // verify checksum of header stored uint32_t checksum; - checksum = x1764_memory(rb->buf, rb->ndone); + checksum = toku_x1764_memory(rb->buf, rb->ndone); uint32_t stored_checksum; stored_checksum = rbuf_int(rb); if (stored_checksum != checksum) { @@ -2342,10 +2144,9 @@ deserialize_ftnode_from_rbuf( // now that the node info has been deserialized, we can proceed to deserialize // the individual sub blocks - paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type)); // setup the memory of the partitions - // for partitions being decompressed, create either FIFO or basement node + // for partitions being decompressed, create either message buffer or basement node // for partitions staying compressed, create sub_block setup_ftnode_partitions(node, bfe, true); @@ -2386,7 +2187,7 @@ deserialize_ftnode_from_rbuf( // case where we read and decompress the partition tokutime_t partition_decompress_time; r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, - &bfe->h->cmp_descriptor, bfe->h->compare_fun, &partition_decompress_time); + bfe->ft->cmp, &partition_decompress_time); decompress_time += partition_decompress_time; if (r != 0) { goto cleanup; @@ -2420,7 +2221,7 @@ deserialize_ftnode_from_rbuf( // NOTE: Right now, callers higher in the stack will assert on // failure, so this is OK for production. However, if we // create tools that use this function to search for errors in - // the BRT, then we will leak memory. + // the FT, then we will leak memory. if (node) { toku_free(node); } @@ -2429,7 +2230,7 @@ deserialize_ftnode_from_rbuf( } int -toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe) { +toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe) { int r = 0; assert(BP_STATE(node,childnum) == PT_ON_DISK); assert(node->bp[childnum].ptr.tag == BCT_NULL); @@ -2445,16 +2246,13 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i // // get the file offset and block size for the block DISKOFF node_offset, total_node_disk_size; - toku_translate_blocknum_to_offset_size( - bfe->h->blocktable, - node->thisnodename, - &node_offset, - &total_node_disk_size - ); + bfe->ft->blocktable.translate_blocknum_to_offset_size(node->blocknum, &node_offset, &total_node_disk_size); uint32_t curr_offset = BP_START(ndd, childnum); - uint32_t curr_size = BP_SIZE (ndd, childnum); - struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; + uint32_t curr_size = BP_SIZE (ndd, childnum); + + struct rbuf rb; + rbuf_init(&rb, nullptr, 0); uint32_t pad_at_beginning = (node_offset+curr_offset)%512; uint32_t padded_size = roundup_to_multiple(512, pad_at_beginning + curr_size); @@ -2492,7 +2290,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i // deserialize tokutime_t t2 = toku_time_now(); - r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + r = deserialize_ftnode_partition(&curr_sb, node, childnum, bfe->ft->cmp); tokutime_t t3 = toku_time_now(); @@ -2512,7 +2310,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i // Take a ftnode partition that is in the compressed state, and make it avail int -toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe) { +toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe) { int r = 0; assert(BP_STATE(node, childnum) == PT_COMPRESSED); SUB_BLOCK curr_sb = BSB(node, childnum); @@ -2536,7 +2334,7 @@ toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fet tokutime_t t1 = toku_time_now(); - r = deserialize_ftnode_partition(curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun); + r = deserialize_ftnode_partition(curr_sb, node, childnum, bfe->ft->cmp); tokutime_t t2 = toku_time_now(); @@ -2557,13 +2355,13 @@ deserialize_ftnode_from_fd(int fd, uint32_t fullhash, FTNODE *ftnode, FTNODE_DISK_DATA *ndd, - struct ftnode_fetch_extra *bfe, + ftnode_fetch_extra *bfe, STAT64INFO info) { struct rbuf rb = RBUF_INITIALIZER; tokutime_t t0 = toku_time_now(); - read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); + read_block_from_fd_into_rbuf(fd, blocknum, bfe->ft, &rb); tokutime_t t1 = toku_time_now(); // Decompress and deserialize the ftnode. Time statistics @@ -2579,14 +2377,14 @@ deserialize_ftnode_from_fd(int fd, return r; } -// Read brt node from file into struct. Perform version upgrade if necessary. +// Read ftnode from file into struct. Perform version upgrade if necessary. int toku_deserialize_ftnode_from (int fd, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, - struct ftnode_fetch_extra* bfe + ftnode_fetch_extra *bfe ) // Effect: Read a node in. If possible, read just the header. { @@ -2596,7 +2394,7 @@ toku_deserialize_ftnode_from (int fd, // each function below takes the appropriate io/decompression/deserialize statistics if (!bfe->read_all_partitions) { - read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb, bfe); + read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->ft, &rb, bfe); r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd); } else { // force us to do it the old way @@ -2628,7 +2426,7 @@ serialize_rollback_log_size(ROLLBACK_LOG_NODE log) { +8 //blocknum +8 //previous (blocknum) +8 //resident_bytecount - +8 //memarena_size_needed_to_load + +8 //memarena size +log->rollentry_resident_bytecount; return size; } @@ -2649,7 +2447,7 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc wbuf_nocrc_BLOCKNUM(&wb, log->previous); wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount); //Write down memarena size needed to restore - wbuf_nocrc_ulonglong(&wb, memarena_total_size_in_use(log->rollentry_arena)); + wbuf_nocrc_ulonglong(&wb, log->rollentry_arena.total_size_in_use()); { //Store rollback logs @@ -2705,7 +2503,7 @@ serialize_uncompressed_block_to_memory(char * uncompressed_buf, // compute the header checksum and serialize it uint32_t header_length = (char *)ptr - (char *)compressed_buf; - uint32_t xsum = x1764_memory(compressed_buf, header_length); + uint32_t xsum = toku_x1764_memory(compressed_buf, header_length); *ptr = toku_htod32(xsum); uint32_t padded_len = roundup_to_multiple(512, header_len + compressed_len); @@ -2745,7 +2543,7 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT h, bool for_checkpoint) { + FT ft, bool for_checkpoint) { size_t n_to_write; char *compressed_buf; struct serialized_rollback_log_node serialized_local; @@ -2757,20 +2555,26 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA serialized_log = &serialized_local; toku_serialize_rollback_log_to_memory_uncompressed(log, serialized_log); } + BLOCKNUM blocknum = serialized_log->blocknum; + invariant(blocknum.b >= 0); - //Compress and malloc buffer to write + // Compress and malloc buffer to write serialize_uncompressed_block_to_memory(serialized_log->data, - serialized_log->n_sub_blocks, serialized_log->sub_block, - h->h->compression_method, &n_to_write, &compressed_buf); + serialized_log->n_sub_blocks, + serialized_log->sub_block, + ft->h->compression_method, + &n_to_write, &compressed_buf); - { - lazy_assert(blocknum.b>=0); - DISKOFF offset; - toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset, - h, fd, for_checkpoint); //dirties h - toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); - } + // Dirties the ft + DISKOFF offset; + ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, + ft, fd, for_checkpoint, + // We consider rollback log flushing the hottest possible allocation, + // since rollback logs are short-lived compared to FT nodes. + INT_MAX); + + toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); toku_free(compressed_buf); if (!is_serialized) { toku_static_serialized_rollback_log_destroy(&serialized_local); @@ -2789,13 +2593,13 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p, return r; } - //printf("Deserializing %lld datasize=%d\n", off, datasize); - bytevec magic; + const void *magic; rbuf_literal_bytes(rb, &magic, 8); lazy_assert(!memcmp(magic, "tokuroll", 8)); result->layout_version = rbuf_int(rb); - lazy_assert(result->layout_version == FT_LAYOUT_VERSION); + lazy_assert((FT_LAYOUT_VERSION_25 <= result->layout_version && result->layout_version <= FT_LAYOUT_VERSION_27) || + (result->layout_version == FT_LAYOUT_VERSION)); result->layout_version_original = rbuf_int(rb); result->layout_version_read_from_disk = result->layout_version; result->build_id = rbuf_int(rb); @@ -2813,8 +2617,8 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p, result->rollentry_resident_bytecount = rbuf_ulonglong(rb); size_t arena_initial_size = rbuf_ulonglong(rb); - result->rollentry_arena = memarena_create_presized(arena_initial_size); - if (0) { died1: memarena_close(&result->rollentry_arena); goto died0; } + result->rollentry_arena.create(arena_initial_size); + if (0) { died1: result->rollentry_arena.destroy(); goto died0; } //Load rollback entries lazy_assert(rb->size > 4); @@ -2823,10 +2627,10 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p, while (rb->ndone < rb->size) { struct roll_entry *item; uint32_t rollback_fsize = rbuf_int(rb); //Already read 4. Rest is 4 smaller - bytevec item_vec; + const void *item_vec; rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4); unsigned char* item_buf = (unsigned char*)item_vec; - r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena); + r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena); if (r!=0) { r = toku_db_badformat(); goto died1; @@ -2855,7 +2659,7 @@ deserialize_rollback_log_from_rbuf_versioned (uint32_t version, BLOCKNUM blocknu struct rbuf *rb) { int r = 0; ROLLBACK_LOG_NODE rollback_log_node = NULL; - invariant(version==FT_LAYOUT_VERSION); //Rollback log nodes do not survive version changes. + invariant((FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || version == FT_LAYOUT_VERSION); r = deserialize_rollback_log_from_rbuf(blocknum, &rollback_log_node, rb); if (r==0) { *log = rollback_log_node; @@ -2877,7 +2681,7 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s { // verify the header checksum uint32_t header_length = node_header_overhead + sub_block_header_size(n_sub_blocks); invariant(header_length <= raw_block_size); - uint32_t xsum = x1764_memory(raw_block, header_length); + uint32_t xsum = toku_x1764_memory(raw_block, header_length); uint32_t stored_xsum = toku_dtoh32(*(uint32_t *)(raw_block + header_length)); if (xsum != stored_xsum) { r = TOKUDB_BAD_CHECKSUM; @@ -2952,18 +2756,15 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s return r; } -static int -decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { +static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { // This function exists solely to accomodate future changes in compression. int r = 0; - switch (version) { - case FT_LAYOUT_VERSION_13: - case FT_LAYOUT_VERSION_14: - case FT_LAYOUT_VERSION: - r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum); - break; - default: - abort(); + if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) || + (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || + version == FT_LAYOUT_VERSION) { + r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum); + } else { + abort(); } return r; } @@ -2971,7 +2772,7 @@ decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_blo static int read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, DISKOFF offset, DISKOFF size, - FT h, + FT ft, struct rbuf *rb, /* out */ int *layout_version_p) { int r = 0; @@ -3010,7 +2811,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Checksum failure while reading raw block in file %s.\n", - toku_cachefile_fname_in_env(h->cf)); + toku_cachefile_fname_in_env(ft->cf)); abort(); } else { r = toku_db_badformat(); @@ -3030,16 +2831,19 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, return r; } -// Read rollback log node from file into struct. Perform version upgrade if necessary. -int -toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h) { +// Read rollback log node from file into struct. +// Perform version upgrade if necessary. +int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft) { int layout_version = 0; int r; - struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; + + struct rbuf rb; + rbuf_init(&rb, nullptr, 0); // get the file offset and block size for the block DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); + // if the size is 0, then the blocknum is unused if (size == 0) { // blocknum is unused, just create an empty one and get out @@ -3051,7 +2855,7 @@ toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE goto cleanup; } - r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, h, &rb, &layout_version); + r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, offset, size, ft, &rb, &layout_version); if (r!=0) goto cleanup; { @@ -3065,24 +2869,26 @@ toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE r = deserialize_rollback_log_from_rbuf_versioned(layout_version, blocknum, logp, &rb); cleanup: - if (rb.buf) toku_free(rb.buf); + if (rb.buf) { + toku_free(rb.buf); + } return r; } int -toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) +toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) { int r = 0; // 15 was the last version with subtree estimates - invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15); + invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15); FTNODE unused_node = NULL; FTNODE_DISK_DATA unused_ndd = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, h); - r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd, - &bfe, &h->h->on_disk_stats); - h->in_memory_stats = h->h->on_disk_stats; + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); + r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd, + &bfe, &ft->h->on_disk_stats); + ft->in_memory_stats = ft->h->on_disk_stats; if (unused_node) { toku_ftnode_free(&unused_node); @@ -3094,22 +2900,22 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h) } int -toku_upgrade_msn_from_root_to_header(int fd, FT h) +toku_upgrade_msn_from_root_to_header(int fd, FT ft) { int r; // 21 was the first version with max_msn_in_ft in the header - invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20); + invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_20); FTNODE node; FTNODE_DISK_DATA ndd; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, h); - r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft); + r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); if (r != 0) { goto exit; } - h->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk; + ft->h->max_msn_in_ft = node->max_msn_applied_to_node_on_disk; toku_ftnode_free(&node); toku_free(ndd); exit: diff --git a/storage/tokudb/ft-index/ft/memarena.h b/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h similarity index 55% rename from storage/tokudb/ft-index/ft/memarena.h rename to storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h index 949521cfd06f6..319e270dd58d2 100644 --- a/storage/tokudb/ft-index/ft/memarena.h +++ b/storage/tokudb/ft-index/ft/serialize/ft_node-serialize.h @@ -1,9 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_MEMARENA_H -#define TOKU_MEMARENA_H -#ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -32,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,45 +86,57 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* We have too many memory management tricks: - * memarena (this code) is for a collection of objects that cannot be moved. - * The pattern is allocate more and more stuff. - * Don't free items as you go. - * Free all the items at once. - * Then reuse the same buffer again. - * Allocated objects never move. - * A memarena (as currently implemented) is not suitable for interprocess memory sharing. No reason it couldn't be made to work though. - */ - -#include "fttypes.h" - -MEMARENA memarena_create_presized (size_t initial_size); -// Effect: Create a memarena with initial size. In case of ENOMEM, aborts. - -MEMARENA memarena_create (void); -// Effect: Create a memarena with default initial size. In case of ENOMEM, aborts. - -void memarena_clear (MEMARENA ma); -// Effect: Reset the internal state so that the allocated memory can be used again. - -void* malloc_in_memarena (MEMARENA ma, size_t size); -// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed. -// In case of ENOMEM, aborts. - -void *memarena_memdup (MEMARENA ma, const void *v, size_t len); - -void memarena_close(MEMARENA *ma); - -void memarena_move_buffers(MEMARENA dest, MEMARENA source); -// Effect: Move all the memory from SOURCE into DEST. When SOURCE is closed the memory won't be freed. When DEST is closed, the memory will be freed. (Unless DEST moves its memory to another memarena...) - -size_t memarena_total_memory_size (MEMARENA); -// Effect: Calculate the amount of memory used by a memory arena. - -size_t memarena_total_size_in_use (MEMARENA); - - -#endif +#pragma once + +#include "ft/ft.h" +#include "ft/node.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" +#include "ft/serialize/block_table.h" + +unsigned int toku_serialize_ftnode_size(FTNODE node); +int toku_serialize_ftnode_to_memory(FTNODE node, FTNODE_DISK_DATA *ndd, + unsigned int basementnodesize, + enum toku_compression_method compression_method, + bool do_rebalancing, bool in_parallel, + size_t *n_bytes_to_write, size_t *n_uncompressed_bytes, + char **bytes_to_write); +int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA *ndd, bool do_rebalancing, FT ft, bool for_checkpoint); +int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, + FT ft, bool for_checkpoint); +void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized); + +int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft); +int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe); +int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe); +int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe); + +void toku_serialize_set_parallel(bool); + +// used by nonleaf node partial eviction +void toku_create_compressed_partition_from_available(FTNODE node, int childnum, + enum toku_compression_method compression_method, SUB_BLOCK sb); + +// For verifying old, non-upgraded nodes (versions 13 and 14). +int decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum); + +// used by verify +int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); +void read_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, FT ft, struct rbuf *rb); +int read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb); +int verify_ftnode_sub_block(struct sub_block *sb); +void just_decompress_sub_block(struct sub_block *sb); + +// used by ft-node-deserialize.cc +void initialize_ftnode(FTNODE node, BLOCKNUM blocknum); +int read_and_check_magic(struct rbuf *rb); +int read_and_check_version(FTNODE node, struct rbuf *rb); +void read_node_info(FTNODE node, struct rbuf *rb, int version); +void allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd); +int check_node_info_checksum(struct rbuf *rb); +void read_legacy_node_info(FTNODE node, struct rbuf *rb, int version); +int check_legacy_end_checksum(struct rbuf *rb); + +// exported so the loader can dump bad blocks +void dump_bad_block(unsigned char *vp, uint64_t size); diff --git a/storage/tokudb/ft-index/ft/quicklz.cc b/storage/tokudb/ft-index/ft/serialize/quicklz.cc similarity index 99% rename from storage/tokudb/ft-index/ft/quicklz.cc rename to storage/tokudb/ft-index/ft/serialize/quicklz.cc index 62dfb8e019237..81f768ababfeb 100644 --- a/storage/tokudb/ft-index/ft/quicklz.cc +++ b/storage/tokudb/ft-index/ft/serialize/quicklz.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -311,6 +311,7 @@ static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui3 #endif } +__attribute__((unused)) static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s) { #if QLZ_COMPRESSION_LEVEL == 1 diff --git a/storage/tokudb/ft-index/ft/quicklz.h b/storage/tokudb/ft-index/ft/serialize/quicklz.h similarity index 98% rename from storage/tokudb/ft-index/ft/quicklz.h rename to storage/tokudb/ft-index/ft/serialize/quicklz.h index 2f2db8cd73991..362a246994ff8 100644 --- a/storage/tokudb/ft-index/ft/quicklz.h +++ b/storage/tokudb/ft-index/ft/serialize/quicklz.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef QLZ_HEADER -#define QLZ_HEADER // Fast data compression library // Copyright (C) 2006-2011 Lasse Mikkel Reinhold @@ -228,6 +228,3 @@ int qlz_get_setting(int setting); #if defined (__cplusplus) } #endif - -#endif - diff --git a/storage/tokudb/ft-index/ft/rbuf.h b/storage/tokudb/ft-index/ft/serialize/rbuf.h similarity index 68% rename from storage/tokudb/ft-index/ft/rbuf.h rename to storage/tokudb/ft-index/ft/serialize/rbuf.h index 22e41881cd4df..c72ea6b79dbde 100644 --- a/storage/tokudb/ft-index/ft/rbuf.h +++ b/storage/tokudb/ft-index/ft/serialize/rbuf.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef RBUF_H -#define RBUF_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,16 +87,18 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "memarena.h" -#include "toku_assert.h" -#include "fttypes.h" -#include "memory.h" -#include +#include +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_htonl.h" +#include "portability/toku_portability.h" +#include "util/memarena.h" struct rbuf { unsigned char *buf; @@ -122,11 +122,11 @@ static inline unsigned char rbuf_char (struct rbuf *r) { return r->buf[r->ndone++]; } -static inline void rbuf_ma_uint8_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint8_t *num) { +static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) { *num = rbuf_char(r); } -static inline void rbuf_ma_bool (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), bool *b) { +static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) { uint8_t n = rbuf_char(r); *b = (n!=0); } @@ -158,14 +158,14 @@ static unsigned int rbuf_int (struct rbuf *r) { #endif } -static inline void rbuf_literal_bytes (struct rbuf *r, bytevec *bytes, unsigned int n_bytes) { +static inline void rbuf_literal_bytes (struct rbuf *r, const void **bytes, unsigned int n_bytes) { *bytes = &r->buf[r->ndone]; r->ndone+=n_bytes; assert(r->ndone<=r->size); } /* Return a pointer into the middle of the buffer. */ -static inline void rbuf_bytes (struct rbuf *r, bytevec *bytes, unsigned int *n_bytes) +static inline void rbuf_bytes (struct rbuf *r, const void **bytes, unsigned int *n_bytes) { *n_bytes = rbuf_int(r); rbuf_literal_bytes(r, bytes, *n_bytes); @@ -181,82 +181,14 @@ static inline signed long long rbuf_longlong (struct rbuf *r) { return (signed long long)rbuf_ulonglong(r); } -static inline DISKOFF rbuf_diskoff (struct rbuf *r) { - return rbuf_ulonglong(r); -} - -static inline LSN rbuf_lsn (struct rbuf *r) { - LSN lsn = {rbuf_ulonglong(r)}; - return lsn; -} - -static inline MSN rbuf_msn (struct rbuf *r) { - MSN msn = {rbuf_ulonglong(r)}; - return msn; -} - -static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) { - BLOCKNUM result = make_blocknum(rbuf_longlong(r)); - return result; -} -static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) { - *blocknum = rbuf_blocknum(r); -} - -static inline void rbuf_ma_uint32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint32_t *num) { +static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) { *num = rbuf_int(r); } -static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint64_t *num) { +static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) { *num = rbuf_ulonglong(r); } - -static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) { - *txnid = rbuf_ulonglong(r); -} - -static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) { - txnid->parent_id64 = rbuf_ulonglong(r); - txnid->child_id64 = rbuf_ulonglong(r); -} - -static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) { - rbuf_TXNID(r, txnid); -} - -static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) { - rbuf_TXNID_PAIR(r, txnid); -} - -static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) { - filenum->fileid = rbuf_int(r); -} -static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUM *filenum) { - rbuf_FILENUM(r, filenum); -} - -// 2954 -// Don't try to use the same space, malloc it -static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) { - filenums->num = rbuf_int(r); - filenums->filenums = (FILENUM *) toku_malloc( filenums->num * sizeof(FILENUM) ); - assert(filenums->filenums != NULL); - for (uint32_t i=0; i < filenums->num; i++) { - rbuf_FILENUM(r, &(filenums->filenums[i])); - } -} - -// 2954 -static inline void rbuf_ma_FILENUMS (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUMS *filenums) { - rbuf_ma_uint32_t(r, ma, &(filenums->num)); - filenums->filenums = (FILENUM *) malloc_in_memarena(ma, filenums->num * sizeof(FILENUM) ); - assert(filenums->filenums != NULL); - for (uint32_t i=0; i < filenums->num; i++) { - rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i])); - } -} - // Don't try to use the same space, malloc it static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) { bs->len = rbuf_int(r); @@ -267,14 +199,12 @@ static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) { r->ndone = newndone; } -static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) { +static inline void rbuf_ma_BYTESTRING (struct rbuf *r, memarena *ma, BYTESTRING *bs) { bs->len = rbuf_int(r); uint32_t newndone = r->ndone + bs->len; assert(newndone <= r->size); - bs->data = (char *) memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len); + bs->data = (char *) ma->malloc_from_arena(bs->len); assert(bs->data); + memcpy(bs->data, &r->buf[r->ndone], bs->len); r->ndone = newndone; } - - -#endif diff --git a/storage/tokudb/ft-index/ft/sub_block.cc b/storage/tokudb/ft-index/ft/serialize/sub_block.cc similarity index 97% rename from storage/tokudb/ft-index/ft/sub_block.cc rename to storage/tokudb/ft-index/ft/serialize/sub_block.cc index 7dc790a61b736..1346c76b103b2 100644 --- a/storage/tokudb/ft-index/ft/sub_block.cc +++ b/storage/tokudb/ft-index/ft/serialize/sub_block.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,22 +89,21 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "sub_block.h" - -#include "compress.h" -#include "quicklz.h" -#include "x1764.h" - -#include -#include -#include -#include - +#include #include #include -#include #include +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" + +#include "ft/serialize/compress.h" +#include "ft/serialize/sub_block.h" +#include "ft/serialize/quicklz.h" +#include "util/threadpool.h" +#include "util/x1764.h" + SUB_BLOCK sub_block_creat(void) { SUB_BLOCK XMALLOC(sb); sub_block_init(sb); @@ -268,7 +267,7 @@ compress_sub_block(struct sub_block *sub_block, enum toku_compression_method met method ); // checksum it - sub_block->xsum = x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size); + sub_block->xsum = toku_x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size); } void * @@ -365,7 +364,7 @@ decompress_sub_block(void *compress_ptr, uint32_t compress_size, void *uncompres int result = 0; // verify checksum - uint32_t xsum = x1764_memory(compress_ptr, compress_size); + uint32_t xsum = toku_x1764_memory(compress_ptr, compress_size); if (xsum != expected_xsum) { if (verbose_decompress_sub_block) fprintf(stderr, "%s:%d xsum %u expected %u\n", __FUNCTION__, __LINE__, xsum, expected_xsum); result = EINVAL; diff --git a/storage/tokudb/ft-index/ft/sub_block.h b/storage/tokudb/ft-index/ft/serialize/sub_block.h similarity index 95% rename from storage/tokudb/ft-index/ft/sub_block.h rename to storage/tokudb/ft-index/ft/serialize/sub_block.h index 23fad83c96684..1a371c2dcd381 100644 --- a/storage/tokudb/ft-index/ft/sub_block.h +++ b/storage/tokudb/ft-index/ft/serialize/sub_block.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_SUB_BLOCK_H -#define TOKU_SUB_BLOCK_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,18 +87,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "compress.h" -#include "fttypes.h" - +#include "ft/serialize/compress.h" +// TODO: Clean this abstraciton up static const int max_sub_blocks = 8; -static const int target_sub_block_size = 512*1024; +static const int target_sub_block_size = 512 * 1024; static const int max_basement_nodes = 32; -static const int max_basement_node_uncompressed_size = 256*1024; -static const int max_basement_node_compressed_size = 64*1024; +static const int max_basement_node_uncompressed_size = 256 * 1024; +static const int max_basement_node_compressed_size = 64 * 1024; struct sub_block { void *uncompressed_ptr; @@ -112,6 +111,7 @@ struct sub_block { uint32_t xsum; // sub block checksum }; +typedef struct sub_block *SUB_BLOCK; struct stored_sub_block { uint32_t uncompressed_size; @@ -212,6 +212,3 @@ int decompress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], unsigned char *compressed_data, unsigned char *uncompressed_data, int num_cores, struct toku_thread_pool *pool); extern int verbose_decompress_sub_block; - - -#endif diff --git a/storage/tokudb/ft-index/ft/wbuf.h b/storage/tokudb/ft-index/ft/serialize/wbuf.h similarity index 74% rename from storage/tokudb/ft-index/ft/wbuf.h rename to storage/tokudb/ft-index/ft/serialize/wbuf.h index 282233920a725..8c71fb16b2092 100644 --- a/storage/tokudb/ft-index/ft/wbuf.h +++ b/storage/tokudb/ft-index/ft/serialize/wbuf.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef WBUF_H -#define WBUF_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,18 +86,18 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include #include -#include - -#include "fttypes.h" -#include "x1764.h" +#include "portability/toku_htonl.h" -#define CRC_INCR +#include "util/bytestring.h" +#include "util/x1764.h" /* When serializing a value, write it into a buffer. */ /* This code requires that the buffer be big enough to hold whatever you put into it. */ @@ -113,15 +111,15 @@ struct wbuf { struct x1764 checksum; // The checksum state }; -static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, DISKOFF size) { +static inline void wbuf_nocrc_init (struct wbuf *w, void *buf, unsigned int size) { w->buf = (unsigned char *) buf; w->size = size; w->ndone = 0; } -static inline void wbuf_init (struct wbuf *w, void *buf, DISKOFF size) { +static inline void wbuf_init (struct wbuf *w, void *buf, unsigned int size) { wbuf_nocrc_init(w, buf, size); - x1764_init(&w->checksum); + toku_x1764_init(&w->checksum); } static inline size_t wbuf_get_woffset(struct wbuf *w) { @@ -142,7 +140,7 @@ static inline void wbuf_nocrc_uint8_t (struct wbuf *w, uint8_t ch) { static inline void wbuf_char (struct wbuf *w, unsigned char ch) { wbuf_nocrc_char (w, ch); - x1764_add(&w->checksum, &w->buf[w->ndone-1], 1); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-1], 1); } //Write an int that MUST be in network order regardless of disk order @@ -150,7 +148,7 @@ static void wbuf_network_int (struct wbuf *w, int32_t i) __attribute__((__unused static void wbuf_network_int (struct wbuf *w, int32_t i) { assert(w->ndone + 4 <= w->size); *(uint32_t*)(&w->buf[w->ndone]) = toku_htonl(i); - x1764_add(&w->checksum, &w->buf[w->ndone], 4); + toku_x1764_add(&w->checksum, &w->buf[w->ndone], 4); w->ndone += 4; } @@ -176,7 +174,7 @@ static inline void wbuf_nocrc_int (struct wbuf *w, int32_t i) { static inline void wbuf_int (struct wbuf *w, int32_t i) { wbuf_nocrc_int(w, i); - x1764_add(&w->checksum, &w->buf[w->ndone-4], 4); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-4], 4); } static inline void wbuf_nocrc_uint (struct wbuf *w, uint32_t i) { @@ -187,7 +185,14 @@ static inline void wbuf_uint (struct wbuf *w, uint32_t i) { wbuf_int(w, (int32_t)i); } -static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { +static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t nbytes) { + assert(w->ndone + nbytes <= w->size); + uint8_t * dest = w->buf + w->ndone; + w->ndone += nbytes; + return dest; +} + +static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { const unsigned char *bytes = (const unsigned char *) bytes_bv; #if 0 { int i; for (i=0; ichecksum, &w->buf[w->ndone-nbytes], nbytes); + toku_x1764_add(&w->checksum, &w->buf[w->ndone-nbytes], nbytes); } -static void wbuf_nocrc_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { +static void wbuf_nocrc_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { wbuf_nocrc_uint(w, nbytes); wbuf_nocrc_literal_bytes(w, bytes_bv, nbytes); } -static void wbuf_bytes (struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { +static void wbuf_bytes (struct wbuf *w, const void *bytes_bv, uint32_t nbytes) { wbuf_uint(w, nbytes); wbuf_literal_bytes(w, bytes_bv, nbytes); } @@ -255,76 +260,3 @@ static inline void wbuf_nocrc_uint32_t (struct wbuf *w, uint32_t v) { static inline void wbuf_uint32_t (struct wbuf *w, uint32_t v) { wbuf_uint(w, v); } - -static inline void wbuf_DISKOFF (struct wbuf *w, DISKOFF off) { - wbuf_ulonglong(w, (uint64_t)off); -} - -static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { - wbuf_ulonglong(w, b.b); -} -static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { - wbuf_nocrc_ulonglong(w, b.b); -} - -static inline void wbuf_nocrc_TXNID (struct wbuf *w, TXNID tid) { - wbuf_nocrc_ulonglong(w, tid); -} - -static inline void wbuf_nocrc_TXNID_PAIR (struct wbuf *w, TXNID_PAIR tid) { - wbuf_nocrc_ulonglong(w, tid.parent_id64); - wbuf_nocrc_ulonglong(w, tid.child_id64); -} - - -static inline void wbuf_TXNID (struct wbuf *w, TXNID tid) { - wbuf_ulonglong(w, tid); -} - -static inline void wbuf_nocrc_XIDP (struct wbuf *w, XIDP xid) { - wbuf_nocrc_uint32_t(w, xid->formatID); - wbuf_nocrc_uint8_t(w, xid->gtrid_length); - wbuf_nocrc_uint8_t(w, xid->bqual_length); - wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length); -} - -static inline void wbuf_nocrc_LSN (struct wbuf *w, LSN lsn) { - wbuf_nocrc_ulonglong(w, lsn.lsn); -} - -static inline void wbuf_LSN (struct wbuf *w, LSN lsn) { - wbuf_ulonglong(w, lsn.lsn); -} - -static inline void wbuf_MSN (struct wbuf *w, MSN msn) { - wbuf_ulonglong(w, msn.msn); -} - -static inline void wbuf_nocrc_FILENUM (struct wbuf *w, FILENUM fileid) { - wbuf_nocrc_uint(w, fileid.fileid); -} - -static inline void wbuf_FILENUM (struct wbuf *w, FILENUM fileid) { - wbuf_uint(w, fileid.fileid); -} - -// 2954 -static inline void wbuf_nocrc_FILENUMS (struct wbuf *w, FILENUMS v) { - wbuf_nocrc_uint(w, v.num); - uint32_t i; - for (i = 0; i < v.num; i++) { - wbuf_nocrc_FILENUM(w, v.filenums[i]); - } -} - -// 2954 -static inline void wbuf_FILENUMS (struct wbuf *w, FILENUMS v) { - wbuf_uint(w, v.num); - uint32_t i; - for (i = 0; i < v.num; i++) { - wbuf_FILENUM(w, v.filenums[i]); - } -} - - -#endif diff --git a/storage/tokudb/ft-index/ft/workset.h b/storage/tokudb/ft-index/ft/serialize/workset.h similarity index 98% rename from storage/tokudb/ft-index/ft/workset.h rename to storage/tokudb/ft-index/ft/serialize/workset.h index 27dd97780067a..4efa042b9c3df 100644 --- a/storage/tokudb/ft-index/ft/workset.h +++ b/storage/tokudb/ft-index/ft/serialize/workset.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKU_WORKSET_H -#define _TOKU_WORKSET_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -187,5 +187,3 @@ workset_join(struct workset *ws) { } workset_unlock(ws); } - -#endif diff --git a/storage/tokudb/ft-index/ft/sub_block_map.h b/storage/tokudb/ft-index/ft/sub_block_map.h deleted file mode 100644 index 3c1d71078d8af..0000000000000 --- a/storage/tokudb/ft-index/ft/sub_block_map.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKU_SUB_BLOCK_MAP_H -#define _TOKU_SUB_BLOCK_MAP_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -// Map objects to a sequence of sub block -struct sub_block_map { - uint32_t idx; - uint32_t offset; - uint32_t size; -}; - -enum { - stored_sub_block_map_size = sizeof (struct sub_block_map), // size of a sub-block map on disk -}; - -static inline void -sub_block_map_init(struct sub_block_map *sbmap, uint32_t idx, uint32_t offset, uint32_t size) { - sbmap->idx = idx; - sbmap->offset = offset; - sbmap->size = size; -} - -static inline void -sub_block_map_serialize(struct sub_block_map *sbmap, struct wbuf *wbuf) { - wbuf_nocrc_int(wbuf, sbmap->idx); - wbuf_nocrc_int(wbuf, sbmap->offset); - wbuf_nocrc_int(wbuf, sbmap->size); -} - -static inline void -sub_block_map_deserialize(struct sub_block_map *sbmap, struct rbuf *rbuf) { - sbmap->idx = rbuf_int(rbuf); - sbmap->offset = rbuf_int(rbuf); - sbmap->size = rbuf_int(rbuf); -} - -#endif diff --git a/storage/tokudb/ft-index/ft/tests/CMakeLists.txt b/storage/tokudb/ft-index/ft/tests/CMakeLists.txt index 209155d692d1e..a363b70c5dd9f 100644 --- a/storage/tokudb/ft-index/ft/tests/CMakeLists.txt +++ b/storage/tokudb/ft-index/ft/tests/CMakeLists.txt @@ -101,6 +101,17 @@ if(BUILD_TESTING OR BUILD_FT_TESTS) set_property(TEST ft/upgrade_test_simple APPEND PROPERTY ENVIRONMENT "TOKUDB_DATA=${TOKUDB_DATA}") + # should be a file GLOB and a loop + declare_custom_tests(test-upgrade-recovery-logs) + add_ft_test_aux(test-upgrade-recovery-logs-24-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-clean) + add_ft_test_aux(test-upgrade-recovery-logs-24-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-24-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-25-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-clean) + add_ft_test_aux(test-upgrade-recovery-logs-25-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-25-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-26-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-clean) + add_ft_test_aux(test-upgrade-recovery-logs-26-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-26-dirty) + add_ft_test_aux(test-upgrade-recovery-logs-27-clean test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-clean) + add_ft_test_aux(test-upgrade-recovery-logs-27-dirty test-upgrade-recovery-logs ${TOKUDB_DATA}/upgrade-recovery-logs-27-dirty) + ## give some tests, that time out normally, 1 hour to complete set(long_tests ft/ftloader-test-extractor-3a diff --git a/storage/tokudb/ft-index/ft/tests/benchmark-test.cc b/storage/tokudb/ft-index/ft/tests/benchmark-test.cc index 0f7a0d4f84bbe..0acb97daa878f 100644 --- a/storage/tokudb/ft-index/ft/tests/benchmark-test.cc +++ b/storage/tokudb/ft-index/ft/tests/benchmark-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,8 +119,8 @@ static FT_HANDLE t; static void setup (void) { int r; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, NULL_TXN, toku_builtin_compare_fun); assert(r==0); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, nullptr, toku_builtin_compare_fun); assert(r==0); } static void toku_shutdown (void) { diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort9.cc b/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc similarity index 52% rename from storage/tokudb/ft-index/src/tests/test_txn_abort9.cc rename to storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc index 48c4ae120e8cd..6879002a025e8 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort9.cc +++ b/storage/tokudb/ft-index/ft/tests/block_allocator_strategy_test.cc @@ -1,6 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" + /* COPYING CONDITIONS NOTICE: @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,77 +86,91 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "test.h" -#include - -#include -#include -#include -#include -#include - -// -static void -test_abort_close (void) { - -#ifndef USE_TDB -#if DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==3 - if (verbose) fprintf(stderr, "%s does not work for BDB %d.%d. Not running\n", __FILE__, DB_VERSION_MAJOR, DB_VERSION_MINOR); - return; -#else - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); - - int r; - DB_ENV *env; - r = db_env_create(&env, 0); assert(r == 0); - r = env->set_data_dir(env, TOKU_TEST_FILENAME); - r = env->set_lg_dir(env, TOKU_TEST_FILENAME); - env->set_errfile(env, stdout); - r = env->open(env, 0, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); - if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r)); - assert(r == 0); - - DB_TXN *txn = 0; - r = env->txn_begin(env, 0, &txn, 0); assert(r == 0); - - DB *db; - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); - } - - // Close before commit. - r = db->close(db, 0); - r = txn->commit(txn, 0); CKERR(r); - - // Now reopen it - r = env->txn_begin(env, 0, &txn, 0); CKERR(r); - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); +#include "ft/tests/test.h" + +#include "ft/serialize/block_allocator_strategy.h" + +static const uint64_t alignment = 4096; + +static void test_first_vs_best_fit(void) { + struct block_allocator::blockpair pairs[] = { + block_allocator::blockpair(1 * alignment, 6 * alignment), + // hole between 7x align -> 8x align + block_allocator::blockpair(8 * alignment, 4 * alignment), + // hole between 12x align -> 16x align + block_allocator::blockpair(16 * alignment, 1 * alignment), + block_allocator::blockpair(17 * alignment, 2 * alignment), + // hole between 19 align -> 21x align + block_allocator::blockpair(21 * alignment, 2 * alignment), + }; + const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); - DBT k,v; - r = db->put(db, txn, dbt_init(&k, "hello", 6), dbt_init(&v, "there", 6), 0); - CKERR(r); + block_allocator::blockpair *bp; + + // first fit + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment); + assert(bp == &pairs[1]); + bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment); + assert(bp == nullptr); + + // best fit + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment); + assert(bp == &pairs[0]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment); + assert(bp == &pairs[3]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment); + assert(bp == &pairs[1]); + bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment); + assert(bp == nullptr); +} - r = db->close(db, 0); +static void test_padded_fit(void) { + struct block_allocator::blockpair pairs[] = { + block_allocator::blockpair(1 * alignment, 1 * alignment), + // 4096 byte hole after bp[0] + block_allocator::blockpair(3 * alignment, 1 * alignment), + // 8192 byte hole after bp[1] + block_allocator::blockpair(6 * alignment, 1 * alignment), + // 16384 byte hole after bp[2] + block_allocator::blockpair(11 * alignment, 1 * alignment), + // 32768 byte hole after bp[3] + block_allocator::blockpair(17 * alignment, 1 * alignment), + // 116kb hole after bp[4] + block_allocator::blockpair(113 * alignment, 1 * alignment), + // 256kb hole after bp[5] + block_allocator::blockpair(371 * alignment, 1 * alignment), + }; + const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); + + block_allocator::blockpair *bp; + + // padding for a 100 byte allocation will be < than standard alignment, + // so it should fit in the first 4096 byte hole. + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment); + assert(bp == &pairs[0]); - r = txn->abort(txn); assert(r == 0); + // Even padded, a 12kb alloc will fit in a 16kb hole + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment); + assert(bp == &pairs[2]); - r = env->close(env, 0); CKERR(r); + // would normally fit in the 116kb hole but the padding will bring it over + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment); + assert(bp == &pairs[5]); -#endif -#endif + bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment); + assert(bp == &pairs[5]); } -int -test_main(int UU(argc), char UU(*const argv[])) { - test_abort_close(); +int test_main(int argc, const char *argv[]) { + (void) argc; + (void) argv; + + test_first_vs_best_fit(); + test_padded_fit(); + return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc b/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc index ef6f1fcdc9727..bbd170ebaabbf 100644 --- a/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc +++ b/storage/tokudb/ft-index/ft/tests/block_allocator_test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,75 +88,48 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - #include "test.h" -static void ba_alloc_at (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t offset) { - block_allocator_validate(ba); - block_allocator_alloc_block_at(ba, size*512, offset*512); - block_allocator_validate(ba); -} - -static void ba_alloc (BLOCK_ALLOCATOR ba, uint64_t size, uint64_t *answer) { - block_allocator_validate(ba); +static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) { + ba->validate(); uint64_t actual_answer; - block_allocator_alloc_block(ba, 512*size, &actual_answer); - block_allocator_validate(ba); + const uint64_t heat = random() % 2; + ba->alloc_block(512 * size, heat, &actual_answer); + ba->validate(); + assert(actual_answer%512==0); *answer = actual_answer/512; } -static void ba_free (BLOCK_ALLOCATOR ba, uint64_t offset) { - block_allocator_validate(ba); - block_allocator_free_block(ba, offset*512); - block_allocator_validate(ba); +static void ba_free(block_allocator *ba, uint64_t offset) { + ba->validate(); + ba->free_block(offset * 512); + ba->validate(); } -static void -ba_check_l (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order, uint64_t expected_offset, uint64_t expected_size) -{ +static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order, + uint64_t expected_offset, uint64_t expected_size) { uint64_t actual_offset, actual_size; - int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size); + int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); assert(r==0); assert(expected_offset*512 == actual_offset); assert(expected_size *512 == actual_size); } -static void -ba_check_none (BLOCK_ALLOCATOR ba, uint64_t blocknum_in_layout_order) -{ +static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) { uint64_t actual_offset, actual_size; - int r = block_allocator_get_nth_block_in_layout_order(ba, blocknum_in_layout_order, &actual_offset, &actual_size); + int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); assert(r==-1); } // Simple block allocator test -static void -test_ba0 (void) { - BLOCK_ALLOCATOR ba; - uint64_t b0, b1; - create_block_allocator(&ba, 100*512, 1*512); - assert(block_allocator_allocated_limit(ba)==100*512); - ba_alloc_at(ba, 50, 100); - assert(block_allocator_allocated_limit(ba)==150*512); - ba_alloc_at(ba, 25, 150); - ba_alloc (ba, 10, &b0); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 100, 50); - ba_check_l (ba, 2, 150, 25); - ba_check_l (ba, 3, b0, 10); - ba_check_none (ba, 4); - assert(b0==175); - ba_free(ba, 150); - ba_alloc_at(ba, 10, 150); - ba_alloc(ba, 10, &b0); - assert(b0==160); - ba_alloc(ba, 10, &b0); - ba_alloc(ba, 113, &b1); - assert(113*512==block_allocator_block_size(ba, b1 *512)); - assert(10 *512==block_allocator_block_size(ba, b0 *512)); - assert(50 *512==block_allocator_block_size(ba, 100*512)); +static void test_ba0(block_allocator::allocation_strategy strategy) { + block_allocator allocator; + block_allocator *ba = &allocator; + ba->create(100*512, 1*512); + ba->set_strategy(strategy); + assert(ba->allocated_limit()==100*512); uint64_t b2, b3, b4, b5, b6, b7; ba_alloc(ba, 100, &b2); @@ -183,27 +156,28 @@ test_ba0 (void) { ba_free(ba, b4); ba_alloc(ba, 100, &b4); - destroy_block_allocator(&ba); - assert(ba==0); + ba->destroy(); } // Manually to get coverage of all the code in the block allocator. static void -test_ba1 (int n_initial) { - BLOCK_ALLOCATOR ba; - create_block_allocator(&ba, 0*512, 1*512); - int i; +test_ba1(block_allocator::allocation_strategy strategy, int n_initial) { + block_allocator allocator; + block_allocator *ba = &allocator; + ba->create(0*512, 1*512); + ba->set_strategy(strategy); + int n_blocks=0; uint64_t blocks[1000]; - for (i=0; i<1000; i++) { - if (i0) { + if (n_blocks > 0) { int blocknum = random()%n_blocks; //printf("F[%d]%ld\n", blocknum, blocks[blocknum]); ba_free(ba, blocks[blocknum]); @@ -213,19 +187,21 @@ test_ba1 (int n_initial) { } } - destroy_block_allocator(&ba); - assert(ba==0); + ba->destroy(); } // Check to see if it is first fit or best fit. static void test_ba2 (void) { - BLOCK_ALLOCATOR ba; + block_allocator allocator; + block_allocator *ba = &allocator; uint64_t b[6]; enum { BSIZE = 1024 }; - create_block_allocator(&ba, 100*512, BSIZE*512); - assert(block_allocator_allocated_limit(ba)==100*512); + ba->create(100*512, BSIZE*512); + ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT); + assert(ba->allocated_limit()==100*512); + ba_check_l (ba, 0, 0, 100); ba_check_none (ba, 1); @@ -234,16 +210,16 @@ test_ba2 (void) ba_check_l (ba, 1, BSIZE, 100); ba_check_none (ba, 2); - ba_alloc (ba, BSIZE+100, &b[1]); + ba_alloc (ba, BSIZE + 100, &b[1]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_none (ba, 3); ba_alloc (ba, 100, &b[2]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_none (ba, 4); @@ -252,7 +228,7 @@ test_ba2 (void) ba_alloc (ba, 100, &b[5]); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -262,7 +238,7 @@ test_ba2 (void) ba_free (ba, 4*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 5*BSIZE, 100); ba_check_l (ba, 4, 6*BSIZE, 100); ba_check_l (ba, 5, 7*BSIZE, 100); @@ -273,7 +249,7 @@ test_ba2 (void) assert(b2==4*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -283,7 +259,7 @@ test_ba2 (void) ba_free (ba, BSIZE); ba_free (ba, 5*BSIZE); ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 2, 4*BSIZE, 100); ba_check_l (ba, 3, 6*BSIZE, 100); ba_check_l (ba, 4, 7*BSIZE, 100); @@ -301,7 +277,7 @@ test_ba2 (void) assert(b5==5*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -318,7 +294,7 @@ test_ba2 (void) assert(b8==10*BSIZE); ba_check_l (ba, 0, 0, 100); ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE+100); + ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); ba_check_l (ba, 3, 4*BSIZE, 100); ba_check_l (ba, 4, 5*BSIZE, 100); ba_check_l (ba, 5, 6*BSIZE, 100); @@ -344,15 +320,23 @@ test_ba2 (void) ba_alloc(ba, 100, &b11); assert(b11==5*BSIZE); - destroy_block_allocator(&ba); + ba->destroy(); } int test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - test_ba0(); - test_ba1(0); - test_ba1(10); - test_ba1(20); + enum block_allocator::allocation_strategy strategies[] = { + block_allocator::BA_STRATEGY_FIRST_FIT, + block_allocator::BA_STRATEGY_BEST_FIT, + block_allocator::BA_STRATEGY_PADDED_FIT, + block_allocator::BA_STRATEGY_HEAT_ZONE, + }; + for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) { + test_ba0(strategies[i]); + test_ba1(strategies[i], 0); + test_ba1(strategies[i], 10); + test_ba1(strategies[i], 20); + } test_ba2(); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc b/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc index 253a216e67537..bd9f28c858fb3 100644 --- a/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc +++ b/storage/tokudb/ft-index/ft/tests/bnc-insert-benchmark.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -127,9 +127,9 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat) *p = (rand() & 0xff); } } - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; - int r = xids_create_child(xids_0, &xids_123, (TXNID)123); + int r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); NONLEAF_CHILDINFO bnc; @@ -137,6 +137,9 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat) struct timeval t[2]; gettimeofday(&t[0], NULL); + toku::comparator cmp; + cmp.create(long_key_cmp, nullptr); + for (unsigned int i = 0; i < repeat; ++i) { bnc = toku_create_empty_nl(); for (; toku_bnc_nbytesinbuf(bnc) <= nodesize; ++cur) { @@ -144,7 +147,7 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat) &keys[cur % 1024], sizeof keys[cur % 1024], vals[cur % 1024], eltsize - (sizeof keys[cur % 1024]), FT_NONE, next_dummymsn(), xids_123, true, - NULL, long_key_cmp); assert_zero(r); + cmp); assert_zero(r); } nbytesinserted += toku_bnc_nbytesinbuf(bnc); destroy_nonleaf_childinfo(bnc); @@ -157,6 +160,8 @@ run_test(unsigned long eltsize, unsigned long nodesize, unsigned long repeat) long long unsigned eltrate = (long) (cur / dt); printf("%0.03lf MB/sec\n", mbrate); printf("%llu elts/sec\n", eltrate); + + cmp.destroy(); } int diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc b/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc index de75f6813d26c..0704914cc5bbd 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-4357.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,7 +115,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc b/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc index ecaeea2d6315a..1c5a55bf12066 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-4365.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,7 +136,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc b/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc index 7c958dd3049ef..5cef1f3c6d580 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-5097.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -169,7 +169,7 @@ cachetable_test (void) { check_flush = false; dirty_flush_called = false; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); // disable eviction thread toku_os_recursive_delete(TOKU_TEST_FILENAME); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc index be7c4fb23631d..427bc2c4a4236 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-5978-2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,7 +210,7 @@ cachetable_test (void) { int r; toku_pair_list_set_lock_size(2); // set two bucket mutexes CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc b/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc index c72d67909e1e1..11613e5a204f3 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-5978.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -227,7 +227,7 @@ cachetable_test (void) { int r; toku_pair_list_set_lock_size(2); // set two bucket mutexes CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc b/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc index 3af800e7edb36..b0ebd9ed5e9b5 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-all-write.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc index c9f6033da9e44..53570ec1f0a52 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pending.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include #include #include "cachetable-test.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include static int N; // how many items in the table @@ -108,13 +108,9 @@ static volatile int n_flush, n_write_me, n_keep_me, n_fetch; static void sleep_random (void) { -#if TOKU_WINDOWS - usleep(random() % 1000); //Will turn out to be almost always 1ms. -#else toku_timespec_t req = {.tv_sec = 0, .tv_nsec = random()%1000000}; //Max just under 1ms nanosleep(&req, NULL); -#endif } int expect_value = 42; // initially 42, later 43 @@ -191,7 +187,7 @@ static void checkpoint_pending(void) { if (verbose) { printf("%s:%d n=%d\n", __FUNCTION__, __LINE__, N); fflush(stdout); } const int test_limit = N; int r; - toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit*sizeof(int), ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; r = unlink(fname1); if (r!=0) CKERR2(get_error_errno(), ENOENT); r = toku_cachetable_openf(&cf, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc index cf0d4e28afda8..bacf48d01b12c 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-pinned-nodes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ cachetable_test (void) { const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc index fded78d5ba0fc..510b2fb458c9d 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-prefetched-nodes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -154,7 +154,7 @@ cachetable_test (void) { const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc index e86e7de4bb030..f1ea464d952c0 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-checkpoint-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static const int item_size = 1; @@ -145,7 +145,7 @@ static void cachetable_checkpoint_test(int n, enum cachetable_dirty dirty) { const int test_limit = n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc b/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc index c2adc202fb500..6b138cd0bcacb 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-checkpointer-class.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" #include "cachetable-test.h" // @@ -112,6 +112,14 @@ struct checkpointer_test { uint32_t k); }; +static void init_cachefile(CACHEFILE cf, int which_cf, bool for_checkpoint) { + memset(cf, 0, sizeof(*cf)); + create_dummy_functions(cf); + cf->fileid = { 0, (unsigned) which_cf }; + cf->filenum = { (unsigned) which_cf }; + cf->for_checkpoint = for_checkpoint; +} + //------------------------------------------------------------------------------ // test_begin_checkpoint() - // @@ -135,33 +143,28 @@ void checkpointer_test::test_begin_checkpoint() { // 2. Call checkpoint with ONE cachefile. //cachefile cf; struct cachefile cf; - cf.next = NULL; - cf.for_checkpoint = false; - m_cp.m_cf_list->m_active_head = &cf; - create_dummy_functions(&cf); + init_cachefile(&cf, 0, false); + m_cp.m_cf_list->add_cf_unlocked(&cf); m_cp.begin_checkpoint(); assert(m_cp.m_checkpoint_num_files == 1); assert(cf.for_checkpoint == true); + m_cp.m_cf_list->remove_cf(&cf); // 3. Call checkpoint with MANY cachefiles. const uint32_t count = 3; struct cachefile cfs[count]; - m_cp.m_cf_list->m_active_head = &cfs[0]; for (uint32_t i = 0; i < count; ++i) { - cfs[i].for_checkpoint = false; + init_cachefile(&cfs[i], i, false); create_dummy_functions(&cfs[i]); - if (i == count - 1) { - cfs[i].next = NULL; - } else { - cfs[i].next = &cfs[i + 1]; - } + m_cp.m_cf_list->add_cf_unlocked(&cfs[i]); } m_cp.begin_checkpoint(); assert(m_cp.m_checkpoint_num_files == count); for (uint32_t i = 0; i < count; ++i) { assert(cfs[i].for_checkpoint == true); + cfl.remove_cf(&cfs[i]); } ctbl.list.destroy(); m_cp.destroy(); @@ -195,10 +198,8 @@ void checkpointer_test::test_pending_bits() { // struct cachefile cf; cf.cachetable = &ctbl; - memset(&cf, 0, sizeof(cf)); - cf.next = NULL; - cf.for_checkpoint = true; - m_cp.m_cf_list->m_active_head = &cf; + init_cachefile(&cf, 0, true); + m_cp.m_cf_list->add_cf_unlocked(&cf); create_dummy_functions(&cf); CACHEKEY k; @@ -258,6 +259,7 @@ void checkpointer_test::test_pending_bits() { ctbl.list.destroy(); m_cp.destroy(); + cfl.remove_cf(&cf); cfl.destroy(); } @@ -337,14 +339,11 @@ void checkpointer_test::test_end_checkpoint() { cfl.init(); struct cachefile cf; - memset(&cf, 0, sizeof(cf)); - cf.next = NULL; - cf.for_checkpoint = true; - create_dummy_functions(&cf); + init_cachefile(&cf, 0, true); ZERO_STRUCT(m_cp); m_cp.init(&ctbl.list, NULL, &ctbl.ev, &cfl); - m_cp.m_cf_list->m_active_head = &cf; + m_cp.m_cf_list->add_cf_unlocked(&cf); // 2. Add data before running checkpoint. const uint32_t count = 6; @@ -394,6 +393,7 @@ void checkpointer_test::test_end_checkpoint() { assert(pp); m_cp.m_list->evict_completely(pp); } + cfl.remove_cf(&cf); m_cp.destroy(); ctbl.list.destroy(); cfl.destroy(); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc index 7e40d3c861f4d..0b726f67306aa 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc index 4c9eacd004c62..b360d21c177d2 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-checkpoint2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc index 1318f342f2b62..1b7f4825e17b1 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-attrs-accumulate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -143,7 +143,7 @@ run_test (void) { int r; CACHETABLE ct; toku_mutex_init(&attr_mutex, NULL); - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc index 3f771b5807534..8c7de0ae91439 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-empty-cachetable.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,7 +99,7 @@ cachetable_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc index 0a809339b8e04..8e5a3ea40fa4b 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-everything-pinned.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,7 +111,7 @@ run_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc index 33a603baec92e..06107b7cefde3 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-nothing-needs-flushing.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,7 +111,7 @@ run_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc index 485224302b056..de1cb8b612a4a 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-same-fullhash.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ run_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); my_cleaner_callback_called = false; const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc index 5d4fed42e505f..a50495774f3eb 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-cleaner-thread-simple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ run_test (void) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); my_cleaner_callback_called = false; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc index af08020e4aa3c..9eac1304fe20f 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clock-all-pinned.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ cachetable_test (void) { int test_limit = 6; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc index f024a79e51dac..ac18ce8ac327d 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -143,7 +143,7 @@ cachetable_test (void) { num_entries = 0; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc index 23926241b97eb..13b941ab05433 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -186,7 +186,7 @@ cachetable_test (void) { const int test_limit = 16; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc index 735bde724d0f6..9f148af1d4395 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -202,7 +202,7 @@ cachetable_test (void) { const int test_limit = 20; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, 100*test_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc index 9dc1f1a5218e2..e89319c90a979 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clock-eviction4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -181,7 +181,7 @@ cachetable_test (void) { num_entries = 0; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc index f7904ffd73d40..1fc36e06927a3 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clone-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -145,7 +145,7 @@ cachetable_test (void) { const int test_limit = 200; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc index 4c5e1133555fc..d22478b8e38fc 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch-pinned-node.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc index fed76332a4520..92859cfac68c8 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clone-partial-fetch.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -144,7 +144,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc index a56dc034202cf..d7cdbcc0854fd 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clone-pin-nonblocking.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,7 +126,7 @@ cachetable_test (enum cachetable_dirty dirty, bool cloneable) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc b/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc index 1aeff2ee28e77..f6cf0ec34c734 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-clone-unpin-remove.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,7 +137,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc index d44372780545d..4dba635d22f79 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-count-pinned-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ cachetable_count_pinned_test (int n) { const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc index dde4a0c69b118..fda1d0ae563ea 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-debug-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ cachetable_debug_test (int n) { const int test_limit = n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc index 18a65729501e0..da2ff48f2eb7b 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -155,7 +155,7 @@ static void cachetable_eviction_full_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc index c8004ca1cb155..d6ba0f3b1366b 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-close-test2.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -168,7 +168,7 @@ static void cachetable_eviction_full_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc index a1887fe6c9466..51540db5739a3 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -123,7 +123,7 @@ static void cachetable_predef_fetch_maybegetandpin_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc index d65048f797a83..45c10bcc552ba 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-eviction-getandpin-test2.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,7 +130,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc b/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc index d0dff7d95702c..12e463d61d88b 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-evictor-class.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" class evictor_unit_test { public: diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc index 16b757bebdf65..8ff6ee94fbe66 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-fd-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,7 +98,7 @@ cachetable_fd_test (void) { const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_os_recursive_delete(TOKU_TEST_FILENAME); r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert_zero(r); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc b/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc index ac3191b1a338e..089c34498b979 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-fetch-inducing-evictor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -113,7 +113,7 @@ cachetable_test (enum pin_evictor_test_type test_type, bool nonblocking) { const int test_limit = 7; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, test_limit, test_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc b/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc index d4c8c85cfbaeb..237671fe28ff7 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-flush-during-cleaner.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,7 @@ cachetable_test (void) { const int test_limit = 400; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_set_cleaner_period(ct, 1); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc index c4c2da0577af0..2297364891be6 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-flush-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ test_cachetable_def_flush (int n) { const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); toku_os_recursive_delete(TOKU_TEST_FILENAME); r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert_zero(r); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc index 6165de34eb005..b3e4dfa1d9a11 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-getandpin-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -134,7 +134,7 @@ cachetable_getandpin_test (int n) { const int test_limit = 1024*1024; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc b/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc index f44414cb66724..b6f2a189e26d9 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-kibbutz_and_flush_cachefile.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ run_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc b/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc index 27f5800d06fb8..043b35ab50336 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-partial-fetch.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -166,7 +166,7 @@ cachetable_test (void) { int r; CACHETABLE ct; bool doing_prefetch = false; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -215,7 +215,7 @@ cachetable_test (void) { // close and reopen cachefile so we can do some simple prefetch tests toku_cachefile_close(&f1, false, ZERO_LSN); toku_cachetable_close(&ct); - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); // // verify that a prefetch of the node will succeed diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc index e5022afee8877..6916e974c3b39 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-pin-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -413,7 +413,7 @@ cachetable_test (void) { int r; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc b/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc index ba4bebab32387..ca9db5e652141 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-pin-nonblocking-checkpoint-clean.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,7 +100,7 @@ run_test (void) { const int test_limit = 20; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); f1 = NULL; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc index 2122f61afa81a..65465339f0e04 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-checkpoint-test.cc @@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include "cachetable-test.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" const int item_size = 1; @@ -153,7 +153,7 @@ static void cachetable_prefetch_checkpoint_test(int n, enum cachetable_dirty dir CACHETABLE ct; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc index 3153c6f3a3ca9..e817c8aa65ef8 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-leak-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ static void cachetable_prefetch_close_leak_test (void) { const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc index d013db1ab7303..e8d08c86aa117 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-close-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -141,7 +141,7 @@ static void cachetable_prefetch_full_test (bool partial_fetch) { expect_pf = false; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc index 6159e8eb67f92..8736b6a406518 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-flowcontrol-test.cc @@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include "test.h" -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" static int flush_calls = 0; static int flush_evict_calls = 0; @@ -152,7 +152,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), static void cachetable_prefetch_flowcontrol_test (int cachetable_size_limit) { int r; CACHETABLE ct; - toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, cachetable_size_limit, ZERO_LSN, nullptr); evictor_test_helpers::set_hysteresis_limits(&ct->ev, cachetable_size_limit, cachetable_size_limit); evictor_test_helpers::disable_ev_thread(&ct->ev); const char *fname1 = TOKU_TEST_FILENAME; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc index 9aba0fdbafa31..4ba6dff51a9aa 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-getandpin-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -162,7 +162,7 @@ static void cachetable_prefetch_maybegetandpin_test (bool do_partial_fetch) { const int test_limit = 2; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc index 14c12bbb817a6..0540ab5429be8 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch-maybegetandpin-test.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc index 6c81ce4918813..f7d348eaa3b8e 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-prefetch2-test.cc @@ -32,7 +32,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -122,7 +122,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { const int test_limit = 1; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc index 8691e2b93b0d3..0b316aa528f3c 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-put-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -545,7 +545,7 @@ cachetable_test (void) { int r; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc index 0280681903e3b..07765bd666e24 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-put-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ cachetable_put_test (int n) { const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc index 87014dc406ebc..7e5fb4a00bbcd 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-rwlock-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc index fe96b4402480a..3944182084c71 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -150,7 +150,7 @@ test_clean (enum cachetable_dirty dirty, bool cloneable) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc index 7dcd2a2bb7ceb..177905e672108 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-clone2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,7 +137,7 @@ test_clean (enum cachetable_dirty dirty, bool cloneable) { const int test_limit = 200; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc index 03c66162aab29..7a7518b78f590 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -145,7 +145,7 @@ simple_test(bool unlink_on_close) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -214,7 +214,7 @@ static void test_pair_stays_in_cache(enum cachetable_dirty dirty) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -245,7 +245,7 @@ static void test_multiple_cachefiles(bool use_same_hash) { const int test_limit = 1000; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")]; strcpy(fname1, TOKU_TEST_FILENAME); @@ -333,7 +333,7 @@ static void test_evictor(void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); char fname1[strlen(TOKU_TEST_FILENAME) + sizeof("_1")]; strcpy(fname1, TOKU_TEST_FILENAME); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc index 08c14191be658..891b70fd7d110 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-maybe-get-pin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,7 +100,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc index f5608b7572cd7..70e7a936a2633 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-cheap.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,7 +125,7 @@ run_test (pair_lock_type lock_type) { struct unlockers unlockers = {true, unlock_dummy, NULL, NULL}; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc index d8ced02318b95..8a87f006f6bb3 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-dep-nodes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -158,7 +158,7 @@ cachetable_test (bool write_first, bool write_second, bool start_checkpoint) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc index cec5aff826620..e1050b2da7fdc 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking-cheap.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -164,7 +164,7 @@ run_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc index a96f764922630..33319b7a3689c 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin-nonblocking.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,7 +147,7 @@ run_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc index e40890ccc0442..b90b01bfd6cc6 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-pin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -139,7 +139,7 @@ run_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc index 1a5074a172fcf..eaeee0bb4dbd5 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-put-dep-nodes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -173,7 +173,7 @@ cachetable_test (bool write_first, bool write_second, bool start_checkpoint) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc index 6a3d7c34f4a8f..aeb6437f67055 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin-nonblocking.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ run_test (void) { int r; void *ret; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc index 5bbc74557552d..5f0b6eff445ae 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-read-pin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -158,7 +158,7 @@ run_test (void) { int r; void *ret; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc index b94123ad9a65c..45d660739303a 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-unpin-remove-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ cachetable_test (void) { const int test_limit = 120; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc b/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc index 99364660bd178..f38eb2214b66a 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-simple-verify.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ cachetable_test (void) { const int test_limit = 12; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-test.cc index e1a1bc28c65da..a040943007a29 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,7 +118,7 @@ static inline void test_mutex_unlock(void) { static void test_cachetable_create(void) { CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); toku_cachetable_close(&ct); } @@ -172,7 +172,7 @@ static void test_nested_pin (void) { void *vv,*vv2; const char *fname = TOKU_TEST_FILENAME; if (verbose) printf("creating cachetable\n"); - toku_cachetable_create(&t, 1, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 1, ZERO_LSN, nullptr); toku_os_recursive_delete(fname); r = toku_cachetable_openf(&f, t, fname, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); @@ -241,8 +241,6 @@ PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { return 0; } -#if !TOKU_WINDOWS - static void test_multi_filehandles (void) { CACHETABLE t; CACHEFILE f1,f2,f3; @@ -259,7 +257,7 @@ static void test_multi_filehandles (void) { unlink(fname1); unlink(fname2); - toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 4, ZERO_LSN, nullptr); r = toku_cachetable_openf(&f1, t, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); r = link(fname1, fname2); assert(r==0); r = toku_cachetable_openf(&f2, t, fname2, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r==0); @@ -291,8 +289,6 @@ static void test_multi_filehandles (void) { toku_cachetable_close(&t); } -#endif - static void test_dirty_flush(CACHEFILE f, int UU(fd), CACHEKEY key, @@ -329,7 +325,7 @@ static void test_dirty(void) { int dirty; long long pinned; long entry_size; int r; - toku_cachetable_create(&t, 4, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, 4, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; toku_os_recursive_delete(fname); @@ -459,7 +455,7 @@ static void test_size_resize(void) { int n = 3; long size = 1; - toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); @@ -513,7 +509,7 @@ static void test_size_flush(void) { const int n = 8; long long size = 1*1024*1024; - toku_cachetable_create(&t, n*size, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&t, n*size, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); @@ -587,15 +583,11 @@ test_main (int argc, const char *argv[]) { test_mutex_init(); // run tests -#if !TOKU_WINDOWS test_multi_filehandles(); -#endif test_cachetable_create(); for (i=0; i<1; i++) { test_nested_pin(); -#if !TOKU_WINDOWS test_multi_filehandles (); -#endif test_dirty(); test_size_resize(); //test_size_flush(); diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-test.h b/storage/tokudb/ft-index/ft/tests/cachetable-test.h index 6d143237c11b4..6d27a9b71bba0 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-test.h +++ b/storage/tokudb/ft-index/ft/tests/cachetable-test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "cachetable-internal.h" +#include "cachetable/cachetable-internal.h" // // Dummy callbacks for checkpointing diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc index 8e199e153d6ab..9063cdc2bcc8e 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-and-remove-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ cachetable_unpin_and_remove_test (int n) { int i; CACHETABLE ct; - toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -172,7 +172,7 @@ cachetable_put_evict_remove_test (int n) { int i; CACHETABLE ct; - toku_cachetable_create(&ct, table_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, table_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc index e121f2165d9c2..406df310de569 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-remove-and-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,7 +114,7 @@ run_test (void) { const int test_limit = 12; int r; ct = NULL; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc index 4d0fe46f5f444..1d8c2b03abce9 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-unpin-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,7 @@ cachetable_unpin_test (int n) { const int test_limit = 2*n; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; @@ -145,7 +145,7 @@ unpin_and_evictor_test(enum unpin_evictor_test_type test_type) { int r; CACHETABLE ct; int test_limit = 4; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc b/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc index fe7a26e4b3a3c..92f5a1906f69a 100644 --- a/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc +++ b/storage/tokudb/ft-index/ft/tests/cachetable-writer-thread-limit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,7 +125,7 @@ cachetable_test (void) { test_limit = 6; int r; CACHETABLE ct; - toku_cachetable_create(&ct, test_limit, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, test_limit, ZERO_LSN, nullptr); const char *fname1 = TOKU_TEST_FILENAME; unlink(fname1); CACHEFILE f1; diff --git a/storage/tokudb/ft-index/ft/tests/comparator-test.cc b/storage/tokudb/ft-index/ft/tests/comparator-test.cc index ad09ad0c3abeb..0ac3bd569ccdf 100644 --- a/storage/tokudb/ft-index/ft/tests/comparator-test.cc +++ b/storage/tokudb/ft-index/ft/tests/comparator-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,14 +112,31 @@ static void test_desc(void) { // create with d1, make sure it gets used cmp.create(magic_compare, &d1); expected_desc = &d1; - c = cmp.compare(&dbt_a, &dbt_b); + c = cmp(&dbt_a, &dbt_b); invariant(c == MAGIC); // set desc to d2, make sure it gets used - cmp.set_descriptor(&d2); + toku::comparator cmp2; + cmp2.create(magic_compare, &d2); + cmp.inherit(cmp2); expected_desc = &d2; - c = cmp.compare(&dbt_a, &dbt_b); + c = cmp(&dbt_a, &dbt_b); invariant(c == MAGIC); + cmp2.destroy(); + + // go back to using d1, but using the create_from API + toku::comparator cmp3, cmp4; + cmp3.create(magic_compare, &d1); // cmp3 has d1 + cmp4.create_from(cmp3); // cmp4 should get d1 from cmp3 + expected_desc = &d1; + c = cmp3(&dbt_a, &dbt_b); + invariant(c == MAGIC); + c = cmp4(&dbt_a, &dbt_b); + invariant(c == MAGIC); + cmp3.destroy(); + cmp4.destroy(); + + cmp.destroy(); } static int dont_compare_me_bro(DB *db, const DBT *a, const DBT *b) { @@ -137,20 +154,22 @@ static void test_infinity(void) { // should never be called and thus the dbt never actually read. DBT arbitrary_dbt; - c = cmp.compare(&arbitrary_dbt, toku_dbt_positive_infinity()); + c = cmp(&arbitrary_dbt, toku_dbt_positive_infinity()); invariant(c < 0); - c = cmp.compare(toku_dbt_negative_infinity(), &arbitrary_dbt); + c = cmp(toku_dbt_negative_infinity(), &arbitrary_dbt); invariant(c < 0); - c = cmp.compare(toku_dbt_positive_infinity(), &arbitrary_dbt); + c = cmp(toku_dbt_positive_infinity(), &arbitrary_dbt); invariant(c > 0); - c = cmp.compare(&arbitrary_dbt, toku_dbt_negative_infinity()); + c = cmp(&arbitrary_dbt, toku_dbt_negative_infinity()); invariant(c > 0); - c = cmp.compare(toku_dbt_negative_infinity(), toku_dbt_negative_infinity()); + c = cmp(toku_dbt_negative_infinity(), toku_dbt_negative_infinity()); invariant(c == 0); - c = cmp.compare(toku_dbt_positive_infinity(), toku_dbt_positive_infinity()); + c = cmp(toku_dbt_positive_infinity(), toku_dbt_positive_infinity()); invariant(c == 0); + + cmp.destroy(); } int main(void) { diff --git a/storage/tokudb/ft-index/ft/tests/compress-test.cc b/storage/tokudb/ft-index/ft/tests/compress-test.cc index 55b701320298f..7f7a97274c81b 100644 --- a/storage/tokudb/ft-index/ft/tests/compress-test.cc +++ b/storage/tokudb/ft-index/ft/tests/compress-test.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: #ident "$Id$" #include "test.h" -#include "compress.h" +#include "serialize/compress.h" static void test_compress_buf_method (unsigned char *buf, int i, enum toku_compression_method m) { int bound = toku_compress_bound(m, i); diff --git a/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc b/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc index 8110f9554ad83..c9984879a8603 100644 --- a/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc +++ b/storage/tokudb/ft-index/ft/tests/dbufio-test-destroy.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "dbufio.h" +#include "loader/dbufio.h" #include #include #include diff --git a/storage/tokudb/ft-index/ft/tests/dbufio-test.cc b/storage/tokudb/ft-index/ft/tests/dbufio-test.cc index cffc081921bae..6f562d8ac85ea 100644 --- a/storage/tokudb/ft-index/ft/tests/dbufio-test.cc +++ b/storage/tokudb/ft-index/ft/tests/dbufio-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "dbufio.h" +#include "loader/dbufio.h" #include #include #include diff --git a/storage/tokudb/ft-index/ft/tests/omt-test.cc b/storage/tokudb/ft-index/ft/tests/dmt-test.cc similarity index 64% rename from storage/tokudb/ft-index/ft/tests/omt-test.cc rename to storage/tokudb/ft-index/ft/tests/dmt-test.cc index b63d1b42de209..adc759a3c4baa 100644 --- a/storage/tokudb/ft-index/ft/tests/omt-test.cc +++ b/storage/tokudb/ft-index/ft/tests/dmt-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,10 +90,103 @@ PATENT RIGHTS GRANT: #include "test.h" -#include "omt.h" -#include +#include -typedef OMTVALUE TESTVALUE; +typedef void *DMTVALUE; + +class dmtvalue_writer { +public: + size_t get_size(void) const { + return sizeof(DMTVALUE); + } + void write_to(DMTVALUE *const dest) const { + *dest = value; + } + + dmtvalue_writer(DMTVALUE _value) + : value(_value) { + } + dmtvalue_writer(const uint32_t size UU(), DMTVALUE *const src) + : value(*src) { + paranoid_invariant(size == sizeof(DMTVALUE)); + } +private: + const DMTVALUE value; +}; + +typedef toku::dmt *DMT; + +static int dmt_insert_at(DMT dmt, DMTVALUE value, uint32_t index) { + dmtvalue_writer functor(value); + return dmt->insert_at(functor, index); +} + +static DMT dmt_create_from_sorted_array(DMTVALUE *values, uint32_t numvalues) { + DMT XMALLOC(dmt); + dmt->create(); + for (uint32_t i = 0; i < numvalues; i++) { + dmt_insert_at(dmt, values[i], i); + } + return dmt; +} + +struct heftor { + int (*h)(DMTVALUE, void *v); + void *v; +}; + +int call_heftor(const uint32_t size, const DMTVALUE &v, const heftor &htor); +int call_heftor(const uint32_t size, const DMTVALUE &v, const heftor &htor) { + invariant(size == sizeof(DMTVALUE)); + return htor.h(const_cast(v), htor.v); +} + +static int dmt_insert(DMT dmt, DMTVALUE value, int(*h)(DMTVALUE, void*v), void *v, uint32_t *index) { + struct heftor htor = { .h = h, .v = v }; + dmtvalue_writer functor(value); + return dmt->insert(functor, htor, index); +} + +static int dmt_find_zero(DMT V, int (*h)(DMTVALUE, void*extra), void*extra, DMTVALUE *value, uint32_t *index) { + struct heftor htor = { .h = h, .v = extra }; + uint32_t ignore; + return V->find_zero(htor, &ignore, value, index); +} + +static int dmt_find(DMT V, int (*h)(DMTVALUE, void*extra), void*extra, int direction, DMTVALUE *value, uint32_t *index) { + struct heftor htor = { .h = h, .v = extra }; + uint32_t ignore; + return V->find(htor, direction, &ignore, value, index); +} + +static int dmt_split_at(DMT dmt, DMT *newdmtp, uint32_t index) { + if (index > dmt->size()) { return EINVAL; } + DMT XMALLOC(newdmt); + newdmt->create(); + int r; + + for (uint32_t i = index; i < dmt->size(); i++) { + DMTVALUE v; + r = dmt->fetch(i, nullptr, &v); + invariant_zero(r); + r = dmt_insert_at(newdmt, v, i-index); + invariant_zero(r); + } + if (dmt->size() > 0) { + for (uint32_t i = dmt->size(); i > index; i--) { + r = dmt->delete_at(i - 1); + invariant_zero(r); + } + } + r = 0; + + if (r != 0) { + toku_free(newdmt); + } else { + *newdmtp = newdmt; + } + return r; +} static void parse_args (int argc, const char *argv[]) { @@ -133,26 +226,25 @@ enum close_when_done { KEEP_WHEN_DONE }; enum create_type { - STEAL_ARRAY, BATCH_INSERT, INSERT_AT, INSERT_AT_ALMOST_RANDOM, }; /* Globals */ -OMT global_omt; -TESTVALUE* values = NULL; -struct value* nums = NULL; +DMT global_dmt; +DMTVALUE* values = nullptr; +struct value* nums = nullptr; uint32_t length; static void cleanup_globals (void) { assert(values); toku_free(values); - values = NULL; + values = nullptr; assert(nums); toku_free(nums); - nums = NULL; + nums = nullptr; } const unsigned int random_seed = 0xFEADACBA; @@ -178,7 +270,7 @@ init_identity_values (unsigned int seed, uint32_t num_elements) { for (i = 0; i < length; i++) { nums[i].number = i; - values[i] = (TESTVALUE)&nums[i]; + values[i] = (DMTVALUE)&nums[i]; } } @@ -193,7 +285,7 @@ init_distinct_sorted_values (unsigned int seed, uint32_t num_elements) { for (i = 0; i < length; i++) { number += (uint32_t)(random() % 32) + 1; nums[i].number = number; - values[i] = (TESTVALUE)&nums[i]; + values[i] = (DMTVALUE)&nums[i]; } } @@ -229,25 +321,22 @@ static void test_close (enum close_when_done do_close) { if (do_close == KEEP_WHEN_DONE) return; assert(do_close == CLOSE_WHEN_DONE); - toku_omt_destroy(&global_omt); - assert(global_omt==NULL); + global_dmt->destroy(); + toku_free(global_dmt); + global_dmt = nullptr; } static void test_create (enum close_when_done do_close) { - int r; - global_omt = NULL; - - r = toku_omt_create(&global_omt); - CKERR(r); - assert(global_omt!=NULL); + XMALLOC(global_dmt); + global_dmt->create(); test_close(do_close); } static void test_create_size (enum close_when_done do_close) { test_create(KEEP_WHEN_DONE); - assert(toku_omt_size(global_omt) == 0); + assert(global_dmt->size() == 0); test_close(do_close); } @@ -258,24 +347,24 @@ test_create_insert_at_almost_random (enum close_when_done do_close) { uint32_t size = 0; test_create(KEEP_WHEN_DONE); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+1); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+1); CKERR2(r, EINVAL); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+2); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+2); CKERR2(r, EINVAL); for (i = 0; i < length/2; i++) { - assert(size==toku_omt_size(global_omt)); - r = toku_omt_insert_at(global_omt, values[i], i); + assert(size==global_dmt->size()); + r = dmt_insert_at(global_dmt, values[i], i); CKERR(r); - assert(++size==toku_omt_size(global_omt)); - r = toku_omt_insert_at(global_omt, values[length-1-i], i+1); + assert(++size==global_dmt->size()); + r = dmt_insert_at(global_dmt, values[length-1-i], i+1); CKERR(r); - assert(++size==toku_omt_size(global_omt)); + assert(++size==global_dmt->size()); } - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+1); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+1); CKERR2(r, EINVAL); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+2); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+2); CKERR2(r, EINVAL); - assert(size==toku_omt_size(global_omt)); + assert(size==global_dmt->size()); test_close(do_close); } @@ -286,39 +375,30 @@ test_create_insert_at_sequential (enum close_when_done do_close) { uint32_t size = 0; test_create(KEEP_WHEN_DONE); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+1); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+1); CKERR2(r, EINVAL); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+2); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+2); CKERR2(r, EINVAL); for (i = 0; i < length; i++) { - assert(size==toku_omt_size(global_omt)); - r = toku_omt_insert_at(global_omt, values[i], i); + assert(size==global_dmt->size()); + r = dmt_insert_at(global_dmt, values[i], i); CKERR(r); - assert(++size==toku_omt_size(global_omt)); + assert(++size==global_dmt->size()); } - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+1); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+1); CKERR2(r, EINVAL); - r = toku_omt_insert_at(global_omt, values[0], toku_omt_size(global_omt)+2); + r = dmt_insert_at(global_dmt, values[0], global_dmt->size()+2); CKERR2(r, EINVAL); - assert(size==toku_omt_size(global_omt)); + assert(size==global_dmt->size()); test_close(do_close); } static void test_create_from_sorted_array (enum create_type create_choice, enum close_when_done do_close) { - int r; - global_omt = NULL; + global_dmt = nullptr; if (create_choice == BATCH_INSERT) { - r = toku_omt_create_from_sorted_array(&global_omt, values, length); - CKERR(r); - } - else if (create_choice == STEAL_ARRAY) { - TESTVALUE* MALLOC_N(length, values_copy); - memcpy(values_copy, values, length*sizeof(*values)); - r = toku_omt_create_steal_sorted_array(&global_omt, &values_copy, length, length); - CKERR(r); - assert(values_copy==NULL); + global_dmt = dmt_create_from_sorted_array(values, length); } else if (create_choice == INSERT_AT) { test_create_insert_at_sequential(KEEP_WHEN_DONE); @@ -326,33 +406,35 @@ test_create_from_sorted_array (enum create_type create_choice, enum close_when_d else if (create_choice == INSERT_AT_ALMOST_RANDOM) { test_create_insert_at_almost_random(KEEP_WHEN_DONE); } - else assert(false); + else { + assert(false); + } - assert(global_omt!=NULL); + assert(global_dmt!=nullptr); test_close(do_close); } static void test_create_from_sorted_array_size (enum create_type create_choice, enum close_when_done do_close) { test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); - assert(toku_omt_size(global_omt)==length); + assert(global_dmt->size()==length); test_close(do_close); } static void -test_fetch_verify (OMT omtree, TESTVALUE* val, uint32_t len ) { +test_fetch_verify (DMT dmtree, DMTVALUE* val, uint32_t len ) { uint32_t i; int r; - TESTVALUE v = (TESTVALUE)&i; - TESTVALUE oldv = v; + DMTVALUE v = (DMTVALUE)&i; + DMTVALUE oldv = v; - assert(len == toku_omt_size(omtree)); + assert(len == dmtree->size()); for (i = 0; i < len; i++) { assert(oldv!=val[i]); - v = NULL; - r = toku_omt_fetch(omtree, i, &v); + v = nullptr; + r = dmtree->fetch(i, nullptr, &v); CKERR(r); - assert(v != NULL); + assert(v != nullptr); assert(v != oldv); assert(v == val[i]); assert(V(v)->number == V(val[i])->number); @@ -361,7 +443,7 @@ test_fetch_verify (OMT omtree, TESTVALUE* val, uint32_t len ) { for (i = len; i < len*2; i++) { v = oldv; - r = toku_omt_fetch(omtree, i, &v); + r = dmtree->fetch(i, nullptr, &v); CKERR2(r, EINVAL); assert(v == oldv); } @@ -371,30 +453,45 @@ test_fetch_verify (OMT omtree, TESTVALUE* val, uint32_t len ) { static void test_create_fetch_verify (enum create_type create_choice, enum close_when_done do_close) { test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); - test_fetch_verify(global_omt, values, length); + test_fetch_verify(global_dmt, values, length); test_close(do_close); } static int iterate_helper_error_return = 1; static int -iterate_helper (TESTVALUE v, uint32_t idx, void* extra) { - if (extra == NULL) return iterate_helper_error_return; - TESTVALUE* vals = (TESTVALUE *)extra; - assert(v != NULL); +iterate_helper (DMTVALUE v, uint32_t idx, void* extra) { + if (extra == nullptr) return iterate_helper_error_return; + DMTVALUE* vals = (DMTVALUE *)extra; + assert(v != nullptr); assert(v == vals[idx]); assert(V(v)->number == V(vals[idx])->number); return 0; } +struct functor { + int (*f)(DMTVALUE, uint32_t, void *); + void *v; +}; + +int call_functor(const uint32_t size, const DMTVALUE &v, uint32_t idx, functor *const ftor); +int call_functor(const uint32_t size, const DMTVALUE &v, uint32_t idx, functor *const ftor) { + invariant(size == sizeof(DMTVALUE)); + return ftor->f(const_cast(v), idx, ftor->v); +} + +static int dmt_iterate(DMT dmt, int (*f)(DMTVALUE, uint32_t, void*), void*v) { + struct functor ftor = { .f = f, .v = v }; + return dmt->iterate(&ftor); +} static void -test_iterate_verify (OMT omtree, TESTVALUE* vals, uint32_t len) { +test_iterate_verify (DMT dmtree, DMTVALUE* vals, uint32_t len) { int r; iterate_helper_error_return = 0; - r = toku_omt_iterate(omtree, iterate_helper, (void*)vals); + r = dmt_iterate(dmtree, iterate_helper, (void*)vals); CKERR(r); iterate_helper_error_return = 0xFEEDABBA; - r = toku_omt_iterate(omtree, iterate_helper, NULL); + r = dmt_iterate(dmtree, iterate_helper, nullptr); if (!len) { CKERR2(r, 0); } @@ -406,7 +503,7 @@ test_iterate_verify (OMT omtree, TESTVALUE* vals, uint32_t len) { static void test_create_iterate_verify (enum create_type create_choice, enum close_when_done do_close) { test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); - test_iterate_verify(global_omt, values, length); + test_iterate_verify(global_dmt, values, length); test_close(do_close); } @@ -431,19 +528,26 @@ permute_array (uint32_t* arr, uint32_t len) { } } +static int +dmt_set_at (DMT dmt, DMTVALUE value, uint32_t index) { + int r = dmt->delete_at(index); + if (r!=0) return r; + return dmt_insert_at(dmt, value, index); +} + static void test_create_set_at (enum create_type create_choice, enum close_when_done do_close) { uint32_t i = 0; - struct value* old_nums = NULL; + struct value* old_nums = nullptr; MALLOC_N(length, old_nums); assert(nums); - uint32_t* perm = NULL; + uint32_t* perm = nullptr; MALLOC_N(length, perm); assert(perm); - TESTVALUE* old_values = NULL; + DMTVALUE* old_values = nullptr; MALLOC_N(length, old_values); assert(old_values); @@ -459,22 +563,22 @@ test_create_set_at (enum create_type create_choice, enum close_when_done do_clos } test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); int r; - r = toku_omt_set_at (global_omt, values[0], length); + r = dmt_set_at (global_dmt, values[0], length); CKERR2(r,EINVAL); - r = toku_omt_set_at (global_omt, values[0], length+1); + r = dmt_set_at (global_dmt, values[0], length+1); CKERR2(r,EINVAL); for (i = 0; i < length; i++) { uint32_t choice = perm[i]; values[choice] = &nums[choice]; nums[choice].number = (uint32_t)random(); - r = toku_omt_set_at (global_omt, values[choice], choice); + r = dmt_set_at (global_dmt, values[choice], choice); CKERR(r); - test_iterate_verify(global_omt, values, length); - test_fetch_verify(global_omt, values, length); + test_iterate_verify(global_dmt, values, length); + test_fetch_verify(global_dmt, values, length); } - r = toku_omt_set_at (global_omt, values[0], length); + r = dmt_set_at (global_dmt, values[0], length); CKERR2(r,EINVAL); - r = toku_omt_set_at (global_omt, values[0], length+1); + r = dmt_set_at (global_dmt, values[0], length+1); CKERR2(r,EINVAL); toku_free(perm); @@ -485,8 +589,8 @@ test_create_set_at (enum create_type create_choice, enum close_when_done do_clos } static int -insert_helper (TESTVALUE value, void* extra_insert) { - TESTVALUE to_insert = (OMTVALUE)extra_insert; +insert_helper (DMTVALUE value, void* extra_insert) { + DMTVALUE to_insert = (DMTVALUE)extra_insert; assert(to_insert); if (V(value)->number < V(to_insert)->number) return -1; @@ -498,7 +602,7 @@ static void test_create_insert (enum close_when_done do_close) { uint32_t i = 0; - uint32_t* perm = NULL; + uint32_t* perm = nullptr; MALLOC_N(length, perm); assert(perm); @@ -510,11 +614,11 @@ test_create_insert (enum close_when_done do_close) { length = 0; while (length < size) { uint32_t choice = perm[length]; - TESTVALUE to_insert = &nums[choice]; + DMTVALUE to_insert = &nums[choice]; uint32_t idx = UINT32_MAX; - assert(length==toku_omt_size(global_omt)); - r = toku_omt_insert(global_omt, to_insert, insert_helper, to_insert, &idx); + assert(length==global_dmt->size()); + r = dmt_insert(global_dmt, to_insert, insert_helper, to_insert, &idx); CKERR(r); assert(idx <= length); if (idx > 0) { @@ -524,24 +628,24 @@ test_create_insert (enum close_when_done do_close) { assert(V(to_insert)->number < V(values[idx])->number); } length++; - assert(length==toku_omt_size(global_omt)); + assert(length==global_dmt->size()); /* Make room */ for (i = length-1; i > idx; i--) { values[i] = values[i-1]; } values[idx] = to_insert; - test_fetch_verify(global_omt, values, length); - test_iterate_verify(global_omt, values, length); + test_fetch_verify(global_dmt, values, length); + test_iterate_verify(global_dmt, values, length); idx = UINT32_MAX; - r = toku_omt_insert(global_omt, to_insert, insert_helper, to_insert, &idx); + r = dmt_insert(global_dmt, to_insert, insert_helper, to_insert, &idx); CKERR2(r, DB_KEYEXIST); assert(idx < length); assert(V(values[idx])->number == V(to_insert)->number); - assert(length==toku_omt_size(global_omt)); + assert(length==global_dmt->size()); - test_iterate_verify(global_omt, values, length); - test_fetch_verify(global_omt, values, length); + test_iterate_verify(global_dmt, values, length); + test_fetch_verify(global_dmt, values, length); } toku_free(perm); @@ -555,91 +659,118 @@ test_create_delete_at (enum create_type create_choice, enum close_when_done do_c int r = ENOSYS; test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); - assert(length == toku_omt_size(global_omt)); - r = toku_omt_delete_at(global_omt, length); + assert(length == global_dmt->size()); + r = global_dmt->delete_at(length); CKERR2(r,EINVAL); - assert(length == toku_omt_size(global_omt)); - r = toku_omt_delete_at(global_omt, length+1); + assert(length == global_dmt->size()); + r = global_dmt->delete_at(length+1); CKERR2(r,EINVAL); while (length > 0) { - assert(length == toku_omt_size(global_omt)); + assert(length == global_dmt->size()); uint32_t index_to_delete = random()%length; - r = toku_omt_delete_at(global_omt, index_to_delete); + r = global_dmt->delete_at(index_to_delete); CKERR(r); for (i = index_to_delete+1; i < length; i++) { values[i-1] = values[i]; } length--; - test_fetch_verify(global_omt, values, length); - test_iterate_verify(global_omt, values, length); + test_fetch_verify(global_dmt, values, length); + test_iterate_verify(global_dmt, values, length); } assert(length == 0); - assert(length == toku_omt_size(global_omt)); - r = toku_omt_delete_at(global_omt, length); + assert(length == global_dmt->size()); + r = global_dmt->delete_at(length); CKERR2(r, EINVAL); - assert(length == toku_omt_size(global_omt)); - r = toku_omt_delete_at(global_omt, length+1); + assert(length == global_dmt->size()); + r = global_dmt->delete_at(length+1); CKERR2(r, EINVAL); test_close(do_close); } +static int dmt_merge(DMT leftdmt, DMT rightdmt, DMT *newdmtp) { + DMT XMALLOC(newdmt); + newdmt->create(); + int r; + for (uint32_t i = 0; i < leftdmt->size(); i++) { + DMTVALUE v; + r = leftdmt->fetch(i, nullptr, &v); + invariant_zero(r); + r = newdmt->insert_at(v, i); + invariant_zero(r); + } + uint32_t offset = leftdmt->size(); + for (uint32_t i = 0; i < rightdmt->size(); i++) { + DMTVALUE v; + r = rightdmt->fetch(i, nullptr, &v); + invariant_zero(r); + r = newdmt->insert_at(v, i+offset); + invariant_zero(r); + } + leftdmt->destroy(); + rightdmt->destroy(); + toku_free(leftdmt); + toku_free(rightdmt); + *newdmtp = newdmt; + return 0; +} + static void test_split_merge (enum create_type create_choice, enum close_when_done do_close) { int r = ENOSYS; uint32_t i = 0; - OMT left_split = NULL; - OMT right_split = NULL; + DMT left_split = nullptr; + DMT right_split = nullptr; test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); for (i = 0; i <= length; i++) { - r = toku_omt_split_at(global_omt, &right_split, length+1); + r = dmt_split_at(global_dmt, &right_split, length+1); CKERR2(r,EINVAL); - r = toku_omt_split_at(global_omt, &right_split, length+2); + r = dmt_split_at(global_dmt, &right_split, length+2); CKERR2(r,EINVAL); // // test successful split // - r = toku_omt_split_at(global_omt, &right_split, i); + r = dmt_split_at(global_dmt, &right_split, i); CKERR(r); - left_split = global_omt; - global_omt = NULL; - assert(toku_omt_size(left_split) == i); - assert(toku_omt_size(right_split) == length - i); + left_split = global_dmt; + global_dmt = nullptr; + assert(left_split->size() == i); + assert(right_split->size() == length - i); test_fetch_verify(left_split, values, i); test_iterate_verify(left_split, values, i); test_fetch_verify(right_split, &values[i], length - i); test_iterate_verify(right_split, &values[i], length - i); // - // verify that new global_omt's cannot do bad splits + // verify that new global_dmt's cannot do bad splits // - r = toku_omt_split_at(left_split, &global_omt, i+1); + r = dmt_split_at(left_split, &global_dmt, i+1); CKERR2(r,EINVAL); - assert(toku_omt_size(left_split) == i); - assert(toku_omt_size(right_split) == length - i); - r = toku_omt_split_at(left_split, &global_omt, i+2); + assert(left_split->size() == i); + assert(right_split->size() == length - i); + r = dmt_split_at(left_split, &global_dmt, i+2); CKERR2(r,EINVAL); - assert(toku_omt_size(left_split) == i); - assert(toku_omt_size(right_split) == length - i); - r = toku_omt_split_at(right_split, &global_omt, length - i + 1); + assert(left_split->size() == i); + assert(right_split->size() == length - i); + r = dmt_split_at(right_split, &global_dmt, length - i + 1); CKERR2(r,EINVAL); - assert(toku_omt_size(left_split) == i); - assert(toku_omt_size(right_split) == length - i); - r = toku_omt_split_at(right_split, &global_omt, length - i + 1); + assert(left_split->size() == i); + assert(right_split->size() == length - i); + r = dmt_split_at(right_split, &global_dmt, length - i + 1); CKERR2(r,EINVAL); - assert(toku_omt_size(left_split) == i); - assert(toku_omt_size(right_split) == length - i); + assert(left_split->size() == i); + assert(right_split->size() == length - i); // // test merge // - r = toku_omt_merge(left_split,right_split,&global_omt); + r = dmt_merge(left_split,right_split,&global_dmt); CKERR(r); - left_split = NULL; - right_split = NULL; - assert(toku_omt_size(global_omt) == length); - test_fetch_verify(global_omt, values, length); - test_iterate_verify(global_omt, values, length); + left_split = nullptr; + right_split = nullptr; + assert(global_dmt->size() == length); + test_fetch_verify(global_dmt, values, length); + test_iterate_verify(global_dmt, values, length); } test_close(do_close); } @@ -693,8 +824,8 @@ typedef struct { static int -test_heaviside (OMTVALUE v_omt, void* x) { - TESTVALUE v = (OMTVALUE) v_omt; +test_heaviside (DMTVALUE v_dmt, void* x) { + DMTVALUE v = (DMTVALUE) v_dmt; h_extra* extra = (h_extra*)x; assert(v && x); assert(extra->first_zero <= extra->first_pos); @@ -712,24 +843,24 @@ heavy_extra (h_extra* extra, uint32_t first_zero, uint32_t first_pos) { } static void -test_find_dir (int dir, void* extra, int (*h)(OMTVALUE, void*), +test_find_dir (int dir, void* extra, int (*h)(DMTVALUE, void*), int r_expect, bool idx_will_change, uint32_t idx_expect, uint32_t number_expect, bool UU(cursor_valid)) { uint32_t idx = UINT32_MAX; uint32_t old_idx = idx; - TESTVALUE omt_val; + DMTVALUE dmt_val; int r; - omt_val = NULL; + dmt_val = nullptr; - /* Verify we can pass NULL value. */ - omt_val = NULL; + /* Verify we can pass nullptr value. */ + dmt_val = nullptr; idx = old_idx; if (dir == 0) { - r = toku_omt_find_zero(global_omt, h, extra, NULL, &idx); + r = dmt_find_zero(global_dmt, h, extra, nullptr, &idx); } else { - r = toku_omt_find( global_omt, h, extra, dir, NULL, &idx); + r = dmt_find( global_dmt, h, extra, dir, nullptr, &idx); } CKERR2(r, r_expect); if (idx_will_change) { @@ -738,38 +869,38 @@ test_find_dir (int dir, void* extra, int (*h)(OMTVALUE, void*), else { assert(idx == old_idx); } - assert(omt_val == NULL); + assert(dmt_val == nullptr); - /* Verify we can pass NULL idx. */ - omt_val = NULL; + /* Verify we can pass nullptr idx. */ + dmt_val = nullptr; idx = old_idx; if (dir == 0) { - r = toku_omt_find_zero(global_omt, h, extra, &omt_val, 0); + r = dmt_find_zero(global_dmt, h, extra, &dmt_val, 0); } else { - r = toku_omt_find( global_omt, h, extra, dir, &omt_val, 0); + r = dmt_find( global_dmt, h, extra, dir, &dmt_val, 0); } CKERR2(r, r_expect); assert(idx == old_idx); if (r == DB_NOTFOUND) { - assert(omt_val == NULL); + assert(dmt_val == nullptr); } else { - assert(V(omt_val)->number == number_expect); + assert(V(dmt_val)->number == number_expect); } - /* Verify we can pass NULL both. */ - omt_val = NULL; + /* Verify we can pass nullptr both. */ + dmt_val = nullptr; idx = old_idx; if (dir == 0) { - r = toku_omt_find_zero(global_omt, h, extra, NULL, 0); + r = dmt_find_zero(global_dmt, h, extra, nullptr, 0); } else { - r = toku_omt_find( global_omt, h, extra, dir, NULL, 0); + r = dmt_find( global_dmt, h, extra, dir, nullptr, 0); } CKERR2(r, r_expect); assert(idx == old_idx); - assert(omt_val == NULL); + assert(dmt_val == nullptr); } static void @@ -857,33 +988,34 @@ runtests_create_choice (enum create_type create_choice) { static void test_clone(uint32_t nelts) // Test that each clone operation gives the right data back. If nelts is -// zero, also tests that you still get a valid OMT back and that the way +// zero, also tests that you still get a valid DMT back and that the way // to deallocate it still works. { - OMT src = NULL, dest = NULL; - int r; + DMT src = nullptr, dest = nullptr; + int r = 0; - r = toku_omt_create(&src); - assert_zero(r); + XMALLOC(src); + src->create(); for (long i = 0; i < nelts; ++i) { - r = toku_omt_insert_at(src, (OMTVALUE) i, i); + r = dmt_insert_at(src, (DMTVALUE) i, i); assert_zero(r); } - r = toku_omt_clone_noptr(&dest, src); - assert_zero(r); - assert(dest != NULL); - assert(toku_omt_size(dest) == nelts); + XMALLOC(dest); + dest->clone(*src); + assert(dest->size() == nelts); for (long i = 0; i < nelts; ++i) { - OMTVALUE v; + DMTVALUE v; long l; - r = toku_omt_fetch(dest, i, &v); + r = dest->fetch(i, nullptr, &v); assert_zero(r); l = (long) v; assert(l == i); } - toku_omt_destroy(&dest); - toku_omt_destroy(&src); + dest->destroy(); + toku_free(dest); + src->destroy(); + toku_free(src); } int @@ -893,7 +1025,6 @@ test_main(int argc, const char *argv[]) { test_create( CLOSE_WHEN_DONE); test_create_size( CLOSE_WHEN_DONE); runtests_create_choice(BATCH_INSERT); - runtests_create_choice(STEAL_ARRAY); runtests_create_choice(INSERT_AT); runtests_create_choice(INSERT_AT_ALMOST_RANDOM); test_clone(0); diff --git a/storage/tokudb/ft-index/ft/tests/dmt-test2.cc b/storage/tokudb/ft-index/ft/tests/dmt-test2.cc new file mode 100644 index 0000000000000..707ad9a5a7ef6 --- /dev/null +++ b/storage/tokudb/ft-index/ft/tests/dmt-test2.cc @@ -0,0 +1,373 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + +#include "test.h" + +#include + +static void +parse_args (int argc, const char *argv[]) { + const char *argv0=argv[0]; + while (argc>1) { + int resultcode=0; + if (strcmp(argv[1], "-v")==0) { + verbose++; + } else if (strcmp(argv[1], "-q")==0) { + verbose = 0; + } else if (strcmp(argv[1], "-h")==0) { + do_usage: + fprintf(stderr, "Usage:\n%s [-v|-h]\n", argv0); + exit(resultcode); + } else { + resultcode=1; + goto do_usage; + } + argc--; + argv++; + } +} +/* End ".h like" stuff. */ + +struct value { + uint32_t number; +}; +#define V(x) ((struct value *)(x)) + + + +const uint32_t MAXNUM = 1024; +const uint32_t MAXLEN = 32; +char data[MAXNUM][MAXLEN]; + +struct val_type { + char c[MAXLEN]; +}; + +namespace toku { +class vwriter { + public: + size_t get_size(void) const { + size_t len = strlen(v.c); + invariant(len < sizeof(val_type)); + return len + 1; + } + void write_to(val_type *const dest) const { + strcpy(dest->c, v.c); + } + + vwriter(const char* c) { + invariant(strlen(c) < sizeof(val_type)); + strcpy(v.c, c); + } + + vwriter(const uint32_t klpair_len, val_type *const src) { + invariant(strlen(src->c) < sizeof(val_type)); + strcpy(v.c, src->c); + invariant(klpair_len == get_size()); + } + private: + val_type v; +}; +} + +/* Globals */ +typedef toku::dmt vdmt; + +const unsigned int random_seed = 0xFEADACBA; + +/////////////// + + +static void fail_one_verify(uint32_t len, uint32_t num, vdmt *v) { + val_type* fetched_data; + int count = 0; + v->verify(); + for (uint32_t i = 0; i < num; i++) { + uint32_t fetched_len; + int r = v->fetch(i-count, &fetched_len, &fetched_data); + if (r != 0 || fetched_len != len || strcmp(fetched_data->c, data[i])) { + count++; + continue; + } + } + invariant(count == 1); +} + +static void verify(uint32_t len, uint32_t num, vdmt *v) { + v->verify(); + val_type* fetched_data; + for (uint32_t i = 0; i < num; i++) { + uint32_t fetched_len; + int r = v->fetch(i, &fetched_len, &fetched_data); + CKERR(r); + invariant(fetched_len == len); + invariant(!strcmp(fetched_data->c, data[i])); + } +} + + +static void test_builder_fixed(uint32_t len, uint32_t num) { + srandom(random_seed); + assert(len > 1); + assert(len <= MAXLEN); + assert(num <= MAXNUM); + for (uint32_t i = 0; i < num; i++) { + for (uint32_t j = 0; j < len-1; j++) { + data[i][j] = random() % 255 + 1; //This way it doesn't end up being 0 and thought of as NUL + } + data[i][len-1] = '\0'; //cap it + } + + vdmt::builder builder; + builder.create(num, num * len); + + for (uint32_t i = 0; i < num; i++) { + vwriter vfun(data[i]); + builder.append(vfun); + } + invariant(builder.value_length_is_fixed()); + vdmt v; + builder.build(&v); + invariant(v.value_length_is_fixed()); + invariant(v.get_fixed_length() == len || num == 0); + + invariant(v.size() == num); + + verify(len, num, &v); + + for (uint32_t change = 0; change < num; change++) { + vdmt v2; + v2.clone(v); + v2.delete_at(change); + fail_one_verify(len, num, &v2); + + vwriter vfun(data[change]); + v2.insert_at(vfun, change); + verify(len, num, &v2); + v2.destroy(); + } + + v.destroy(); +} + +static void test_builder_variable(uint32_t len, uint32_t len2, uint32_t num) { + srandom(random_seed); + assert(len > 1); + assert(len <= MAXLEN); + assert(num <= MAXNUM); + assert(num > 3); + uint32_t which2 = random() % num; + for (uint32_t i = 0; i < num; i++) { + uint32_t thislen = i == which2 ? len2 : len; + for (uint32_t j = 0; j < thislen-1; j++) { + data[i][j] = random() % 255 + 1; //This way it doesn't end up being 0 and thought of as NUL + } + data[i][thislen-1] = '\0'; //cap it + } + + vdmt::builder builder; + builder.create(num, (num-1) * len + len2); + + for (uint32_t i = 0; i < num; i++) { + vwriter vfun(data[i]); + builder.append(vfun); + } + invariant(!builder.value_length_is_fixed()); + vdmt v; + builder.build(&v); + invariant(!v.value_length_is_fixed()); + + invariant(v.size() == num); + + val_type* fetched_data; + for (uint32_t i = 0; i < num; i++) { + uint32_t fetched_len; + int r = v.fetch(i, &fetched_len, &fetched_data); + CKERR(r); + if (i == which2) { + invariant(fetched_len == len2); + invariant(!strcmp(fetched_data->c, data[i])); + } else { + invariant(fetched_len == len); + invariant(!strcmp(fetched_data->c, data[i])); + } + } + + v.destroy(); +} + +static void test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(uint32_t len, uint32_t num) { + srandom(random_seed); + assert(len <= MAXLEN); + assert(num <= MAXNUM); + for (uint32_t i = 0; i < num; i++) { + for (uint32_t j = 0; j < len-1; j++) { + data[i][j] = random() % 255 + 1; //This way it doesn't end up being 0 and thought of as NUL + } + data[i][len-1] = '\0'; //cap it + } + + char *flat = (char*)toku_xmalloc(len * num); + char *p = flat; + for (uint32_t i = 0; i < num; i++) { + memcpy(p, data[i], len); + p += len; + } + vdmt v; + + v.create_from_sorted_memory_of_fixed_size_elements(flat, num, len*num, len); + invariant(v.value_length_is_fixed()); + invariant(v.get_fixed_length() == len); + + invariant(v.size() == num); + + val_type* fetched_data; + for (uint32_t i = 0; i < num; i++) { + uint32_t fetched_len; + int r = v.fetch(i, &fetched_len, &fetched_data); + CKERR(r); + invariant(fetched_len == len); + invariant(!strcmp(fetched_data->c, data[i])); + } + + char *serialized_flat = (char*)toku_xmalloc(len*num); + struct wbuf wb; + wbuf_nocrc_init(&wb, serialized_flat, len*num); + v.prepare_for_serialize(); + v.serialize_values(len*num, &wb); + invariant(!memcmp(serialized_flat, flat, len*num)); + + + if (num > 1) { + //Currently converting to dtree treats the entire thing as NOT fixed length. + //Optional additional perf here. + uint32_t which = (random() % (num-1)) + 1; // Not last, not first + invariant(which > 0 && which < num-1); + v.delete_at(which); + + memmove(flat + which*len, flat+(which+1)*len, (num-which-1) * len); + v.prepare_for_serialize(); + wbuf_nocrc_init(&wb, serialized_flat, len*(num-1)); + v.serialize_values(len*(num-1), &wb); + invariant(!memcmp(serialized_flat, flat, len*(num-1))); + } + + toku_free(flat); + toku_free(serialized_flat); + + v.destroy(); +} + +int +test_main(int argc, const char *argv[]) { + parse_args(argc, argv); + // Do test with size divisible by 4 and not + test_builder_fixed(4, 0); + test_builder_fixed(5, 0); + test_builder_fixed(4, 1); + test_builder_fixed(5, 1); + test_builder_fixed(4, 100); + test_builder_fixed(5, 100); + // Do test with zero, one, or both sizes divisible + test_builder_variable(4, 8, 100); + test_builder_variable(4, 5, 100); + test_builder_variable(5, 8, 100); + test_builder_variable(5, 10, 100); + + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(4, 0); + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(5, 0); + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(4, 1); + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(5, 1); + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(4, 100); + test_create_from_sorted_memory_of_fixed_sized_elements_and_serialize(5, 100); + + return 0; +} + diff --git a/storage/tokudb/ft-index/ft/tests/fifo-test.cc b/storage/tokudb/ft-index/ft/tests/fifo-test.cc index 0a2047ab920e1..30815160684ee 100644 --- a/storage/tokudb/ft-index/ft/tests/fifo-test.cc +++ b/storage/tokudb/ft-index/ft/tests/fifo-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,90 +94,95 @@ PATENT RIGHTS GRANT: #include "test.h" static void -test_fifo_create (void) { - int r; - FIFO f; +test_create (void) { + message_buffer msg_buffer; + msg_buffer.create(); + msg_buffer.destroy(); +} - f = 0; - r = toku_fifo_create(&f); - assert(r == 0); assert(f != 0); +static char *buildkey(size_t len) { + char *XMALLOC_N(len, k); + memset(k, 0, len); + return k; +} - toku_fifo_free(&f); - assert(f == 0); +static char *buildval(size_t len) { + char *XMALLOC_N(len, v); + memset(v, ~len, len); + return v; } static void -test_fifo_enq (int n) { - int r; - FIFO f; +test_enqueue(int n) { MSN startmsn = ZERO_MSN; - f = 0; - r = toku_fifo_create(&f); - assert(r == 0); assert(f != 0); - - char *thekey = 0; int thekeylen; - char *theval = 0; int thevallen; - - // this was a function but icc cant handle it -#define buildkey(len) { \ - thekeylen = len+1; \ - XREALLOC_N(thekeylen, thekey); \ - memset(thekey, len, thekeylen); \ - } - -#define buildval(len) { \ - thevallen = len+2; \ - XREALLOC_N(thevallen, theval); \ - memset(theval, ~len, thevallen); \ - } + message_buffer msg_buffer; + msg_buffer.create(); for (int i=0; isize == thekeylen); assert(memcmp(msg.kdbt()->data, thekey, msg.kdbt()->size) == 0); + assert((int) msg.vdbt()->size == thevallen); assert(memcmp(msg.vdbt()->data, theval, msg.vdbt()->size) == 0); + assert(i % 256 == (int)type); + assert((TXNID)i == toku_xids_get_innermost_xid(msg.xids())); + i += 1; + toku_free(thekey); + toku_free(theval); + return 0; + } + } checkit(startmsn, verbose); + msg_buffer.iterate(checkit); + assert(checkit.i == n); + + msg_buffer.destroy(); } int test_main(int argc, const char *argv[]) { default_parse_args(argc, argv); initialize_dummymsn(); - test_fifo_create(); - test_fifo_enq(4); - test_fifo_enq(512); + test_create(); + test_enqueue(4); + test_enqueue(512); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc b/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc index 4bf02021d2360..d91ae0018844e 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-bfe-query.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,9 +101,8 @@ int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) { } static void -test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { +test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; - brt_h->compare_fun = int64_key_cmp; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; PAIR_ATTR attr; @@ -115,12 +114,12 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { cursor->right_is_pos_infty = true; cursor->disable_prefetching = false; - struct ftnode_fetch_extra bfe; + ftnode_fetch_extra bfe; // quick test to see that we have the right behavior when we set // disable_prefetching to true cursor->disable_prefetching = true; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); FTNODE_DISK_DATA ndd = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); @@ -132,21 +131,21 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); // now enable prefetching again cursor->disable_prefetching = false; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); @@ -154,21 +153,21 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); uint64_t left_key = 150; toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t)); cursor->left_is_neg_infty = false; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); @@ -176,21 +175,21 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); uint64_t right_key = 151; toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t)); cursor->right_is_pos_infty = false; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_ON_DISK); @@ -198,20 +197,20 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); left_key = 100000; right_key = 100000; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_COMPRESSED); @@ -219,20 +218,20 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_free(ndd); toku_ftnode_free(&dn); left_key = 100; right_key = 100; - fill_bfe_for_prefetch(&bfe, brt_h, cursor); + bfe.create_for_prefetch( ft_h, cursor); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); @@ -240,7 +239,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - destroy_bfe_for_prefetch(&bfe); + bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -248,9 +247,8 @@ test_prefetch_read(int fd, FT_HANDLE UU(brt), FT brt_h) { } static void -test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) { +test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; - brt_h->compare_fun = int64_key_cmp; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; FTNODE_DISK_DATA ndd = NULL; @@ -262,16 +260,15 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) { cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; - struct ftnode_fetch_extra bfe; - uint64_t left_key = 150; uint64_t right_key = 151; DBT left, right; toku_fill_dbt(&left, &left_key, sizeof(left_key)); toku_fill_dbt(&right, &right_key, sizeof(right_key)); - fill_bfe_for_subset_read( - &bfe, - brt_h, + + ftnode_fetch_extra bfe; + bfe.create_for_subset_read( + ft_h, NULL, &left, &right, @@ -292,11 +289,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_COMPRESSED); @@ -317,11 +314,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); @@ -341,11 +338,11 @@ test_subset_read(int fd, FT_HANDLE UU(brt), FT brt_h) { assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_ON_DISK); @@ -372,7 +369,7 @@ test_prefetching(void) { // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -384,10 +381,10 @@ test_prefetching(void) { uint64_t key2 = 200; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], &key1, sizeof(key1)); - toku_memdup_dbt(&sn.childkeys[1], &key2, sizeof(key2)); - sn.totalchildkeylens = sizeof(key1) + sizeof(key2); + DBT pivotkeys[2]; + toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1)); + toku_fill_dbt(&pivotkeys[1], &key2, sizeof(key2)); + sn.pivotkeys.create_from_dbts(pivotkeys, 2); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_BLOCKNUM(&sn, 2).b = 40; @@ -398,23 +395,23 @@ test_prefetching(void) { set_BNC(&sn, 1, toku_create_empty_nl()); set_BNC(&sn, 2, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); // data in the buffers does not matter in this test //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -422,46 +419,42 @@ test_prefetching(void) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->cmp.create(int64_key_cmp, nullptr); + ft->ft = ft_h; + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); assert(r==0); - test_prefetch_read(fd, brt, brt_h); - test_subset_read(fd, brt, brt_h); - - toku_free(sn.childkeys[0].data); - toku_free(sn.childkeys[1].data); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - destroy_nonleaf_childinfo(BNC(&sn, 2)); - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + test_prefetch_read(fd, ft, ft_h); + test_subset_read(fd, ft, ft_h); + + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(ndd); r = close(fd); assert(r != -1); diff --git a/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc b/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc index ce31c04bac74b..50bb6d67ca54f 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-clock-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: #include "test.h" - +#include "ft/cursor.h" enum ftnode_verify_type { read_all=1, @@ -115,13 +115,18 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); + void *maybe_free = nullptr; bn->get_space_for_insert( idx, key, keylen, size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = vallen; @@ -139,11 +144,10 @@ le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val) static void -test1(int fd, FT brt_h, FTNODE *dn) { +test1(int fd, FT ft_h, FTNODE *dn) { int r; - struct ftnode_fetch_extra bfe_all; - brt_h->compare_fun = string_key_cmp; - fill_bfe_for_full_read(&bfe_all, brt_h); + ftnode_fetch_extra bfe_all; + bfe_all.create_for_full_read(ft_h); FTNODE_DISK_DATA ndd = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all); bool is_leaf = ((*dn)->height == 0); @@ -154,12 +158,12 @@ test1(int fd, FT brt_h, FTNODE *dn) { // should sweep and NOT get rid of anything PAIR_ATTR attr; memset(&attr,0,sizeof(attr)); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } // should sweep and get compress all - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { assert(BP_STATE(*dn,i) == PT_COMPRESSED); @@ -172,12 +176,12 @@ test1(int fd, FT brt_h, FTNODE *dn) { bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all); assert(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } // should sweep and get compress all - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { assert(BP_STATE(*dn,i) == PT_COMPRESSED); @@ -190,15 +194,15 @@ test1(int fd, FT brt_h, FTNODE *dn) { req = toku_ftnode_pf_req_callback(*dn, &bfe_all); assert(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } (*dn)->dirty = 1; - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } @@ -212,20 +216,18 @@ static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) { } static void -test2(int fd, FT brt_h, FTNODE *dn) { - struct ftnode_fetch_extra bfe_subset; +test2(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - ft_search_t search_t; + ft_search search; - brt_h->compare_fun = string_key_cmp; - fill_bfe_for_subset_read( - &bfe_subset, - brt_h, - ft_search_init(&search_t, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), + ftnode_fetch_extra bfe_subset; + bfe_subset.create_for_subset_read( + ft_h, + ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), &left, &right, true, @@ -233,6 +235,7 @@ test2(int fd, FT brt_h, FTNODE *dn) { false, false ); + FTNODE_DISK_DATA ndd = NULL; int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset); assert(r==0); @@ -246,11 +249,11 @@ test2(int fd, FT brt_h, FTNODE *dn) { assert(!BP_SHOULD_EVICT(*dn, 1)); PAIR_ATTR attr; memset(&attr,0,sizeof(attr)); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); assert(BP_STATE(*dn, 1) == PT_AVAIL); assert(BP_SHOULD_EVICT(*dn, 1)); - toku_ftnode_pe_callback(*dn, attr, brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset); @@ -266,19 +269,16 @@ test2(int fd, FT brt_h, FTNODE *dn) { } static void -test3_leaf(int fd, FT brt_h, FTNODE *dn) { - struct ftnode_fetch_extra bfe_min; +test3_leaf(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - brt_h->compare_fun = string_key_cmp; - fill_bfe_for_min_read( - &bfe_min, - brt_h - ); + ftnode_fetch_extra bfe_min; + bfe_min.create_for_min_read(ft_h); + FTNODE_DISK_DATA ndd = NULL; int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min); assert(r==0); @@ -304,20 +304,17 @@ test_serialize_nonleaf(void) { // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; - char *hello_string; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; sn.n_children = 2; sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; - hello_string = toku_strdup("hello"); MALLOC_N(2, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_fill_dbt(&sn.childkeys[0], hello_string, 6); - sn.totalchildkeylens = 6; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_STATE(&sn,0) = PT_AVAIL; @@ -325,25 +322,30 @@ test_serialize_nonleaf(void) { set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp); - //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku::comparator cmp; + cmp.create(string_key_cmp, nullptr); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); + toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); + + //Cleanup: + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); + cmp.destroy(); + + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -351,46 +353,44 @@ test_serialize_nonleaf(void) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft_h->cmp.create(string_key_cmp, nullptr); + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); assert(r==0); - test1(fd, brt_h, &dn); - test2(fd, brt_h, &dn); + test1(fd, ft_h, &dn); + test2(fd, ft_h, &dn); - toku_free(hello_string); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); toku_free(ndd); - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + ft_h->cmp.destroy(); + toku_free(ft_h); + toku_free(ft); r = close(fd); assert(r != -1); } @@ -406,7 +406,7 @@ test_serialize_leaf(void) { sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -414,9 +414,8 @@ test_serialize_leaf(void) { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); @@ -425,9 +424,9 @@ test_serialize_leaf(void) { le_malloc(BLB_DATA(&sn, 0), 1, "b", "bval"); le_malloc(BLB_DATA(&sn, 1), 0, "x", "xval"); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -435,48 +434,41 @@ test_serialize_leaf(void) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); assert(r==0); - test1(fd, brt_h, &dn); - test3_leaf(fd, brt_h,&dn); + test1(fd, ft_h, &dn); + test3_leaf(fd, ft_h,&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(ndd); r = close(fd); assert(r != -1); } diff --git a/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc index 42351e3108d30..82b96742cebbf 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-serialize-benchmark.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,13 +105,18 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); + void *maybe_free = nullptr; bn->get_space_for_insert( idx, key, keylen, size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = vallen; @@ -127,7 +132,7 @@ long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) } static void -test_serialize_leaf(int valsize, int nelts, double entropy) { +test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { // struct ft_handle source_ft; struct ftnode *sn, *dn; @@ -139,7 +144,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { sn->max_msn_applied_to_node_on_disk.msn = 0; sn->flags = 0x11223344; - sn->thisnodename.b = 20; + sn->blocknum.b = 20; sn->layout_version = FT_LAYOUT_VERSION; sn->layout_version_original = FT_LAYOUT_VERSION; sn->height = 0; @@ -147,8 +152,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { sn->dirty = 1; sn->oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn->n_children, sn->bp); - MALLOC_N(sn->n_children-1, sn->childkeys); - sn->totalchildkeylens = 0; + sn->pivotkeys.create_empty(); for (int i = 0; i < sn->n_children; ++i) { BP_STATE(sn,i) = PT_AVAIL; set_BLB(sn, i, toku_create_empty_bn()); @@ -176,14 +180,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { ); } if (ck < 7) { - toku_memdup_dbt(&sn->childkeys[ck], &k, sizeof k); - sn->totalchildkeylens += sizeof k; + DBT pivotkey; + sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); } } - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -191,70 +195,100 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft_h->cmp.create(long_key_cmp, nullptr); + ft->ft = ft_h; - brt_h->compare_fun = long_key_cmp; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } + struct timeval total_start; + struct timeval total_end; + total_start.tv_sec = total_start.tv_usec = 0; + total_end.tv_sec = total_end.tv_usec = 0; struct timeval t[2]; - gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, brt->ft, false); - assert(r==0); - gettimeofday(&t[1], NULL); + for (int i = 0; i < ser_runs; i++) { + gettimeofday(&t[0], NULL); + ndd = NULL; + sn->dirty = 1; + r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false); + assert(r==0); + gettimeofday(&t[1], NULL); + total_start.tv_sec += t[0].tv_sec; + total_start.tv_usec += t[0].tv_usec; + total_end.tv_sec += t[1].tv_sec; + total_end.tv_usec += t[1].tv_usec; + toku_free(ndd); + } double dt; - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); - printf("serialize leaf: %0.05lf\n", dt); - - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt_h); - gettimeofday(&t[0], NULL); - FTNODE_DISK_DATA ndd2 = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); - assert(r==0); - gettimeofday(&t[1], NULL); - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); - printf("deserialize leaf: %0.05lf\n", dt); - printf("io time %lf decompress time %lf deserialize time %lf\n", - tokutime_to_seconds(bfe.io_time), - tokutime_to_seconds(bfe.decompress_time), - tokutime_to_seconds(bfe.deserialize_time) + dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt *= 1000; + dt /= ser_runs; + printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs); + + //reset + total_start.tv_sec = total_start.tv_usec = 0; + total_end.tv_sec = total_end.tv_usec = 0; + + ftnode_fetch_extra bfe; + for (int i = 0; i < deser_runs; i++) { + bfe.create_for_full_read(ft_h); + gettimeofday(&t[0], NULL); + FTNODE_DISK_DATA ndd2 = NULL; + r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); + assert(r==0); + gettimeofday(&t[1], NULL); + + total_start.tv_sec += t[0].tv_sec; + total_start.tv_usec += t[0].tv_usec; + total_end.tv_sec += t[1].tv_sec; + total_end.tv_usec += t[1].tv_usec; + + toku_ftnode_free(&dn); + toku_free(ndd2); + } + dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt *= 1000; + dt /= deser_runs; + printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs); + printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n", + tokutime_to_seconds(bfe.io_time)*1000, + tokutime_to_seconds(bfe.decompress_time)*1000, + tokutime_to_seconds(bfe.deserialize_time)*1000, + deser_runs ); - toku_ftnode_free(&dn); toku_ftnode_free(&sn); - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); - toku_free(ndd); - toku_free(ndd2); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); r = close(fd); assert(r != -1); } static void -test_serialize_nonleaf(int valsize, int nelts, double entropy) { +test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { // struct ft_handle source_ft; struct ftnode sn, *dn; @@ -265,7 +299,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -273,18 +307,19 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = 0; + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_BLOCKNUM(&sn, i).b = 30 + (i*5); BP_STATE(&sn,i) = PT_AVAIL; set_BNC(&sn, i, toku_create_empty_nl()); } //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); + toku::comparator cmp; + cmp.create(long_key_cmp, nullptr); int nperchild = nelts / 8; for (int ck = 0; ck < sn.n_children; ++ck) { long k; @@ -300,21 +335,22 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { } memset(&buf[c], 0, valsize - c); - toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, NULL, long_key_cmp); + toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp); } if (ck < 7) { - toku_memdup_dbt(&sn.childkeys[ck], &k, sizeof k); - sn.totalchildkeylens += sizeof k; + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); } } //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + cmp.destroy(); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -322,70 +358,66 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft_h->cmp.create(long_key_cmp, nullptr); + ft->ft = ft_h; - brt_h->compare_fun = long_key_cmp; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } struct timeval t[2]; gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); assert(r==0); gettimeofday(&t[1], NULL); double dt; dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); - printf("serialize nonleaf: %0.05lf\n", dt); + dt *= 1000; + printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt_h); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); assert(r==0); gettimeofday(&t[1], NULL); dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); - printf("deserialize nonleaf: %0.05lf\n", dt); - printf("io time %lf decompress time %lf deserialize time %lf\n", - tokutime_to_seconds(bfe.io_time), - tokutime_to_seconds(bfe.decompress_time), - tokutime_to_seconds(bfe.deserialize_time) + dt *= 1000; + printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs); + printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n", + tokutime_to_seconds(bfe.io_time)*1000, + tokutime_to_seconds(bfe.decompress_time)*1000, + tokutime_to_seconds(bfe.deserialize_time)*1000, + deser_runs ); toku_ftnode_free(&dn); - - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; ++i) { - destroy_nonleaf_childinfo(BNC(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + ft_h->cmp.destroy(); + toku_free(ft_h); + toku_free(ft); toku_free(ndd); toku_free(ndd2); @@ -394,19 +426,32 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { int test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - long valsize, nelts; + const int DEFAULT_RUNS = 5; + long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS; double entropy = 0.3; - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); + if (argc != 3 && argc != 5) { + fprintf(stderr, "Usage: %s [ ]\n", argv[0]); + fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS); return 2; } valsize = strtol(argv[1], NULL, 0); nelts = strtol(argv[2], NULL, 0); + if (argc == 5) { + ser_runs = strtol(argv[3], NULL, 0); + deser_runs = strtol(argv[4], NULL, 0); + } + + if (ser_runs <= 0) { + ser_runs = DEFAULT_RUNS; + } + if (deser_runs <= 0) { + deser_runs = DEFAULT_RUNS; + } initialize_dummymsn(); - test_serialize_leaf(valsize, nelts, entropy); - test_serialize_nonleaf(valsize, nelts, entropy); + test_serialize_leaf(valsize, nelts, entropy, ser_runs, deser_runs); + test_serialize_nonleaf(valsize, nelts, entropy, ser_runs, deser_runs); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc b/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc index ca8c806f04e46..f0be59a811c79 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-serialize-sub-block-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,9 +92,9 @@ PATENT RIGHTS GRANT: #include "test.h" -// create a brt and put n rows into it -// write the brt to the file -// verify the rows in the brt +// create a ft and put n rows into it +// write the ft to the file +// verify the rows in the ft static void test_sub_block(int n) { if (verbose) printf("%s:%d %d\n", __FUNCTION__, __LINE__, n); @@ -107,14 +107,14 @@ static void test_sub_block(int n) { int error; CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int i; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - error = toku_open_ft_handle(fname, true, &brt, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); + error = toku_open_ft_handle(fname, true, &ft, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); assert(error == 0); // insert keys 0, 1, 2, .. (n-1) @@ -124,20 +124,20 @@ static void test_sub_block(int n) { DBT key, val; toku_fill_dbt(&key, &k, sizeof k); toku_fill_dbt(&val, &v, sizeof v); - toku_ft_insert(brt, &key, &val, 0); + toku_ft_insert(ft, &key, &val, 0); assert(error == 0); } // write to the file - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); - // verify the brt by walking a cursor through the rows - error = toku_open_ft_handle(fname, false, &brt, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); + // verify the ft by walking a cursor through the rows + error = toku_open_ft_handle(fname, false, &ft, nodesize, basementnodesize, compression_method, ct, null_txn, toku_builtin_compare_fun); assert(error == 0); FT_CURSOR cursor; - error = toku_ft_cursor(brt, &cursor, NULL, false, false); + error = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(error == 0); for (i=0; ; i++) { @@ -155,7 +155,7 @@ static void test_sub_block(int n) { toku_ft_cursor_close(cursor); - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); toku_cachetable_close(&ct); diff --git a/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc b/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc index adfd2b7efd45b..266cf50f8ce4e 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-serialize-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,57 +98,53 @@ PATENT RIGHTS GRANT: #endif static size_t -calc_le_size(int keylen, int vallen) { - return LE_CLEAN_MEMSIZE(vallen) + keylen + sizeof(uint32_t); -} - -static void le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); + void *maybe_free = nullptr; bn->get_space_for_insert( - idx, + idx, key, keysize, size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = valsize; memcpy(r->u.clean.val, val, valsize); + return size_needed + keysize + sizeof(uint32_t); } -static KLPAIR -le_fastmalloc(struct mempool * mp, const char *key, int keylen, const char *val, int vallen) -{ - KLPAIR kl; - size_t le_size = calc_le_size(keylen, vallen); - CAST_FROM_VOIDP(kl, toku_mempool_malloc(mp, le_size, 1)); - resource_assert(kl); - kl->keylen = keylen; - memcpy(kl->key_le, key, keylen); - LEAFENTRY le = get_le_from_klpair(kl); - le->type = LE_CLEAN; - le->u.clean.vallen = vallen; - memcpy(le->u.clean.val, val, vallen); - return kl; -} +class test_key_le_pair { + public: + uint32_t keylen; + char* keyp; + LEAFENTRY le; -static KLPAIR -le_malloc(struct mempool * mp, const char *key, const char *val) -{ - int keylen = strlen(key) + 1; - int vallen = strlen(val) + 1; - return le_fastmalloc(mp, key, keylen, val, vallen); -} + test_key_le_pair() : keylen(), keyp(), le() {} + void init(const char *_keyp, const char *_val) { + init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1); + } + void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) { + keylen = _keylen; -struct check_leafentries_struct { - int nelts; - LEAFENTRY *elts; - int i; - int (*cmp)(OMTVALUE, void *); + CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen))); + le->type = LE_CLEAN; + le->u.clean.vallen = _vallen; + memcpy(le->u.clean.val, _val, _vallen); + + CAST_FROM_VOIDP(keyp, toku_xmemdup(_keyp, keylen)); + } + ~test_key_le_pair() { + toku_free(le); + toku_free(keyp); + } }; enum ftnode_verify_type { @@ -166,18 +162,17 @@ string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) } static void -setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) { +setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) { int r; - brt_h->compare_fun = string_key_cmp; if (bft == read_all) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, brt_h); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_h); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); } else if (bft == read_compressed || bft == read_none) { - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, brt_h); + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft_h); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); // assert all bp's are compressed or on disk. @@ -187,7 +182,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_ // if read_none, get rid of the compressed bp's if (bft == read_none) { if ((*dn)->height == 0) { - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); // assert all bp's are on disk for (int i = 0; i < (*dn)->n_children; i++) { if ((*dn)->height == 0) { @@ -204,7 +199,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_ // that it is available // then run partial eviction to get it compressed PAIR_ATTR attr; - fill_bfe_for_full_read(&bfe, brt_h); + bfe.create_for_full_read(ft_h); assert(toku_ftnode_pf_req_callback(*dn, &bfe)); r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); assert(r==0); @@ -212,21 +207,21 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_ for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { // assert all bp's are still available, because we touched the clock assert(BP_STATE(*dn,i) == PT_AVAIL); // now assert all should be evicted assert(BP_SHOULD_EVICT(*dn, i)); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), brt_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_COMPRESSED); } } } // now decompress them - fill_bfe_for_full_read(&bfe, brt_h); + bfe.create_for_full_read(ft_h); assert(toku_ftnode_pf_req_callback(*dn, &bfe)); PAIR_ATTR attr; r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); @@ -243,20 +238,20 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT brt_h, FTNODE *dn, FTNODE_DISK_ } } -static void write_sn_to_disk(int fd, FT_HANDLE brt, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) { +static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) { int r; if (do_clone) { void* cloned_node_v = NULL; PAIR_ATTR attr; long clone_size; - toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, brt->ft); + toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft); FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v); - r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false); assert(r==0); toku_ftnode_free(&cloned_node); } else { - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, brt->ft, false); + r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false); assert(r==0); } } @@ -275,7 +270,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -283,23 +278,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn()); - KLPAIR elts[3]; le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5); le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5); BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 }); BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK; - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -307,35 +300,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft->ft = ft_h; + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -346,13 +339,11 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { { // Man, this is way too ugly. This entire test suite needs to be refactored. // Create a dummy mempool and put the leaves there. Ugh. - struct mempool dummy_mp; - toku_mempool_construct(&dummy_mp, 1024); - elts[0] = le_malloc(&dummy_mp, "a", "aval"); - elts[1] = le_malloc(&dummy_mp, "b", "bval"); - elts[2] = le_malloc(&dummy_mp, "x", "xval"); + test_key_le_pair elts[3]; + elts[0].init("a", "aval"); + elts[1].init("b", "bval"); + elts[2].init("x", "xval"); const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn); @@ -361,40 +352,32 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { if (bn > 0) { assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); } - for (uint32_t i = 0; i < BLB_DATA(dn, bn)->omt_size(); i++) { + for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; void* curr_key; BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(get_le_from_klpair(elts[last_i]))); - assert(memcmp(curr_le, get_le_from_klpair(elts[last_i]), leafentry_memsize(curr_le)) == 0); + assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); + assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(elts[last_i]->key_le)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } } - toku_mempool_destroy(&dummy_mp); assert(last_i == 3); } + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -412,7 +395,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -421,8 +404,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -436,14 +418,15 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone if (i < nrows-1) { uint32_t keylen; void* curr_key; - BLB_DATA(&sn, i)->fetch_le_key_and_len(0, &keylen, &curr_key); - toku_memdup_dbt(&sn.childkeys[i], curr_key, keylen); + BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i); } } - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -451,56 +434,51 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft->ft = ft_h; + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); { // Man, this is way too ugly. This entire test suite needs to be refactored. // Create a dummy mempool and put the leaves there. Ugh. - struct mempool dummy_mp; - size_t le_size = calc_le_size(keylens, vallens); - size_t mpsize = nrows * le_size; - toku_mempool_construct(&dummy_mp, mpsize); - KLPAIR les[nrows]; + test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[keylens], val[vallens]; key[keylens-1] = '\0'; for (uint32_t i = 0; i < nrows; ++i) { char c = 'a' + i; memset(key, c, keylens-1); - les[i] = le_fastmalloc(&dummy_mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); + les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(keylens*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -508,40 +486,33 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone if (bn > 0) { assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); } - assert(BLB_DATA(dn, bn)->omt_size() > 0); - for (uint32_t i = 0; i < BLB_DATA(dn, bn)->omt_size(); i++) { + assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; void* curr_key; BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(get_le_from_klpair(les[last_i]))); - assert(memcmp(curr_le, get_le_from_klpair(les[last_i]), leafentry_memsize(curr_le)) == 0); + assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); + assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(les[last_i]->key_le)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } } - toku_mempool_destroy(&dummy_mp); assert(last_i == nrows); + delete[] les; } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - toku_free(sn.childkeys); - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); + toku_destroy_ftnode_internals(&sn); - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -552,13 +523,12 @@ static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { int r; struct ftnode sn, *dn; - const int keylens = sizeof(int), vallens = sizeof(int); const uint32_t nrows = 196*1024; int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -566,22 +536,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; - MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*sizeof(int); + XMALLOC_N(sn.n_children, sn.bp); + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } + size_t total_size = 0; for (uint32_t i = 0; i < nrows; ++i) { uint32_t key = i; uint32_t val = i; - le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); + total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); } - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -589,54 +559,49 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); { // Man, this is way too ugly. This entire test suite needs to be refactored. // Create a dummy mempool and put the leaves there. Ugh. - struct mempool dummy_mp; - size_t le_size = calc_le_size(keylens, vallens); - size_t mpsize = nrows * le_size; - toku_mempool_construct(&dummy_mp, mpsize); - KLPAIR les[nrows]; + test_key_le_pair *les = new test_key_le_pair[nrows]; { int key = 0, val = 0; for (uint32_t i = 0; i < nrows; ++i, key++, val++) { - les[i] = le_fastmalloc(&dummy_mp, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); + les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(sizeof(int)*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -644,17 +609,17 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { if (bn > 0) { assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); } - assert(BLB_DATA(dn, bn)->omt_size() > 0); - for (uint32_t i = 0; i < BLB_DATA(dn, bn)->omt_size(); i++) { + assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; void* curr_key; BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(get_le_from_klpair(les[last_i]))); - assert(memcmp(curr_le, get_le_from_klpair(les[last_i]), leafentry_memsize(curr_le)) == 0); + assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); + assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - uint32_t *CAST_FROM_VOIDP(pivot, dn->childkeys[bn].data); - void* tmp = les[last_i]->key_le; + uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data); + void* tmp = les[last_i].keyp; uint32_t *CAST_FROM_VOIDP(item, tmp); assert(*pivot >= *item); } @@ -664,25 +629,18 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { // don't check soft_copy_is_up_to_date or seqinsert assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024); // BN_MAX_SIZE, apt to change } - toku_mempool_destroy(&dummy_mp); assert(last_i == nrows); + delete[] les; } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -702,7 +660,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -711,8 +669,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - sn.totalchildkeylens = (sn.n_children-1)*8; + sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); @@ -727,9 +684,9 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size); } - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -737,46 +694,42 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); { // Man, this is way too ugly. This entire test suite needs to be refactored. // Create a dummy mempool and put the leaves there. Ugh. - struct mempool dummy_mp; - size_t le_size = calc_le_size(key_size, val_size); - size_t mpsize = nrows * le_size; - toku_mempool_construct(&dummy_mp, mpsize); - KLPAIR les[nrows]; + test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[key_size], val[val_size]; key[key_size-1] = '\0'; @@ -785,12 +738,11 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) char c = 'a' + i; memset(key, c, key_size-1); memset(val, c, val_size-1); - les[i] = le_fastmalloc(&dummy_mp, key, key_size, val, val_size); + les[i].init(key, key_size, val, val_size); } } const uint32_t npartitions = dn->n_children; assert(npartitions == nrows); - assert(dn->totalchildkeylens==(key_size*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -798,41 +750,34 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) if (bn > 0) { assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); } - assert(BLB_DATA(dn, bn)->omt_size() > 0); - for (uint32_t i = 0; i < BLB_DATA(dn, bn)->omt_size(); i++) { + assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; void* curr_key; BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(get_le_from_klpair(les[last_i]))); - assert(memcmp(curr_le, get_le_from_klpair(les[last_i]), leafentry_memsize(curr_le)) == 0); + assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); + assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(les[last_i]->key_le)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } // don't check soft_copy_is_up_to_date or seqinsert } - toku_mempool_destroy(&dummy_mp); assert(last_i == 7); + delete[] les; } toku_ftnode_free(&dn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -850,7 +795,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -858,27 +803,26 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "A", 2); - toku_memdup_dbt(&sn.childkeys[1], "a", 2); - toku_memdup_dbt(&sn.childkeys[2], "a", 2); - toku_memdup_dbt(&sn.childkeys[3], "b", 2); - toku_memdup_dbt(&sn.childkeys[4], "b", 2); - toku_memdup_dbt(&sn.childkeys[5], "x", 2); - sn.totalchildkeylens = (sn.n_children-1)*2; + DBT pivotkeys[6]; + toku_fill_dbt(&pivotkeys[0], "A", 2); + toku_fill_dbt(&pivotkeys[1], "a", 2); + toku_fill_dbt(&pivotkeys[2], "a", 2); + toku_fill_dbt(&pivotkeys[3], "b", 2); + toku_fill_dbt(&pivotkeys[4], "b", 2); + toku_fill_dbt(&pivotkeys[5], "x", 2); + sn.pivotkeys.create_from_dbts(pivotkeys, 6); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); BLB_SEQINSERT(&sn, i) = 0; } - KLPAIR elts[3]; le_add_to_bn(BLB_DATA(&sn, 1), 0, "a", 2, "aval", 5); le_add_to_bn(BLB_DATA(&sn, 3), 0, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 5), 0, "x", 2, "xval", 5); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -886,34 +830,34 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -921,15 +865,14 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool assert(dn->height == 0); assert(dn->n_children>0); { + test_key_le_pair elts[3]; + // Man, this is way too ugly. This entire test suite needs to be refactored. // Create a dummy mempool and put the leaves there. Ugh. - struct mempool dummy_mp; - toku_mempool_construct(&dummy_mp, 1024); - elts[0] = le_malloc(&dummy_mp, "a", "aval"); - elts[1] = le_malloc(&dummy_mp, "b", "bval"); - elts[2] = le_malloc(&dummy_mp, "x", "xval"); + elts[0].init("a", "aval"); + elts[1].init("b", "bval"); + elts[2].init("x", "xval"); const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { assert(dest_ndd[bn].start > 0); @@ -937,40 +880,32 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool if (bn > 0) { assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); } - for (uint32_t i = 0; i < BLB_DATA(dn, bn)->omt_size(); i++) { + for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; void* curr_key; BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(get_le_from_klpair(elts[last_i]))); - assert(memcmp(curr_le, get_le_from_klpair(elts[last_i]), leafentry_memsize(curr_le)) == 0); + assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); + assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); if (bn < npartitions-1) { - assert(strcmp((char*)dn->childkeys[bn].data, (char*)(elts[last_i]->key_le)) <= 0); + assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } } - toku_mempool_destroy(&dummy_mp); assert(last_i == 3); } + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -987,7 +922,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -995,19 +930,19 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(sn.n_children-1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "A", 2); - toku_memdup_dbt(&sn.childkeys[1], "A", 2); - toku_memdup_dbt(&sn.childkeys[2], "A", 2); - sn.totalchildkeylens = (sn.n_children-1)*2; + DBT pivotkeys[3]; + toku_fill_dbt(&pivotkeys[0], "A", 2); + toku_fill_dbt(&pivotkeys[1], "A", 2); + toku_fill_dbt(&pivotkeys[2], "A", 2); + sn.pivotkeys.create_from_dbts(pivotkeys, 3); for (int i = 0; i < sn.n_children; ++i) { BP_STATE(&sn,i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -1015,35 +950,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); @@ -1052,32 +987,24 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b assert(dn->n_children == 1); { const uint32_t npartitions = dn->n_children; - assert(dn->totalchildkeylens==(2*(npartitions-1))); for (uint32_t i = 0; i < npartitions; ++i) { assert(dest_ndd[i].start > 0); assert(dest_ndd[i].size > 0); if (i > 0) { assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } - assert(BLB_DATA(dn, i)->omt_size() == 0); + assert(BLB_DATA(dn, i)->num_klpairs() == 0); } } + toku_ftnode_free(&dn); + toku_destroy_ftnode_internals(&sn); - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); @@ -1097,7 +1024,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { // source_ft.fd=fd; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 1; @@ -1105,9 +1032,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(2, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "hello", 6); - sn.totalchildkeylens = 6; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_STATE(&sn,0) = PT_AVAIL; @@ -1115,25 +1041,30 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); //Create XIDS - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_123, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_123, &xids_234, (TXNID)234); CKERR(r); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, NULL, string_key_cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, NULL, string_key_cmp); - //Cleanup: - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku::comparator cmp; + cmp.create(string_key_cmp, nullptr); + + toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); + toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); + toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); - FT_HANDLE XMALLOC(brt); - FT XCALLOC(brt_h); - toku_ft_init(brt_h, + //Cleanup: + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); + cmp.destroy(); + + FT_HANDLE XMALLOC(ft); + FT XCALLOC(ft_h); + toku_ft_init(ft_h, make_blocknum(0), ZERO_LSN, TXNID_NONE, @@ -1141,67 +1072,63 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { 128*1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); - brt->ft = brt_h; + ft_h->cmp.create(string_key_cmp, nullptr); + ft->ft = ft_h; - toku_blocktable_create_new(&brt_h->blocktable); + ft_h->blocktable.create(); { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } //Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { - toku_allocate_blocknum(brt_h->blocktable, &b, brt_h); + ft_h->blocktable.allocate_blocknum(&b, ft_h); } assert(b.b == 20); { DISKOFF offset; DISKOFF size; - toku_blocknum_realloc_on_disk(brt_h->blocktable, b, 100, &offset, brt_h, fd, false); - assert(offset==BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); + assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_translate_blocknum_to_offset_size(brt_h->blocktable, b, &offset, &size); - assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); + assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; - write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); + write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn, &dest_ndd); + setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->thisnodename.b==20); + assert(dn->blocknum.b==20); assert(dn->layout_version ==FT_LAYOUT_VERSION); assert(dn->layout_version_original ==FT_LAYOUT_VERSION); assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); assert(dn->height == 1); assert(dn->n_children==2); - assert(strcmp((char*)dn->childkeys[0].data, "hello")==0); - assert(dn->childkeys[0].size==6); - assert(dn->totalchildkeylens==6); + assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0); + assert(dn->pivotkeys.get_pivot(0).size==6); assert(BP_BLOCKNUM(dn,0).b==30); assert(BP_BLOCKNUM(dn,1).b==35); - FIFO src_fifo_1 = BNC(&sn, 0)->buffer; - FIFO src_fifo_2 = BNC(&sn, 1)->buffer; - FIFO dest_fifo_1 = BNC(dn, 0)->buffer; - FIFO dest_fifo_2 = BNC(dn, 1)->buffer; + message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer; + message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer; + message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer; + message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer; - assert(toku_are_fifos_same(src_fifo_1, dest_fifo_1)); - assert(toku_are_fifos_same(src_fifo_2, dest_fifo_2)); + assert(src_msg_buffer1->equals(dest_msg_buffer1)); + assert(src_msg_buffer2->equals(dest_msg_buffer2)); toku_ftnode_free(&dn); - - toku_free(sn.childkeys[0].data); - destroy_nonleaf_childinfo(BNC(&sn, 0)); - destroy_nonleaf_childinfo(BNC(&sn, 1)); - toku_free(sn.bp); - toku_free(sn.childkeys); - - toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - toku_blocktable_destroy(&brt_h->blocktable); - toku_free(brt_h->h); - toku_free(brt_h); - toku_free(brt); + toku_destroy_ftnode_internals(&sn); + + ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.destroy(); + ft_h->cmp.destroy(); + toku_free(ft_h->h); + toku_free(ft_h); + toku_free(ft); toku_free(src_ndd); toku_free(dest_ndd); diff --git a/storage/tokudb/ft-index/ft/tests/ft-test-cursor-2.cc b/storage/tokudb/ft-index/ft/tests/ft-test-cursor-2.cc index f252c9fef4c72..6e38884d57111 100644 --- a/storage/tokudb/ft-index/ft/tests/ft-test-cursor-2.cc +++ b/storage/tokudb/ft-index/ft/tests/ft-test-cursor-2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; static int -save_data (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN vallen, bytevec val, void *v, bool lock_only) { +save_data (uint32_t UU(keylen), const void *UU(key), uint32_t vallen, const void *val, void *v, bool lock_only) { if (lock_only) return 0; assert(key!=NULL); void **CAST_FROM_VOIDP(vp, v); @@ -106,21 +106,21 @@ save_data (ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN vallen, bytevec val, voi // Verify that different cursors return different data items when a DBT is initialized to all zeros (no flags) -// Note: The BRT test used to implement DBTs with per-cursor allocated space, but there isn't any such thing any more +// Note: The ft test used to implement DBTs with per-cursor allocated space, but there isn't any such thing any more // so this test is a little bit obsolete. static void test_multiple_ft_cursor_dbts(int n) { if (verbose) printf("test_multiple_ft_cursors:%d\n", n); int r; CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; FT_CURSOR cursors[n]; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); int i; @@ -129,14 +129,14 @@ static void test_multiple_ft_cursor_dbts(int n) { char key[10],val[10]; snprintf(key, sizeof key, "k%04d", i); snprintf(val, sizeof val, "v%04d", i); - toku_ft_insert(brt, + toku_ft_insert(ft, toku_fill_dbt(&kbt, key, 1+strlen(key)), toku_fill_dbt(&vbt, val, 1+strlen(val)), 0); } for (i=0; idata, a->size, b->data, b->size); } -static void assert_cursor_notfound(FT_HANDLE brt, int position) { +static void assert_cursor_notfound(FT_HANDLE ft, int position) { FT_CURSOR cursor=0; int r; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); struct check_pair pair = {0,0,0,0,0}; @@ -117,11 +116,11 @@ static void assert_cursor_notfound(FT_HANDLE brt, int position) { toku_ft_cursor_close(cursor); } -static void assert_cursor_value(FT_HANDLE brt, int position, long long value) { +static void assert_cursor_value(FT_HANDLE ft, int position, long long value) { FT_CURSOR cursor=0; int r; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("key: "); @@ -133,11 +132,11 @@ static void assert_cursor_value(FT_HANDLE brt, int position, long long value) { toku_ft_cursor_close(cursor); } -static void assert_cursor_first_last(FT_HANDLE brt, long long firstv, long long lastv) { +static void assert_cursor_first_last(FT_HANDLE ft, long long firstv, long long lastv) { FT_CURSOR cursor=0; int r; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("first key: "); @@ -162,7 +161,7 @@ static void assert_cursor_first_last(FT_HANDLE brt, long long firstv, long long static void test_ft_cursor_first(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -170,9 +169,9 @@ static void test_ft_cursor_first(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -184,15 +183,15 @@ static void test_ft_cursor_first(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } if (n == 0) - assert_cursor_notfound(brt, DB_FIRST); + assert_cursor_notfound(ft, DB_FIRST); else - assert_cursor_value(brt, DB_FIRST, 0); + assert_cursor_value(ft, DB_FIRST, 0); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -200,7 +199,7 @@ static void test_ft_cursor_first(int n) { static void test_ft_cursor_last(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -208,9 +207,9 @@ static void test_ft_cursor_last(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert keys 0, 1, .. (n-1) */ @@ -222,16 +221,16 @@ static void test_ft_cursor_last(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); assert(r==0); } if (n == 0) - assert_cursor_notfound(brt, DB_LAST); + assert_cursor_notfound(ft, DB_LAST); else - assert_cursor_value(brt, DB_LAST, n-1); + assert_cursor_value(ft, DB_LAST, n-1); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -239,7 +238,7 @@ static void test_ft_cursor_last(int n) { static void test_ft_cursor_first_last(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -247,9 +246,9 @@ static void test_ft_cursor_first_last(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -262,16 +261,16 @@ static void test_ft_cursor_first_last(int n) { v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } if (n == 0) { - assert_cursor_notfound(brt, DB_FIRST); - assert_cursor_notfound(brt, DB_LAST); + assert_cursor_notfound(ft, DB_FIRST); + assert_cursor_notfound(ft, DB_LAST); } else - assert_cursor_first_last(brt, 0, n-1); + assert_cursor_first_last(ft, 0, n-1); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -281,7 +280,7 @@ static void test_ft_cursor_first_last(int n) { static void test_ft_cursor_rfirst(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -289,9 +288,9 @@ static void test_ft_cursor_rfirst(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert keys n-1, n-2, ... , 0 */ @@ -304,26 +303,26 @@ static void test_ft_cursor_rfirst(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } if (n == 0) - assert_cursor_notfound(brt, DB_FIRST); + assert_cursor_notfound(ft, DB_FIRST); else - assert_cursor_value(brt, DB_FIRST, 0); + assert_cursor_value(ft, DB_FIRST, 0); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); } -static void assert_cursor_walk(FT_HANDLE brt, int n) { +static void assert_cursor_walk(FT_HANDLE ft, int n) { FT_CURSOR cursor=0; int i; int r; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("key: "); @@ -345,7 +344,7 @@ static void assert_cursor_walk(FT_HANDLE brt, int n) { static void test_ft_cursor_walk(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -353,9 +352,9 @@ static void test_ft_cursor_walk(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -367,25 +366,25 @@ static void test_ft_cursor_walk(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } /* walk the tree */ - assert_cursor_walk(brt, n); + assert_cursor_walk(ft, n); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); } -static void assert_cursor_rwalk(FT_HANDLE brt, int n) { +static void assert_cursor_rwalk(FT_HANDLE ft, int n) { FT_CURSOR cursor=0; int i; int r; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("key: "); @@ -407,7 +406,7 @@ static void assert_cursor_rwalk(FT_HANDLE brt, int n) { static void test_ft_cursor_rwalk(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -415,9 +414,9 @@ static void test_ft_cursor_rwalk(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -429,13 +428,13 @@ static void test_ft_cursor_rwalk(int n) { toku_fill_dbt(&kbt, &k, sizeof k); v = i; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } /* walk the tree */ - assert_cursor_rwalk(brt, n); + assert_cursor_rwalk(ft, n); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -443,7 +442,7 @@ static void test_ft_cursor_rwalk(int n) { } static int -ascending_key_string_checkf (ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), bytevec UU(val), void *v, bool lock_only) +ascending_key_string_checkf (uint32_t keylen, const void *key, uint32_t UU(vallen), const void *UU(val), void *v, bool lock_only) // the keys are strings. Verify that they keylen matches the key, that the keys are ascending. Use (char**)v to hold a // malloc'd previous string. { @@ -462,13 +461,13 @@ ascending_key_string_checkf (ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), by } // The keys are strings (null terminated) -static void assert_cursor_walk_inorder(FT_HANDLE brt, int n) { +static void assert_cursor_walk_inorder(FT_HANDLE ft, int n) { FT_CURSOR cursor=0; int i; int r; char *prevkey = 0; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("key: "); @@ -488,7 +487,7 @@ static void assert_cursor_walk_inorder(FT_HANDLE brt, int n) { static void test_ft_cursor_rand(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; int r; int i; @@ -496,9 +495,9 @@ static void test_ft_cursor_rand(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -513,22 +512,22 @@ static void test_ft_cursor_rand(int n) { v = i; toku_fill_dbt(&vbt, &v, sizeof v); struct check_pair pair = {kbt.size, key, len_ignore, 0, 0}; - r = toku_ft_lookup(brt, &kbt, lookup_checkf, &pair); + r = toku_ft_lookup(ft, &kbt, lookup_checkf, &pair); if (r == 0) { assert(pair.call_count==1); if (verbose) printf("dup"); continue; } assert(pair.call_count==0); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); break; } } /* walk the tree */ - assert_cursor_walk_inorder(brt, n); + assert_cursor_walk_inorder(ft, n); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -536,7 +535,7 @@ static void test_ft_cursor_rand(int n) { static void test_ft_cursor_split(int n) { CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; FT_CURSOR cursor=0; int r; int keyseqnum; @@ -546,9 +545,9 @@ static void test_ft_cursor_split(int n) { unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); /* insert a bunch of kv pairs */ @@ -560,10 +559,10 @@ static void test_ft_cursor_split(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = keyseqnum; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } - r = toku_ft_cursor(brt, &cursor, NULL, false, false); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); if (test_cursor_debug && verbose) printf("key: "); @@ -583,7 +582,7 @@ static void test_ft_cursor_split(int n) { toku_fill_dbt(&kbt, key, strlen(key)+1); v = keyseqnum; toku_fill_dbt(&vbt, &v, sizeof v); - toku_ft_insert(brt, &kbt, &vbt, 0); + toku_ft_insert(ft, &kbt, &vbt, 0); } if (test_cursor_debug && verbose) printf("key: "); @@ -601,7 +600,7 @@ static void test_ft_cursor_split(int n) { toku_ft_cursor_close(cursor); - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); @@ -612,19 +611,19 @@ static void test_multiple_ft_cursors(int n) { int r; CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; FT_CURSOR cursors[n]; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); int i; for (i=0; i %02x%02x%02x%02x\n", __FILE__, __LINE__, ((char*)kbt.data)[0], ((char*)kbt.data)[1], ((char*)kbt.data)[2], ((char*)kbt.data)[3], ((char*)vbt.data)[0], ((char*)vbt.data)[1], ((char*)vbt.data)[2], ((char*)vbt.data)[3]); - toku_ft_insert(brt, &kbt, &vbt, null_txn); + toku_ft_insert(ft, &kbt, &vbt, null_txn); } { FT_CURSOR cursor=0; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); assert(r==0); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); for (i=0; i<2; i++) { unsigned char a[4],b[4]; @@ -489,13 +488,13 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) { } - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); } } { toku_cachetable_verify(ct); - r = toku_open_ft_handle(fname, 1, &brt, 1<<20, 1<<17, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, wrong_p ? wrong_compare_fun : toku_builtin_compare_fun); assert(r==0); + r = toku_open_ft_handle(fname, 1, &ft, 1<<20, 1<<17, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, wrong_p ? wrong_compare_fun : toku_builtin_compare_fun); assert(r==0); toku_cachetable_verify(ct); for (i=0; i %02x%02x%02x%02x\n", __FILE__, __LINE__, ((unsigned char*)kbt.data)[0], ((unsigned char*)kbt.data)[1], ((unsigned char*)kbt.data)[2], ((unsigned char*)kbt.data)[3], ((unsigned char*)vbt.data)[0], ((unsigned char*)vbt.data)[1], ((unsigned char*)vbt.data)[2], ((unsigned char*)vbt.data)[3]); - toku_ft_insert(brt, &kbt, &vbt, null_txn); + toku_ft_insert(ft, &kbt, &vbt, null_txn); toku_cachetable_verify(ct); } FT_CURSOR cursor=0; - r = toku_ft_cursor(brt, &cursor, NULL, false, false); assert(r==0); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); for (i=0; i= v */ @@ -1226,7 +1225,7 @@ static void test_new_ft_cursor_set_range(int n) { toku_ft_cursor_close(cursor); - r = toku_close_ft_handle_nolsn(brt, 0); assert(r==0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); } @@ -1236,14 +1235,14 @@ static void test_new_ft_cursor_set(int n, int cursor_op, DB *db) { int r; CACHETABLE ct; - FT_HANDLE brt; + FT_HANDLE ft; FT_CURSOR cursor=0; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); int i; @@ -1252,10 +1251,10 @@ static void test_new_ft_cursor_set(int n, int cursor_op, DB *db) { DBT key, val; int k = toku_htonl(10*i); int v = 10*i; - toku_ft_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0); + toku_ft_insert(ft, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0); } - r = toku_ft_cursor(brt, &cursor, NULL, false, false); assert(r==0); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); /* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */ for (i=0; i static toku_mutex_t event_mutex = TOKU_MUTEX_INITIALIZER; @@ -224,5 +223,3 @@ static void *my_realloc(void *p, size_t n) { } return realloc(p, n); } - -#endif diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc index 95e3c5784cbee..3cc574b759a5d 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-bad-generate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,8 +94,8 @@ PATENT RIGHTS GRANT: #define DONT_DEPRECATE_MALLOC #define DONT_DEPRECATE_WRITES #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include "memory.h" #include @@ -158,19 +158,19 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail) { // open the ft_loader. this runs the extractor. const int N = 1; - FT_HANDLE brts[N]; + FT_HANDLE fts[N]; DB* dbs[N]; const char *fnames[N]; ft_compare_func compares[N]; for (int i = 0; i < N; i++) { - brts[i] = NULL; + fts[i] = NULL; dbs[i] = NULL; fnames[i] = ""; compares[i] = compare_int; } FTLOADER loader; - r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false, true); + r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, fts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false, true); assert(r == 0); struct rowset *rowset[nrowsets]; @@ -183,7 +183,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail) { // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc index 378eca46a5690..4418e475bac47 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor-errors.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,8 +95,8 @@ PATENT RIGHTS GRANT: #define DONT_DEPRECATE_MALLOC #define DONT_DEPRECATE_WRITES #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include "memory.h" #include @@ -165,12 +165,12 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char // open the ft_loader. this runs the extractor. const int N = 1; - FT_HANDLE brts[N]; + FT_HANDLE fts[N]; DB* dbs[N]; const char *fnames[N]; ft_compare_func compares[N]; for (int i = 0; i < N; i++) { - brts[i] = NULL; + fts[i] = NULL; dbs[i] = NULL; fnames[i] = ""; compares[i] = compare_int; @@ -180,7 +180,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char sprintf(temp, "%s/%s", testdir, "tempXXXXXX"); FTLOADER loader; - r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false, true); + r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, fts, dbs, fnames, compares, "tempXXXXXX", ZERO_LSN, nullptr, true, 0, false, true); assert(r == 0); struct rowset *rowset[nrowsets]; @@ -201,7 +201,7 @@ static void test_extractor(int nrows, int nrowsets, bool expect_fail, const char // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor.cc index 9938cb12f6d05..b20dd2fb2c30a 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-extractor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,14 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// The purpose of this test is to test the extractor component of the brt loader. We insert rowsets into the extractor queue and verify temp files +// The purpose of this test is to test the extractor component of the ft loader. We insert rowsets into the extractor queue and verify temp files // after the extractor is finished. #define DONT_DEPRECATE_MALLOC #define DONT_DEPRECATE_WRITES #include "test.h" -#include "ftloader.h" -#include "ftloader-internal.h" +#include "loader/loader.h" +#include "loader/loader-internal.h" #include "memory.h" #include @@ -387,12 +387,12 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) { // open the ft_loader. this runs the extractor. const int N = 1; - FT_HANDLE brts[N]; + FT_HANDLE fts[N]; DB* dbs[N]; const char *fnames[N]; ft_compare_func compares[N]; for (int i = 0; i < N; i++) { - brts[i] = NULL; + fts[i] = NULL; dbs[i] = NULL; fnames[i] = ""; compares[i] = compare_int; @@ -402,7 +402,7 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) { sprintf(temp, "%s/%s", testdir, "tempXXXXXX"); FTLOADER loader; - r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, brts, dbs, fnames, compares, temp, ZERO_LSN, nullptr, true, 0, false, true); + r = toku_ft_loader_open(&loader, NULL, generate, NULL, N, fts, dbs, fnames, compares, temp, ZERO_LSN, nullptr, true, 0, false, true); assert(r == 0); struct rowset *rowset[nrowsets]; @@ -415,7 +415,7 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) { // feed rowsets to the extractor for (int i = 0; i < nrowsets; i++) { - r = queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); + r = toku_queue_enq(loader->primary_rowset_queue, rowset[i], 1, NULL); assert(r == 0); } r = toku_ft_loader_finish_extractor(loader); diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc index 33ceb99e0b702..d450bd0092328 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-merge-files-dbufio.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #define DONT_DEPRECATE_MALLOC #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include static int event_count, event_count_trigger; @@ -346,7 +346,7 @@ static void *consumer_thread (void *ctv) { struct consumer_thunk *cthunk = (struct consumer_thunk *)ctv; while (1) { void *item; - int r = queue_deq(cthunk->q, &item, NULL, NULL); + int r = toku_queue_deq(cthunk->q, &item, NULL, NULL); if (r==EOF) return NULL; assert(r==0); struct rowset *rowset = (struct rowset *)item; @@ -385,7 +385,7 @@ static void test (const char *directory, bool is_error) { } FTLOADER bl; - FT_HANDLE *XCALLOC_N(N_DEST_DBS, brts); + FT_HANDLE *XCALLOC_N(N_DEST_DBS, fts); DB* *XCALLOC_N(N_DEST_DBS, dbs); const char **XMALLOC_N(N_DEST_DBS, new_fnames_in_env); for (int i=0; i @@ -126,12 +126,12 @@ static void test_loader_open(int ndbs) { FTLOADER loader; // open the ft_loader. this runs the extractor. - FT_HANDLE brts[ndbs]; + FT_HANDLE fts[ndbs]; DB* dbs[ndbs]; const char *fnames[ndbs]; ft_compare_func compares[ndbs]; for (int i = 0; i < ndbs; i++) { - brts[i] = NULL; + fts[i] = NULL; dbs[i] = NULL; fnames[i] = ""; compares[i] = my_compare; @@ -143,7 +143,7 @@ static void test_loader_open(int ndbs) { for (i = 0; ; i++) { set_my_malloc_trigger(i+1); - r = toku_ft_loader_open(&loader, NULL, NULL, NULL, ndbs, brts, dbs, fnames, compares, "", ZERO_LSN, nullptr, true, 0, false, true); + r = toku_ft_loader_open(&loader, NULL, NULL, NULL, ndbs, fts, dbs, fnames, compares, "", ZERO_LSN, nullptr, true, 0, false, true); if (r == 0) break; } diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc index d9a0566144deb..9dd7ffec02703 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-vm.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "test.h" -#include "cachetable.h" +#include "cachetable/cachetable.h" #include /* Test for #2755. The ft_loader is using too much VM. */ diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc index c72b6f847a3f2..d2669aee72a58 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-writer-errors.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #define DONT_DEPRECATE_MALLOC #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include "ftloader-error-injector.h" #include @@ -159,20 +159,20 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec ft_loader_fi_close_all(&bl.file_infos); QUEUE q; - r = queue_create(&q, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue. assert(r==0); r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r); assert(fs.n_temp_files==0); QUEUE q2; - r = queue_create(&q2, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue. assert(r==0); size_t num_found = 0; size_t found_size_est = 0; while (1) { void *v; - r = queue_deq(q, &v, NULL, NULL); + r = toku_queue_deq(q, &v, NULL, NULL); if (r==EOF) break; struct rowset *rs = (struct rowset *)v; if (verbose) printf("v=%p\n", v); @@ -187,16 +187,16 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec num_found += rs->n_rows; - r = queue_enq(q2, v, 0, NULL); + r = toku_queue_enq(q2, v, 0, NULL); assert(r==0); } assert((int)num_found == n); if (!expect_error) assert(found_size_est == size_est); - r = queue_eof(q2); + r = toku_queue_eof(q2); assert(r==0); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(r==0); DESCRIPTOR_S desc; @@ -213,7 +213,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec ft_loader_set_error_function(&bl.error_callback, NULL, NULL); ft_loader_set_poll_function(&bl.poll_callback, loader_poll_callback, NULL); - result = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + result = toku_loader_write_ft_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); toku_set_func_malloc_only(NULL); toku_set_func_realloc_only(NULL); @@ -225,7 +225,7 @@ static int write_dbfile (char *tf_template, int n, char *output_name, bool expec ft_loader_destroy_poll_callback(&bl.poll_callback); ft_loader_lock_destroy(&bl); - r = queue_destroy(q2); + r = toku_queue_destroy(q2); assert(r==0); destroy_merge_fileset(&fs); diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc index 53d6bde27a0cd..e57b800adad6b 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test-writer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include "test.h" -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include #include @@ -129,7 +129,7 @@ static void verify_dbfile(int n, const char *name) { int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); TOKUTXN const null_txn = NULL; FT_HANDLE t = NULL; @@ -137,9 +137,9 @@ static void verify_dbfile(int n, const char *name) { toku_ft_set_bt_compare(t, compare_ints); r = toku_ft_handle_open(t, name, 0, 0, ct, null_txn); assert(r==0); - if (verbose) traceit("Verifying brt internals"); + if (verbose) traceit("Verifying ft internals"); r = toku_verify_ft(t); - if (verbose) traceit("Verified brt internals"); + if (verbose) traceit("Verified ft internals"); FT_CURSOR cursor = NULL; r = toku_ft_cursor(t, &cursor, NULL, false, false); assert(r == 0); @@ -215,20 +215,20 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI ft_loader_fi_close_all(&bl.file_infos); QUEUE q; - r = queue_create(&q, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q, 0xFFFFFFFF); // infinite queue. assert(r==0); r = merge_files(&fs, &bl, 0, dest_db, compare_ints, 0, q); CKERR(r); assert(fs.n_temp_files==0); QUEUE q2; - r = queue_create(&q2, 0xFFFFFFFF); // infinite queue. + r = toku_queue_create(&q2, 0xFFFFFFFF); // infinite queue. assert(r==0); size_t num_found = 0; size_t found_size_est = 0; while (1) { void *v; - r = queue_deq(q, &v, NULL, NULL); + r = toku_queue_deq(q, &v, NULL, NULL); if (r==EOF) break; struct rowset *rs = (struct rowset *)v; if (verbose) printf("v=%p\n", v); @@ -243,16 +243,16 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI num_found += rs->n_rows; - r = queue_enq(q2, v, 0, NULL); + r = toku_queue_enq(q2, v, 0, NULL); assert(r==0); } assert((int)num_found == n); assert(found_size_est == size_est); - r = queue_eof(q2); + r = toku_queue_eof(q2); assert(r==0); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(r==0); DESCRIPTOR_S desc; @@ -262,10 +262,10 @@ static void test_write_dbfile (char *tf_template, int n, char *output_name, TXNI assert(fd>=0); if (verbose) traceit("write to file"); - r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + r = toku_loader_write_ft_from_q_in_C(&bl, &desc, fd, 1000, q2, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); assert(r==0); - r = queue_destroy(q2); + r = toku_queue_destroy(q2); assert_zero(r); destroy_merge_fileset(&fs); diff --git a/storage/tokudb/ft-index/ft/tests/ftloader-test.cc b/storage/tokudb/ft-index/ft/tests/ftloader-test.cc index 2fc2d309f6499..46271eeb4518d 100644 --- a/storage/tokudb/ft-index/ft/tests/ftloader-test.cc +++ b/storage/tokudb/ft-index/ft/tests/ftloader-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include "ftloader-internal.h" +#include "loader/loader-internal.h" #include "memory.h" #include @@ -186,7 +186,7 @@ static void test_merge_internal (int a[], int na, int b[], int nb, bool dups) { static void test_merge (void) { { int avals[]={1,2,3,4,5}; - int *bvals = NULL; //icc won't let us use a zero-sized array explicitly or by [] = {} construction. + int *bvals = NULL; test_merge_internal(avals, 5, bvals, 0, false); test_merge_internal(bvals, 0, avals, 5, false); } @@ -336,7 +336,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); TOKUTXN const null_txn = NULL; FT_HANDLE t = NULL; @@ -350,7 +350,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c size_t userdata = 0; int i; for (i=0; i=0); - r = toku_loader_write_brt_from_q_in_C(&bl, &desc, fd, 1000, q, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); + r = toku_loader_write_ft_from_q_in_C(&bl, &desc, fd, 1000, q, size_est, 0, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 16); assert(r==0); destroy_merge_fileset(&fs); @@ -436,7 +436,7 @@ static void test_merge_files (const char *tf_template, const char *output_name) // verify the dbfile verify_dbfile(10, sorted_keys, sorted_vals, output_name); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(r==0); } diff --git a/storage/tokudb/ft-index/toku_include/rdtsc.h b/storage/tokudb/ft-index/ft/tests/generate-upgrade-recovery-logs.cc similarity index 71% rename from storage/tokudb/ft-index/toku_include/rdtsc.h rename to storage/tokudb/ft-index/ft/tests/generate-upgrade-recovery-logs.cc index e70f636e16990..b221e6a4fc565 100644 --- a/storage/tokudb/ft-index/toku_include/rdtsc.h +++ b/storage/tokudb/ft-index/ft/tests/generate-upgrade-recovery-logs.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,40 +88,64 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// read the processor time stamp register -#if defined __ICC +// Generate a recovery log with a checkpoint and an optional shutdown log entry. +// These logs will be used later to test recovery. -#define USE_RDTSC 1 -#define rdtsc _rdtsc +#include "test.h" -#elif defined __i386__ +static void generate_recovery_log(const char *testdir, bool do_shutdown) { + int r; -#define USE_RDTSC 1 + // setup the test dir + toku_os_recursive_delete(testdir); + r = toku_os_mkdir(testdir, S_IRWXU); + CKERR(r); -static inline unsigned long long rdtsc(void) { - unsigned long hi, lo; - __asm__ __volatile__ ("rdtsc\n" - "movl %%edx,%0\n" - "movl %%eax,%1" : "=r"(hi), "=r"(lo) : : "edx", "eax"); - return ((unsigned long long) hi << 32ULL) + (unsigned long long) lo; -} + // open the log + TOKULOGGER logger; + r = toku_logger_create(&logger); + CKERR(r); + r = toku_logger_open(testdir, logger); + CKERR(r); -#elif defined __x86_64__ + // log checkpoint + LSN beginlsn; + toku_log_begin_checkpoint(logger, &beginlsn, false, 0, 0); + toku_log_end_checkpoint(logger, nullptr, false, beginlsn, 0, 0, 0); -#define USE_RDTSC 1 + // log shutdown + if (do_shutdown) { + toku_log_shutdown(logger, nullptr, true, 0, 0); + } -static inline unsigned long long rdtsc(void) { - unsigned long long r; - __asm__ __volatile__ ("rdtsc\n" - "shl $32,%%rdx\n" - "or %%rdx,%%rax\n" - "movq %%rax,%0" : "=r"(r) : : "edx", "eax", "rdx", "rax"); - return r; + r = toku_logger_close(&logger); + CKERR(r); } -#else - -#define USE_RDTSC 0 - -#endif +int test_main(int argc, const char *argv[]) { + bool do_shutdown = true; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) + verbose--; + continue; + } + if (strcmp(argv[i], "--clean") == 0) { + do_shutdown = true; + continue; + } + if (strcmp(argv[i], "--dirty") == 0) { + do_shutdown = false; + continue; + } + } + char testdir[256]; + sprintf(testdir, "upgrade-recovery-logs-%d-%s", TOKU_LOG_VERSION, do_shutdown ? "clean" : "dirty"); + generate_recovery_log(testdir, do_shutdown); + return 0; +} diff --git a/storage/tokudb/ft-index/ft/tests/is_empty.cc b/storage/tokudb/ft-index/ft/tests/is_empty.cc index 0927e2b5894f0..b415343fba1c0 100644 --- a/storage/tokudb/ft-index/ft/tests/is_empty.cc +++ b/storage/tokudb/ft-index/ft/tests/is_empty.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,13 +92,13 @@ PATENT RIGHTS GRANT: #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #define FILENAME "test0.ft" static void test_it (int N) { - FT_HANDLE brt; + FT_HANDLE ft; int r; toku_os_recursive_delete(TOKU_TEST_FILENAME); r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); CKERR(r); @@ -119,18 +119,18 @@ static void test_it (int N) { TOKUTXN txn; r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_ROOT, false); CKERR(r); - r = toku_open_ft_handle(FILENAME, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, toku_builtin_compare_fun); CKERR(r); + r = toku_open_ft_handle(FILENAME, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, toku_builtin_compare_fun); CKERR(r); r = toku_txn_commit_txn(txn, false, NULL, NULL); CKERR(r); toku_txn_close_txn(txn); CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); r = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r); - r = toku_close_ft_handle_nolsn(brt, NULL); CKERR(r); + r = toku_close_ft_handle_nolsn(ft, NULL); CKERR(r); unsigned int rands[N]; for (int i=0; i static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static CACHETABLE ct; @@ -111,7 +110,7 @@ static void close_ft_and_ct (void) { static void open_ft_and_ct (bool unlink_old) { int r; if (unlink_old) unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); } @@ -132,8 +131,8 @@ static void reload (uint64_t limit) { enum memory_state { LEAVE_IN_MEMORY, // leave the state in main memory - CLOSE_AND_RELOAD, // close the brts and reload them into main memory (that will cause >1 partitio in many leaves.) - CLOSE_AND_REOPEN_LEAVE_ON_DISK // close the brts, reopen them, but leave the state on disk. + CLOSE_AND_RELOAD, // close the fts and reload them into main memory (that will cause >1 partitio in many leaves.) + CLOSE_AND_REOPEN_LEAVE_ON_DISK // close the fts, reopen them, but leave the state on disk. }; static void maybe_reopen (enum memory_state ms, uint64_t limit) { diff --git a/storage/tokudb/ft-index/ft/tests/keytest.cc b/storage/tokudb/ft-index/ft/tests/keytest.cc index 93896a819b458..bde2a4ca9ca9e 100644 --- a/storage/tokudb/ft-index/ft/tests/keytest.cc +++ b/storage/tokudb/ft-index/ft/tests/keytest.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,12 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + #include "test.h" -#include "key.h" +#include "ft.h" -void -toku_test_keycompare (void) { +static void +test_keycompare (void) { assert(toku_keycompare("a",1, "a",1)==0); assert(toku_keycompare("aa",2, "a",1)>0); assert(toku_keycompare("a",1, "aa",2)<0); @@ -109,7 +110,7 @@ int test_main (int argc , const char *argv[]) { default_parse_args(argc, argv); - toku_test_keycompare(); + test_keycompare(); if (verbose) printf("test ok\n"); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc b/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc index 22c7f6b864058..33729527ca1e4 100644 --- a/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc +++ b/storage/tokudb/ft-index/ft/tests/le-cursor-provdel.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,15 +91,13 @@ PATENT RIGHTS GRANT: // test the LE_CURSOR next function with provisionally deleted rows -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" -static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); @@ -138,8 +136,8 @@ create_populate_tree(const char *logdir, const char *fname, int n) { error = toku_txn_begin_txn(NULL, NULL, &txn, logger, TXN_SNAPSHOT_NONE, false); assert(error == 0); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); assert(error == 0); error = toku_txn_commit_txn(txn, true, NULL, NULL); @@ -158,7 +156,7 @@ create_populate_tree(const char *logdir, const char *fname, int n) { toku_fill_dbt(&key, &k, sizeof k); DBT val; toku_fill_dbt(&val, &v, sizeof v); - toku_ft_insert(brt, &key, &val, txn); + toku_ft_insert(ft, &key, &val, txn); assert(error == 0); } @@ -166,7 +164,7 @@ create_populate_tree(const char *logdir, const char *fname, int n) { assert(error == 0); toku_txn_close_txn(txn); - error = toku_close_ft_handle_nolsn(brt, NULL); + error = toku_close_ft_handle_nolsn(ft, NULL); assert(error == 0); CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); @@ -208,8 +206,8 @@ test_provdel(const char *logdir, const char *fname, int n) { error = toku_txn_begin_txn(NULL, NULL, &txn, logger, TXN_SNAPSHOT_NONE, false); assert(error == 0); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); assert(error == 0); error = toku_txn_commit_txn(txn, true, NULL, NULL); @@ -225,7 +223,7 @@ test_provdel(const char *logdir, const char *fname, int n) { int k = toku_htonl(i); DBT key; toku_fill_dbt(&key, &k, sizeof k); - toku_ft_delete(brt, &key, txn); + toku_ft_delete(ft, &key, txn); assert(error == 0); } @@ -234,7 +232,7 @@ test_provdel(const char *logdir, const char *fname, int n) { assert(error == 0); LE_CURSOR cursor = NULL; - error = toku_le_cursor_create(&cursor, brt, cursortxn); + error = toku_le_cursor_create(&cursor, ft, cursortxn); assert(error == 0); DBT key; @@ -267,7 +265,7 @@ test_provdel(const char *logdir, const char *fname, int n) { assert(error == 0); toku_txn_close_txn(txn); - error = toku_close_ft_handle_nolsn(brt, NULL); + error = toku_close_ft_handle_nolsn(ft, NULL); assert(error == 0); CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); error = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); diff --git a/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc b/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc index 78415855a51c0..1c6ac9eab5c05 100644 --- a/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc +++ b/storage/tokudb/ft-index/ft/tests/le-cursor-right.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,15 +94,14 @@ PATENT RIGHTS GRANT: // - LE_CURSOR somewhere else -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); @@ -142,8 +141,8 @@ create_populate_tree(const char *logdir, const char *fname, int n) { error = toku_txn_begin_txn(NULL, NULL, &txn, logger, TXN_SNAPSHOT_NONE, false); assert(error == 0); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_keycompare); assert(error == 0); error = toku_txn_commit_txn(txn, true, NULL, NULL); @@ -162,14 +161,14 @@ create_populate_tree(const char *logdir, const char *fname, int n) { toku_fill_dbt(&key, &k, sizeof k); DBT val; toku_fill_dbt(&val, &v, sizeof v); - toku_ft_insert(brt, &key, &val, txn); + toku_ft_insert(ft, &key, &val, txn); } error = toku_txn_commit_txn(txn, true, NULL, NULL); assert(error == 0); toku_txn_close_txn(txn); - error = toku_close_ft_handle_nolsn(brt, NULL); + error = toku_close_ft_handle_nolsn(ft, NULL); assert(error == 0); CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); @@ -196,15 +195,15 @@ test_pos_infinity(const char *fname, int n) { int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); assert(error == 0); // position the cursor at -infinity LE_CURSOR cursor = NULL; - error = toku_le_cursor_create(&cursor, brt, NULL); + error = toku_le_cursor_create(&cursor, ft, NULL); assert(error == 0); for (int i = 0; i < 2*n; i++) { @@ -217,7 +216,7 @@ test_pos_infinity(const char *fname, int n) { toku_le_cursor_close(cursor); - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); toku_cachetable_close(&ct); @@ -230,15 +229,15 @@ test_neg_infinity(const char *fname, int n) { int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); assert(error == 0); // position the LE_CURSOR at +infinity LE_CURSOR cursor = NULL; - error = toku_le_cursor_create(&cursor, brt, NULL); + error = toku_le_cursor_create(&cursor, ft, NULL); assert(error == 0); DBT key; @@ -271,7 +270,7 @@ test_neg_infinity(const char *fname, int n) { toku_le_cursor_close(cursor); - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); toku_cachetable_close(&ct); @@ -284,15 +283,15 @@ test_between(const char *fname, int n) { int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_keycompare); assert(error == 0); // position the LE_CURSOR at +infinity LE_CURSOR cursor = NULL; - error = toku_le_cursor_create(&cursor, brt, NULL); + error = toku_le_cursor_create(&cursor, ft, NULL); assert(error == 0); DBT key; @@ -337,7 +336,7 @@ test_between(const char *fname, int n) { toku_le_cursor_close(cursor); - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); toku_cachetable_close(&ct); diff --git a/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc b/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc index 3e08461da6424..af26228ddfe31 100644 --- a/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc +++ b/storage/tokudb/ft-index/ft/tests/le-cursor-walk.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,16 +91,15 @@ PATENT RIGHTS GRANT: // test the LE_CURSOR next function -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "le-cursor.h" #include "test.h" #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int -get_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +get_next_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { DBT *CAST_FROM_VOIDP(key_dbt, extra); if (!lock_only) { toku_dbt_set(keylen, key, key_dbt, NULL); @@ -139,8 +138,8 @@ create_populate_tree(const char *logdir, const char *fname, int n) { error = toku_txn_begin_txn(NULL, NULL, &txn, logger, TXN_SNAPSHOT_NONE, false); assert(error == 0); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, test_ft_cursor_keycompare); assert(error == 0); error = toku_txn_commit_txn(txn, true, NULL, NULL); @@ -159,14 +158,14 @@ create_populate_tree(const char *logdir, const char *fname, int n) { toku_fill_dbt(&key, &k, sizeof k); DBT val; toku_fill_dbt(&val, &v, sizeof v); - toku_ft_insert(brt, &key, &val, txn); + toku_ft_insert(ft, &key, &val, txn); } error = toku_txn_commit_txn(txn, true, NULL, NULL); assert(error == 0); toku_txn_close_txn(txn); - error = toku_close_ft_handle_nolsn(brt, NULL); + error = toku_close_ft_handle_nolsn(ft, NULL); assert(error == 0); CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); @@ -192,14 +191,14 @@ walk_tree(const char *fname, int n) { int error; CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - FT_HANDLE brt = NULL; - error = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); + FT_HANDLE ft = NULL; + error = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(error == 0); LE_CURSOR cursor = NULL; - error = toku_le_cursor_create(&cursor, brt, NULL); + error = toku_le_cursor_create(&cursor, ft, NULL); assert(error == 0); DBT key; @@ -224,7 +223,7 @@ walk_tree(const char *fname, int n) { toku_le_cursor_close(cursor); - error = toku_close_ft_handle_nolsn(brt, 0); + error = toku_close_ft_handle_nolsn(ft, 0); assert(error == 0); toku_cachetable_close(&ct); diff --git a/storage/tokudb/ft-index/ft/tests/list-test.cc b/storage/tokudb/ft-index/ft/tests/list-test.cc index 0922b831e1f06..c7286048e2446 100644 --- a/storage/tokudb/ft-index/ft/tests/list-test.cc +++ b/storage/tokudb/ft-index/ft/tests/list-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc b/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc index 6f2398eead4eb..2e12fa815795c 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test-maybe-trim.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: // verify that the log file trimmer does not delete the log file containing the // begin checkpoint when the checkpoint log entries span multiple log files. -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" int diff --git a/storage/tokudb/ft-index/ft/tests/log-test.cc b/storage/tokudb/ft-index/ft/tests/log-test.cc index 7b7005ccb2c0f..c21e43641d532 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test2.cc b/storage/tokudb/ft-index/ft/tests/log-test2.cc index d5fb7bfe102eb..eafdd26d08ba0 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test2.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test3.cc b/storage/tokudb/ft-index/ft/tests/log-test3.cc index 69443377751f2..e9bb72fdfb058 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test3.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test4.cc b/storage/tokudb/ft-index/ft/tests/log-test4.cc index 8a0d230983e89..c4e92fe2fcc7e 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test4.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test5.cc b/storage/tokudb/ft-index/ft/tests/log-test5.cc index c6ad061aa116e..bbe24640d7ee5 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test5.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test6.cc b/storage/tokudb/ft-index/ft/tests/log-test6.cc index 8e07365a967bb..5e2ff52ea759b 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test6.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test6.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/log-test7.cc b/storage/tokudb/ft-index/ft/tests/log-test7.cc index a07e6775818b1..d6ac8bd688f49 100644 --- a/storage/tokudb/ft-index/ft/tests/log-test7.cc +++ b/storage/tokudb/ft-index/ft/tests/log-test7.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc b/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc index 74a9ec27bf5fb..d9dc5ce04dfd5 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-bad-checksum.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" // log a couple of timestamp entries and verify the log by walking diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc b/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc index 00fc757e83677..3d5a0d32f6239 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-bw.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc index a4822f148113b..cdce388160501 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logdir.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" // a logcursor in an empty directory should not find any log entries diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc index 5bf7269cfc9ab..665623788afa3 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc index 85cce4e7bcde9..12bf4ba485946 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile-3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc index 7b6de69b061f5..0cc4bd3428505 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-empty-logfile.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" const int N = 2; diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc b/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc index 51fa24b107816..9f0510c61183b 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-fw.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-print.cc b/storage/tokudb/ft-index/ft/tests/logcursor-print.cc index 957a7d18494bd..902dc494a664b 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-print.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-print.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "test.h" -#include "logcursor.h" +#include "logger/logcursor.h" int test_main(int argc, const char *argv[]) { int r; diff --git a/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc b/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc index b79bd199e8f5f..a329cb49d3dee 100644 --- a/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc +++ b/storage/tokudb/ft-index/ft/tests/logcursor-timestamp.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" static uint64_t now(void) { diff --git a/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc b/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc index 2ec8071cfed13..c7a06d90d4119 100644 --- a/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc +++ b/storage/tokudb/ft-index/ft/tests/logfilemgr-create-destroy.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,8 +88,9 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "test.h" -#include "logfilemgr.h" + +#include "ft/tests/test.h" +#include "ft/logger/logfilemgr.h" int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) { int r; diff --git a/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc b/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc index 883d7bf01313e..6a50cd3f091f2 100644 --- a/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc +++ b/storage/tokudb/ft-index/ft/tests/logfilemgr-print.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,8 +88,8 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "test.h" -#include "logfilemgr.h" +#include "ft/tests/test.h" +#include "ft/logger/logfilemgr.h" int test_main(int argc __attribute__((unused)), const char *argv[] __attribute__((unused))) { int r; diff --git a/storage/tokudb/ft-index/ft/tests/make-tree.cc b/storage/tokudb/ft-index/ft/tests/make-tree.cc index 56df485173de7..51a4b66618ae6 100644 --- a/storage/tokudb/ft-index/ft/tests/make-tree.cc +++ b/storage/tokudb/ft-index/ft/tests/make-tree.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,10 +103,10 @@ PATENT RIGHTS GRANT: #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -119,14 +119,14 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); MSN msn = next_dummymsn(); // apply an insert to the leaf node txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} }; - toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL); + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); leafnode->max_msn_applied_to_node_on_disk = msn; @@ -146,29 +146,29 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static void -insert_into_child_buffer(FT_HANDLE brt, FTNODE node, int childnum, int minkey, int maxkey) { +insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, int maxkey) { for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { MSN msn = next_dummymsn(); unsigned int key = htonl(val); DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); - toku_ft_append_to_child_buffer(brt->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); node->max_msn_applied_to_node_on_disk = msn; } } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -176,8 +176,8 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); - insert_into_child_buffer(brt, node, childnum, minkeys[childnum], maxkeys[childnum]); + toku_unpin_ftnode(ft->ft, child); + insert_into_child_buffer(ft, node, childnum, minkeys[childnum], maxkeys[childnum]); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -209,33 +209,33 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); - brt->ft->h->max_msn_in_ft = last_dummymsn(); // capture msn of last message injected into tree + ft->ft->h->max_msn_in_ft = last_dummymsn(); // capture msn of last message injected into tree // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r == 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/mempool-115.cc b/storage/tokudb/ft-index/ft/tests/mempool-115.cc index 8b5d2d691958b..11960aa4ef253 100644 --- a/storage/tokudb/ft-index/ft/tests/mempool-115.cc +++ b/storage/tokudb/ft-index/ft/tests/mempool-115.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,13 +96,18 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); + void *maybe_free = nullptr; bn->get_space_for_insert( idx, key, keysize, size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = valsize; @@ -113,14 +118,20 @@ static void le_overwrite(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); + void *maybe_free = nullptr; bn->get_space_for_overwrite( idx, key, keysize, + keysize, // old_keylen size_needed, // old_le_size size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = valsize; @@ -138,7 +149,7 @@ class bndata_bugfix_test { // just copy this code from a previous test // don't care what it does, just want to get a node up and running sn.flags = 0x11223344; - sn.thisnodename.b = 20; + sn.blocknum.b = 20; sn.layout_version = FT_LAYOUT_VERSION; sn.layout_version_original = FT_LAYOUT_VERSION; sn.height = 0; @@ -146,9 +157,8 @@ class bndata_bugfix_test { sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; MALLOC_N(sn.n_children, sn.bp); - MALLOC_N(1, sn.childkeys); - toku_memdup_dbt(&sn.childkeys[0], "b", 2); - sn.totalchildkeylens = 2; + DBT pivotkey; + sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); BP_STATE(&sn,0) = PT_AVAIL; BP_STATE(&sn,1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); @@ -157,8 +167,6 @@ class bndata_bugfix_test { le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5); - - // now this is the test. If I keep getting space for overwrite // like crazy, it should expose the bug bn_data* bnd = BLB_DATA(&sn, 0); @@ -176,15 +184,7 @@ class bndata_bugfix_test { // on. It may be that some algorithm has changed. assert(new_size < 5*old_size); - - for (int i = 0; i < sn.n_children-1; ++i) { - toku_free(sn.childkeys[i].data); - } - for (int i = 0; i < sn.n_children; i++) { - destroy_basement_node(BLB(&sn, i)); - } - toku_free(sn.bp); - toku_free(sn.childkeys); + toku_destroy_ftnode_internals(&sn); } }; diff --git a/storage/tokudb/ft-index/ft/tests/msnfilter.cc b/storage/tokudb/ft-index/ft/tests/msnfilter.cc index 1c1cba86f1cb5..41615028168ea 100644 --- a/storage/tokudb/ft-index/ft/tests/msnfilter.cc +++ b/storage/tokudb/ft-index/ft/tests/msnfilter.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." // Verify that a message with an old msn is ignored -// by toku_apply_cmd_to_leaf() +// by toku_apply_msg_to_leaf() // // method: // - inject valid message, verify that new value is in row -// - inject message with same msn and new value, verify that original value is still in key (verify cmd.msn == node.max_msn is rejected) +// - inject message with same msn and new value, verify that original value is still in key (verify msg.msn == node.max_msn is rejected) // - inject valid message with new value2, verify that row has new value2 -// - inject message with old msn, verify that row still has value2 (verify cmd.msn < node.max_msn is rejected) +// - inject message with old msn, verify that row still has value2 (verify msg.msn < node.max_msn is rejected) // TODO: -// - verify that no work is done by messages that should be ignored (via workdone arg to ft_leaf_put_cmd()) +// - verify that no work is done by messages that should be ignored (via workdone arg to ft_leaf_put_msg()) // - maybe get counter of messages ignored for old msn (once the counter is implemented in ft-ops.c) #include "ft-internal.h" @@ -108,16 +108,16 @@ PATENT RIGHTS GRANT: #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } static void -append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *val, uint32_t vallen) { +append_leaf(FT_HANDLE ft, FTNODE leafnode, void *key, uint32_t keylen, void *val, uint32_t vallen) { assert(leafnode->height == 0); DBT thekey; toku_fill_dbt(&thekey, key, keylen); @@ -130,48 +130,48 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va // apply an insert to the leaf node MSN msn = next_dummymsn(); - brt->ft->h->max_msn_in_ft = msn; - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft->ft->h->max_msn_in_ft = msn; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, &gc_info, nullptr, nullptr); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg, &gc_info, nullptr, nullptr); { - int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair); + int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair); assert(r==0); assert(pair.call_count==1); } - FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} }; - toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, &gc_info, nullptr, nullptr); + ft_msg badmsg(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, badmsg, &gc_info, nullptr, nullptr); // message should be rejected for duplicate msn, row should still have original val { - int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair); + int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair); assert(r==0); assert(pair.call_count==2); } // now verify that message with proper msn gets through msn = next_dummymsn(); - brt->ft->h->max_msn_in_ft = msn; - FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} }; - toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, &gc_info, nullptr, nullptr); + ft->ft->h->max_msn_in_ft = msn; + ft_msg msg2(&thekey, &val2, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg2, &gc_info, nullptr, nullptr); // message should be accepted, val should have new value { - int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair2); + int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2); assert(r==0); assert(pair2.call_count==1); } // now verify that message with lesser (older) msn is rejected msn.msn = msn.msn - 10; - FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }}; - toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, &gc_info, nullptr, nullptr); + ft_msg msg3(&thekey, &badval, FT_INSERT, msn, toku_xids_get_root_xids()); + toku_ft_leaf_apply_msg(ft->ft->cmp, ft->ft->update_fun, leafnode, -1, msg3, &gc_info, nullptr, nullptr); // message should be rejected, val should still have value in pair2 { - int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair2); + int r = toku_ft_lookup(ft, &thekey, lookup_checkf, &pair2); assert(r==0); assert(pair2.call_count==2); } @@ -181,11 +181,11 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va } static void -populate_leaf(FT_HANDLE brt, FTNODE leafnode, int k, int v) { +populate_leaf(FT_HANDLE ft, FTNODE leafnode, int k, int v) { char vbuf[32]; // store v in a buffer large enough to dereference unaligned int's memset(vbuf, 0, sizeof vbuf); memcpy(vbuf, &v, sizeof v); - append_leaf(brt, leafnode, &k, sizeof k, vbuf, sizeof v); + append_leaf(ft, leafnode, &k, sizeof k, vbuf, sizeof v); } static void @@ -202,18 +202,18 @@ test_msnfilter(int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); - FTNODE newroot = make_node(brt, 0); + FTNODE newroot = make_node(ft, 0); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // KLUDGE: Unpin the new root so toku_ft_lookup() can pin it. (Pin lock is no longer a recursive // mutex.) Just leaving it unpinned for this test program works because it is the only @@ -221,17 +221,17 @@ test_msnfilter(int do_verify) { // node and unlock it again before and after each message injection, but that requires more // work than it's worth (setting up dummy callbacks, etc.) // - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); - populate_leaf(brt, newroot, htonl(2), 1); + populate_leaf(ft, newroot, htonl(2), 1); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r == 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc b/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc index cfac10fb921b9..749729838e312 100644 --- a/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc +++ b/storage/tokudb/ft-index/ft/tests/orthopush-flush.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,25 +94,9 @@ PATENT RIGHTS GRANT: #include "ule.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - -static int dummy_cmp(DB *db __attribute__((unused)), - const DBT *a, const DBT *b) { - int c; - if (a->size > b->size) { - c = memcmp(a->data, b->data, b->size); - } else if (a->size < b->size) { - c = memcmp(a->data, b->data, a->size); - } else { - return memcmp(a->data, b->data, a->size); - } - if (c == 0) { - c = a->size - b->size; - } - return c; -} +static toku::comparator dummy_cmp; // generate size random bytes into dest static void @@ -148,7 +132,7 @@ rand_bytes_limited(void *dest, int size) // generate a random message with xids and a key starting with pfx, insert // it in bnc, and save it in output params save and is_fresh_out static void -insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out, XIDS xids, int pfx) +insert_random_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool *is_fresh_out, XIDS xids, int pfx) { int keylen = (random() % 128) + 16; int vallen = (random() % 128) + 16; @@ -160,23 +144,15 @@ insert_random_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool *is_fresh_out MSN msn = next_dummymsn(); bool is_fresh = (random() & 0x100) == 0; - DBT *keydbt, *valdbt; - XMALLOC(keydbt); - XMALLOC(valdbt); - toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); - toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S *XMALLOC(result); - result->type = FT_INSERT; - result->msn = msn; - result->xids = xids; - result->u.id.key = keydbt; - result->u.id.val = valdbt; - *save = result; + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx)); + toku_fill_dbt(&valdbt, val, vallen); + *save = new ft_msg(&keydbt, &valdbt, FT_INSERT, msn, xids); *is_fresh_out = is_fresh; toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), val, vallen, FT_INSERT, msn, xids, is_fresh, - NULL, dummy_cmp); + dummy_cmp); } // generate a random message with xids and a key starting with pfx, insert @@ -209,17 +185,12 @@ insert_random_message_to_bn( valdbt = &valdbt_s; toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen); toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S msg; - msg.type = FT_INSERT; - msg.msn = msn; - msg.xids = xids; - msg.u.id.key = keydbt; - msg.u.id.val = valdbt; *keylenp = keydbt->size; *keyp = toku_xmemdup(keydbt->data, keydbt->size); + ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids); int64_t numbytes; - toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes); - toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb->max_msn_applied.msn) { blb->max_msn_applied = msn; } @@ -259,21 +230,16 @@ insert_same_message_to_bns( valdbt = &valdbt_s; toku_fill_dbt(keydbt, key, (sizeof *pfxp) + keylen); toku_fill_dbt(valdbt, val, vallen); - FT_MSG_S msg; - msg.type = FT_INSERT; - msg.msn = msn; - msg.xids = xids; - msg.u.id.key = keydbt; - msg.u.id.val = valdbt; *keylenp = keydbt->size; *keyp = toku_xmemdup(keydbt->data, keydbt->size); + ft_msg msg(keydbt, valdbt, FT_INSERT, msn, xids); int64_t numbytes; - toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes); - toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_le_apply_msg(msg, NULL, NULL, 0, keydbt->size, &non_mvcc_gc_info, save, &numbytes); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb1, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb1->max_msn_applied.msn) { blb1->max_msn_applied = msn; } - toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL); + toku_ft_bn_apply_msg(t->ft->cmp, t->ft->update_fun, blb2, msg, &non_mvcc_gc_info, NULL, NULL); if (msn.msn > blb2->max_msn_applied.msn) { blb2->max_msn_applied = msn; } @@ -300,7 +266,7 @@ orthopush_flush_update_fun(DB * UU(db), const DBT *UU(key), const DBT *UU(old_va // the update message will overwrite the value with something generated // here, and add one to the int pointed to by applied static void -insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn) +insert_random_update_message(NONLEAF_CHILDINFO bnc, ft_msg **save, bool is_fresh, XIDS xids, int pfx, int *applied, MSN *max_msn) { int keylen = (random() % 16) + 16; int vallen = (random() % 16) + 16; @@ -313,48 +279,38 @@ insert_random_update_message(NONLEAF_CHILDINFO bnc, FT_MSG_S **save, bool is_fre update_extra->num_applications = applied; MSN msn = next_dummymsn(); - DBT *keydbt, *valdbt; - XMALLOC(keydbt); - XMALLOC(valdbt); - toku_fill_dbt(keydbt, key, keylen + (sizeof pfx)); - toku_fill_dbt(valdbt, update_extra, sizeof *update_extra); - FT_MSG_S *XMALLOC(result); - result->type = FT_UPDATE; - result->msn = msn; - result->xids = xids; - result->u.id.key = keydbt; - result->u.id.val = valdbt; - *save = result; + DBT keydbt, valdbt; + toku_fill_dbt(&keydbt, key, keylen + (sizeof pfx)); + toku_fill_dbt(&valdbt, update_extra, sizeof *update_extra); + *save = new ft_msg(&keydbt, &valdbt, FT_UPDATE, msn, xids); toku_bnc_insert_msg(bnc, key, keylen + (sizeof pfx), update_extra, sizeof *update_extra, FT_UPDATE, msn, xids, is_fresh, - NULL, dummy_cmp); + dummy_cmp); if (msn.msn > max_msn->msn) { *max_msn = msn; } } -const int M = 1024 * 1024; - // flush from one internal node to another, where both only have one // buffer static void flush_to_internal(FT_HANDLE t) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 - FT_MSG_S **MALLOC_N(4096,child_messages); + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,child_messages); bool *MALLOC_N(4096,parent_messages_is_fresh); bool *MALLOC_N(4096,child_messages_is_fresh); memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0])); memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); NONLEAF_CHILDINFO child_bnc = toku_create_empty_nl(); @@ -384,41 +340,60 @@ flush_to_internal(FT_HANDLE t) { memset(parent_messages_present, 0, sizeof parent_messages_present); memset(child_messages_present, 0, sizeof child_messages_present); - FIFO_ITERATE(child_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - DBT keydbt; - DBT valdbt; - toku_fill_dbt(&keydbt, key, keylen); - toku_fill_dbt(&valdbt, val, vallen); - int found = 0; - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(parent_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0); - assert(type == parent_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids)); - assert(parent_messages_is_fresh[i] == is_fresh); - parent_messages_present[i]++; - found++; - } - } - for (i = 0; i < num_child_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 && - msn.msn == child_messages[i]->msn.msn) { - assert(child_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0); - assert(type == child_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids)); - assert(child_messages_is_fresh[i] == is_fresh); - child_messages_present[i]++; - found++; - } - } - assert(found == 1); - }); + struct checkit_fn { + int num_parent_messages; + ft_msg **parent_messages; + int *parent_messages_present; + bool *parent_messages_is_fresh; + int num_child_messages; + ft_msg **child_messages; + int *child_messages_present; + bool *child_messages_is_fresh; + checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) : + num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf), + num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + DBT valdbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size); + int found = 0; + MSN msn = msg.msn(); + enum ft_msg_type type = msg.type(); + XIDS xids = msg.xids(); + for (int k = 0; k < num_parent_messages; ++k) { + if (dummy_cmp(&keydbt, parent_messages[k]->kdbt()) == 0 && + msn.msn == parent_messages[k]->msn().msn) { + assert(parent_messages_present[k] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, parent_messages[k]->vdbt()) == 0); + assert(type == parent_messages[k]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[k]->xids())); + assert(parent_messages_is_fresh[k] == is_fresh); + parent_messages_present[k]++; + found++; + } + } + for (int k = 0; k < num_child_messages; ++k) { + if (dummy_cmp(&keydbt, child_messages[k]->kdbt()) == 0 && + msn.msn == child_messages[k]->msn().msn) { + assert(child_messages_present[k] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, child_messages[k]->vdbt()) == 0); + assert(type == child_messages[k]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[k]->xids())); + assert(child_messages_is_fresh[k] == is_fresh); + child_messages_present[k]++; + found++; + } + } + assert(found == 1); + return 0; + } + } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh, + num_child_messages, child_messages, child_messages_present, child_messages_is_fresh); + child_bnc->msg_buffer.iterate(checkit); for (i = 0; i < num_parent_messages; ++i) { assert(parent_messages_present[i] == 1); @@ -427,23 +402,19 @@ flush_to_internal(FT_HANDLE t) { assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->kdbt()->data); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { - toku_free(child_messages[i]->u.id.key->data); - toku_free((DBT *) child_messages[i]->u.id.key); - toku_free(child_messages[i]->u.id.val->data); - toku_free((DBT *) child_messages[i]->u.id.val); - toku_free(child_messages[i]); + toku_free(child_messages[i]->kdbt()->data); + toku_free(child_messages[i]->vdbt()->data); + delete child_messages[i]; } destroy_nonleaf_childinfo(parent_bnc); toku_ftnode_free(&child); @@ -458,22 +429,22 @@ static void flush_to_internal_multiple(FT_HANDLE t) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 - FT_MSG_S **MALLOC_N(4096,child_messages); + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,child_messages); bool *MALLOC_N(4096,parent_messages_is_fresh); bool *MALLOC_N(4096,child_messages_is_fresh); memset(parent_messages_is_fresh, 0, 4096*(sizeof parent_messages_is_fresh[0])); memset(child_messages_is_fresh, 0, 4096*(sizeof child_messages_is_fresh[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); NONLEAF_CHILDINFO child_bncs[8]; - FT_MSG childkeys[7]; + ft_msg *childkeys[7]; int i; for (i = 0; i < 8; ++i) { child_bncs[i] = toku_create_empty_nl(); @@ -487,7 +458,7 @@ flush_to_internal_multiple(FT_HANDLE t) { insert_random_message(child_bncs[i%8], &child_messages[i], &child_messages_is_fresh[i], xids_123, i%8); total_size += toku_bnc_memory_used(child_bncs[i%8]); if (i % 8 < 7) { - if (childkeys[i%8] == NULL || dummy_cmp(NULL, child_messages[i]->u.id.key, childkeys[i%8]->u.id.key) > 0) { + if (childkeys[i%8] == NULL || dummy_cmp(child_messages[i]->kdbt(), childkeys[i%8]->kdbt()) > 0) { childkeys[i%8] = child_messages[i]; } } @@ -508,7 +479,7 @@ flush_to_internal_multiple(FT_HANDLE t) { set_BNC(child, i, child_bncs[i]); BP_STATE(child, i) = PT_AVAIL; if (i < 7) { - toku_clone_dbt(&child->childkeys[i], *childkeys[i]->u.id.key); + child->pivotkeys.insert_at(childkeys[i]->kdbt(), i); } } @@ -525,41 +496,60 @@ flush_to_internal_multiple(FT_HANDLE t) { memset(child_messages_present, 0, sizeof child_messages_present); for (int j = 0; j < 8; ++j) { - FIFO_ITERATE(child_bncs[j]->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - DBT keydbt; - DBT valdbt; - toku_fill_dbt(&keydbt, key, keylen); - toku_fill_dbt(&valdbt, val, vallen); - int found = 0; - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(parent_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, parent_messages[i]->u.id.val) == 0); - assert(type == parent_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(parent_messages[i]->xids)); - assert(parent_messages_is_fresh[i] == is_fresh); - parent_messages_present[i]++; - found++; - } - } - for (i = 0; i < num_child_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, child_messages[i]->u.id.key) == 0 && - msn.msn == child_messages[i]->msn.msn) { - assert(child_messages_present[i] == 0); - assert(found == 0); - assert(dummy_cmp(NULL, &valdbt, child_messages[i]->u.id.val) == 0); - assert(type == child_messages[i]->type); - assert(xids_get_innermost_xid(xids) == xids_get_innermost_xid(child_messages[i]->xids)); - assert(child_messages_is_fresh[i] == is_fresh); - child_messages_present[i]++; - found++; - } - } - assert(found == 1); - }); + struct checkit_fn { + int num_parent_messages; + ft_msg **parent_messages; + int *parent_messages_present; + bool *parent_messages_is_fresh; + int num_child_messages; + ft_msg **child_messages; + int *child_messages_present; + bool *child_messages_is_fresh; + checkit_fn(int np, ft_msg **pm, int *npp, bool *pmf, int nc, ft_msg **cm, int *ncp, bool *cmf) : + num_parent_messages(np), parent_messages(pm), parent_messages_present(npp), parent_messages_is_fresh(pmf), + num_child_messages(nc), child_messages(cm), child_messages_present(ncp), child_messages_is_fresh(cmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + DBT valdbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + toku_fill_dbt(&valdbt, msg.vdbt()->data, msg.vdbt()->size); + int found = 0; + MSN msn = msg.msn(); + enum ft_msg_type type = msg.type(); + XIDS xids = msg.xids(); + for (int _i = 0; _i < num_parent_messages; ++_i) { + if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 && + msn.msn == parent_messages[_i]->msn().msn) { + assert(parent_messages_present[_i] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, parent_messages[_i]->vdbt()) == 0); + assert(type == parent_messages[_i]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(parent_messages[_i]->xids())); + assert(parent_messages_is_fresh[_i] == is_fresh); + parent_messages_present[_i]++; + found++; + } + } + for (int _i = 0; _i < num_child_messages; ++_i) { + if (dummy_cmp(&keydbt, child_messages[_i]->kdbt()) == 0 && + msn.msn == child_messages[_i]->msn().msn) { + assert(child_messages_present[_i] == 0); + assert(found == 0); + assert(dummy_cmp(&valdbt, child_messages[_i]->vdbt()) == 0); + assert(type == child_messages[_i]->type()); + assert(toku_xids_get_innermost_xid(xids) == toku_xids_get_innermost_xid(child_messages[_i]->xids())); + assert(child_messages_is_fresh[_i] == is_fresh); + child_messages_present[_i]++; + found++; + } + } + assert(found == 1); + return 0; + } + } checkit(num_parent_messages, parent_messages, parent_messages_present, parent_messages_is_fresh, + num_child_messages, child_messages, child_messages_present, child_messages_is_fresh); + child_bncs[j]->msg_buffer.iterate(checkit); } for (i = 0; i < num_parent_messages; ++i) { @@ -569,23 +559,19 @@ flush_to_internal_multiple(FT_HANDLE t) { assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->kdbt()->data); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { - toku_free(child_messages[i]->u.id.key->data); - toku_free((DBT *) child_messages[i]->u.id.key); - toku_free(child_messages[i]->u.id.val->data); - toku_free((DBT *) child_messages[i]->u.id.val); - toku_free(child_messages[i]); + toku_free(child_messages[i]->kdbt()->data); + toku_free(child_messages[i]->vdbt()->data); + delete child_messages[i]; } destroy_nonleaf_childinfo(parent_bnc); toku_ftnode_free(&child); @@ -607,7 +593,7 @@ static void flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4096 LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -619,11 +605,11 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child_blbs[8]; @@ -653,7 +639,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { total_size += child_blbs[i%8]->data_buffer.get_memory_size(); if (i % 8 < 7) { DBT keydbt; - if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { + if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]); } } @@ -663,7 +649,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { for (i = 0; i < num_child_messages; ++i) { DBT keydbt; if (i % 8 < 7) { - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); } } @@ -679,13 +665,13 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child->childkeys[i], childkeys[i]); + child->pivotkeys.insert_at(&childkeys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { if (!parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -717,15 +703,16 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { BP_STATE(parentnode, 0) = PT_AVAIL; parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; - const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL }; bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied, -1); + toku_apply_ancestors_messages_to_node(t, child, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1); - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids; - assert(!is_fresh); - }); + struct checkit_fn { + int operator()(const ft_msg &UU(msg), bool is_fresh) { + assert(!is_fresh); + return 0; + } + } checkit; + parent_bnc->msg_buffer.iterate(checkit); invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages); @@ -734,7 +721,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { int total_messages = 0; for (i = 0; i < 8; ++i) { - total_messages += BLB_DATA(child, i)->omt_size(); + total_messages += BLB_DATA(child, i)->num_klpairs(); } assert(total_messages <= num_parent_messages + num_child_messages); @@ -747,7 +734,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { memset(parent_messages_present, 0, sizeof parent_messages_present); memset(child_messages_present, 0, sizeof child_messages_present); for (int j = 0; j < 8; ++j) { - uint32_t len = BLB_DATA(child, j)->omt_size(); + uint32_t len = BLB_DATA(child, j)->num_klpairs(); for (uint32_t idx = 0; idx < len; ++idx) { LEAFENTRY le; DBT keydbt, valdbt; @@ -763,10 +750,10 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { } int found = 0; for (i = num_parent_messages - 1; i >= 0; --i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0) { + if (dummy_cmp(&keydbt, parent_messages[i]->kdbt()) == 0) { if (found == 0) { - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->u.id.val->data); - assert(dummy_cmp(NULL, &valdbt, &e->new_val) == 0); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(e, parent_messages[i]->vdbt()->data); + assert(dummy_cmp(&valdbt, &e->new_val) == 0); found++; } assert(parent_messages_present[i] == 0); @@ -782,9 +769,9 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { toku_fill_dbt(&childkeydbt, key_pointers[i], keylens[i]); toku_fill_dbt(&childvaldbt, valp, vallen); } - if (dummy_cmp(NULL, &keydbt, &childkeydbt) == 0) { + if (dummy_cmp(&keydbt, &childkeydbt) == 0) { if (found == 0) { - assert(dummy_cmp(NULL, &valdbt, &childvaldbt) == 0); + assert(dummy_cmp(&valdbt, &childvaldbt) == 0); found++; } assert(child_messages_present[i] == 0); @@ -801,18 +788,16 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { assert(child_messages_present[i] == 1); } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(child_messages[i]); @@ -837,7 +822,7 @@ static void flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -849,11 +834,11 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child_blbs[8]; @@ -879,7 +864,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { insert_random_message_to_bn(t, child_blbs[i%8], &key_pointers[i], &keylens[i], &child_messages[i], xids_123, i%8); total_size += child_blbs[i%8]->data_buffer.get_memory_size(); DBT keydbt; - if (childkeys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { + if (childkeys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) > 0) { toku_fill_dbt(&childkeys[i%8], key_pointers[i], keylens[i]); } } @@ -887,7 +872,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { for (i = 0; i < num_child_messages; ++i) { DBT keydbt; - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &childkeys[i%8]) <= 0); } { @@ -902,14 +887,14 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child->childkeys[i], childkeys[i]); + child->pivotkeys.insert_at(&childkeys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && + if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 && !parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -923,7 +908,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { if (make_leaf_up_to_date && - dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && + dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0 && !parent_messages_is_fresh[i]) { assert(parent_messages_applied[i] == 1); } else { @@ -940,59 +925,66 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; DBT lbe, ubi; - const struct pivot_bounds bounds = { - .lower_bound_exclusive = toku_init_dbt(&lbe), - .upper_bound_inclusive = toku_clone_dbt(&ubi, childkeys[7]) - }; + toku_init_dbt(&lbe); + toku_clone_dbt(&ubi, childkeys[7]); + const pivot_bounds bounds(lbe, ubi); bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied, -1); - - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - val = val; vallen = vallen; type = type; msn = msn; xids = xids; - DBT keydbt; - toku_fill_dbt(&keydbt, key, keylen); - if (dummy_cmp(NULL, &keydbt, &childkeys[7]) > 0) { - for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, &keydbt, parent_messages[i]->u.id.key) == 0 && - msn.msn == parent_messages[i]->msn.msn) { - assert(is_fresh == parent_messages_is_fresh[i]); - break; - } - } - } else { - assert(!is_fresh); - } - }); + toku_apply_ancestors_messages_to_node(t, child, &ancestors, bounds, &msgs_applied, -1); + + struct checkit_fn { + DBT *childkeys; + int num_parent_messages; + ft_msg **parent_messages; + bool *parent_messages_is_fresh; + checkit_fn(DBT *ck, int np, ft_msg **pm, bool *pmf) : + childkeys(ck), num_parent_messages(np), parent_messages(pm), parent_messages_is_fresh(pmf) { + } + int operator()(const ft_msg &msg, bool is_fresh) { + DBT keydbt; + toku_fill_dbt(&keydbt, msg.kdbt()->data, msg.kdbt()->size); + MSN msn = msg.msn(); + if (dummy_cmp(&keydbt, &childkeys[7]) > 0) { + for (int _i = 0; _i < num_parent_messages; ++_i) { + if (dummy_cmp(&keydbt, parent_messages[_i]->kdbt()) == 0 && + msn.msn == parent_messages[_i]->msn().msn) { + assert(is_fresh == parent_messages_is_fresh[_i]); + break; + } + } + } else { + assert(!is_fresh); + } + return 0; + } + } checkit(childkeys, num_parent_messages, parent_messages, parent_messages_is_fresh); + parent_bnc->msg_buffer.iterate(checkit); toku_ftnode_free(&parentnode); int total_messages = 0; for (i = 0; i < 8; ++i) { - total_messages += BLB_DATA(child, i)->omt_size(); + total_messages += BLB_DATA(child, i)->num_klpairs(); } assert(total_messages <= num_parent_messages + num_child_messages); for (i = 0; i < num_parent_messages; ++i) { - if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0) { + if (dummy_cmp(parent_messages[i]->kdbt(), &childkeys[7]) <= 0) { assert(parent_messages_applied[i] == 1); } else { assert(parent_messages_applied[i] == 0); } } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(child_messages[i]); @@ -1019,7 +1011,7 @@ static void compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { int r; - FT_MSG_S **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k + ft_msg **MALLOC_N(4096,parent_messages); // 128k / 32 = 4k LEAFENTRY* child_messages = NULL; XMALLOC_N(4096,child_messages); void** key_pointers = NULL; @@ -1031,11 +1023,11 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { int *MALLOC_N(4096,parent_messages_applied); memset(parent_messages_applied, 0, 4096*(sizeof parent_messages_applied[0])); - XIDS xids_0 = xids_get_root_xids(); + XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123, xids_234; - r = xids_create_child(xids_0, &xids_123, (TXNID)123); + r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); CKERR(r); - r = xids_create_child(xids_0, &xids_234, (TXNID)234); + r = toku_xids_create_child(xids_0, &xids_234, (TXNID)234); CKERR(r); BASEMENTNODE child1_blbs[8], child2_blbs[8]; @@ -1070,7 +1062,7 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { total_size += child1_blbs[i%8]->data_buffer.get_memory_size(); if (i % 8 < 7) { DBT keydbt; - if (child1keys[i%8].size == 0 || dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) { + if (child1keys[i%8].size == 0 || dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) > 0) { toku_fill_dbt(&child1keys[i%8], key_pointers[i], keylens[i]); toku_fill_dbt(&child2keys[i%8], key_pointers[i], keylens[i]); } @@ -1081,8 +1073,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { for (i = 0; i < num_child_messages; ++i) { DBT keydbt; if (i % 8 < 7) { - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0); - assert(dummy_cmp(NULL, toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child1keys[i%8]) <= 0); + assert(dummy_cmp(toku_fill_dbt(&keydbt, key_pointers[i], keylens[i]), &child2keys[i%8]) <= 0); } } @@ -1098,15 +1090,15 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { int num_parent_messages = i; for (i = 0; i < 7; ++i) { - toku_clone_dbt(&child1->childkeys[i], child1keys[i]); - toku_clone_dbt(&child2->childkeys[i], child2keys[i]); + child1->pivotkeys.insert_at(&child1keys[i], i); + child2->pivotkeys.insert_at(&child2keys[i], i); } if (make_leaf_up_to_date) { for (i = 0; i < num_parent_messages; ++i) { if (!parent_messages_is_fresh[i]) { - toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); - toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child1, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); + toku_ft_leaf_apply_msg(t->ft->cmp, t->ft->update_fun, child2, -1, *parent_messages[i], &non_mvcc_gc_info, NULL, NULL); } } for (i = 0; i < 8; ++i) { @@ -1130,25 +1122,26 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { BP_STATE(parentnode, 0) = PT_AVAIL; parentnode->max_msn_applied_to_node_on_disk = max_parent_msn; struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL }; - const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL }; bool msgs_applied; - toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied, -1); + toku_apply_ancestors_messages_to_node(t, child2, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1); - FIFO_ITERATE(parent_bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, - { - key = key; keylen = keylen; val = val; vallen = vallen; type = type; msn = msn; xids = xids; - assert(!is_fresh); - }); + struct checkit_fn { + int operator()(const ft_msg &UU(msg), bool is_fresh) { + assert(!is_fresh); + return 0; + } + } checkit; + parent_bnc->msg_buffer.iterate(checkit); invariant(parent_bnc->fresh_message_tree.size() + parent_bnc->stale_message_tree.size() == (uint32_t) num_parent_messages); toku_ftnode_free(&parentnode); for (int j = 0; j < 8; ++j) { - BN_DATA first = BLB_DATA(child1, j); - BN_DATA second = BLB_DATA(child2, j); - uint32_t len = first->omt_size(); - assert(len == second->omt_size()); + bn_data* first = BLB_DATA(child1, j); + bn_data* second = BLB_DATA(child2, j); + uint32_t len = first->num_klpairs(); + assert(len == second->num_klpairs()); for (uint32_t idx = 0; idx < len; ++idx) { LEAFENTRY le1, le2; DBT key1dbt, val1dbt, key2dbt, val2dbt; @@ -1170,23 +1163,21 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { toku_fill_dbt(&key2dbt, keyp, keylen); toku_fill_dbt(&val2dbt, valp, vallen); } - assert(dummy_cmp(NULL, &key1dbt, &key2dbt) == 0); - assert(dummy_cmp(NULL, &val1dbt, &val2dbt) == 0); + assert(dummy_cmp(&key1dbt, &key2dbt) == 0); + assert(dummy_cmp(&val1dbt, &val2dbt) == 0); } } - xids_destroy(&xids_0); - xids_destroy(&xids_123); - xids_destroy(&xids_234); + toku_xids_destroy(&xids_0); + toku_xids_destroy(&xids_123); + toku_xids_destroy(&xids_234); for (i = 0; i < num_parent_messages; ++i) { - toku_free(parent_messages[i]->u.id.key->data); - toku_free((DBT *) parent_messages[i]->u.id.key); - struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->u.id.val->data); + toku_free(parent_messages[i]->kdbt()->data); + struct orthopush_flush_update_fun_extra *CAST_FROM_VOIDP(extra, parent_messages[i]->vdbt()->data); toku_free(extra->new_val.data); - toku_free(parent_messages[i]->u.id.val->data); - toku_free((DBT *) parent_messages[i]->u.id.val); - toku_free(parent_messages[i]); + toku_free(parent_messages[i]->vdbt()->data); + delete parent_messages[i]; } for (i = 0; i < num_child_messages; ++i) { toku_free(key_pointers[i]); @@ -1219,14 +1210,32 @@ parse_args(int argc, const char *argv[]) { } } +static int cmp_fn(DB *db __attribute__((unused)), + const DBT *a, const DBT *b) { + int c; + if (a->size > b->size) { + c = memcmp(a->data, b->data, b->size); + } else if (a->size < b->size) { + c = memcmp(a->data, b->data, a->size); + } else { + return memcmp(a->data, b->data, a->size); + } + if (c == 0) { + c = a->size - b->size; + } + return c; +} + int test_main (int argc, const char *argv[]) { parse_args(argc, argv); + dummy_cmp.create(cmp_fn, nullptr); + initialize_dummymsn(); int r; CACHETABLE ct; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); FT_HANDLE t; r = toku_open_ft_handle(fname, 1, &t, 128*1024, 4096, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -1256,5 +1265,7 @@ test_main (int argc, const char *argv[]) { r = toku_close_ft_handle_nolsn(t, 0); assert(r==0); toku_cachetable_close(&ct); + dummy_cmp.destroy(); + return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/pqueue-test.cc b/storage/tokudb/ft-index/ft/tests/pqueue-test.cc index a42cf830c9e3d..a10fcd774830e 100644 --- a/storage/tokudb/ft-index/ft/tests/pqueue-test.cc +++ b/storage/tokudb/ft-index/ft/tests/pqueue-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,8 +91,8 @@ PATENT RIGHTS GRANT: #include "test.h" -#include "ftloader-internal.h" -#include "pqueue.h" +#include "loader/loader-internal.h" +#include "loader/pqueue.h" int found_dup = -1; diff --git a/storage/tokudb/ft-index/ft/tests/quicklz-test.cc b/storage/tokudb/ft-index/ft/tests/quicklz-test.cc index 44bec12fb085e..2c8b88440f9d5 100644 --- a/storage/tokudb/ft-index/ft/tests/quicklz-test.cc +++ b/storage/tokudb/ft-index/ft/tests/quicklz-test.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: #ident "$Id$" #include "test.h" -#include "quicklz.h" +#include "serialize/quicklz.h" static void test_qlz_random_i (int i) { if (verbose) printf("i=%d\n", i); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc b/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc index 214218f5a1a15..80d0f2954659e 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-bad-last-entry.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -146,7 +146,7 @@ run_test(void) { else break; // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, diff --git a/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc b/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc index b0da2695e4710..6686ba61f6429 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend-hello.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -128,7 +128,7 @@ run_test(void) { r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc b/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc index b192ad64af434..ef95392381369 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-cbegin-cend.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -113,7 +113,7 @@ run_test(void) { r = toku_logger_close(&logger); assert(r == 0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc b/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc index 411684770d0c7..54d69d2a605b4 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-cbegin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,7 @@ run_test(void) { r = close(devnul); assert(r==0); - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc b/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc index 8c155c35b0e01..d03b95fd9f7db 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-cend-cbegin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -121,7 +121,7 @@ run_test(void) { } // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, diff --git a/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc b/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc index 7a8108a347e66..5df3b6bdca495 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-datadir-is-file.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,7 +130,7 @@ run_test(void) { strncat(buf, testfile, TOKU_PATH_MAX); r = system(buf); CKERR(r); } - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, diff --git a/storage/tokudb/ft-index/ft/tests/recovery-empty.cc b/storage/tokudb/ft-index/ft/tests/recovery-empty.cc index 33c7333bc54fe..37acb97e82bfb 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-empty.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-empty.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ run_test(void) { } // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc b/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc index 63a5f5a5fee5b..7590ea162bb66 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-fopen-missing-file.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -109,7 +109,7 @@ run_test(void) { toku_log_begin_checkpoint(logger, &beginlsn, true, 0, 0); toku_log_end_checkpoint(logger, NULL, true, beginlsn, 0, 0, 0); - BYTESTRING iname = { (uint32_t) strlen("missing_tokudb_file"), (char *) "missing_tokudb_file" }; + BYTESTRING iname = { (uint32_t) strlen("missing_tokuft_file"), (char *) "missing_tokuft_file" }; FILENUM filenum = {42}; uint32_t treeflags = 0; toku_log_fopen(logger, NULL, true, iname, filenum, treeflags); @@ -122,7 +122,7 @@ run_test(void) { r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-hello.cc b/storage/tokudb/ft-index/ft/tests/recovery-hello.cc index d31698f795bf7..36126c576afba 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-hello.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-hello.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -122,7 +122,7 @@ run_test(void) { r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc b/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc index 253c674ae7003..f21c307ccf5fc 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-lsn-error-during-forward-scan.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,7 +152,7 @@ run_test(void) { toku_recover_set_callback(recover_callback_at_turnaround, NULL); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc b/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc index 222de5bdbcbb7..b79ea03bca5b9 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-no-datadir.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,7 @@ run_test(void) { r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, "/junk", TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc b/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc index ac71769e580f7..a2fd7b2e01052 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-no-log.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ run_test(void) { r = close(devnul); assert(r==0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, TOKU_TEST_FILENAME, TOKU_TEST_FILENAME, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc b/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc index ad72decd134b5..3e889b665a6fd 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-no-logdir.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,7 +102,7 @@ run_test(void) { r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); assert(r == 0); // run recovery - r = tokudb_recover(NULL, + r = tokuft_recover(NULL, NULL_prepared_txn_callback, NULL_keep_cachetable_callback, NULL_logger, NULL, NULL, 0, 0, 0, NULL, 0); diff --git a/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc b/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc index 7020ea39b244c..955a842e6e9f4 100644 --- a/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc +++ b/storage/tokudb/ft-index/ft/tests/recovery-test5123.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test-ft-txns.h" diff --git a/storage/tokudb/ft-index/ft/tests/shortcut.cc b/storage/tokudb/ft-index/ft/tests/shortcut.cc index d972279cb7740..fc08868ccf0fb 100644 --- a/storage/tokudb/ft-index/ft/tests/shortcut.cc +++ b/storage/tokudb/ft-index/ft/tests/shortcut.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,7 @@ static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; CACHETABLE ct; -FT_HANDLE brt; +FT_HANDLE ft; FT_CURSOR cursor; static int test_ft_cursor_keycompare(DB *db __attribute__((unused)), const DBT *a, const DBT *b) { @@ -108,16 +108,16 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); - r = toku_ft_cursor(brt, &cursor, NULL, false, false); assert(r==0); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, test_ft_cursor_keycompare); assert(r==0); + r = toku_ft_cursor(ft, &cursor, NULL, false, false); assert(r==0); int i; for (i=0; i<1000; i++) { char string[100]; snprintf(string, sizeof(string), "%04d", i); DBT key,val; - toku_ft_insert(brt, toku_fill_dbt(&key, string, 5), toku_fill_dbt(&val, string, 5), 0); + toku_ft_insert(ft, toku_fill_dbt(&key, string, 5), toku_fill_dbt(&val, string, 5), 0); } { @@ -132,7 +132,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute // This will invalidate due to the root counter bumping, but the OMT itself will still be valid. { DBT key, val; - toku_ft_insert(brt, toku_fill_dbt(&key, "d", 2), toku_fill_dbt(&val, "w", 2), 0); + toku_ft_insert(ft, toku_fill_dbt(&key, "d", 2), toku_fill_dbt(&val, "w", 2), 0); } { @@ -141,7 +141,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute } toku_ft_cursor_close(cursor); - r = toku_close_ft_handle_nolsn(brt, 0); assert(r==0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc b/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc index 1885ce0f55cd7..0ba9e88ee83b7 100644 --- a/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc +++ b/storage/tokudb/ft-index/ft/tests/subblock-test-checksum.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,8 +91,8 @@ PATENT RIGHTS GRANT: #include "test.h" -#include "compress.h" -#include "sub_block.h" +#include "serialize/compress.h" +#include "serialize/sub_block.h" #include #include diff --git a/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc b/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc index ccd7a4e521cab..ba3ab4113f432 100644 --- a/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc +++ b/storage/tokudb/ft-index/ft/tests/subblock-test-compression.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores, enum toku_compression_method method) { diff --git a/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc b/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc index 2821429c3eb33..d6e035af6d311 100644 --- a/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc +++ b/storage/tokudb/ft-index/ft/tests/subblock-test-index.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_index(void) { diff --git a/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc b/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc index 5a226a4b443e0..e21b15f33d211 100644 --- a/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc +++ b/storage/tokudb/ft-index/ft/tests/subblock-test-size.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include -#include "sub_block.h" +#include "serialize/sub_block.h" static void test_sub_block_size(int total_size) { diff --git a/storage/tokudb/ft-index/ft/tests/test-assert.cc b/storage/tokudb/ft-index/ft/tests/test-assert.cc index a06b389584d65..f6221c2d152d2 100644 --- a/storage/tokudb/ft-index/ft/tests/test-assert.cc +++ b/storage/tokudb/ft-index/ft/tests/test-assert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/test-bjm.cc b/storage/tokudb/ft-index/ft/tests/test-bjm.cc index dc0f833992d5d..4969f8c4a8bb8 100644 --- a/storage/tokudb/ft-index/ft/tests/test-bjm.cc +++ b/storage/tokudb/ft-index/ft/tests/test-bjm.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "background_job_manager.h" +#include "cachetable/background_job_manager.h" #include "test.h" diff --git a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-flush.cc b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-flush.cc index e3a6116624eb7..22fbf37d500ae 100644 --- a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-flush.cc +++ b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-flush.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -184,7 +183,7 @@ doit (bool after_child_pin) { toku_flusher_thread_set_callback(flusher_callback, &after_child_pin); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo1.ft_handle"); r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -228,17 +227,16 @@ doit (bool after_child_pin) { ); FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 1); @@ -249,15 +247,14 @@ doit (bool after_child_pin) { assert(checkpoint_callback_called); // now let's pin the root again and make sure it is flushed - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 1); @@ -285,16 +282,15 @@ doit (bool after_child_pin) { // // now pin the root, verify that we have a message in there, and that it is clean // - fill_bfe_for_full_read(&bfe, c_ft->ft); - toku_pin_ftnode_off_client_thread( + bfe.create_for_full_read(c_ft->ft); + toku_pin_ftnode( c_ft->ft, node_root, toku_cachetable_hash(c_ft->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(!node->dirty); @@ -305,17 +301,16 @@ doit (bool after_child_pin) { else { assert(toku_bnc_nbytesinbuf(BNC(node, 0)) > 0); } - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + toku_unpin_ftnode(c_ft->ft, node); - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, node_leaf, toku_cachetable_hash(c_ft->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); @@ -326,7 +321,7 @@ doit (bool after_child_pin) { else { assert(BLB_NBYTESINDATA(node,0) == 0); } - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + toku_unpin_ftnode(c_ft->ft, node); struct check_pair pair1 = {2, "a", 0, NULL, 0}; DBT k; diff --git a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc index c8127d7287a7d..0ad417f27121a 100644 --- a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc +++ b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-merge.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -175,7 +174,7 @@ doit (int state) { toku_flusher_thread_set_callback(flusher_callback, &state); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo2.ft_handle"); unlink("bar2.ft_handle"); // note the basement node size is 5 times the node size @@ -246,9 +245,9 @@ doit (int state) { toku_unpin_ftnode(t->ft, node); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode_with_dep_nodes( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), @@ -256,7 +255,8 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 2); @@ -266,7 +266,7 @@ doit (int state) { assert(checkpoint_callback_called); // now let's pin the root again and make sure it is has merged - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode_with_dep_nodes( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), @@ -274,7 +274,8 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 1); @@ -304,8 +305,8 @@ doit (int state) { // // now pin the root, verify that the state is what we expect // - fill_bfe_for_full_read(&bfe, c_ft->ft); - toku_pin_ftnode_off_client_thread( + bfe.create_for_full_read(c_ft->ft); + toku_pin_ftnode_with_dep_nodes( c_ft->ft, node_root, toku_cachetable_hash(c_ft->ft->cf, node_root), @@ -313,7 +314,8 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 1); assert(!node->dirty); @@ -331,11 +333,11 @@ doit (int state) { else { assert(false); } - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + toku_unpin_ftnode(c_ft->ft, node); // now let's verify the leaves are what we expect if (state == flt_flush_before_merge || state == flt_flush_before_pin_second_node_for_merge) { - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode_with_dep_nodes( c_ft->ft, left_child, toku_cachetable_hash(c_ft->ft->cf, left_child), @@ -343,15 +345,16 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 1); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 1); + toku_unpin_ftnode(c_ft->ft, node); - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode_with_dep_nodes( c_ft->ft, right_child, toku_cachetable_hash(c_ft->ft->cf, right_child), @@ -359,16 +362,17 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 1); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 1); + toku_unpin_ftnode(c_ft->ft, node); } else if (state == ft_flush_aflter_merge || state == flt_flush_before_unpin_remove) { - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode_with_dep_nodes( c_ft->ft, left_child, toku_cachetable_hash(c_ft->ft->cf, left_child), @@ -376,13 +380,14 @@ doit (int state) { PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 2); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 2); + toku_unpin_ftnode(c_ft->ft, node); } else { assert(false); diff --git a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc index 89176adba5c61..7870cd2fa5841 100644 --- a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc +++ b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-rebalance.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -175,7 +174,7 @@ doit (int state) { toku_flusher_thread_set_callback(flusher_callback, &state); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo3.ft_handle"); unlink("bar3.ft_handle"); // note the basement node size is 5 times the node size @@ -266,17 +265,16 @@ doit (int state) { toku_unpin_ftnode(t->ft, node); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 2); @@ -286,15 +284,14 @@ doit (int state) { assert(checkpoint_callback_called); // now let's pin the root again and make sure it is has rebalanced - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 2); @@ -324,16 +321,15 @@ doit (int state) { // // now pin the root, verify that the state is what we expect // - fill_bfe_for_full_read(&bfe, c_ft->ft); - toku_pin_ftnode_off_client_thread( + bfe.create_for_full_read(c_ft->ft); + toku_pin_ftnode( c_ft->ft, node_root, toku_cachetable_hash(c_ft->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(!node->dirty); @@ -343,41 +339,38 @@ doit (int state) { left_child = BP_BLOCKNUM(node,0); right_child = BP_BLOCKNUM(node,1); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + toku_unpin_ftnode(c_ft->ft, node); // now let's verify the leaves are what we expect - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, left_child, toku_cachetable_hash(c_ft->ft->cf, left_child), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 2); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 2); + toku_unpin_ftnode(c_ft->ft, node); - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, right_child, toku_cachetable_hash(c_ft->ft->cf, right_child), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 2); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); - + assert(BLB_DATA(node, 0)->num_klpairs() == 2); + toku_unpin_ftnode(c_ft->ft, node); DBT k; struct check_pair pair1 = {2, "a", 0, NULL, 0}; diff --git a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc index f4b08f25c42b5..8e24ae2bb4343 100644 --- a/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc +++ b/storage/tokudb/ft-index/ft/tests/test-checkpoint-during-split.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -184,7 +183,7 @@ doit (bool after_split) { toku_flusher_thread_set_callback(flusher_callback, &after_split); - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo4.ft_handle"); unlink("bar4.ft_handle"); // note the basement node size is 5 times the node size @@ -242,17 +241,16 @@ doit (bool after_split) { ); FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 1); @@ -262,15 +260,14 @@ doit (bool after_split) { assert(checkpoint_callback_called); // now let's pin the root again and make sure it is has split - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 2); @@ -300,16 +297,15 @@ doit (bool after_split) { // // now pin the root, verify that we have a message in there, and that it is clean // - fill_bfe_for_full_read(&bfe, c_ft->ft); - toku_pin_ftnode_off_client_thread( + bfe.create_for_full_read(c_ft->ft); + toku_pin_ftnode( c_ft->ft, node_root, toku_cachetable_hash(c_ft->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(!node->dirty); @@ -325,58 +321,55 @@ doit (bool after_split) { left_child = BP_BLOCKNUM(node,0); assert(left_child.b == node_leaf.b); } - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + toku_unpin_ftnode(c_ft->ft, node); // now let's verify the leaves are what we expect if (after_split) { - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, left_child, toku_cachetable_hash(c_ft->ft->cf, left_child), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 1); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 1); + toku_unpin_ftnode(c_ft->ft, node); - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, right_child, toku_cachetable_hash(c_ft->ft->cf, right_child), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 1); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 1); + toku_unpin_ftnode(c_ft->ft, node); } else { - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( c_ft->ft, left_child, toku_cachetable_hash(c_ft->ft->cf, left_child), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 0); assert(!node->dirty); assert(node->n_children == 1); - assert(BLB_DATA(node, 0)->omt_size() == 2); - toku_unpin_ftnode_off_client_thread(c_ft->ft, node); + assert(BLB_DATA(node, 0)->num_klpairs() == 2); + toku_unpin_ftnode(c_ft->ft, node); } diff --git a/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc b/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc index 9054661fa0ead..75a1c255bd999 100644 --- a/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc +++ b/storage/tokudb/ft-index/ft/tests/test-del-inorder.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,6 @@ PATENT RIGHTS GRANT: static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -109,7 +108,7 @@ doit (void) { int r; - toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff --git a/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc b/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc index b62b9e063a182..a88c07c0ca170 100644 --- a/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc +++ b/storage/tokudb/ft-index/ft/tests/test-dirty-flushes-on-cleaner.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,16 +94,15 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; CACHETABLE ct; -FT_HANDLE brt; +FT_HANDLE ft; const char *fname = TOKU_TEST_FILENAME; static int update_func( @@ -132,13 +131,13 @@ doit (void) { int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); - r = toku_open_ft_handle(fname, 1, &brt, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); - brt->ft->update_fun = update_func; - brt->ft->update_fun = update_func; + ft->ft->update_fun = update_func; + ft->ft->update_fun = update_func; toku_testsetup_initialize(); // must precede any other toku_testsetup calls @@ -146,16 +145,16 @@ doit (void) { pivots[0] = toku_strdup("kkkkk"); int pivot_len = 6; - r = toku_testsetup_leaf(brt, &node_leaf, 2, pivots, &pivot_len); + r = toku_testsetup_leaf(ft, &node_leaf, 2, pivots, &pivot_len); assert(r==0); - r = toku_testsetup_nonleaf(brt, 1, &node_internal, 1, &node_leaf, 0, 0); + r = toku_testsetup_nonleaf(ft, 1, &node_internal, 1, &node_leaf, 0, 0); assert(r==0); - r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0); + r = toku_testsetup_nonleaf(ft, 2, &node_root, 1, &node_internal, 0, 0); assert(r==0); - r = toku_testsetup_root(brt, node_root); + r = toku_testsetup_root(ft, node_root); assert(r==0); // @@ -165,7 +164,7 @@ doit (void) { // now we insert a row into each leaf node r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "a", // key 2, // keylen @@ -174,7 +173,7 @@ doit (void) { ); assert(r==0); r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "z", // key 2, // keylen @@ -187,7 +186,7 @@ doit (void) { // now we insert filler data so that the rebalance // keeps it at two nodes r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "b", // key 2, // keylen @@ -196,7 +195,7 @@ doit (void) { ); assert(r==0); r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "y", // key 2, // keylen @@ -211,7 +210,7 @@ doit (void) { // for (int i = 0; i < 100000; i++) { r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_internal, FT_DELETE_ANY, "jj", // this key does not exist, so its message application should be a no-op @@ -226,7 +225,7 @@ doit (void) { // now insert a broadcast message into the root // r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_root, FT_UPDATE_BROADCAST_ALL, NULL, @@ -238,28 +237,29 @@ doit (void) { // now lock and release the leaf node to make sure it is what we expect it to be. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode_with_dep_nodes( + ft->ft, node_leaf, - toku_cachetable_hash(brt->ft->cf, node_leaf), + toku_cachetable_hash(ft->ft->cf, node_leaf), &bfe, PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->dirty); assert(node->n_children == 2); assert(BP_STATE(node,0) == PT_AVAIL); assert(BP_STATE(node,1) == PT_AVAIL); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // now do a lookup on one of the keys, this should bring a leaf node up to date DBT k; struct check_pair pair = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); assert(r==0); // @@ -268,36 +268,38 @@ doit (void) { // node is in memory and another is // on disk // - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode_with_dep_nodes( + ft->ft, node_leaf, - toku_cachetable_hash(brt->ft->cf, node_leaf), + toku_cachetable_hash(ft->ft->cf, node_leaf), &bfe, PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->dirty); assert(node->n_children == 2); assert(BP_STATE(node,0) == PT_AVAIL); assert(BP_STATE(node,1) == PT_AVAIL); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // // now let us induce a clean on the internal node // - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode_with_dep_nodes( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); assert(node->dirty); @@ -307,25 +309,26 @@ doit (void) { r = toku_ftnode_cleaner_callback( node, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), - brt->ft + toku_cachetable_hash(ft->ft->cf, node_internal), + ft->ft ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode_with_dep_nodes( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, 0, NULL, - &node + &node, + true ); // check that buffers are empty assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // // now run a checkpoint to get everything clean, @@ -337,14 +340,14 @@ doit (void) { // check that lookups on the two keys is still good struct check_pair pair1 = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); assert(r==0); struct check_pair pair2 = {2, "z", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); assert(r==0); - r = toku_close_ft_handle_nolsn(brt, 0); assert(r==0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); toku_free(pivots[0]); diff --git a/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc b/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc index f18723c525eec..f1c76d0bd13d6 100644 --- a/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc +++ b/storage/tokudb/ft-index/ft/tests/test-dump-ft.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ PATENT RIGHTS GRANT: #include "test.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; int test_main(int argc, const char *argv[]) { @@ -106,7 +105,7 @@ test_main(int argc, const char *argv[]) { FILE *f = fopen("test-dump-ft.out", "w"); unlink(n); assert(f); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(n, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); int i; for (i=0; i<10000; i++) { diff --git a/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc b/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc index 291d409018acf..fa00100d3ed69 100644 --- a/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc +++ b/storage/tokudb/ft-index/ft/tests/test-flushes-on-cleaner.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,16 +94,15 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; CACHETABLE ct; -FT_HANDLE brt; +FT_HANDLE ft; const char *fname = TOKU_TEST_FILENAME; static int update_func( @@ -132,13 +131,13 @@ doit (bool keep_other_bn_in_memory) { int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); - r = toku_open_ft_handle(fname, 1, &brt, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); - brt->options.update_fun = update_func; - brt->ft->update_fun = update_func; + ft->options.update_fun = update_func; + ft->ft->update_fun = update_func; toku_testsetup_initialize(); // must precede any other toku_testsetup calls @@ -146,16 +145,16 @@ doit (bool keep_other_bn_in_memory) { pivots[0] = toku_strdup("kkkkk"); int pivot_len = 6; - r = toku_testsetup_leaf(brt, &node_leaf, 2, pivots, &pivot_len); + r = toku_testsetup_leaf(ft, &node_leaf, 2, pivots, &pivot_len); assert(r==0); - r = toku_testsetup_nonleaf(brt, 1, &node_internal, 1, &node_leaf, 0, 0); + r = toku_testsetup_nonleaf(ft, 1, &node_internal, 1, &node_leaf, 0, 0); assert(r==0); - r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0); + r = toku_testsetup_nonleaf(ft, 2, &node_root, 1, &node_internal, 0, 0); assert(r==0); - r = toku_testsetup_root(brt, node_root); + r = toku_testsetup_root(ft, node_root); assert(r==0); // @@ -165,7 +164,7 @@ doit (bool keep_other_bn_in_memory) { // now we insert a row into each leaf node r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "a", // key 2, // keylen @@ -174,7 +173,7 @@ doit (bool keep_other_bn_in_memory) { ); assert(r==0); r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "z", // key 2, // keylen @@ -187,7 +186,7 @@ doit (bool keep_other_bn_in_memory) { // now we insert filler data so that the rebalance // keeps it at two nodes r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "b", // key 2, // keylen @@ -196,7 +195,7 @@ doit (bool keep_other_bn_in_memory) { ); assert(r==0); r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf, "y", // key 2, // keylen @@ -211,7 +210,7 @@ doit (bool keep_other_bn_in_memory) { // for (int i = 0; i < 100000; i++) { r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_internal, FT_DELETE_ANY, "jj", // this key does not exist, so its message application should be a no-op @@ -226,7 +225,7 @@ doit (bool keep_other_bn_in_memory) { // now insert a broadcast message into the root // r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_root, FT_UPDATE_BROADCAST_ALL, NULL, @@ -244,34 +243,33 @@ doit (bool keep_other_bn_in_memory) { assert_zero(r); // now lock and release the leaf node to make sure it is what we expect it to be. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode( + ft->ft, node_leaf, - toku_cachetable_hash(brt->ft->cf, node_leaf), + toku_cachetable_hash(ft->ft->cf, node_leaf), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(!node->dirty); assert(node->n_children == 2); // a hack to get the basement nodes evicted for (int i = 0; i < 20; i++) { - toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), brt->ft, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(node, make_pair_attr(0xffffffff), ft->ft, def_pe_finalize_impl, nullptr); } // this ensures that when we do the lookups below, // that the data is read off disk assert(BP_STATE(node,0) == PT_ON_DISK); assert(BP_STATE(node,1) == PT_ON_DISK); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // now do a lookup on one of the keys, this should bring a leaf node up to date DBT k; struct check_pair pair = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); assert(r==0); if (keep_other_bn_in_memory) { @@ -282,7 +280,7 @@ doit (bool keep_other_bn_in_memory) { // but only one should have broadcast message // applied. // - fill_bfe_for_full_read(&bfe, brt->ft); + bfe.create_for_full_read(ft->ft); } else { // @@ -291,17 +289,16 @@ doit (bool keep_other_bn_in_memory) { // node is in memory and another is // on disk // - fill_bfe_for_min_read(&bfe, brt->ft); + bfe.create_for_min_read(ft->ft); } - toku_pin_ftnode_off_client_thread( - brt->ft, + toku_pin_ftnode( + ft->ft, node_leaf, - toku_cachetable_hash(brt->ft->cf, node_leaf), + toku_cachetable_hash(ft->ft->cf, node_leaf), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(!node->dirty); assert(node->n_children == 2); @@ -312,21 +309,20 @@ doit (bool keep_other_bn_in_memory) { else { assert(BP_STATE(node,1) == PT_ON_DISK); } - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // // now let us induce a clean on the internal node // - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(!node->dirty); @@ -336,25 +332,24 @@ doit (bool keep_other_bn_in_memory) { r = toku_ftnode_cleaner_callback( node, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), - brt->ft + toku_cachetable_hash(ft->ft->cf, node_internal), + ft->ft ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); // check that buffers are empty assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // // now run a checkpoint to get everything clean, @@ -365,14 +360,14 @@ doit (bool keep_other_bn_in_memory) { // check that lookups on the two keys is still good struct check_pair pair1 = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); assert(r==0); struct check_pair pair2 = {2, "z", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); assert(r==0); - r = toku_close_ft_handle_nolsn(brt, 0); assert(r==0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); toku_free(pivots[0]); diff --git a/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc b/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc index dee6dd3649634..d8e51b5ab7c0e 100644 --- a/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc +++ b/storage/tokudb/ft-index/ft/tests/test-ft-overflow.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,7 +97,6 @@ PATENT RIGHTS GRANT: static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static void test_overflow (void) { @@ -106,7 +105,7 @@ test_overflow (void) { uint32_t nodesize = 1<<20; int r; unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, nodesize, nodesize / 8, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); DBT k,v; diff --git a/storage/tokudb/ft-index/ft/tests/test-ft-txns.h b/storage/tokudb/ft-index/ft/tests/test-ft-txns.h index bc88739158974..04b2cfdf8bf56 100644 --- a/storage/tokudb/ft-index/ft/tests/test-ft-txns.h +++ b/storage/tokudb/ft-index/ft/tests/test-ft-txns.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TEST_FT_TXNS_H -#define TEST_FT_TXNS_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -136,7 +136,7 @@ static inline void test_setup_and_recover(const char *envdir, TOKULOGGER *logger CKERR(r); DB_ENV *CAST_FROM_VOIDP(ctv, (void *) &ct); // Use intermediate to avoid compiler warning. - r = tokudb_recover(ctv, + r = tokuft_recover(ctv, NULL_prepared_txn_callback, xid_lsn_keep_cachetable_callback, logger, @@ -179,5 +179,3 @@ static inline void shutdown_after_recovery(TOKULOGGER *loggerp, CACHETABLE *ctp) int r = toku_logger_close(loggerp); CKERR(r); } - -#endif /* TEST_FT_TXNS_H */ diff --git a/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc b/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc index 40ed00bcdc221..419cbd2cb5122 100644 --- a/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc +++ b/storage/tokudb/ft-index/ft/tests/test-hot-with-bounds.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,10 +96,9 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -113,7 +112,7 @@ doit (void) { int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(TOKU_TEST_FILENAME); r = toku_open_ft_handle(TOKU_TEST_FILENAME, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -181,17 +180,16 @@ doit (void) { // the root, one in each buffer, let's verify this. FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 3); @@ -212,16 +210,15 @@ doit (void) { // at this point, we have should have flushed // only the middle buffer, let's verify this. node = NULL; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, node_root, toku_cachetable_hash(t->ft->cf, node_root), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 3); diff --git a/storage/tokudb/ft-index/ft/tests/test-inc-split.cc b/storage/tokudb/ft-index/ft/tests/test-inc-split.cc index cafcb496f7aaf..13510855cb0ce 100644 --- a/storage/tokudb/ft-index/ft/tests/test-inc-split.cc +++ b/storage/tokudb/ft-index/ft/tests/test-inc-split.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -120,7 +120,6 @@ PATENT RIGHTS GRANT: static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -137,7 +136,7 @@ doit (int ksize __attribute__((__unused__))) { int i; int r; - toku_cachetable_create(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff --git a/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc b/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc index e55b20d6a3f60..30dd15d3e3997 100644 --- a/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc +++ b/storage/tokudb/ft-index/ft/tests/test-leafentry-child-txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,10 +91,9 @@ PATENT RIGHTS GRANT: #include #include "test.h" -#include "fttypes.h" -#include "ule.h" -#include "ule-internal.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" static void init_empty_ule(ULE ule) { ule->num_cuxrs = 0; @@ -111,17 +110,6 @@ static void add_committed_entry(ULE ule, DBT *val, TXNID xid) { ule->uxrs[index].xid = xid; } -static FT_MSG_S -msg_init(enum ft_msg_type type, XIDS xids, - DBT *key, DBT *val) { - FT_MSG_S msg; - msg.type = type; - msg.xids = xids; - msg.u.id.key = key; - msg.u.id.val = val; - return msg; -} - //Test all the different things that can happen to a //committed leafentry (logical equivalent of a committed insert). static void @@ -144,14 +132,14 @@ run_test(void) { // test case where we apply a message and the innermost child_id // is the same as the innermost committed TXNID - XIDS root_xids = xids_get_root_xids(); + XIDS root_xids = toku_xids_get_root_xids(); TXNID root_txnid = 1000; TXNID child_id = 10; XIDS msg_xids_1; XIDS msg_xids_2; - r = xids_create_child(root_xids, &msg_xids_1, root_txnid); + r = toku_xids_create_child(root_xids, &msg_xids_1, root_txnid); assert(r==0); - r = xids_create_child(msg_xids_1, &msg_xids_2, child_id); + r = toku_xids_create_child(msg_xids_1, &msg_xids_2, child_id); assert(r==0); init_empty_ule(&ule_initial); @@ -161,45 +149,49 @@ run_test(void) { add_committed_entry(&ule_initial, &val, 10); // now do the application of xids to the ule - FT_MSG_S msg; // do a commit - msg = msg_init(FT_COMMIT_ANY, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - assert(ule->num_cuxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + assert(ule->num_cuxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + } // do an abort - msg = msg_init(FT_ABORT_ANY, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - assert(ule->num_cuxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + assert(ule->num_cuxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + } // do an insert val.data = &val_data_three; - msg = msg_init(FT_INSERT, msg_xids_2, &key, &val); - test_msg_modify_ule(&ule_initial, &msg); - // now that message applied, verify that things are good - assert(ule->num_cuxrs == 2); - assert(ule->num_puxrs == 2); - assert(ule->uxrs[0].xid == TXNID_NONE); - assert(ule->uxrs[1].xid == 10); - assert(ule->uxrs[2].xid == 1000); - assert(ule->uxrs[3].xid == 10); - assert(ule->uxrs[0].valp == &val_data_one); - assert(ule->uxrs[1].valp == &val_data_two); - assert(ule->uxrs[2].type == XR_PLACEHOLDER); - assert(ule->uxrs[3].valp == &val_data_three); - - - xids_destroy(&msg_xids_2); - xids_destroy(&msg_xids_1); - xids_destroy(&root_xids); + { + ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids_2); + test_msg_modify_ule(&ule_initial, msg); + // now that message applied, verify that things are good + assert(ule->num_cuxrs == 2); + assert(ule->num_puxrs == 2); + assert(ule->uxrs[0].xid == TXNID_NONE); + assert(ule->uxrs[1].xid == 10); + assert(ule->uxrs[2].xid == 1000); + assert(ule->uxrs[3].xid == 10); + assert(ule->uxrs[0].valp == &val_data_one); + assert(ule->uxrs[1].valp == &val_data_two); + assert(ule->uxrs[2].type == XR_PLACEHOLDER); + assert(ule->uxrs[3].valp == &val_data_three); + } + + toku_xids_destroy(&msg_xids_2); + toku_xids_destroy(&msg_xids_1); + toku_xids_destroy(&root_xids); } diff --git a/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc b/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc index b0a5dfe78179d..9253ff814c902 100644 --- a/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc +++ b/storage/tokudb/ft-index/ft/tests/test-leafentry-nested.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,10 +91,9 @@ PATENT RIGHTS GRANT: #include #include "test.h" -#include "fttypes.h" -#include "ule.h" -#include "ule-internal.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" enum {MAX_SIZE = 256}; static XIDS nested_xids[MAX_TRANSACTION_RECORDS]; @@ -213,7 +212,7 @@ test_le_offsets (void) { static void test_ule_packs_to_nothing (ULE ule) { LEAFENTRY le; - int r = le_pack(ule, NULL, 0, NULL, 0, 0, &le); + int r = le_pack(ule, NULL, 0, NULL, 0, 0, 0, &le, nullptr); assert(r==0); assert(le==NULL); } @@ -319,7 +318,7 @@ test_le_pack_committed (void) { size_t memsize; LEAFENTRY le; - int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le); + int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); assert(r==0); assert(le!=NULL); memsize = le_memsize_from_ule(&ule); @@ -329,7 +328,7 @@ test_le_pack_committed (void) { verify_ule_equal(&ule, &tmp_ule); LEAFENTRY tmp_le; size_t tmp_memsize; - r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le); + r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr); tmp_memsize = le_memsize_from_ule(&tmp_ule); assert(r==0); assert(tmp_memsize == memsize); @@ -377,7 +376,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla size_t memsize; LEAFENTRY le; - int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, &le); + int r = le_pack(&ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); assert(r==0); assert(le!=NULL); memsize = le_memsize_from_ule(&ule); @@ -387,7 +386,7 @@ test_le_pack_uncommitted (uint8_t committed_type, uint8_t prov_type, int num_pla verify_ule_equal(&ule, &tmp_ule); LEAFENTRY tmp_le; size_t tmp_memsize; - r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, &tmp_le); + r = le_pack(&tmp_ule, nullptr, 0, nullptr, 0, 0, 0, &tmp_le, nullptr); tmp_memsize = le_memsize_from_ule(&tmp_ule); assert(r==0); assert(tmp_memsize == memsize); @@ -442,13 +441,13 @@ test_le_pack (void) { } static void -test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) { +test_le_apply(ULE ule_initial, const ft_msg &msg, ULE ule_expected) { int r; LEAFENTRY le_initial; LEAFENTRY le_expected; LEAFENTRY le_result; - r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, &le_initial); + r = le_pack(ule_initial, nullptr, 0, nullptr, 0, 0, 0, &le_initial, nullptr); CKERR(r); size_t result_memsize = 0; @@ -458,6 +457,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) { le_initial, nullptr, 0, + 0, &gc_info, &le_result, &ignoreme); @@ -467,7 +467,7 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) { } size_t expected_memsize = 0; - r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, &le_expected); + r = le_pack(ule_expected, nullptr, 0, nullptr, 0, 0, 0, &le_expected, nullptr); CKERR(r); if (le_expected) { expected_memsize = leafentry_memsize(le_expected); @@ -495,17 +495,6 @@ static const ULE_S ule_committed_delete = { .uxrs = (UXR_S *)ule_committed_delete.uxrs_static }; -static FT_MSG_S -msg_init(enum ft_msg_type type, XIDS xids, - DBT *key, DBT *val) { - FT_MSG_S msg; - msg.type = type; - msg.xids = xids; - msg.u.id.key = key; - msg.u.id.val = val; - return msg; -} - static uint32_t next_nesting_level(uint32_t current) { uint32_t rval = current + 1; @@ -530,13 +519,13 @@ generate_committed_for(ULE ule, DBT *val) { } static void -generate_provpair_for(ULE ule, FT_MSG msg) { +generate_provpair_for(ULE ule, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->uxrs = ule->uxrs_static; ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_DELETE; ule->uxrs[0].vallen = 0; @@ -546,12 +535,12 @@ generate_provpair_for(ULE ule, FT_MSG msg) { ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_INSERT; - ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size; - ule->uxrs[num_uxrs - 1].valp = msg->u.id.val->data; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size; + ule->uxrs[num_uxrs - 1].valp = msg.vdbt()->data; + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } //Test all the different things that can happen to a @@ -559,7 +548,6 @@ generate_provpair_for(ULE ule, FT_MSG msg) { static void test_le_empty_apply(void) { ULE_S ule_initial = ule_committed_delete; - FT_MSG_S msg; DBT key; DBT val; @@ -584,34 +572,41 @@ test_le_empty_apply(void) { //Abort/commit of an empty le is an empty le ULE_S ule_expected = ule_committed_delete; - msg = msg_init(FT_COMMIT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - - msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } } { //delete of an empty le is an empty le ULE_S ule_expected = ule_committed_delete; - msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); } { - msg = msg_init(FT_INSERT, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_INSERT, ZERO_MSN, msg_xids); ULE_S ule_expected; - generate_provpair_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provpair_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { - msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids); ULE_S ule_expected; - generate_provpair_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provpair_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } } } @@ -619,36 +614,36 @@ test_le_empty_apply(void) { } static void -generate_provdel_for(ULE ule, FT_MSG msg) { +generate_provdel_for(ULE ule, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_INSERT; - ule->uxrs[0].vallen = msg->u.id.val->size; - ule->uxrs[0].valp = msg->u.id.val->data; + ule->uxrs[0].vallen = msg.vdbt()->size; + ule->uxrs[0].valp = msg.vdbt()->data; ule->uxrs[0].xid = TXNID_NONE; for (level = ule->num_cuxrs; level < ule->num_cuxrs + ule->num_puxrs - 1; level++) { ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_DELETE; ule->uxrs[num_uxrs - 1].vallen = 0; ule->uxrs[num_uxrs - 1].valp = NULL; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } static void -generate_both_for(ULE ule, DBT *oldval, FT_MSG msg) { +generate_both_for(ULE ule, DBT *oldval, const ft_msg &msg) { uint32_t level; - XIDS xids = msg->xids; + XIDS xids = msg.xids(); ule->num_cuxrs = 1; - ule->num_puxrs = xids_get_num_xids(xids); + ule->num_puxrs = toku_xids_get_num_xids(xids); uint32_t num_uxrs = ule->num_cuxrs + ule->num_puxrs; ule->uxrs[0].type = XR_INSERT; ule->uxrs[0].vallen = oldval->size; @@ -658,12 +653,12 @@ generate_both_for(ULE ule, DBT *oldval, FT_MSG msg) { ule->uxrs[level].type = XR_PLACEHOLDER; ule->uxrs[level].vallen = 0; ule->uxrs[level].valp = NULL; - ule->uxrs[level].xid = xids_get_xid(xids, level-1); + ule->uxrs[level].xid = toku_xids_get_xid(xids, level-1); } ule->uxrs[num_uxrs - 1].type = XR_INSERT; - ule->uxrs[num_uxrs - 1].vallen = msg->u.id.val->size; - ule->uxrs[num_uxrs - 1].valp = msg->u.id.val->data; - ule->uxrs[num_uxrs - 1].xid = xids_get_innermost_xid(xids); + ule->uxrs[num_uxrs - 1].vallen = msg.vdbt()->size; + ule->uxrs[num_uxrs - 1].valp = msg.vdbt()->data; + ule->uxrs[num_uxrs - 1].xid = toku_xids_get_innermost_xid(xids); } //Test all the different things that can happen to a @@ -672,7 +667,6 @@ static void test_le_committed_apply(void) { ULE_S ule_initial; ule_initial.uxrs = ule_initial.uxrs_static; - FT_MSG_S msg; DBT key; DBT val; @@ -695,23 +689,30 @@ test_le_committed_apply(void) { if (nesting_level > 0) { //Commit/abort will not change a committed le ULE_S ule_expected = ule_initial; - msg = msg_init(FT_COMMIT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_COMMIT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - - msg = msg_init(FT_ABORT_ANY, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); - msg = msg_init(FT_ABORT_BROADCAST_TXN, msg_xids, &key, &val); - test_le_apply(&ule_initial, &msg, &ule_expected); + { + ft_msg msg(&key, &val, FT_COMMIT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_COMMIT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_ANY, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } + { + ft_msg msg(&key, &val, FT_ABORT_BROADCAST_TXN, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); + } } { - msg = msg_init(FT_DELETE_ANY, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_DELETE_ANY, ZERO_MSN, msg_xids); ULE_S ule_expected; ule_expected.uxrs = ule_expected.uxrs_static; - generate_provdel_for(&ule_expected, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_provdel_for(&ule_expected, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { @@ -720,11 +721,11 @@ test_le_committed_apply(void) { fillrandom(valbuf2, valsize2); DBT val2; toku_fill_dbt(&val2, valbuf2, valsize2); - msg = msg_init(FT_INSERT, msg_xids, &key, &val2); + ft_msg msg(&key, &val2, FT_INSERT, ZERO_MSN, msg_xids); ULE_S ule_expected; ule_expected.uxrs = ule_expected.uxrs_static; - generate_both_for(&ule_expected, &val, &msg); - test_le_apply(&ule_initial, &msg, &ule_expected); + generate_both_for(&ule_expected, &val, msg); + test_le_apply(&ule_initial, msg, &ule_expected); } { //INSERT_NO_OVERWRITE will not change a committed insert @@ -734,8 +735,8 @@ test_le_committed_apply(void) { fillrandom(valbuf2, valsize2); DBT val2; toku_fill_dbt(&val2, valbuf2, valsize2); - msg = msg_init(FT_INSERT_NO_OVERWRITE, msg_xids, &key, &val2); - test_le_apply(&ule_initial, &msg, &ule_expected); + ft_msg msg(&key, &val2, FT_INSERT_NO_OVERWRITE, ZERO_MSN, msg_xids); + test_le_apply(&ule_initial, msg, &ule_expected); } } } @@ -749,7 +750,7 @@ test_le_apply_messages(void) { static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_referenced_xid_known) { LEAFENTRY le; - int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r); + int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, 0, &le, nullptr); CKERR(r); invariant_notnull(le); txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true); bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info); @@ -854,7 +855,6 @@ static void test_le_garbage_collection_birdie(void) { } static void test_le_optimize(void) { - FT_MSG_S msg; DBT key; DBT val; ULE_S ule_initial; @@ -868,11 +868,11 @@ static void test_le_optimize(void) { TXNID optimize_txnid = 1000; memset(&key, 0, sizeof(key)); memset(&val, 0, sizeof(val)); - XIDS root_xids = xids_get_root_xids(); + XIDS root_xids = toku_xids_get_root_xids(); XIDS msg_xids; - int r = xids_create_child(root_xids, &msg_xids, optimize_txnid); + int r = toku_xids_create_child(root_xids, &msg_xids, optimize_txnid); assert(r==0); - msg = msg_init(FT_OPTIMIZE, msg_xids, &key, &val); + ft_msg msg(&key, &val, FT_OPTIMIZE, ZERO_MSN, msg_xids); // // create the key @@ -897,8 +897,8 @@ static void test_le_optimize(void) { ule_expected.uxrs[0].vallen = valsize; ule_expected.uxrs[0].valp = valbuf; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // add another committed entry and ensure no effect @@ -915,8 +915,8 @@ static void test_le_optimize(void) { ule_expected.uxrs[1].vallen = 0; ule_expected.uxrs[1].valp = NULL; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // now test when there is one provisional, three cases, after, equal, and before FT_OPTIMIZE's transaction @@ -928,20 +928,20 @@ static void test_le_optimize(void) { ule_expected.num_cuxrs = 1; ule_expected.num_puxrs = 1; ule_expected.uxrs[1].xid = 1500; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 1000; ule_expected.uxrs[1].xid = 1000; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 500; ule_expected.uxrs[1].xid = 500; ule_expected.num_cuxrs = 2; ule_expected.num_puxrs = 0; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); // // now test cases with two provisional @@ -962,13 +962,13 @@ static void test_le_optimize(void) { ule_expected.uxrs[2].vallen = valsize; ule_expected.uxrs[2].valp = valbuf; ule_expected.uxrs[1].xid = 1200; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 1000; ule_expected.uxrs[1].xid = 1000; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); ule_initial.uxrs[1].xid = 800; ule_expected.uxrs[1].xid = 800; @@ -977,12 +977,12 @@ static void test_le_optimize(void) { ule_expected.uxrs[1].type = ule_initial.uxrs[2].type; ule_expected.uxrs[1].valp = ule_initial.uxrs[2].valp; ule_expected.uxrs[1].vallen = ule_initial.uxrs[2].vallen; - test_msg_modify_ule(&ule_initial,&msg); - verify_ule_equal(&ule_initial,&ule_expected); + test_msg_modify_ule(&ule_initial, msg); + verify_ule_equal(&ule_initial, &ule_expected); - xids_destroy(&msg_xids); - xids_destroy(&root_xids); + toku_xids_destroy(&msg_xids); + toku_xids_destroy(&root_xids); } //TODO: #1125 tests: @@ -1020,9 +1020,9 @@ static void test_le_optimize(void) { static void init_xids(void) { uint32_t i; - nested_xids[0] = xids_get_root_xids(); + nested_xids[0] = toku_xids_get_root_xids(); for (i = 1; i < MAX_TRANSACTION_RECORDS; i++) { - int r = xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36); + int r = toku_xids_create_child(nested_xids[i-1], &nested_xids[i], i * 37 + random() % 36); assert(r==0); } } @@ -1031,7 +1031,7 @@ static void destroy_xids(void) { uint32_t i; for (i = 0; i < MAX_TRANSACTION_RECORDS; i++) { - xids_destroy(&nested_xids[i]); + toku_xids_destroy(&nested_xids[i]); } } diff --git a/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc b/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc index d6b8d361b4756..f67cfa7873433 100644 --- a/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc +++ b/storage/tokudb/ft-index/ft/tests/test-merges-on-cleaner.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,15 +94,14 @@ PATENT RIGHTS GRANT: #include #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; CACHETABLE ct; -FT_HANDLE brt; +FT_HANDLE ft; const char *fname = TOKU_TEST_FILENAME; static int update_func( @@ -131,32 +130,32 @@ doit (void) { int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); - r = toku_open_ft_handle(fname, 1, &brt, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + r = toku_open_ft_handle(fname, 1, &ft, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); - brt->options.update_fun = update_func; - brt->ft->update_fun = update_func; + ft->options.update_fun = update_func; + ft->ft->update_fun = update_func; toku_testsetup_initialize(); // must precede any other toku_testsetup calls - r = toku_testsetup_leaf(brt, &node_leaf[0], 1, NULL, NULL); + r = toku_testsetup_leaf(ft, &node_leaf[0], 1, NULL, NULL); assert(r==0); - r = toku_testsetup_leaf(brt, &node_leaf[1], 1, NULL, NULL); + r = toku_testsetup_leaf(ft, &node_leaf[1], 1, NULL, NULL); assert(r==0); char* pivots[1]; pivots[0] = toku_strdup("kkkkk"); int pivot_len = 6; - r = toku_testsetup_nonleaf(brt, 1, &node_internal, 2, node_leaf, pivots, &pivot_len); + r = toku_testsetup_nonleaf(ft, 1, &node_internal, 2, node_leaf, pivots, &pivot_len); assert(r==0); - r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0); + r = toku_testsetup_nonleaf(ft, 2, &node_root, 1, &node_internal, 0, 0); assert(r==0); - r = toku_testsetup_root(brt, node_root); + r = toku_testsetup_root(ft, node_root); assert(r==0); // @@ -166,7 +165,7 @@ doit (void) { // now we insert a row into each leaf node r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf[0], "a", // key 2, // keylen @@ -175,7 +174,7 @@ doit (void) { ); assert(r==0); r = toku_testsetup_insert_to_leaf ( - brt, + ft, node_leaf[1], "z", // key 2, // keylen @@ -190,7 +189,7 @@ doit (void) { // for (int i = 0; i < 100000; i++) { r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_internal, FT_DELETE_ANY, "jj", // this key does not exist, so its message application should be a no-op @@ -205,7 +204,7 @@ doit (void) { // now insert a broadcast message into the root // r = toku_testsetup_insert_to_nonleaf ( - brt, + ft, node_root, FT_UPDATE_BROADCAST_ALL, NULL, @@ -219,28 +218,27 @@ doit (void) { // now let us induce a clean on the internal node // FTNODE node; - toku_pin_node_with_min_bfe(&node, node_leaf[1], brt); + toku_pin_node_with_min_bfe(&node, node_leaf[1], ft); // hack to get merge going BLB_SEQINSERT(node, node->n_children-1) = false; - toku_unpin_ftnode(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // now do a lookup on one of the keys, this should bring a leaf node up to date DBT k; struct check_pair pair = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair); assert(r==0); - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + ftnode_fetch_extra bfe; + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->n_children == 2); // we expect that this flushes its buffer, that @@ -249,27 +247,26 @@ doit (void) { r = toku_ftnode_cleaner_callback( node, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), - brt->ft + toku_cachetable_hash(ft->ft->cf, node_internal), + ft->ft ); // verify that node_internal's buffer is empty - fill_bfe_for_min_read(&bfe, brt->ft); - toku_pin_ftnode_off_client_thread( - brt->ft, + bfe.create_for_min_read(ft->ft); + toku_pin_ftnode( + ft->ft, node_internal, - toku_cachetable_hash(brt->ft->cf, node_internal), + toku_cachetable_hash(ft->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); // check that merge happened assert(node->n_children == 1); // check that buffers are empty assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0); - toku_unpin_ftnode_off_client_thread(brt->ft, node); + toku_unpin_ftnode(ft->ft, node); // // now run a checkpoint to get everything clean, @@ -281,14 +278,14 @@ doit (void) { // check that lookups on the two keys is still good struct check_pair pair1 = {2, "a", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1); assert(r==0); struct check_pair pair2 = {2, "z", 0, NULL, 0}; - r = toku_ft_lookup(brt, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); + r = toku_ft_lookup(ft, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2); assert(r==0); - r = toku_close_ft_handle_nolsn(brt, 0); assert(r==0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); toku_free(pivots[0]); diff --git a/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc index 32a8714f06d00..fc642eab8df31 100644 --- a/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc +++ b/storage/tokudb/ft-index/ft/tests/test-oldest-referenced-xid-flush.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -131,7 +131,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) { FT_HANDLE t; BLOCKNUM grandchild_leaf_blocknum, child_nonleaf_blocknum, root_blocknum; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink("foo1.ft_handle"); r = toku_open_ft_handle("foo1.ft_handle", 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, nullptr, toku_builtin_compare_fun); assert(r==0); @@ -167,17 +167,16 @@ static void test_oldest_referenced_xid_gets_propogated(void) { // first verify the child FTNODE node = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_min_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_min_read(t->ft); + toku_pin_ftnode( t->ft, child_nonleaf_blocknum, toku_cachetable_hash(t->ft->cf, child_nonleaf_blocknum), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 1); assert(node->n_children == 1); @@ -186,15 +185,14 @@ static void test_oldest_referenced_xid_gets_propogated(void) { toku_unpin_ftnode(t->ft, node); // now verify the root - keep it pinned so we can flush it below - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( t->ft, root_blocknum, toku_cachetable_hash(t->ft->cf, root_blocknum), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->height == 2); assert(node->n_children == 1); @@ -222,15 +220,14 @@ static void test_oldest_referenced_xid_gets_propogated(void) { // pin the child, verify that oldest referenced xid was // propogated from parent to child during the flush - toku_pin_ftnode_off_client_thread( + toku_pin_ftnode( t->ft, child_nonleaf_blocknum, toku_cachetable_hash(t->ft->cf, child_nonleaf_blocknum), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->oldest_referenced_xid_known == flush_xid); diff --git a/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc b/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc index b8b44e669f523..d2fe0ef9469d2 100644 --- a/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc +++ b/storage/tokudb/ft-index/ft/tests/test-pick-child-to-flush.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,10 +97,9 @@ PATENT RIGHTS GRANT: #include "ft-flusher.h" #include "ft-flusher-internal.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -165,7 +164,7 @@ doit (void) { BLOCKNUM node_leaf[2]; int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -189,7 +188,7 @@ doit (void) { r = toku_testsetup_root(t, node_root); assert(r==0); - char filler[900]; + char filler[900-2*bn_data::HEADER_LENGTH]; memset(filler, 0, sizeof(filler)); // now we insert filler data so that a merge does not happen r = toku_testsetup_insert_to_leaf ( @@ -245,7 +244,7 @@ doit (void) { // what we say and flushes the child we pick FTNODE node = NULL; toku_pin_node_with_min_bfe(&node, node_internal, t); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); assert(!node->dirty); assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) > 0); @@ -268,7 +267,7 @@ doit (void) { assert(num_flushes_called == 1); toku_pin_node_with_min_bfe(&node, node_internal, t); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->dirty); assert(node->n_children == 2); // child 0 should have empty buffer because it flushed @@ -287,7 +286,7 @@ doit (void) { toku_pin_node_with_min_bfe(&node, node_internal, t); assert(node->dirty); - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); // both buffers should be empty now assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0); @@ -305,7 +304,7 @@ doit (void) { toku_pin_node_with_min_bfe(&node, node_internal, t); assert(node->dirty); // nothing was flushed, but since we were trying to flush to a leaf, both become dirty - toku_assert_entire_node_in_memory(node); + toku_ftnode_assert_fully_in_memory(node); assert(node->n_children == 2); // both buffers should be empty now assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0); @@ -326,7 +325,7 @@ doit (void) { // use a for loop so to get us down both paths for (int i = 0; i < 2; i++) { toku_pin_node_with_min_bfe(&node, node_root, t); - toku_assert_entire_node_in_memory(node); // entire root is in memory + toku_ftnode_assert_fully_in_memory(node); // entire root is in memory curr_child_to_flush = i; num_flushes_called = 0; toku_ft_flush_some_child(t->ft, node, &fa); @@ -376,7 +375,7 @@ doit (void) { //now let's do the same test as above toku_pin_node_with_min_bfe(&node, node_root, t); - toku_assert_entire_node_in_memory(node); // entire root is in memory + toku_ftnode_assert_fully_in_memory(node); // entire root is in memory curr_child_to_flush = 0; num_flushes_called = 0; toku_ft_flush_some_child(t->ft, node, &fa); diff --git a/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc b/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc index 6ce44f0b3d318..8a67df8aa97a6 100644 --- a/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc +++ b/storage/tokudb/ft-index/ft/tests/test-txn-child-manager.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test-ft-txns.h" diff --git a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-nowait.cc b/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc similarity index 60% rename from storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-nowait.cc rename to storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc index 036fb8d01151a..528e7889599b9 100644 --- a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-nowait.cc +++ b/storage/tokudb/ft-index/ft/tests/test-upgrade-recovery-logs.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,90 +88,106 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// verify that a simle write lock deadlock is detected by the BDB locker -// A write locks L -// B write locks M -// A tries to write lock M, gets DB_LOCK_NOTGRANTED -// B tries to write lock L, gets DB_LOCK_NOTGRANTED + +// Test that recovery works correctly on a recovery log in a log directory. #include "test.h" +#include -static void simple_deadlock(DB_ENV *db_env) { +static void run_recovery(const char *testdir) { int r; - uint32_t locker_a; - r = db_env->lock_id(db_env, &locker_a); assert(r == 0); - uint32_t locker_b; - r = db_env->lock_id(db_env, &locker_b); assert(r == 0); - - DBT object_l = { .data = (char *) "L", .size = 1 }; - DBT object_m = { .data = (char *) "M", .size = 1 }; - - DB_LOCK lock_a_l; - r = db_env->lock_get(db_env, locker_a, DB_LOCK_NOWAIT, &object_l, DB_LOCK_WRITE, &lock_a_l); assert(r == 0); - - DB_LOCK lock_b_m; - r = db_env->lock_get(db_env, locker_b, DB_LOCK_NOWAIT, &object_m, DB_LOCK_WRITE, &lock_b_m); assert(r == 0); - - DB_LOCK lock_a_m; - r = db_env->lock_get(db_env, locker_a, DB_LOCK_NOWAIT, &object_m, DB_LOCK_WRITE, &lock_a_m); assert(r == DB_LOCK_NOTGRANTED); + int log_version; + char shutdown[32+1]; + r = sscanf(testdir, "upgrade-recovery-logs-%d-%32s", &log_version, shutdown); + assert(r == 2); + + char **logfiles = nullptr; + int n_logfiles = 0; + r = toku_logger_find_logfiles(testdir, &logfiles, &n_logfiles); + CKERR(r); + assert(n_logfiles > 0); + + FILE *f = fopen(logfiles[n_logfiles-1], "r"); + assert(f); + uint32_t real_log_version; + r = toku_read_logmagic(f, &real_log_version); + CKERR(r); + assert((uint32_t)log_version == (uint32_t)real_log_version); + r = fclose(f); + CKERR(r); + + toku_logger_free_logfiles(logfiles, n_logfiles); + + // test needs recovery + r = tokuft_needs_recovery(testdir, false); + if (strcmp(shutdown, "clean") == 0) { + CKERR(r); // clean does not need recovery + } else if (strncmp(shutdown, "dirty", 5) == 0) { + CKERR2(r, 1); // dirty needs recovery + } else { + CKERR(EINVAL); + } - DB_LOCK lock_b_l; - r = db_env->lock_get(db_env, locker_b, DB_LOCK_NOWAIT, &object_l, DB_LOCK_WRITE, &lock_b_l); assert(r == DB_LOCK_NOTGRANTED); + // test maybe upgrade log + LSN lsn_of_clean_shutdown; + bool upgrade_in_progress; + r = toku_maybe_upgrade_log(testdir, testdir, &lsn_of_clean_shutdown, &upgrade_in_progress); + if (strcmp(shutdown, "dirty") == 0 && log_version <= 24) { + CKERR2(r, TOKUDB_UPGRADE_FAILURE); // we dont support dirty upgrade from versions <= 24 + return; + } else { + CKERR(r); + } - r = db_env->lock_put(db_env, &lock_a_l); assert(r == 0); - r = db_env->lock_put(db_env, &lock_b_m); assert(r == 0); + if (!verbose) { + // redirect stderr + int devnul = open(DEV_NULL_FILE, O_WRONLY); + assert(devnul >= 0); + int rr = toku_dup2(devnul, fileno(stderr)); + assert(rr == fileno(stderr)); + rr = close(devnul); + assert(rr == 0); + } - r = db_env->lock_id_free(db_env, locker_a); assert(r == 0); - r = db_env->lock_id_free(db_env, locker_b); assert(r == 0); + // run recovery + if (r == 0) { + r = tokuft_recover(NULL, + NULL_prepared_txn_callback, + NULL_keep_cachetable_callback, + NULL_logger, testdir, testdir, 0, 0, 0, NULL, 0); + CKERR(r); + } } -int test_main(int argc, char * const argv[]) { - uint64_t cachesize = 0; - int do_txn = 1; - const char *db_env_dir = TOKU_TEST_FILENAME; - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD; - - // parse_args(argc, argv); - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { +int test_main(int argc, const char *argv[]) { + int i = 0; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { verbose++; continue; } - if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) { + if (strcmp(argv[i], "-q") == 0) { if (verbose > 0) verbose--; continue; } - assert(0); + break; } - - // setup env - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); - if (cachesize) { - const uint64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); + if (i < argc) { + const char *full_test_dir = argv[i]; + const char *test_dir = basename((char *)full_test_dir); + if (strcmp(full_test_dir, test_dir) != 0) { + int r; + char cmd[32 + strlen(full_test_dir) + strlen(test_dir)]; + sprintf(cmd, "rm -rf %s", test_dir); + r = system(cmd); + CKERR(r); + sprintf(cmd, "cp -r %s %s", full_test_dir, test_dir); + r = system(cmd); + CKERR(r); + } + run_recovery(test_dir); } - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if 0 && defined(USE_BDB) - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); -#endif - - // run test - simple_deadlock(db_env); - - // close env - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/test.h b/storage/tokudb/ft-index/ft/tests/test.h index ad1fc7c148eab..3170146a120ff 100644 --- a/storage/tokudb/ft-index/ft/tests/test.h +++ b/storage/tokudb/ft-index/ft/tests/test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -99,15 +101,19 @@ PATENT RIGHTS GRANT: #include #include -#include "ft.h" -#include "key.h" -#include "block_table.h" -#include "log-internal.h" -#include "logger.h" -#include "fttypes.h" -#include "ft-ops.h" -#include "cachetable.h" -#include "cachetable-internal.h" +#include "ft/serialize/block_allocator.h" +#include "ft/serialize/block_table.h" +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/cachetable-internal.h" +#include "ft/cursor.h" +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/logger/log-internal.h" +#include "ft/logger/logger.h" +#include "ft/node.h" +#include "util/bytestring.h" #define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); }) #define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0) @@ -118,15 +124,17 @@ PATENT RIGHTS GRANT: fflush(stderr); \ } while (0) -const ITEMLEN len_ignore = 0xFFFFFFFF; +const uint32_t len_ignore = 0xFFFFFFFF; +static const prepared_txn_callback_t NULL_prepared_txn_callback __attribute__((__unused__)) = NULL; +static const keep_cachetable_callback_t NULL_keep_cachetable_callback __attribute__((__unused__)) = NULL; +static const TOKULOGGER NULL_logger __attribute__((__unused__)) = NULL; -// dummymsn needed to simulate msn because test messages are injected at a lower level than toku_ft_root_put_cmd() +// dummymsn needed to simulate msn because test messages are injected at a lower level than toku_ft_root_put_msg() #define MIN_DUMMYMSN ((MSN) {(uint64_t)1<<62}) static MSN dummymsn; static int dummymsn_initialized = 0; - static void initialize_dummymsn(void) { if (dummymsn_initialized == 0) { @@ -150,14 +158,14 @@ last_dummymsn(void) { struct check_pair { - ITEMLEN keylen; // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key. - bytevec key; // A NULL key means don't check the key. - ITEMLEN vallen; // Similarly for vallen and null val. - bytevec val; + uint32_t keylen; // A keylen equal to 0xFFFFFFFF means don't check the keylen or the key. + const void *key; // A NULL key means don't check the key. + uint32_t vallen; // Similarly for vallen and null val. + const void *val; int call_count; }; static int -lookup_checkf (ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *pair_v, bool lock_only) { +lookup_checkf (uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *pair_v, bool lock_only) { if (!lock_only) { struct check_pair *pair = (struct check_pair *) pair_v; if (key!=NULL) { @@ -182,8 +190,8 @@ ft_lookup_and_check_nodup (FT_HANDLE t, const char *keystring, const char *valst { DBT k; toku_fill_dbt(&k, keystring, strlen(keystring) + 1); - struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring, - (ITEMLEN) (1+strlen(valstring)), valstring, + struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring, + (uint32_t) (1+strlen(valstring)), valstring, 0}; int r = toku_ft_lookup(t, &k, lookup_checkf, &pair); assert(r==0); @@ -195,7 +203,7 @@ ft_lookup_and_fail_nodup (FT_HANDLE t, char *keystring) { DBT k; toku_fill_dbt(&k, keystring, strlen(keystring) + 1); - struct check_pair pair = {(ITEMLEN) (1+strlen(keystring)), keystring, + struct check_pair pair = {(uint32_t) (1+strlen(keystring)), keystring, 0, 0, 0}; int r = toku_ft_lookup(t, &k, lookup_checkf, &pair); @@ -392,4 +400,3 @@ main(int argc, const char *argv[]) { toku_ft_layer_destroy(); return r; } - diff --git a/storage/tokudb/ft-index/ft/tests/test1308a.cc b/storage/tokudb/ft-index/ft/tests/test1308a.cc index 908c648090a05..ddbc43de7dc7d 100644 --- a/storage/tokudb/ft-index/ft/tests/test1308a.cc +++ b/storage/tokudb/ft-index/ft/tests/test1308a.cc @@ -1,6 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -// Test the first case for the bug in #1308 (brt-serialize.c:33 does the cast wrong) +// Test the first case for the bug in #1308 (ft-serialize.c:33 does the cast wrong) #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/test3681.cc b/storage/tokudb/ft-index/ft/tests/test3681.cc index ee7cc4ceff83e..db5e8232cd41c 100644 --- a/storage/tokudb/ft-index/ft/tests/test3681.cc +++ b/storage/tokudb/ft-index/ft/tests/test3681.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,25 +89,24 @@ PATENT RIGHTS GRANT: #ident "$Id$" // Test for #3681: iibench hangs. The scenario is -// * Thread 1 calls root_put_cmd, get_and_pin_root, 1 holds read lock on the root. +// * Thread 1 calls root_put_msg, get_and_pin_root, 1 holds read lock on the root. // * Thread 2 calls checkpoint, marks the root for checkpoint. // * Thread 2 calls end_checkpoint, tries to write lock the root, sets want_write, and blocks on the rwlock because there is a reader. -// * Thread 1 calls apply_cmd_to_in_memory_leaves, calls get_and_pin_if_in_memory, tries to get a read lock on the root node and blocks on the rwlock because there is a write request on the lock. +// * Thread 1 calls apply_msg_to_in_memory_leaves, calls get_and_pin_if_in_memory, tries to get a read lock on the root node and blocks on the rwlock because there is a write request on the lock. -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #include "test.h" CACHETABLE ct; FT_HANDLE t; -static DB * const null_db = 0; static TOKUTXN const null_txn = 0; volatile bool done = false; static void setup (void) { - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); const char *fname = TOKU_TEST_FILENAME; unlink(fname); { int r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); } diff --git a/storage/tokudb/ft-index/ft/tests/test3856.cc b/storage/tokudb/ft-index/ft/tests/test3856.cc index 6a8b1155f510d..c0b693e3421e4 100644 --- a/storage/tokudb/ft-index/ft/tests/test3856.cc +++ b/storage/tokudb/ft-index/ft/tests/test3856.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: // it used to be the case that we copied the left and right keys of a // range to be prelocked but never freed them, this test checks that they -// are freed (as of this time, this happens in destroy_bfe_for_prefetch) +// are freed (as of this time, this happens in ftnode_fetch_extra::destroy()) #include "test.h" @@ -99,7 +99,6 @@ PATENT RIGHTS GRANT: static const char *fname = TOKU_TEST_FILENAME; static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static int const nodesize = 1<<12, basementnodesize = 1<<9; static const enum toku_compression_method compression_method = TOKU_DEFAULT_COMPRESSION_METHOD; static int const count = 1000; @@ -111,7 +110,7 @@ string_cmp(DB* UU(db), const DBT *a, const DBT *b) } static int -found(ITEMLEN UU(keylen), bytevec key, ITEMLEN UU(vallen), bytevec UU(val), void *UU(extra), bool lock_only) +found(uint32_t UU(keylen), const void *key, uint32_t UU(vallen), const void *UU(val), void *UU(extra), bool lock_only) { assert(key != NULL && !lock_only); return 0; @@ -123,7 +122,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ CACHETABLE ct; FT_HANDLE t; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); unlink(fname); int r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r==0); @@ -137,7 +136,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ r = toku_close_ft_handle_nolsn(t, 0); assert(r == 0); toku_cachetable_close(&ct); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, nodesize, basementnodesize, compression_method, ct, null_txn, string_cmp); assert(r == 0); for (int n = 0; n < count/100; ++n) { diff --git a/storage/tokudb/ft-index/ft/tests/test3884.cc b/storage/tokudb/ft-index/ft/tests/test3884.cc index 817e0d82212c9..a4a9e8568cf48 100644 --- a/storage/tokudb/ft-index/ft/tests/test3884.cc +++ b/storage/tokudb/ft-index/ft/tests/test3884.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: // it used to be the case that we copied the left and right keys of a // range to be prelocked but never freed them, this test checks that they -// are freed (as of this time, this happens in destroy_bfe_for_prefetch) +// are freed (as of this time, this happens in ftnode_fetch_extra::destroy()) #include "test.h" @@ -111,7 +111,6 @@ static const int vallen = 64 - sizeof(long) - (sizeof(((LEAFENTRY)NULL)->type) #define dummy_msn_3884 ((MSN) { (uint64_t) 3884 * MIN_MSN.msn }) static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; static const char *fname = TOKU_TEST_FILENAME; static void @@ -119,13 +118,18 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); + void *maybe_free = nullptr; bn->get_space_for_insert( idx, key, keysize, size_needed, - &r + &r, + &maybe_free ); + if (maybe_free) { + toku_free(maybe_free); + } resource_assert(r); r->type = LE_CLEAN; r->u.clean.vallen = valsize; @@ -149,12 +153,11 @@ static void setup_ftnode_header(struct ftnode *node) { node->flags = 0x11223344; - node->thisnodename.b = 20; + node->blocknum.b = 20; node->layout_version = FT_LAYOUT_VERSION; node->layout_version_original = FT_LAYOUT_VERSION; node->height = 0; node->dirty = 1; - node->totalchildkeylens = 0; node->oldest_referenced_xid_known = TXNID_NONE; } @@ -164,12 +167,12 @@ setup_ftnode_partitions(struct ftnode *node, int n_children, const MSN msn, size node->n_children = n_children; node->max_msn_applied_to_node_on_disk = msn; MALLOC_N(node->n_children, node->bp); - MALLOC_N(node->n_children - 1, node->childkeys); for (int bn = 0; bn < node->n_children; ++bn) { BP_STATE(node, bn) = PT_AVAIL; set_BLB(node, bn, toku_create_empty_bn()); BLB_MAX_MSN_APPLIED(node, bn) = msn; } + node->pivotkeys.create_empty(); } static void @@ -181,7 +184,7 @@ verify_basement_node_msns(FTNODE node, MSN expected) } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 // Actual node size before split: 2048 // Actual basement node size before split: 256 @@ -205,38 +208,35 @@ test_split_on_boundary(void) insert_dummy_value(&sn, bn, k, i); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); verify_basement_node_msns(nodea, dummy_msn_3884); verify_basement_node_msns(nodeb, dummy_msn_3884); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 (except the last) // Actual node size before split: 4095 // Actual basement node size before split: 256 (except the last, of size 2K) @@ -265,8 +265,8 @@ test_split_with_everything_on_the_left(void) k = bn * eltsperbn + i; big_val_size += insert_dummy_value(&sn, bn, k, i); } - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } else { k = bn * eltsperbn; // we want this to be as big as the rest of our data and a @@ -282,29 +282,26 @@ test_split_with_everything_on_the_left(void) unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } // -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 (except the last) // Actual node size before split: 4095 // Actual basement node size before split: 256 (except the last, of size 2K) @@ -334,8 +331,8 @@ test_split_on_boundary_of_last_node(void) k = bn * eltsperbn + i; big_val_size += insert_dummy_value(&sn, bn, k, i); } - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } else { k = bn * eltsperbn; // we want this to be slightly smaller than all the rest of @@ -354,23 +351,20 @@ test_split_on_boundary_of_last_node(void) unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } @@ -400,8 +394,8 @@ test_split_at_begin(void) totalbytes += insert_dummy_value(&sn, bn, k, i-1); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } { // now add the first element @@ -418,23 +412,20 @@ test_split_at_begin(void) unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } @@ -471,34 +462,31 @@ test_split_at_end(void) } } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } -// Maximum node size according to the BRT: 1024 (expected node size after split) +// Maximum node size according to the FT: 1024 (expected node size after split) // Maximum basement node size: 256 // Actual node size before split: 2048 // Actual basement node size before split: 256 @@ -525,33 +513,30 @@ test_split_odd_nodes(void) insert_dummy_value(&sn, bn, k, i); } if (bn < sn.n_children - 1) { - toku_memdup_dbt(&sn.childkeys[bn], &k, sizeof k); - sn.totalchildkeylens += (sizeof k); + DBT pivotkey; + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), bn); } } unlink(fname); CACHETABLE ct; - FT_HANDLE brt; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); - r = toku_open_ft_handle(fname, 1, &brt, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); + FT_HANDLE ft; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(fname, 1, &ft, nodesize, bnsize, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); FTNODE nodea, nodeb; DBT splitk; // if we haven't done it right, we should hit the assert in the top of move_leafentries - ftleaf_split(brt->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); + ftleaf_split(ft->ft, &sn, &nodea, &nodeb, &splitk, true, SPLIT_EVENLY, 0, NULL); verify_basement_node_msns(nodea, dummy_msn_3884); verify_basement_node_msns(nodeb, dummy_msn_3884); - toku_unpin_ftnode(brt->ft, nodeb); - r = toku_close_ft_handle_nolsn(brt, NULL); assert(r == 0); + toku_unpin_ftnode(ft->ft, nodeb); + r = toku_close_ft_handle_nolsn(ft, NULL); assert(r == 0); toku_cachetable_close(&ct); - if (splitk.data) { - toku_free(splitk.data); - } - + toku_destroy_dbt(&splitk); toku_destroy_ftnode_internals(&sn); } diff --git a/storage/tokudb/ft-index/ft/tests/test4115.cc b/storage/tokudb/ft-index/ft/tests/test4115.cc index 5f1d041896f8c..e24696af05765 100644 --- a/storage/tokudb/ft-index/ft/tests/test4115.cc +++ b/storage/tokudb/ft-index/ft/tests/test4115.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,6 @@ PATENT RIGHTS GRANT: #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; const char *fname = TOKU_TEST_FILENAME; CACHETABLE ct; @@ -117,7 +116,7 @@ static void close_ft_and_ct (void) { static void open_ft_and_ct (bool unlink_old) { int r; if (unlink_old) unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &t, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); toku_ft_set_bt_compare(t, dont_allow_prefix); } diff --git a/storage/tokudb/ft-index/ft/tests/test4244.cc b/storage/tokudb/ft-index/ft/tests/test4244.cc index 76a5340797f88..3c2728e941e1d 100644 --- a/storage/tokudb/ft-index/ft/tests/test4244.cc +++ b/storage/tokudb/ft-index/ft/tests/test4244.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,7 +96,6 @@ PATENT RIGHTS GRANT: #include static TOKUTXN const null_txn = 0; -static DB * const null_db = 0; enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; @@ -110,7 +109,7 @@ doit (void) { int r; - toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 500*1024*1024, ZERO_LSN, nullptr); unlink(fname); r = toku_open_ft_handle(fname, 1, &t, NODESIZE, NODESIZE/2, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); @@ -148,23 +147,22 @@ doit (void) { // then node_internal should be huge // we pin it and verify that it is not FTNODE node; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, t->ft); - toku_pin_ftnode_off_client_thread( + ftnode_fetch_extra bfe; + bfe.create_for_full_read(t->ft); + toku_pin_ftnode( t->ft, node_internal, toku_cachetable_hash(t->ft->cf, node_internal), &bfe, PL_WRITE_EXPENSIVE, - 0, - NULL, - &node + &node, + true ); assert(node->n_children == 1); // simply assert that the buffer is less than 50MB, // we inserted 100MB of data in there. assert(toku_bnc_nbytesinbuf(BNC(node, 0)) < 50*1000*1000); - toku_unpin_ftnode_off_client_thread(t->ft, node); + toku_unpin_ftnode(t->ft, node); r = toku_close_ft_handle_nolsn(t, 0); assert(r==0); toku_cachetable_close(&ct); diff --git a/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc b/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc deleted file mode 100644 index af66c7408bf31..0000000000000 --- a/storage/tokudb/ft-index/ft/tests/test_block_allocator_merge.cc +++ /dev/null @@ -1,236 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#include "../block_allocator.h" -#include -#include -// Test the merger. - -int verbose = 0; - -static void -print_array (uint64_t n, const struct block_allocator_blockpair a[/*n*/]) { - printf("{"); - for (uint64_t i=0; ioffset < b->offset) return -1; - if (a->offset > b->offset) return +1; - return 0; -} - -static void -test_merge (uint64_t an, const struct block_allocator_blockpair a[/*an*/], - uint64_t bn, const struct block_allocator_blockpair b[/*bn*/]) { - if (verbose>1) { printf("a:"); print_array(an, a); } - if (verbose>1) { printf("b:"); print_array(bn, b); } - struct block_allocator_blockpair *MALLOC_N(an+bn, q); - struct block_allocator_blockpair *MALLOC_N(an+bn, m); - if (q==0 || m==0) { - fprintf(stderr, "malloc failed, continuing\n"); - goto malloc_failed; - } - for (uint64_t i=0; i1) { printf("q:"); print_array(an+bn, q); } - if (verbose) printf("merge\n"); - block_allocator_merge_blockpairs_into(an, m, bn, b); - if (verbose) printf("compare\n"); - if (verbose>1) { printf("m:"); print_array(an+bn, m); } - for (uint64_t i=0; i #include -#include "logcursor.h" +#include "logger/logcursor.h" #include "test.h" -#include "fttypes.h" #if defined(HAVE_LIMITS_H) # include @@ -105,7 +104,6 @@ const char LOGDIR[100] = "./dir.test_logcursor"; const int FSYNC = 1; const int NO_FSYNC = 0; -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN; const char *namea="a.db"; const char *nameb="b.db"; const char *a="a"; @@ -145,9 +143,9 @@ int test_main(int argc, const char *argv[]) { int r = 0; // start from a clean directory - char rmrf_cmd[100]; - sprintf(rmrf_cmd, "rm -rf %s", LOGDIR); - r = system(rmrf_cmd); + char rmrf_msg[100]; + sprintf(rmrf_msg, "rm -rf %s", LOGDIR); + r = system(rmrf_msg); CKERR(r); toku_os_mkdir(LOGDIR, S_IRWXU+S_IRWXG+S_IRWXO); if ( (r=create_logfiles()) !=0 ) return r; @@ -155,7 +153,7 @@ int test_main(int argc, const char *argv[]) { if ( (r=test_0()) !=0 ) return r; if ( (r=test_1()) !=0 ) return r; - r = system(rmrf_cmd); + r = system(rmrf_msg); CKERR(r); return r; } diff --git a/storage/tokudb/ft-index/ft/tests/test_oexcl.cc b/storage/tokudb/ft-index/ft/tests/test_oexcl.cc index b6eacd9236299..72fd01c2c89a1 100644 --- a/storage/tokudb/ft-index/ft/tests/test_oexcl.cc +++ b/storage/tokudb/ft-index/ft/tests/test_oexcl.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_close_open_commit.cc b/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc similarity index 51% rename from storage/tokudb/ft-index/src/tests/test_txn_close_open_commit.cc rename to storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc index 4f5060b38712c..6996215397981 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_close_open_commit.cc +++ b/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_seqinsert_heuristic.cc @@ -29,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -86,83 +86,98 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "Copyright (c) 2014 Tokutek Inc. All rights reserved." + #include "test.h" -#include - -#include -#include -#include -#include -#include - -// Recreate a mysqld crash by closing and opening a db within a transaction. -// The crash occurs when writing a dirty cachetable pair, so we insert one -// row. -static void -test_txn_close_open_commit (void) { - -#ifndef USE_TDB -#if DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==3 - if (verbose) fprintf(stderr, "%s does not work for BDB %d.%d. Not running\n", __FILE__, DB_VERSION_MAJOR, DB_VERSION_MINOR); - return; -#else - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); - - int r; - DB_ENV *env; - r = db_env_create(&env, 0); assert(r == 0); - r = env->set_data_dir(env, TOKU_TEST_FILENAME); - r = env->set_lg_dir(env, TOKU_TEST_FILENAME); - env->set_errfile(env, stdout); - r = env->open(env, 0, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); - if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r)); - assert(r == 0); - - DB_TXN *txn = 0; - r = env->txn_begin(env, 0, &txn, 0); assert(r == 0); - - DB *db; - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); - } +#include +#include + +// Each FT maintains a sequential insert heuristic to determine if its +// worth trying to insert directly into a well-known rightmost leaf node. +// +// The heuristic is only maintained when a rightmost leaf node is known. +// +// This test verifies that sequential inserts increase the seqinsert score +// and that a single non-sequential insert resets the score. + +static void test_seqinsert_heuristic(void) { + int r = 0; + char name[TOKU_PATH_MAX + 1]; + toku_path_join(name, 2, TOKU_TEST_FILENAME, "ftdata"); + toku_os_recursive_delete(TOKU_TEST_FILENAME); + r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); CKERR(r); + + FT_HANDLE ft_handle; + CACHETABLE ct; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(name, 1, &ft_handle, + 4*1024*1024, 64*1024, + TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL, + toku_builtin_compare_fun); CKERR(r); + FT ft = ft_handle->ft; + + int k; DBT key, val; - int k = 1, v = 1; - r = db->put(db, txn, dbt_init(&key, &k, sizeof k), dbt_init(&val, &v, sizeof v), 0); - assert(r == 0); - - // Close before commit - r = db->close(db, 0); assert(r == 0); - - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_UNKNOWN, 0, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); - - r = txn->commit(txn, 0); assert(r == 0); - - r = db->close(db, 0); assert(r == 0); - - r = env->close(env, 0); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); + const int val_size = 1024 * 1024; + char *XMALLOC_N(val_size, val_buf); + memset(val_buf, 'x', val_size); + toku_fill_dbt(&val, val_buf, val_size); + + // Insert many rows sequentially. This is enough data to: + // - force the root to split (the righmost leaf will then be known) + // - raise the seqinsert score high enough to enable direct rightmost injections + const int rows_to_insert = 200; + for (int i = 0; i < rows_to_insert; i++) { + k = toku_htonl(i); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); } -#endif -#endif + invariant(ft->rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); + invariant(ft->seqinsert_score == FT_SEQINSERT_SCORE_THRESHOLD); + + // Insert on the left extreme. The seq insert score is high enough + // that we will attempt to insert into the rightmost leaf. We won't + // be successful because key 0 won't be in the bounds of the rightmost leaf. + // This failure should reset the seqinsert score back to 0. + k = toku_htonl(0); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + invariant(ft->seqinsert_score == 0); + + // Insert in the middle. The score should not go up. + k = toku_htonl(rows_to_insert / 2); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + invariant(ft->seqinsert_score == 0); + + // Insert on the right extreme. The score should go up. + k = toku_htonl(rows_to_insert); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + invariant(ft->seqinsert_score == 1); + + // Insert again on the right extreme again, the score should go up. + k = toku_htonl(rows_to_insert + 1); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + invariant(ft->seqinsert_score == 2); + + // Insert close to, but not at, the right extreme. The score should reset. + // -- the magic number 4 derives from the fact that vals are 1mb and nodes are 4mb + k = toku_htonl(rows_to_insert - 4); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + invariant(ft->seqinsert_score == 0); + + toku_free(val_buf); + toku_ft_handle_close(ft_handle); + toku_cachetable_close(&ct); + toku_os_recursive_delete(TOKU_TEST_FILENAME); } -int -test_main(int UU(argc), char UU(*const argv[])) { - test_txn_close_open_commit(); +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + test_seqinsert_heuristic(); return 0; } diff --git a/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc b/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc new file mode 100644 index 0000000000000..29515d9925fc7 --- /dev/null +++ b/storage/tokudb/ft-index/ft/tests/test_rightmost_leaf_split_merge.cc @@ -0,0 +1,213 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2014 Tokutek Inc. All rights reserved." + +#include "test.h" + +#include +#include +#include + +// Promotion tracks the rightmost blocknum in the FT when a message +// is successfully promoted to a non-root leaf node on the right extreme. +// +// This test verifies that a split or merge of the rightmost leaf properly +// maintains the rightmost blocknum (which is constant - the pair's swap values, +// like the root blocknum). + +static void test_split_merge(void) { + int r = 0; + char name[TOKU_PATH_MAX + 1]; + toku_path_join(name, 2, TOKU_TEST_FILENAME, "ftdata"); + toku_os_recursive_delete(TOKU_TEST_FILENAME); + r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU); CKERR(r); + + FT_HANDLE ft_handle; + CACHETABLE ct; + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); + r = toku_open_ft_handle(name, 1, &ft_handle, + 4*1024*1024, 64*1024, + TOKU_DEFAULT_COMPRESSION_METHOD, ct, NULL, + toku_builtin_compare_fun); CKERR(r); + + // We have a root blocknum, but no rightmost blocknum yet. + FT ft = ft_handle->ft; + invariant(ft->h->root_blocknum.b != RESERVED_BLOCKNUM_NULL); + invariant(ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL); + + int k; + DBT key, val; + const int val_size = 1 * 1024 * 1024; + char *XMALLOC_N(val_size, val_buf); + memset(val_buf, 'x', val_size); + toku_fill_dbt(&val, val_buf, val_size); + + // Insert 16 rows (should induce a few splits) + const int rows_to_insert = 16; + for (int i = 0; i < rows_to_insert; i++) { + k = toku_htonl(i); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_insert(ft_handle, &key, &val, NULL); + } + + // rightmost blocknum should be set, because the root split and promotion + // did a rightmost insertion directly into the rightmost leaf, lazily + // initializing the rightmost blocknum. + invariant(ft->rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); + + BLOCKNUM root_blocknum = ft->h->root_blocknum; + FTNODE root_node; + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft); + toku_pin_ftnode(ft, root_blocknum, + toku_cachetable_hash(ft->cf, ft->h->root_blocknum), + &bfe, PL_WRITE_EXPENSIVE, &root_node, true); + // root blocknum should be consistent + invariant(root_node->blocknum.b == ft->h->root_blocknum.b); + // root should have split at least once, and it should now be at height 1 + invariant(root_node->n_children > 1); + invariant(root_node->height == 1); + // rightmost blocknum should no longer be the root, since the root split + invariant(ft->h->root_blocknum.b != ft->rightmost_blocknum.b); + // the right child should have the rightmost blocknum + invariant(BP_BLOCKNUM(root_node, root_node->n_children - 1).b == ft->rightmost_blocknum.b); + + BLOCKNUM rightmost_blocknum_before_merge = ft->rightmost_blocknum; + const int num_children_before_merge = root_node->n_children; + + // delete the last 6 rows. + // - 1mb each, so 6mb deleted + // - should be enough to delete the entire rightmost leaf + some of its neighbor + const int rows_to_delete = 6; + toku_unpin_ftnode(ft, root_node); + for (int i = 0; i < rows_to_delete; i++) { + k = toku_htonl(rows_to_insert - i); + toku_fill_dbt(&key, &k, sizeof(k)); + toku_ft_delete(ft_handle, &key, NULL); + } + toku_pin_ftnode(ft, root_blocknum, + toku_cachetable_hash(ft->cf, root_blocknum), + &bfe, PL_WRITE_EXPENSIVE, &root_node, true); + + // - rightmost leaf should be fusible after those deletes (which were promoted directly to the leaf) + FTNODE rightmost_leaf; + toku_pin_ftnode(ft, rightmost_blocknum_before_merge, + toku_cachetable_hash(ft->cf, rightmost_blocknum_before_merge), + &bfe, PL_WRITE_EXPENSIVE, &rightmost_leaf, true); + invariant(toku_ftnode_get_reactivity(ft, rightmost_leaf) == RE_FUSIBLE); + toku_unpin_ftnode(ft, rightmost_leaf); + + // - merge the rightmost child now that it's fusible + toku_ft_merge_child(ft, root_node, root_node->n_children - 1); + toku_pin_ftnode(ft, root_blocknum, + toku_cachetable_hash(ft->cf, root_blocknum), + &bfe, PL_WRITE_EXPENSIVE, &root_node, true); + + // the merge should have worked, and the root should still be at height 1 + invariant(root_node->n_children < num_children_before_merge); + invariant(root_node->height == 1); + // the rightmost child of the root has the rightmost blocknum + invariant(BP_BLOCKNUM(root_node, root_node->n_children - 1).b == ft->rightmost_blocknum.b); + // the value for rightmost blocknum itself should not have changed + // (we keep it constant, like the root blocknum) + invariant(rightmost_blocknum_before_merge.b == ft->rightmost_blocknum.b); + + toku_unpin_ftnode(ft, root_node); + + toku_free(val_buf); + toku_ft_handle_close(ft_handle); + toku_cachetable_close(&ct); + toku_os_recursive_delete(TOKU_TEST_FILENAME); +} + +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + test_split_merge(); + return 0; +} diff --git a/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc b/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc index e7188bb040214..7f166fda83622 100644 --- a/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc +++ b/storage/tokudb/ft-index/ft/tests/test_toku_malloc_plain_free.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc b/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc index 31811527aa2eb..3496df47e7c60 100644 --- a/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc +++ b/storage/tokudb/ft-index/ft/tests/upgrade_test_simple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,13 +97,12 @@ PATENT RIGHTS GRANT: #include "ft-flusher.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" static TOKUTXN const null_txn = NULL; -static DB * const null_db = NULL; static int -noop_getf(ITEMLEN UU(keylen), bytevec UU(key), ITEMLEN UU(vallen), bytevec UU(val), void *extra, bool UU(lock_only)) +noop_getf(uint32_t UU(keylen), const void *UU(key), uint32_t UU(vallen), const void *UU(val), void *extra, bool UU(lock_only)) { int *CAST_FROM_VOIDP(calledp, extra); (*calledp)++; @@ -176,7 +175,7 @@ with_open_tree(const char *fname, tree_cb cb, void *cb_extra) FT_HANDLE t; CACHETABLE ct; - toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 16*(1<<20), ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 0, &t, diff --git a/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc b/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc index edb1b6262a363..a7e3beea1e5fc 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-bad-msn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -106,10 +106,10 @@ PATENT RIGHTS GRANT: #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -122,14 +122,14 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); MSN msn = next_dummymsn(); // apply an insert to the leaf node - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // Create bad tree (don't do following): // leafnode->max_msn_applied_to_node = msn; @@ -150,13 +150,13 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static void -insert_into_child_buffer(FT_HANDLE brt, FTNODE node, int childnum, int minkey, int maxkey) { +insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, int maxkey) { for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { MSN msn = next_dummymsn(); unsigned int key = htonl(val); DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); - toku_ft_append_to_child_buffer(brt->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); // Create bad tree (don't do following): // node->max_msn_applied_to_node = msn; @@ -164,17 +164,17 @@ insert_into_child_buffer(FT_HANDLE brt, FTNODE node, int childnum, int minkey, i } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -182,8 +182,8 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); - insert_into_child_buffer(brt, node, childnum, minkeys[childnum], maxkeys[childnum]); + toku_unpin_ftnode(ft->ft, child); + insert_into_child_buffer(ft, node, childnum, minkeys[childnum], maxkeys[childnum]); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -212,34 +212,34 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // Create bad tree (don't do following): // newroot->max_msn_applied_to_node = last_dummymsn(); // capture msn of last message injected into tree // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc b/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc index 156fc36666f35..f36ae77a973a4 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-bad-pivots.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +88,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." -// generate a tree with bad pivots and check that brt->verify finds them +// generate a tree with bad pivots and check that ft->verify finds them #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -111,13 +111,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -135,17 +135,17 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -153,7 +153,7 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); + toku_unpin_ftnode(ft->ft, child); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -182,31 +182,31 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc b/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc index 0f9f2a1000dc2..a2c6567fdb440 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-dup-in-leaf.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." // generate a tree with a single leaf node containing duplicate keys -// check that brt verify finds them +// check that ft verify finds them #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -112,13 +112,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -140,33 +140,33 @@ test_dup_in_leaf(int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // discard the old root block - FTNODE newroot = make_node(brt, 0); + FTNODE newroot = make_node(ft, 0); populate_leaf(newroot, htonl(2), 1); populate_leaf(newroot, htonl(2), 2); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc b/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc index 219f1f7da5976..4dc42a06c82ab 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-dup-pivots.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +88,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." -// generate a tree with duplicate pivots and check that brt->verify finds them +// generate a tree with duplicate pivots and check that ft->verify finds them #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -111,13 +111,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -135,17 +135,17 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -153,7 +153,7 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); + toku_unpin_ftnode(ft->ft, child); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -185,32 +185,32 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc b/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc index f68b4d72e53f8..d671dd7a7f3e8 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-misrouted-msgs.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." // generate a tree with misrouted messages in the child buffers. -// check that brt verify finds them. +// check that ft verify finds them. #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -112,13 +112,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode,0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -136,7 +136,7 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static void -insert_into_child_buffer(FT_HANDLE brt, FTNODE node, int childnum, int minkey, int maxkey) { +insert_into_child_buffer(FT_HANDLE ft, FTNODE node, int childnum, int minkey, int maxkey) { int k = htonl(maxkey); maxkey = htonl(k+1); for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { @@ -144,22 +144,22 @@ insert_into_child_buffer(FT_HANDLE brt, FTNODE node, int childnum, int minkey, i DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); DBT theval; toku_fill_dbt(&theval, &val, sizeof val); MSN msn = next_dummymsn(); - toku_ft_append_to_child_buffer(brt->ft->compare_fun, NULL, node, childnum, FT_INSERT, msn, xids_get_root_xids(), true, &thekey, &theval); + toku_ft_append_to_child_buffer(ft->ft->cmp, node, childnum, FT_INSERT, msn, toku_xids_get_root_xids(), true, &thekey, &theval); } } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -167,8 +167,8 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); - insert_into_child_buffer(brt, node, childnum, minkeys[childnum], maxkeys[childnum]); + toku_unpin_ftnode(ft->ft, child); + insert_into_child_buffer(ft, node, childnum, minkeys[childnum], maxkeys[childnum]); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -197,32 +197,32 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc b/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc index c1e9f6f57276f..6a1fe6d0e236b 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-unsorted-leaf.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,17 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." // generate a tree with a single leaf node containing unsorted keys -// check that brt verify finds them +// check that ft verify finds them #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -114,13 +114,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -142,32 +142,32 @@ test_dup_in_leaf(int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // discard the old root block - FTNODE newroot = make_node(brt, 0); + FTNODE newroot = make_node(ft, 0); populate_leaf(newroot, htonl(2), 1); populate_leaf(newroot, htonl(1), 2); // set the new root to point to the new tree - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc b/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc index c209e679072c7..bb20733f3e1e7 100644 --- a/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc +++ b/storage/tokudb/ft-index/ft/tests/verify-unsorted-pivots.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +88,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." -// generate a tree with unsorted pivots and check that brt->verify finds them +// generate a tree with unsorted pivots and check that ft->verify finds them #include #include "test.h" static FTNODE -make_node(FT_HANDLE brt, int height) { +make_node(FT_HANDLE ft, int height) { FTNODE node = NULL; int n_children = (height == 0) ? 1 : 0; - toku_create_new_ftnode(brt, &node, height, n_children); + toku_create_new_ftnode(ft, &node, height, n_children); if (n_children) BP_STATE(node,0) = PT_AVAIL; return node; } @@ -111,13 +111,13 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) DBT theval; toku_fill_dbt(&theval, val, vallen); // get an index that we can use to create a new leaf entry - uint32_t idx = BLB_DATA(leafnode, 0)->omt_size(); + uint32_t idx = BLB_DATA(leafnode, 0)->num_klpairs(); // apply an insert to the leaf node MSN msn = next_dummymsn(); - FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; + ft_msg msg(&thekey, &theval, FT_INSERT, msn, toku_xids_get_root_xids()); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false); - toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL); + toku_ft_bn_apply_msg_once(BLB(leafnode, 0), msg, idx, keylen, NULL, &gc_info, NULL, NULL); // dont forget to dirty the node leafnode->dirty = 1; @@ -135,17 +135,17 @@ populate_leaf(FTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { } static FTNODE -make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { +make_tree(FT_HANDLE ft, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { FTNODE node; if (height == 0) { - node = make_node(brt, 0); + node = make_node(ft, 0); populate_leaf(node, *seq, nperleaf, minkey, maxkey); *seq += nperleaf; } else { - node = make_node(brt, height); + node = make_node(ft, height); int minkeys[fanout], maxkeys[fanout]; for (int childnum = 0; childnum < fanout; childnum++) { - FTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + FTNODE child = make_tree(ft, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); if (childnum == 0) { toku_ft_nonleaf_append_child(node, child, NULL); } else { @@ -153,7 +153,7 @@ make_tree(FT_HANDLE brt, int height, int fanout, int nperleaf, int *seq, int *mi DBT pivotkey; toku_ft_nonleaf_append_child(node, child, toku_fill_dbt(&pivotkey, &k, sizeof k)); } - toku_unpin_ftnode(brt->ft, child); + toku_unpin_ftnode(ft->ft, child); } *minkey = minkeys[0]; *maxkey = maxkeys[0]; @@ -182,31 +182,31 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) { // create a cachetable CACHETABLE ct = NULL; - toku_cachetable_create(&ct, 0, ZERO_LSN, NULL_LOGGER); + toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); - // create the brt + // create the ft TOKUTXN null_txn = NULL; - FT_HANDLE brt = NULL; - r = toku_open_ft_handle(fname, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); + FT_HANDLE ft = NULL; + r = toku_open_ft_handle(fname, 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r == 0); // make a tree int seq = 0, minkey, maxkey; - FTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + FTNODE newroot = make_tree(ft, height, fanout, nperleaf, &seq, &minkey, &maxkey); // discard the old root block - toku_ft_set_new_root_blocknum(brt->ft, newroot->thisnodename); + toku_ft_set_new_root_blocknum(ft->ft, newroot->blocknum); // unpin the new root - toku_unpin_ftnode(brt->ft, newroot); + toku_unpin_ftnode(ft->ft, newroot); if (do_verify) { - r = toku_verify_ft(brt); + r = toku_verify_ft(ft); assert(r != 0); } // flush to the file system - r = toku_close_ft_handle_nolsn(brt, 0); + r = toku_close_ft_handle_nolsn(ft, 0); assert(r == 0); // shutdown the cachetable diff --git a/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc b/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc index 88dcdb17e0528..545fcf4d927db 100644 --- a/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc +++ b/storage/tokudb/ft-index/ft/tests/xid_lsn_independent.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: #include "test.h" #include "toku_os.h" -#include "checkpoint.h" +#include "cachetable/checkpoint.h" #define ENVDIR TOKU_TEST_FILENAME #include "test-ft-txns.h" @@ -119,13 +119,13 @@ static void test_xid_lsn_independent(int N) { test_setup(TOKU_TEST_FILENAME, &logger, &ct); - FT_HANDLE brt; + FT_HANDLE ft; TOKUTXN txn; r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_NONE, false); CKERR(r); - r = toku_open_ft_handle("ftfile", 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, toku_builtin_compare_fun); + r = toku_open_ft_handle("ftfile", 1, &ft, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, toku_builtin_compare_fun); CKERR(r); r = toku_txn_commit_txn(txn, false, NULL, NULL); @@ -143,7 +143,7 @@ static void test_xid_lsn_independent(int N) { snprintf(key, sizeof(key), "key%x.%x", rands[i], i); memset(val, 'v', sizeof(val)); val[sizeof(val)-1]=0; - toku_ft_insert(brt, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), txn); + toku_ft_insert(ft, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), txn); } { TOKUTXN txn2; @@ -172,7 +172,7 @@ static void test_xid_lsn_independent(int N) { CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct); r = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT); CKERR(r); - r = toku_close_ft_handle_nolsn(brt, NULL); + r = toku_close_ft_handle_nolsn(ft, NULL); CKERR(r); clean_shutdown(&logger, &ct); diff --git a/storage/tokudb/ft-index/ft/tests/ybt-test.cc b/storage/tokudb/ft-index/ft/tests/ybt-test.cc index 5e3c6f4b1a7e8..d53c03718e960 100644 --- a/storage/tokudb/ft-index/ft/tests/ybt-test.cc +++ b/storage/tokudb/ft-index/ft/tests/ybt-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -111,11 +111,11 @@ static void ybt_test0 (void) { toku_init_dbt(&t0); toku_init_dbt(&t1); { - bytevec temp1 = "hello"; + const void *temp1 = "hello"; toku_dbt_set(6, temp1, &t0, &v0); } { - bytevec temp2 = "foo"; + const void *temp2 = "foo"; toku_dbt_set( 4, temp2, &t1, &v1); } assert(t0.size==6); @@ -124,7 +124,7 @@ static void ybt_test0 (void) { assert(strcmp((char*)t1.data, "foo")==0); { - bytevec temp3 = "byebye"; + const void *temp3 = "byebye"; toku_dbt_set(7, temp3, &t1, &v0); /* Use v0, not v1 */ } // This assertion would be wrong, since v0 may have been realloc'd, and t0.data may now point @@ -141,7 +141,7 @@ static void ybt_test0 (void) { t0.flags = DB_DBT_USERMEM; t0.ulen = 0; { - bytevec temp4 = "hello"; + const void *temp4 = "hello"; toku_dbt_set(6, temp4, &t0, 0); } assert(t0.data==0); @@ -152,7 +152,7 @@ static void ybt_test0 (void) { t0.flags = DB_DBT_REALLOC; cleanup(&v0); { - bytevec temp5 = "internationalization"; + const void *temp5 = "internationalization"; toku_dbt_set(21, temp5, &t0, &v0); } assert(v0.data==0); /* Didn't change v0 */ @@ -160,7 +160,7 @@ static void ybt_test0 (void) { assert(strcmp((char*)t0.data, "internationalization")==0); { - bytevec temp6 = "provincial"; + const void *temp6 = "provincial"; toku_dbt_set(11, temp6, &t0, &v0); } assert(t0.size==11); diff --git a/storage/tokudb/ft-index/ft/tokuconst.h b/storage/tokudb/ft-index/ft/tokuconst.h deleted file mode 100644 index 73ac3a6a693b4..0000000000000 --- a/storage/tokudb/ft-index/ft/tokuconst.h +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUCONST_H -#define TOKUCONST_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -/* The number of transaction ids stored in the xids structure is - * represented by an 8-bit value. The value 255 is reserved. - * The constant MAX_NESTED_TRANSACTIONS is one less because - * one slot in the packed leaf entry is used for the implicit - * root transaction (id 0). - */ - - -enum {MAX_NESTED_TRANSACTIONS = 253}; -enum {MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1}; - - -#endif - diff --git a/storage/tokudb/ft-index/ft/txn.h b/storage/tokudb/ft-index/ft/txn.h deleted file mode 100644 index 053e24aec1a9a..0000000000000 --- a/storage/tokudb/ft-index/ft/txn.h +++ /dev/null @@ -1,227 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUTXN_H -#define TOKUTXN_H - -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "txn_manager.h" - -void txn_status_init(void); -void txn_status_destroy(void); - - -inline bool txn_pair_is_none(TXNID_PAIR txnid) { - return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; -} - -inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, TOKUTXN parent) { - // we need a snapshot if the snapshot type is a child or - // if the snapshot type is root and we have no parent. - // Cases that we don't need a snapshot: when snapshot type is NONE - // or when it is ROOT and we have a parent - return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD)); -} - -void toku_txn_lock(TOKUTXN txn); -void toku_txn_unlock(TOKUTXN txn); - -uint64_t toku_txn_get_root_id(TOKUTXN txn); -bool txn_declared_read_only(TOKUTXN txn); - -int toku_txn_begin_txn ( - DB_TXN *container_db_txn, - TOKUTXN parent_tokutxn, - TOKUTXN *tokutxn, - TOKULOGGER logger, - TXN_SNAPSHOT_TYPE snapshot_type, - bool read_only - ); - -DB_TXN * toku_txn_get_container_db_txn (TOKUTXN tokutxn); -void toku_txn_set_container_db_txn (TOKUTXN, DB_TXN*); - -// toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN -int toku_txn_begin_with_xid ( - TOKUTXN parent_tokutxn, - TOKUTXN *tokutxn, - TOKULOGGER logger, - TXNID_PAIR xid, - TXN_SNAPSHOT_TYPE snapshot_type, - DB_TXN *container_db_txn, - bool for_recovery, - bool read_only - ); - -void toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid); - -int toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info); - -int toku_txn_commit_txn (TOKUTXN txn, int nosync, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); -int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); - -int toku_txn_abort_txn(TOKUTXN txn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); -int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn, - TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); - -int toku_txn_discard_txn(TOKUTXN); - -void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xid); -// Effect: Do the internal work of preparing a transaction (does not log the prepare record). - -void toku_txn_get_prepared_xa_xid (TOKUTXN, TOKU_XA_XID *); -// Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. - -void toku_txn_maybe_fsync_log(TOKULOGGER logger, LSN do_fsync_lsn, bool do_fsync); - -void toku_txn_get_fsync_info(TOKUTXN ttxn, bool* do_fsync, LSN* do_fsync_lsn); - -// Complete and destroy a txn -void toku_txn_close_txn(TOKUTXN txn); - -// Remove a txn from any live txn lists -void toku_txn_complete_txn(TOKUTXN txn); - -// Free the memory of a txn -void toku_txn_destroy_txn(TOKUTXN txn); - -XIDS toku_txn_get_xids (TOKUTXN); - -// Force fsync on commit -void toku_txn_force_fsync_on_commit(TOKUTXN txn); - -typedef enum { - TXN_BEGIN, // total number of transactions begun (does not include recovered txns) - TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns) - TXN_COMMIT, // successful commits - TXN_ABORT, - TXN_STATUS_NUM_ROWS -} txn_status_entry; - -typedef struct { - bool initialized; - TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS]; -} TXN_STATUS_S, *TXN_STATUS; - -void toku_txn_get_status(TXN_STATUS s); - -bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); - -TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn); - -#include "txn_state.h" - -TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn); - -struct tokulogger_preplist { - TOKU_XA_XID xid; - DB_TXN *txn; -}; -int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); - -void toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn); - -// Return whether txn (or it's descendents) have done no work. -bool toku_txn_is_read_only(TOKUTXN txn); - -void toku_txn_lock_state(TOKUTXN txn); -void toku_txn_unlock_state(TOKUTXN txn); -void toku_txn_pin_live_txn_unlocked(TOKUTXN txn); -void toku_txn_unpin_live_txn(TOKUTXN txn); - -bool toku_txn_has_spilled_rollback(TOKUTXN txn); - -uint64_t toku_txn_get_client_id(TOKUTXN txn); -void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id); - -#endif //TOKUTXN_H diff --git a/storage/tokudb/ft-index/ft/roll.cc b/storage/tokudb/ft-index/ft/txn/roll.cc similarity index 93% rename from storage/tokudb/ft-index/ft/roll.cc rename to storage/tokudb/ft-index/ft/txn/roll.cc index 78fac7df875ef..affa9fa802c6d 100644 --- a/storage/tokudb/ft-index/ft/roll.cc +++ b/storage/tokudb/ft-index/ft/txn/roll.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,14 +91,13 @@ PATENT RIGHTS GRANT: /* rollback and rollforward routines. */ -#include -#include "ft.h" -#include "ft-ops.h" -#include "log-internal.h" -//#include "txn_manager.h" -#include "xids.h" -#include "rollback-apply.h" +#include "ft/ft.h" +#include "ft/ft-ops.h" +#include "ft/log_header.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/xids.h" +#include "ft/txn/rollback-apply.h" // functionality provided by roll.c is exposed by an autogenerated // header file, logheader.h @@ -220,9 +219,9 @@ toku_rollback_fcreate (FILENUM filenum, return 0; } -int find_ft_from_filenum (const FT &h, const FILENUM &filenum); -int find_ft_from_filenum (const FT &h, const FILENUM &filenum) { - FILENUM thisfnum = toku_cachefile_filenum(h->cf); +int find_ft_from_filenum (const FT &ft, const FILENUM &filenum); +int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) { + FILENUM thisfnum = toku_cachefile_filenum(ft->cf); if (thisfnum.fileidfilenum.fileid) return +1; return 0; @@ -236,9 +235,8 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, bool reset_root_xid_that_created) { int r = 0; //printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data); - FT h; - h = NULL; - r = txn->open_fts.find_zero(filenum, &h, NULL); + FT ft = nullptr; + r = txn->open_fts.find_zero(filenum, &ft, NULL); if (r == DB_NOTFOUND) { assert(txn->for_recovery); r = 0; @@ -247,7 +245,7 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, assert(r==0); if (oplsn.lsn != 0) { // if we are executing the recovery algorithm - LSN treelsn = toku_ft_checkpoint_lsn(h); + LSN treelsn = toku_ft_checkpoint_lsn(ft); if (oplsn.lsn <= treelsn.lsn) { // if operation was already applied to tree ... r = 0; // ... do not apply it again. goto done; @@ -258,13 +256,11 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, XIDS xids; xids = toku_txn_get_xids(txn); { - FT_MSG_S ftcmd = { type, ZERO_MSN, xids, - .u = { .id = { (key.len > 0) - ? toku_fill_dbt(&key_dbt, key.data, key.len) - : toku_init_dbt(&key_dbt), - data - ? toku_fill_dbt(&data_dbt, data->data, data->len) - : toku_init_dbt(&data_dbt) } } }; + const DBT *kdbt = key.len > 0 ? toku_fill_dbt(&key_dbt, key.data, key.len) : + toku_init_dbt(&key_dbt); + const DBT *vdbt = data ? toku_fill_dbt(&data_dbt, data->data, data->len) : + toku_init_dbt(&data_dbt); + ft_msg msg(kdbt, vdbt, type, ZERO_MSN, xids); TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger); txn_manager_state txn_state_for_gc(txn_manager); @@ -275,10 +271,10 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, // no messages above us, we can implicitly promote uxrs based on this xid oldest_referenced_xid_estimate, !txn->for_recovery); - toku_ft_root_put_cmd(h, &ftcmd, &gc_info); + toku_ft_root_put_msg(ft, msg, &gc_info); if (reset_root_xid_that_created) { - TXNID new_root_xid_that_created = xids_get_outermost_xid(xids); - toku_reset_root_xid_that_created(h, new_root_xid_that_created); + TXNID new_root_xid_that_created = toku_xids_get_outermost_xid(xids); + toku_reset_root_xid_that_created(ft, new_root_xid_that_created); } } done: @@ -579,15 +575,15 @@ toku_rollback_dictionary_redirect (FILENUM old_filenum, CACHEFILE new_cf = NULL; r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf); assert(r == 0); - FT CAST_FROM_VOIDP(new_h, toku_cachefile_get_userdata(new_cf)); + FT CAST_FROM_VOIDP(new_ft, toku_cachefile_get_userdata(new_cf)); CACHEFILE old_cf = NULL; r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf); assert(r == 0); - FT CAST_FROM_VOIDP(old_h, toku_cachefile_get_userdata(old_cf)); + FT CAST_FROM_VOIDP(old_ft, toku_cachefile_get_userdata(old_cf)); //Redirect back from new to old. - r = toku_dictionary_redirect_abort(old_h, new_h, txn); + r = toku_dictionary_redirect_abort(old_ft, new_ft, txn); assert(r==0); } return r; diff --git a/storage/tokudb/ft-index/ft/rollback-apply.cc b/storage/tokudb/ft-index/ft/txn/rollback-apply.cc similarity index 97% rename from storage/tokudb/ft-index/ft/rollback-apply.cc rename to storage/tokudb/ft-index/ft/txn/rollback-apply.cc index d707753471ab0..258994223cc55 100644 --- a/storage/tokudb/ft-index/ft/rollback-apply.cc +++ b/storage/tokudb/ft-index/ft/txn/rollback-apply.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,8 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "fttypes.h" -#include "log-internal.h" -#include "rollback-apply.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback-apply.h" static void poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint) { if (txn->progress_poll_fun) { @@ -254,9 +253,9 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) { } child_log->newest_logentry = child_log->oldest_logentry = 0; // Put all the memarena data into the parent. - if (memarena_total_size_in_use(child_log->rollentry_arena) > 0) { + if (child_log->rollentry_arena.total_size_in_use() > 0) { // If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed. - memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena); + child_log->rollentry_arena.move_memory(&parent_log->rollentry_arena); } // each txn tries to give back at most one rollback log node // to the cache. All other rollback log nodes for this child @@ -277,7 +276,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) { toku_txn_unlock(txn->parent); } - // Note the open brts, the omts must be merged + // Note the open FTs, the omts must be merged r = txn->open_fts.iterate(txn); assert(r==0); diff --git a/storage/tokudb/ft-index/ft/rollback-apply.h b/storage/tokudb/ft-index/ft/txn/rollback-apply.h similarity index 97% rename from storage/tokudb/ft-index/ft/rollback-apply.h rename to storage/tokudb/ft-index/ft/txn/rollback-apply.h index 6016b5523dfa4..3d91c154a32b6 100644 --- a/storage/tokudb/ft-index/ft/rollback-apply.h +++ b/storage/tokudb/ft-index/ft/txn/rollback-apply.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef ROLLBACK_APPLY_H -#define ROLLBACK_APPLY_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,10 +87,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - typedef int(*apply_rollback_item)(TOKUTXN txn, struct roll_entry *item, LSN lsn); int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); @@ -100,5 +99,3 @@ int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, LSN lsn); int toku_rollback_commit(TOKUTXN txn, LSN lsn); int toku_rollback_abort(TOKUTXN txn, LSN lsn); int toku_rollback_discard(TOKUTXN txn); - -#endif // ROLLBACK_APPLY_H diff --git a/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc b/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc similarity index 95% rename from storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc rename to storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc index bea70a5155e84..bb60e78773517 100644 --- a/storage/tokudb/ft-index/ft/rollback-ct-callbacks.cc +++ b/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,15 +89,16 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "rollback-ct-callbacks.h" +#include "portability/memory.h" +#include "portability/toku_portability.h" -#include -#include -#include "ft-internal.h" -#include "fttypes.h" -#include "memarena.h" -#include "rollback.h" +#include "ft/serialize/block_table.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/txn/rollback.h" +#include "ft/txn/rollback-ct-callbacks.h" +#include "util/memarena.h" // Address used as a sentinel. Otherwise unused. static struct serialized_rollback_log_node cloned_rollback; @@ -125,8 +126,7 @@ toku_rollback_flush_unused_log( { if (write_me) { DISKOFF offset; - toku_blocknum_realloc_on_disk(ft->blocktable, logname, 0, &offset, - ft, fd, for_checkpoint); + ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX); } if (!keep_me && !is_clone) { toku_free(log); diff --git a/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h b/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h similarity index 96% rename from storage/tokudb/ft-index/ft/rollback-ct-callbacks.h rename to storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h index aeb4650e17def..aee13f2e94d32 100644 --- a/storage/tokudb/ft-index/ft/rollback-ct-callbacks.h +++ b/storage/tokudb/ft-index/ft/txn/rollback-ct-callbacks.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef ROLLBACK_CT_CALLBACKS_H -#define ROLLBACK_CT_CALLBACKS_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +87,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include "cachetable.h" -#include "fttypes.h" +#include "ft/cachetable/cachetable.h" void toku_rollback_flush_callback(CACHEFILE cachefile, int fd, BLOCKNUM logname, void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool UU(is_clone)); int toku_rollback_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM logname, uint32_t fullhash, void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs); @@ -123,7 +121,7 @@ int toku_rollback_cleaner_callback ( void* UU(extraargs) ); -static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT h) { +static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT ft) { CACHETABLE_WRITE_CALLBACK wc; wc.flush_callback = toku_rollback_flush_callback; wc.pe_est_callback = toku_rollback_pe_est_callback; @@ -131,9 +129,6 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(FT wc.cleaner_callback = toku_rollback_cleaner_callback; wc.clone_callback = toku_rollback_clone_callback; wc.checkpoint_complete_callback = nullptr; - wc.write_extraargs = h; + wc.write_extraargs = ft; return wc; } - - -#endif // ROLLBACK_CT_CALLBACKS_H diff --git a/storage/tokudb/ft-index/ft/rollback.cc b/storage/tokudb/ft-index/ft/txn/rollback.cc similarity index 93% rename from storage/tokudb/ft-index/ft/rollback.cc rename to storage/tokudb/ft-index/ft/txn/rollback.cc index 3b57049a33ef2..54a7d9b58ae90 100644 --- a/storage/tokudb/ft-index/ft/rollback.cc +++ b/storage/tokudb/ft-index/ft/txn/rollback.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,25 +91,21 @@ PATENT RIGHTS GRANT: #include -#include "ft.h" -#include "log-internal.h" -#include "rollback-ct-callbacks.h" +#include "ft/serialize/block_table.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/rollback-ct-callbacks.h" static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) { - FT CAST_FROM_VOIDP(h, extra); - toku_free_blocknum( - h->blocktable, - cachekey, - h, - for_checkpoint - ); + FT CAST_FROM_VOIDP(ft, extra); + ft->blocktable.free_blocknum(cachekey, ft, for_checkpoint); } void toku_rollback_log_unpin_and_remove(TOKUTXN txn, ROLLBACK_LOG_NODE log) { int r; CACHEFILE cf = txn->logger->rollback_cachefile; - FT CAST_FROM_VOIDP(h, toku_cachefile_get_userdata(cf)); - r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, h); + FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); + r = toku_cachetable_unpin_and_remove (cf, log->ct_pair, rollback_unpin_remove_callback, ft); assert(r == 0); } @@ -120,13 +116,17 @@ toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) { return 0; } +// TODO: fix this name +// toku_rollback_malloc void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) { - return malloc_in_memarena(log->rollentry_arena, size); + return log->rollentry_arena.malloc_from_arena(size); } +// TODO: fix this name +// toku_rollback_memdup void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) { - void *r=toku_malloc_in_rollback(log, len); - memcpy(r,v,len); + void *r = toku_malloc_in_rollback(log, len); + memcpy(r, v, len); return r; } @@ -145,8 +145,8 @@ static inline PAIR_ATTR make_rollback_pair_attr(long size) { PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log) { size_t size = sizeof(*log); - if (log->rollentry_arena) { - size += memarena_total_memory_size(log->rollentry_arena); + if (&log->rollentry_arena) { + size += log->rollentry_arena.total_footprint(); } return make_rollback_pair_attr(size); } @@ -175,12 +175,10 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) { log->previous = make_blocknum(0); log->oldest_logentry = NULL; log->newest_logentry = NULL; - log->rollentry_arena = NULL; + log->rollentry_arena.create(0); log->rollentry_resident_bytecount = 0; } - - static void rollback_initialize_for_txn( ROLLBACK_LOG_NODE log, TOKUTXN txn, @@ -192,13 +190,14 @@ static void rollback_initialize_for_txn( log->previous = previous; log->oldest_logentry = NULL; log->newest_logentry = NULL; - log->rollentry_arena = memarena_create(); + log->rollentry_arena.create(1024); log->rollentry_resident_bytecount = 0; log->dirty = true; } +// TODO: fix this name void make_rollback_log_empty(ROLLBACK_LOG_NODE log) { - memarena_close(&log->rollentry_arena); + log->rollentry_arena.destroy(); rollback_empty_log_init(log); } @@ -217,7 +216,7 @@ static void rollback_log_create ( CACHEFILE cf = txn->logger->rollback_cachefile; FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); rollback_initialize_for_txn(log, txn, previous); - toku_allocate_blocknum(ft->blocktable, &log->blocknum, ft); + ft->blocktable.allocate_blocknum(&log->blocknum, ft); const uint32_t hash = toku_cachetable_hash(ft->cf, log->blocknum); *result = log; toku_cachetable_put(cf, log->blocknum, hash, @@ -267,7 +266,7 @@ int find_filenum (const FT &h, const FT &hfind) { return 0; } -//Notify a transaction that it has touched a brt. +//Notify a transaction that it has touched an ft. void toku_txn_maybe_note_ft (TOKUTXN txn, FT ft) { toku_txn_lock(txn); FT ftv; diff --git a/storage/tokudb/ft-index/ft/rollback.h b/storage/tokudb/ft-index/ft/txn/rollback.h similarity index 94% rename from storage/tokudb/ft-index/ft/rollback.h rename to storage/tokudb/ft-index/ft/txn/rollback.h index 2e9493b0e6bf5..c9f779e677ba3 100644 --- a/storage/tokudb/ft-index/ft/rollback.h +++ b/storage/tokudb/ft-index/ft/txn/rollback.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_ROLLBACK_H -#define TOKU_ROLLBACK_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include "sub_block.h" +#include "ft/cachetable/cachetable.h" +#include "ft/serialize/sub_block.h" +#include "ft/txn/txn.h" + +#include "util/memarena.h" + +typedef struct rollback_log_node *ROLLBACK_LOG_NODE; +typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t stall_for_checkpoint); @@ -132,7 +138,7 @@ void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len); // if necessary. void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log); -void toku_txn_maybe_note_ft (TOKUTXN txn, FT h); +void toku_txn_maybe_note_ft (TOKUTXN txn, struct ft *ft); int toku_logger_txn_rollback_stats(TOKUTXN txn, struct txn_stat *txn_stat); int toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind); @@ -160,7 +166,7 @@ struct rollback_log_node { BLOCKNUM previous; struct roll_entry *oldest_logentry; struct roll_entry *newest_logentry; - MEMARENA rollentry_arena; + memarena rollentry_arena; size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory. PAIR ct_pair; }; @@ -172,6 +178,7 @@ struct serialized_rollback_log_node { BLOCKNUM blocknum; struct sub_block sub_block[max_sub_blocks]; }; +typedef struct serialized_rollback_log_node *SERIALIZED_ROLLBACK_LOG_NODE; static inline void toku_static_serialized_rollback_log_destroy(SERIALIZED_ROLLBACK_LOG_NODE log) { @@ -190,6 +197,3 @@ void make_rollback_log_empty(ROLLBACK_LOG_NODE log); static inline bool rollback_log_is_unused(ROLLBACK_LOG_NODE log) { return (log->txnid.parent_id64 == TXNID_NONE); } - - -#endif // TOKU_ROLLBACK_H diff --git a/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc b/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc similarity index 97% rename from storage/tokudb/ft-index/ft/rollback_log_node_cache.cc rename to storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc index b45e4c03bb847..95a54d6fd7697 100644 --- a/storage/tokudb/ft-index/ft/rollback_log_node_cache.cc +++ b/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,9 +90,9 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include -#include +#include -#include "rollback_log_node_cache.h" +#include "txn/rollback_log_node_cache.h" void rollback_log_node_cache::init (uint32_t max_num_avail_nodes) { XMALLOC_N(max_num_avail_nodes, m_avail_blocknums); diff --git a/storage/tokudb/ft-index/ft/rollback_log_node_cache.h b/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h similarity index 96% rename from storage/tokudb/ft-index/ft/rollback_log_node_cache.h rename to storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h index 0db99faf23b01..4aa9daee20705 100644 --- a/storage/tokudb/ft-index/ft/rollback_log_node_cache.h +++ b/storage/tokudb/ft-index/ft/txn/rollback_log_node_cache.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_ROLLBACK_LOG_NODE_CACHE_H -#define TOKU_ROLLBACK_LOG_NODE_CACHE_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,10 +87,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "rollback.h" +#include "ft/txn/rollback.h" class rollback_log_node_cache { public: @@ -115,5 +115,3 @@ class rollback_log_node_cache { }; ENSURE_POD(rollback_log_node_cache); - -#endif // TOKU_ROLLBACK_LOG_NODE_CACHE_H diff --git a/storage/tokudb/ft-index/ft/txn.cc b/storage/tokudb/ft-index/ft/txn/txn.cc similarity index 95% rename from storage/tokudb/ft-index/ft/txn.cc rename to storage/tokudb/ft-index/ft/txn/txn.cc index b517b2f995df6..7b475c2c9751e 100644 --- a/storage/tokudb/ft-index/ft/txn.cc +++ b/storage/tokudb/ft-index/ft/txn/txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,15 +90,14 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "ft.h" -#include "txn.h" -#include "log-internal.h" -#include "checkpoint.h" -#include "ule.h" -#include "rollback-apply.h" -#include "txn_manager.h" -#include "txn_child_manager.h" -#include +#include "ft/cachetable/checkpoint.h" +#include "ft/ft.h" +#include "ft/logger/log-internal.h" +#include "ft/ule.h" +#include "ft/txn/rollback-apply.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "util/status.h" /////////////////////////////////////////////////////////////////////////////////// // Engine status @@ -108,7 +107,7 @@ PATENT RIGHTS GRANT: static TXN_STATUS_S txn_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(txn_status, k, c, t, "txn: " l, inc) void txn_status_init(void) { @@ -189,13 +188,13 @@ txn_create_xids(TOKUTXN txn, TOKUTXN parent) { XIDS xids; XIDS parent_xids; if (parent == NULL) { - parent_xids = xids_get_root_xids(); + parent_xids = toku_xids_get_root_xids(); } else { parent_xids = parent->xids; } - xids_create_unknown_child(parent_xids, &xids); + toku_xids_create_unknown_child(parent_xids, &xids); TXNID finalized_xid = (parent == NULL) ? txn->txnid.parent_id64 : txn->txnid.child_id64; - xids_finalize_with_child(xids, finalized_xid); + toku_xids_finalize_with_child(xids, finalized_xid); txn->xids = xids; } @@ -218,7 +217,7 @@ toku_txn_begin_with_xid ( TOKUTXN txn; // check for case where we are trying to // create too many nested transactions - if (!read_only && parent && !xids_can_create_child(parent->xids)) { + if (!read_only && parent && !toku_xids_can_create_child(parent->xids)) { r = EINVAL; goto exit; } @@ -388,7 +387,7 @@ toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid) //Used on recovery to recover a transaction. int -toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info) { +toku_txn_load_txninfo (TOKUTXN txn, struct txninfo *info) { txn->roll_info.rollentry_raw_count = info->rollentry_raw_count; uint32_t i; for (i = 0; i < info->num_fts; i++) { @@ -618,7 +617,7 @@ int remove_txn (const FT &h, const uint32_t UU(idx), TOKUTXN const UU(txn)) return 0; } -// for every BRT in txn, remove it. +// for every ft in txn, remove it. static void note_txn_closing (TOKUTXN txn) { txn->open_fts.iterate(txn); } @@ -649,7 +648,7 @@ void toku_txn_complete_txn(TOKUTXN txn) { void toku_txn_destroy_txn(TOKUTXN txn) { txn->open_fts.destroy(); if (txn->xids) { - xids_destroy(&txn->xids); + toku_xids_destroy(&txn->xids); } toku_mutex_destroy(&txn->txn_lock); toku_mutex_destroy(&txn->state_lock); @@ -658,7 +657,7 @@ void toku_txn_destroy_txn(TOKUTXN txn) { } XIDS toku_txn_get_xids (TOKUTXN txn) { - if (txn==0) return xids_get_root_xids(); + if (txn==0) return toku_xids_get_root_xids(); else return txn->xids; } @@ -786,6 +785,21 @@ void toku_txn_set_client_id(TOKUTXN txn, uint64_t client_id) { txn->client_id = client_id; } +int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn) { + int r = 0; + TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(txn); + if (oldest_live_in_snapshot == TXNID_NONE && txnid < txn->snapshot_txnid64) { + r = TOKUDB_ACCEPT; + } else if (txnid < oldest_live_in_snapshot || txnid == txn->txnid.parent_id64) { + r = TOKUDB_ACCEPT; + } else if (txnid > txn->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*txn->live_root_txn_list, txnid)) { + r = 0; + } else { + r = TOKUDB_ACCEPT; + } + return r; +} + int toku_txn_discard_txn(TOKUTXN txn) { int r = toku_rollback_discard(txn); return r; diff --git a/storage/tokudb/ft-index/ft/txn/txn.h b/storage/tokudb/ft-index/ft/txn/txn.h new file mode 100644 index 0000000000000..c458df3b5b2e2 --- /dev/null +++ b/storage/tokudb/ft-index/ft/txn/txn.h @@ -0,0 +1,435 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include "portability/toku_stdint.h" + +#include "ft/txn/txn_state.h" +#include "ft/serialize/block_table.h" +#include "util/omt.h" + +typedef uint64_t TXNID; + +typedef struct tokutxn *TOKUTXN; + +#define TXNID_NONE_LIVING ((TXNID)0) +#define TXNID_NONE ((TXNID)0) +#define TXNID_MAX ((TXNID)-1) + +typedef struct txnid_pair_s { + TXNID parent_id64; + TXNID child_id64; +} TXNID_PAIR; + +static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE }; + +// We include the child manager here beacuse it uses the TXNID / TOKUTXN types +#include "ft/txn/txn_child_manager.h" + +/* Log Sequence Number (LSN) + * Make the LSN be a struct instead of an integer so that we get better type checking. */ +typedef struct __toku_lsn { uint64_t lsn; } LSN; +static const LSN ZERO_LSN = { .lsn = 0 }; +static const LSN MAX_LSN = { .lsn = UINT64_MAX }; + +// +// Types of snapshots that can be taken by a tokutxn +// - TXN_SNAPSHOT_NONE: means that there is no snapshot. Reads do not use snapshot reads. +// used for SERIALIZABLE and READ UNCOMMITTED +// - TXN_SNAPSHOT_ROOT: means that all tokutxns use their root transaction's snapshot +// used for REPEATABLE READ +// - TXN_SNAPSHOT_CHILD: means that each child tokutxn creates its own snapshot +// used for READ COMMITTED +// + +typedef enum __TXN_SNAPSHOT_TYPE { + TXN_SNAPSHOT_NONE=0, + TXN_SNAPSHOT_ROOT=1, + TXN_SNAPSHOT_CHILD=2 +} TXN_SNAPSHOT_TYPE; + +typedef toku::omt txn_omt_t; +typedef toku::omt xid_omt_t; +typedef toku::omt rx_omt_t; + +inline bool txn_pair_is_none(TXNID_PAIR txnid) { + return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE; +} + +inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, struct tokutxn *parent) { + // we need a snapshot if the snapshot type is a child or + // if the snapshot type is root and we have no parent. + // Cases that we don't need a snapshot: when snapshot type is NONE + // or when it is ROOT and we have a parent + return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD)); +} + +struct tokulogger; + +struct txn_roll_info { + // these are number of rollback nodes and rollback entries for this txn. + // + // the current rollback node below has sequence number num_rollback_nodes - 1 + // (because they are numbered 0...num-1). often, the current rollback is + // already set to this block num, which means it exists and is available to + // log some entries. if the current rollback is NONE and the number of + // rollback nodes for this transaction is non-zero, then we will use + // the number of rollback nodes to know which sequence number to assign + // to a new one we create + uint64_t num_rollback_nodes; + uint64_t num_rollentries; + uint64_t num_rollentries_processed; + uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children. + + // spilled rollback nodes are rollback nodes that were gorged by this + // transaction, retired, and saved in a list. + + // the spilled rollback head is the block number of the first rollback node + // that makes up the rollback log chain + BLOCKNUM spilled_rollback_head; + + // the spilled rollback is the block number of the last rollback node that + // makes up the rollback log chain. + BLOCKNUM spilled_rollback_tail; + + // the current rollback node block number we may use. if this is ROLLBACK_NONE, + // then we need to create one and set it here before using it. + BLOCKNUM current_rollback; +}; + +struct tokutxn { + // These don't change after create: + + TXNID_PAIR txnid; + + uint64_t snapshot_txnid64; // this is the lsn of the snapshot + const TXN_SNAPSHOT_TYPE snapshot_type; + const bool for_recovery; + struct tokulogger *const logger; + struct tokutxn *const parent; + // The child txn is protected by the child_txn_manager lock + // and by the user contract. The user contract states (and is + // enforced at the ydb layer) that a child txn should not be created + // while another child exists. The txn_child_manager will protect + // other threads from trying to read this value while another + // thread commits/aborts the child + struct tokutxn *child; + + // statically allocated child manager, if this + // txn is a root txn, this manager will be used and set to + // child_manager for this transaction and all of its children + txn_child_manager child_manager_s; + + // child manager for this transaction, all of its children, + // and all of its ancestors + txn_child_manager* child_manager; + + // These don't change but they're created in a way that's hard to make + // strictly const. + DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn + xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. + struct XIDS_S *xids; // Represents the xid list + + struct tokutxn *snapshot_next; + struct tokutxn *snapshot_prev; + + bool begin_was_logged; + bool declared_read_only; // true if the txn was declared read only when began + + // These are not read until a commit, prepare, or abort starts, and + // they're "monotonic" (only go false->true) during operation: + bool do_fsync; + bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn) + + // Not used until commit, prepare, or abort starts: + LSN do_fsync_lsn; + TOKU_XA_XID xa_xid; // for prepared transactions + TXN_PROGRESS_POLL_FUNCTION progress_poll_fun; + void *progress_poll_fun_extra; + + toku_mutex_t txn_lock; + // Protected by the txn lock: + toku::omt open_fts; // a collection of the fts that we touched. Indexed by filenum. + struct txn_roll_info roll_info; // Info used to manage rollback entries + + // mutex that protects the transition of the state variable + // the rest of the variables are used by the txn code and + // hot indexing to ensure that when hot indexing is processing a + // leafentry, a TOKUTXN cannot dissappear or change state out from + // underneath it + toku_mutex_t state_lock; + toku_cond_t state_cond; + TOKUTXN_STATE state; + uint32_t num_pin; // number of threads (all hot indexes) that want this + // txn to not transition to commit or abort + uint64_t client_id; +}; +typedef struct tokutxn *TOKUTXN; + +void toku_txn_lock(struct tokutxn *txn); +void toku_txn_unlock(struct tokutxn *txn); + +uint64_t toku_txn_get_root_id(struct tokutxn *txn); +bool txn_declared_read_only(struct tokutxn *txn); + +int toku_txn_begin_txn ( + DB_TXN *container_db_txn, + struct tokutxn *parent_tokutxn, + struct tokutxn **tokutxn, + struct tokulogger *logger, + TXN_SNAPSHOT_TYPE snapshot_type, + bool read_only + ); + +DB_TXN * toku_txn_get_container_db_txn (struct tokutxn *tokutxn); +void toku_txn_set_container_db_txn(struct tokutxn *txn, DB_TXN *db_txn); + +// toku_txn_begin_with_xid is called from recovery and has no containing DB_TXN +int toku_txn_begin_with_xid ( + struct tokutxn *parent_tokutxn, + struct tokutxn **tokutxn, + struct tokulogger *logger, + TXNID_PAIR xid, + TXN_SNAPSHOT_TYPE snapshot_type, + DB_TXN *container_db_txn, + bool for_recovery, + bool read_only + ); + +void toku_txn_update_xids_in_txn(struct tokutxn *txn, TXNID xid); + +int toku_txn_load_txninfo (struct tokutxn *txn, struct txninfo *info); + +int toku_txn_commit_txn (struct tokutxn *txn, int nosync, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); +int toku_txn_commit_with_lsn(struct tokutxn *txn, int nosync, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); + +int toku_txn_abort_txn(struct tokutxn *txn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); +int toku_txn_abort_with_lsn(struct tokutxn *txn, LSN oplsn, + TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra); + +int toku_txn_discard_txn(struct tokutxn *txn); + +void toku_txn_prepare_txn (struct tokutxn *txn, TOKU_XA_XID *xid); +// Effect: Do the internal work of preparing a transaction (does not log the prepare record). + +void toku_txn_get_prepared_xa_xid(struct tokutxn *txn, TOKU_XA_XID *xa_xid); +// Effect: Fill in the XID information for a transaction. The caller allocates the XID and the function fills in values. + +void toku_txn_maybe_fsync_log(struct tokulogger *logger, LSN do_fsync_lsn, bool do_fsync); + +void toku_txn_get_fsync_info(struct tokutxn *ttxn, bool* do_fsync, LSN* do_fsync_lsn); + +// Complete and destroy a txn +void toku_txn_close_txn(struct tokutxn *txn); + +// Remove a txn from any live txn lists +void toku_txn_complete_txn(struct tokutxn *txn); + +// Free the memory of a txn +void toku_txn_destroy_txn(struct tokutxn *txn); + +struct XIDS_S *toku_txn_get_xids(struct tokutxn *txn); + +// Force fsync on commit +void toku_txn_force_fsync_on_commit(struct tokutxn *txn); + +typedef enum { + TXN_BEGIN, // total number of transactions begun (does not include recovered txns) + TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns) + TXN_COMMIT, // successful commits + TXN_ABORT, + TXN_STATUS_NUM_ROWS +} txn_status_entry; + +typedef struct { + bool initialized; + TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS]; +} TXN_STATUS_S, *TXN_STATUS; + +void toku_txn_get_status(TXN_STATUS s); + +bool toku_is_txn_in_live_root_txn_list(const xid_omt_t &live_root_txn_list, TXNID xid); + +TXNID toku_get_oldest_in_live_root_txn_list(struct tokutxn *txn); + +TOKUTXN_STATE toku_txn_get_state(struct tokutxn *txn); + +struct tokulogger_preplist { + TOKU_XA_XID xid; + DB_TXN *txn; +}; +int toku_logger_recover_txn (struct tokulogger *logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags); + +void toku_maybe_log_begin_txn_for_write_operation(struct tokutxn *txn); + +// Return whether txn (or it's descendents) have done no work. +bool toku_txn_is_read_only(struct tokutxn *txn); + +void toku_txn_lock_state(struct tokutxn *txn); +void toku_txn_unlock_state(struct tokutxn *txn); +void toku_txn_pin_live_txn_unlocked(struct tokutxn *txn); +void toku_txn_unpin_live_txn(struct tokutxn *txn); + +bool toku_txn_has_spilled_rollback(struct tokutxn *txn); + +uint64_t toku_txn_get_client_id(struct tokutxn *txn); +void toku_txn_set_client_id(struct tokutxn *txn, uint64_t client_id); + +// +// This function is used by the leafentry iterators. +// returns TOKUDB_ACCEPT if live transaction context is allowed to read a value +// that is written by transaction with LSN of id +// live transaction context may read value if either id is the root ancestor of context, or if +// id was committed before context's snapshot was taken. +// For id to be committed before context's snapshot was taken, the following must be true: +// - id < context->snapshot_txnid64 AND id is not in context's live root transaction list +// For the above to NOT be true: +// - id > context->snapshot_txnid64 OR id is in context's live root transaction list +// +int toku_txn_reads_txnid(TXNID txnid, struct tokutxn *txn); + +void txn_status_init(void); + +void txn_status_destroy(void); + +// For serialize / deserialize + +#include "ft/serialize/wbuf.h" + +static inline void wbuf_TXNID(struct wbuf *wb, TXNID txnid) { + wbuf_ulonglong(wb, txnid); +} + +static inline void wbuf_nocrc_TXNID(struct wbuf *wb, TXNID txnid) { + wbuf_nocrc_ulonglong(wb, txnid); +} + +static inline void wbuf_nocrc_TXNID_PAIR(struct wbuf *wb, TXNID_PAIR txnid) { + wbuf_nocrc_ulonglong(wb, txnid.parent_id64); + wbuf_nocrc_ulonglong(wb, txnid.child_id64); +} + +static inline void wbuf_nocrc_LSN(struct wbuf *wb, LSN lsn) { + wbuf_nocrc_ulonglong(wb, lsn.lsn); +} + +static inline void wbuf_LSN(struct wbuf *wb, LSN lsn) { + wbuf_ulonglong(wb, lsn.lsn); +} + +#include "ft/serialize/rbuf.h" + +static inline void rbuf_TXNID(struct rbuf *rb, TXNID *txnid) { + *txnid = rbuf_ulonglong(rb); +} + +static inline void rbuf_TXNID_PAIR(struct rbuf *rb, TXNID_PAIR *txnid) { + txnid->parent_id64 = rbuf_ulonglong(rb); + txnid->child_id64 = rbuf_ulonglong(rb); +} + +static inline void rbuf_ma_TXNID(struct rbuf *rb, memarena *UU(ma), TXNID *txnid) { + rbuf_TXNID(rb, txnid); +} + +static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) { + rbuf_TXNID_PAIR(r, txnid); +} + +static inline LSN rbuf_LSN(struct rbuf *rb) { + LSN lsn = { .lsn = rbuf_ulonglong(rb) }; + return lsn; +} diff --git a/storage/tokudb/ft-index/ft/txn_child_manager.cc b/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc similarity index 98% rename from storage/tokudb/ft-index/ft/txn_child_manager.cc rename to storage/tokudb/ft-index/ft/txn/txn_child_manager.cc index bb74a1cb8aecf..3a006285e201e 100644 --- a/storage/tokudb/ft-index/ft/txn_child_manager.cc +++ b/storage/tokudb/ft-index/ft/txn/txn_child_manager.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,8 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "log-internal.h" -#include "txn_child_manager.h" +#include "ft/logger/log-internal.h" +#include "ft/txn/txn_child_manager.h" // // initialized a txn_child_manager, diff --git a/storage/tokudb/ft-index/ft/txn_child_manager.h b/storage/tokudb/ft-index/ft/txn/txn_child_manager.h similarity index 92% rename from storage/tokudb/ft-index/ft/txn_child_manager.h rename to storage/tokudb/ft-index/ft/txn/txn_child_manager.h index 07cf2ee3b5e8f..99d98e2fe59a5 100644 --- a/storage/tokudb/ft-index/ft/txn_child_manager.h +++ b/storage/tokudb/ft-index/ft/txn/txn_child_manager.h @@ -1,9 +1,7 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_TXN_CHILD_MANAGER_H -#define TOKU_TXN_CHILD_MANAGER_H -#ident "$Id: rollback.h 49033 2012-10-17 18:48:30Z zardosht $" +#ident "$Id: txn/rollback.h 49033 2012-10-17 18:48:30Z zardosht $" /* COPYING CONDITIONS NOTICE: @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,15 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + +// We should be including ft/txn/txn.h here but that header includes this one, +// so we don't. +#include "portability/toku_pthread.h" + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "txn_manager.h" - class txn_child_manager { public: void init (TOKUTXN root); @@ -104,17 +106,15 @@ class txn_child_manager { void suspend(); void resume(); void find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result); - int iterate(txn_mgr_iter_callback cb, void* extra); + int iterate(int (*cb)(TOKUTXN txn, void *extra), void* extra); private: TXNID m_last_xid; TOKUTXN m_root; toku_mutex_t m_mutex; -friend class txn_child_manager_unit_test; + friend class txn_child_manager_unit_test; }; ENSURE_POD(txn_child_manager); - -#endif // TOKU_TXN_CHILD_MANAGER_H diff --git a/storage/tokudb/ft-index/ft/txn_manager.cc b/storage/tokudb/ft-index/ft/txn/txn_manager.cc similarity index 97% rename from storage/tokudb/ft-index/ft/txn_manager.cc rename to storage/tokudb/ft-index/ft/txn/txn_manager.cc index a155db493c176..570174f9b9f5a 100644 --- a/storage/tokudb/ft-index/ft/txn_manager.cc +++ b/storage/tokudb/ft-index/ft/txn/txn_manager.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,16 +89,15 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include "portability/toku_race_tools.h" -#include - -#include "log-internal.h" -#include "txn.h" -#include "checkpoint.h" -#include "ule.h" -#include "txn_manager.h" -#include "rollback.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log-internal.h" +#include "ft/ule.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/txn/rollback.h" +#include "util/omt.h" bool garbage_collection_debug = false; @@ -339,7 +338,11 @@ int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), T // Create list of root transactions that were live when this txn began. static inline void setup_live_root_txn_list(xid_omt_t* live_root_txnid, xid_omt_t* live_root_txn_list) { - live_root_txn_list->clone(*live_root_txnid); + if (live_root_txnid->size() > 0) { + live_root_txn_list->clone(*live_root_txnid); + } else { + live_root_txn_list->create_no_array(); + } } //Heaviside function to search through an OMT by a TXNID @@ -350,27 +353,6 @@ find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind) { return 0; } -#if 0 -static void -omt_insert_at_end_unless_recovery(OMT omt, int (*h)(OMTVALUE, void*extra), TOKUTXN txn, OMTVALUE v, bool for_recovery) -// Effect: insert v into omt that is sorted by xid gotten from txn. -// Rationale: -// During recovery, we get txns in the order that they did their first -// write operation, which is not necessarily monotonically increasing. -// During normal operation, txns are created with strictly increasing -// txnids, so we can always insert at the end. -{ - int r; - uint32_t idx = toku_omt_size(omt); - if (for_recovery) { - r = toku_omt_find_zero(omt, h, (void *) txn->txnid64, NULL, &idx); - invariant(r==DB_NOTFOUND); - } - r = toku_omt_insert_at(omt, v, idx); - lazy_assert_zero(r); -} -#endif - static TXNID max_xid(TXNID a, TXNID b) { return a < b ? b : a; diff --git a/storage/tokudb/ft-index/ft/txn_manager.h b/storage/tokudb/ft-index/ft/txn/txn_manager.h similarity index 95% rename from storage/tokudb/ft-index/ft/txn_manager.h rename to storage/tokudb/ft-index/ft/txn/txn_manager.h index 12267297a0ed2..5df1e23115c65 100644 --- a/storage/tokudb/ft-index/ft/txn_manager.h +++ b/storage/tokudb/ft-index/ft/txn/txn_manager.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKUTXN_MANAGER_H -#define TOKUTXN_MANAGER_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +87,17 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include "fttypes.h" -#include -#include +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" + +#include "ft/txn/txn.h" + +typedef struct txn_manager *TXN_MANAGER; struct referenced_xid_tuple { TXNID begin_id; @@ -104,10 +105,6 @@ struct referenced_xid_tuple { uint32_t references; }; -typedef toku::omt txn_omt_t; -typedef toku::omt xid_omt_t; -typedef toku::omt rx_omt_t; - struct txn_manager { toku_mutex_t txn_manager_lock; // a lock protecting this object txn_omt_t live_root_txns; // a sorted tree. @@ -123,6 +120,7 @@ struct txn_manager { TXNID last_xid_seen_for_recover; TXNID last_calculated_oldest_referenced_xid; }; +typedef struct txn_manager *TXN_MANAGER; struct txn_manager_state { txn_manager_state(TXN_MANAGER mgr) : @@ -268,5 +266,3 @@ bool toku_txn_manager_txns_exist(TXN_MANAGER mgr); void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment); TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids); - -#endif // TOKUTXN_MANAGER_H diff --git a/storage/tokudb/ft-index/ft/txn_state.h b/storage/tokudb/ft-index/ft/txn/txn_state.h similarity index 97% rename from storage/tokudb/ft-index/ft/txn_state.h rename to storage/tokudb/ft-index/ft/txn/txn_state.h index d8e192edec3c0..75c3f51ce79d0 100644 --- a/storage/tokudb/ft-index/ft/txn_state.h +++ b/storage/tokudb/ft-index/ft/txn/txn_state.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#if !defined(TOKUTXN_STATE_H) -#define TOKUTXN_STATE_H // this is a separate file so that the hotindexing tests can see the txn states @@ -101,5 +101,3 @@ enum tokutxn_state { TOKUTXN_RETIRED, // txn no longer exists }; typedef enum tokutxn_state TOKUTXN_STATE; - -#endif diff --git a/storage/tokudb/ft-index/ft/xids.cc b/storage/tokudb/ft-index/ft/txn/xids.cc similarity index 79% rename from storage/tokudb/ft-index/ft/xids.cc rename to storage/tokudb/ft-index/ft/txn/xids.cc index 5733a10550f54..6308f3c0368d7 100644 --- a/storage/tokudb/ft-index/ft/xids.cc +++ b/storage/tokudb/ft-index/ft/txn/xids.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,18 +101,15 @@ PATENT RIGHTS GRANT: * host order. */ - #include #include -#include -#include "fttypes.h" -#include "xids.h" -#include "xids-internal.h" -#include "toku_assert.h" -#include "memory.h" -#include +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_htod.h" +#include "portability/toku_portability.h" +#include "ft/txn/xids.h" ///////////////////////////////////////////////////////////////////////////////// // This layer of abstraction (xids_xxx) understands xids<> and nothing else. @@ -124,22 +121,20 @@ PATENT RIGHTS GRANT: // the variable num_xids. // // The xids struct is immutable. The caller gets an initial version of XIDS -// by calling xids_get_root_xids(), which returns the constant struct +// by calling toku_xids_get_root_xids(), which returns the constant struct // representing the root transaction (id 0). When a transaction begins, // a new XIDS is created with the id of the current transaction appended to // the list. // // - // This is the xids list for a transactionless environment. // It is also the initial state of any xids list created for // nested transactions. - XIDS -xids_get_root_xids(void) { - static const struct xids_t root_xids = { +toku_xids_get_root_xids(void) { + static const struct XIDS_S root_xids = { .num_xids = 0 }; @@ -148,14 +143,13 @@ xids_get_root_xids(void) { } bool -xids_can_create_child(XIDS xids) { +toku_xids_can_create_child(XIDS xids) { invariant(xids->num_xids < MAX_TRANSACTION_RECORDS); return (xids->num_xids + 1) != MAX_TRANSACTION_RECORDS; } - int -xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) { +toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) { // Postcondition: // xids_p points to an xids that is an exact copy of parent_xids, but with room for one more xid. int rval; @@ -174,9 +168,9 @@ xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p) { } void -xids_finalize_with_child(XIDS xids, TXNID this_xid) { +toku_xids_finalize_with_child(XIDS xids, TXNID this_xid) { // Precondition: - // - xids was created by xids_create_unknown_child + // - xids was created by toku_xids_create_unknown_child TXNID this_xid_disk = toku_htod64(this_xid); uint32_t num_child_xids = ++xids->num_xids; xids->ids[num_child_xids - 1] = this_xid_disk; @@ -185,21 +179,21 @@ xids_finalize_with_child(XIDS xids, TXNID this_xid) { // xids is immutable. This function creates a new xids by copying the // parent's list and then appending the xid of the new transaction. int -xids_create_child(XIDS parent_xids, // xids list for parent transaction - XIDS * xids_p, // xids list created - TXNID this_xid) { // xid of this transaction (new innermost) - bool can_create_child = xids_can_create_child(parent_xids); +toku_xids_create_child(XIDS parent_xids, // xids list for parent transaction + XIDS *xids_p, // xids list created + TXNID this_xid) { // xid of this transaction (new innermost) + bool can_create_child = toku_xids_can_create_child(parent_xids); if (!can_create_child) { return EINVAL; } - xids_create_unknown_child(parent_xids, xids_p); - xids_finalize_with_child(*xids_p, this_xid); + toku_xids_create_unknown_child(parent_xids, xids_p); + toku_xids_finalize_with_child(*xids_p, this_xid); return 0; } void -xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction - XIDS * xids_p) { // xids list created +toku_xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction + XIDS *xids_p) { // xids list created uint8_t num_xids = rbuf_char(rb); invariant(num_xids < MAX_TRANSACTION_RECORDS); XIDS CAST_FROM_VOIDP(xids, toku_xmalloc(sizeof(*xids) + num_xids*sizeof(xids->ids[0]))); @@ -211,61 +205,59 @@ xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction *xids_p = xids; } - void -xids_destroy(XIDS *xids_p) { - if (*xids_p != xids_get_root_xids()) toku_free(*xids_p); +toku_xids_destroy(XIDS *xids_p) { + if (*xids_p != toku_xids_get_root_xids()) toku_free(*xids_p); *xids_p = NULL; } - // Return xid at requested position. // If requesting an xid out of range (which will be the case if xids array is empty) // then return 0, the xid of the root transaction. TXNID -xids_get_xid(XIDS xids, uint8_t index) { - invariant(index < xids_get_num_xids(xids)); +toku_xids_get_xid(XIDS xids, uint8_t index) { + invariant(index < toku_xids_get_num_xids(xids)); TXNID rval = xids->ids[index]; rval = toku_dtoh64(rval); return rval; } uint8_t -xids_get_num_xids(XIDS xids) { +toku_xids_get_num_xids(XIDS xids) { uint8_t rval = xids->num_xids; return rval; } - // Return innermost xid TXNID -xids_get_innermost_xid(XIDS xids) { +toku_xids_get_innermost_xid(XIDS xids) { TXNID rval = TXNID_NONE; - if (xids_get_num_xids(xids)) { + if (toku_xids_get_num_xids(xids)) { // if clause above makes this cast ok - uint8_t innermost_xid = (uint8_t)(xids_get_num_xids(xids)-1); - rval = xids_get_xid(xids, innermost_xid); + uint8_t innermost_xid = (uint8_t) (toku_xids_get_num_xids(xids) - 1); + rval = toku_xids_get_xid(xids, innermost_xid); } return rval; } TXNID -xids_get_outermost_xid(XIDS xids) { +toku_xids_get_outermost_xid(XIDS xids) { TXNID rval = TXNID_NONE; - if (xids_get_num_xids(xids)) - rval = xids_get_xid(xids, 0); + if (toku_xids_get_num_xids(xids)) { + rval = toku_xids_get_xid(xids, 0); + } return rval; } void -xids_cpy(XIDS target, XIDS source) { - size_t size = xids_get_size(source); +toku_xids_cpy(XIDS target, XIDS source) { + size_t size = toku_xids_get_size(source); memcpy(target, source, size); } // return size in bytes uint32_t -xids_get_size(XIDS xids){ +toku_xids_get_size(XIDS xids) { uint32_t rval; uint8_t num_xids = xids->num_xids; rval = sizeof(*xids) + num_xids * sizeof(xids->ids[0]); @@ -273,7 +265,7 @@ xids_get_size(XIDS xids){ } uint32_t -xids_get_serialize_size(XIDS xids){ +toku_xids_get_serialize_size(XIDS xids) { uint32_t rval; uint8_t num_xids = xids->num_xids; rval = 1 + //num xids @@ -281,9 +273,8 @@ xids_get_serialize_size(XIDS xids){ return rval; } - unsigned char * -xids_get_end_of_array(XIDS xids) { +toku_xids_get_end_of_array(XIDS xids) { TXNID *r = xids->ids + xids->num_xids; return (unsigned char*)r; } @@ -297,13 +288,13 @@ void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids) { } void -xids_fprintf(FILE* fp, XIDS xids) { +toku_xids_fprintf(FILE *fp, XIDS xids) { uint8_t index; - unsigned num_xids = xids_get_num_xids(xids); + unsigned num_xids = toku_xids_get_num_xids(xids); fprintf(fp, "[|%u| ", num_xids); - for (index = 0; index < xids_get_num_xids(xids); index++) { + for (index = 0; index < toku_xids_get_num_xids(xids); index++) { if (index) fprintf(fp, ","); - fprintf(fp, "%" PRIx64, xids_get_xid(xids, index)); + fprintf(fp, "%" PRIx64, toku_xids_get_xid(xids, index)); } fprintf(fp, "]"); } diff --git a/storage/tokudb/ft-index/ft/xids.h b/storage/tokudb/ft-index/ft/txn/xids.h similarity index 70% rename from storage/tokudb/ft-index/ft/xids.h rename to storage/tokudb/ft-index/ft/txn/xids.h index f7a7a87155d92..5b0e95c2eee6a 100644 --- a/storage/tokudb/ft-index/ft/xids.h +++ b/storage/tokudb/ft-index/ft/txn/xids.h @@ -12,9 +12,6 @@ * TokuWiki/Imp/TransactionsOverview. */ -#ifndef XIDS_H -#define XIDS_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -44,7 +41,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,53 +98,73 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "x1764.h" -#include "rbuf.h" -#include "wbuf.h" -#include "tokuconst.h" +#include "ft/txn/txn.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/wbuf.h" + +/* The number of transaction ids stored in the xids structure is + * represented by an 8-bit value. The value 255 is reserved. + * The constant MAX_NESTED_TRANSACTIONS is one less because + * one slot in the packed leaf entry is used for the implicit + * root transaction (id 0). + */ +enum { + MAX_NESTED_TRANSACTIONS = 253, + MAX_TRANSACTION_RECORDS = MAX_NESTED_TRANSACTIONS + 1 +}; + +// Variable size list of transaction ids (known in design doc as xids<>). +// ids[0] is the outermost transaction. +// ids[num_xids - 1] is the innermost transaction. +// Should only be accessed by accessor functions toku_xids_xxx, not directly. +// If the xids struct is unpacked, the compiler aligns the ids[] and we waste a lot of space +struct __attribute__((__packed__)) XIDS_S { + // maximum value of MAX_TRANSACTION_RECORDS - 1 because transaction 0 is implicit + uint8_t num_xids; + TXNID ids[]; +}; +typedef struct XIDS_S *XIDS; -//Retrieve an XIDS representing the root transaction. -XIDS xids_get_root_xids(void); +// Retrieve an XIDS representing the root transaction. +XIDS toku_xids_get_root_xids(void); -bool xids_can_create_child(XIDS xids); +bool toku_xids_can_create_child(XIDS xids); -void xids_cpy(XIDS target, XIDS source); +void toku_xids_cpy(XIDS target, XIDS source); //Creates an XIDS representing this transaction. //You must pass in an XIDS representing the parent of this transaction. -int xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid); +int toku_xids_create_child(XIDS parent_xids, XIDS *xids_p, TXNID this_xid); -// The following two functions (in order) are equivalent to xids_create child, +// The following two functions (in order) are equivalent to toku_xids_create child, // but allow you to do most of the work without knowing the new xid. -int xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p); -void xids_finalize_with_child(XIDS xids, TXNID this_xid); +int toku_xids_create_unknown_child(XIDS parent_xids, XIDS *xids_p); +void toku_xids_finalize_with_child(XIDS xids, TXNID this_xid); -void xids_create_from_buffer(struct rbuf *rb, XIDS * xids_p); +void toku_xids_create_from_buffer(struct rbuf *rb, XIDS *xids_p); -void xids_destroy(XIDS *xids_p); +void toku_xids_destroy(XIDS *xids_p); -TXNID xids_get_xid(XIDS xids, uint8_t index); +TXNID toku_xids_get_xid(XIDS xids, uint8_t index); -uint8_t xids_get_num_xids(XIDS xids); +uint8_t toku_xids_get_num_xids(XIDS xids); -TXNID xids_get_innermost_xid(XIDS xids); -TXNID xids_get_outermost_xid(XIDS xids); +TXNID toku_xids_get_innermost_xid(XIDS xids); +TXNID toku_xids_get_outermost_xid(XIDS xids); // return size in bytes -uint32_t xids_get_size(XIDS xids); +uint32_t toku_xids_get_size(XIDS xids); -uint32_t xids_get_serialize_size(XIDS xids); +uint32_t toku_xids_get_serialize_size(XIDS xids); -unsigned char *xids_get_end_of_array(XIDS xids); +unsigned char *toku_xids_get_end_of_array(XIDS xids); void wbuf_nocrc_xids(struct wbuf *wb, XIDS xids); -void xids_fprintf(FILE* fp, XIDS xids); - - - -#endif +void toku_xids_fprintf(FILE* fp, XIDS xids); diff --git a/storage/tokudb/ft-index/ft/ule-internal.h b/storage/tokudb/ft-index/ft/ule-internal.h index 5c005acf887b3..9a42ead3d900a 100644 --- a/storage/tokudb/ft-index/ft/ule-internal.h +++ b/storage/tokudb/ft-index/ft/ule-internal.h @@ -5,9 +5,6 @@ * ule mechanisms that do not belong in the public interface. */ -#ifndef TOKU_ULE_INTERNAL_H -#define TOKU_ULE_INTERNAL_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -37,7 +34,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,6 +91,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -136,7 +135,7 @@ typedef struct ule { // unpacked leaf entry -void test_msg_modify_ule(ULE ule, FT_MSG msg); +void test_msg_modify_ule(ULE ule, const ft_msg &msg); ////////////////////////////////////////////////////////////////////////////////////// @@ -148,14 +147,12 @@ le_pack(ULE ule, // data to be packed into new leafentry uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, - LEAFENTRY * const new_leafentry_p // this is what this function creates + LEAFENTRY * const new_leafentry_p, // this is what this function creates + void **const maybe_free ); size_t le_memsize_from_ule (ULE ule); void ule_cleanup(ULE ule); - - -#endif // TOKU_ULE_H - diff --git a/storage/tokudb/ft-index/ft/ule.cc b/storage/tokudb/ft-index/ft/ule.cc index dc4198bda3d48..03ec452cbd2cf 100644 --- a/storage/tokudb/ft-index/ft/ule.cc +++ b/storage/tokudb/ft-index/ft/ule.cc @@ -31,7 +31,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,27 +102,27 @@ PATENT RIGHTS GRANT: // See design documentation for nested transactions at // TokuWiki/Imp/TransactionsOverview. -#include -#include "fttypes.h" -#include "ft-internal.h" - -#include - -#include "leafentry.h" -#include "xids.h" -#include "ft_msg.h" -#include "ule.h" -#include "txn_manager.h" -#include "ule-internal.h" -#include -#include -#include +#include "portability/toku_portability.h" + +#include "ft/ft-internal.h" +#include "ft/leafentry.h" +#include "ft/logger/logger.h" +#include "ft/msg.h" +#include "ft/txn/txn.h" +#include "ft/txn/txn_manager.h" +#include "ft/ule.h" +#include "ft/ule-internal.h" +#include "ft/txn/xids.h" +#include "util/bytestring.h" +#include "util/omt.h" +#include "util/partitioned_counter.h" +#include "util/scoped_malloc.h" +#include "util/status.h" #define ULE_DEBUG 0 static uint32_t ule_get_innermost_numbytes(ULE ule, uint32_t keylen); - /////////////////////////////////////////////////////////////////////////////////// // Engine status // @@ -131,7 +131,7 @@ static uint32_t ule_get_innermost_numbytes(ULE ule, uint32_t keylen); static LE_STATUS_S le_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(le_status, k, c, t, "le: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(le_status, k, c, t, "le: " l, inc) void toku_ule_status_init(void) { // Note, this function initializes the keyname, type, and legend fields. @@ -216,7 +216,7 @@ const UXR_S committed_delete = { // Local functions: static void msg_init_empty_ule(ULE ule); -static void msg_modify_ule(ULE ule, FT_MSG msg); +static void msg_modify_ule(ULE ule, const ft_msg &msg); static void ule_init_empty_ule(ULE ule); static void ule_do_implicit_promotions(ULE ule, XIDS xids); static void ule_try_promote_provisional_outermost(ULE ule, TXNID oldest_possible_live_xid); @@ -252,26 +252,28 @@ static inline size_t uxr_unpack_length_and_bit(UXR uxr, uint8_t *p); static inline size_t uxr_unpack_data(UXR uxr, uint8_t *p); static void get_space_for_le( - bn_data* data_buffer, + bn_data* data_buffer, uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, - size_t size, - LEAFENTRY* new_le_space - ) + size_t size, + LEAFENTRY* new_le_space, + void **const maybe_free + ) { - if (data_buffer == NULL) { + if (data_buffer == nullptr) { CAST_FROM_VOIDP(*new_le_space, toku_xmalloc(size)); } else { // this means we are overwriting something if (old_le_size > 0) { - data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_le_size, size, new_le_space); + data_buffer->get_space_for_overwrite(idx, keyp, keylen, old_keylen, old_le_size, size, new_le_space, maybe_free); } // this means we are inserting something new else { - data_buffer->get_space_for_insert(idx, keyp, keylen, size, new_le_space); + data_buffer->get_space_for_insert(idx, keyp, keylen, size, new_le_space, maybe_free); } } } @@ -326,11 +328,11 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c // static void ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) { - uint32_t curr_index = 0; - uint32_t num_entries; if (ule->num_cuxrs == 1) { - goto done; + return; } + + uint32_t curr_index = 0; if (gc_info->mvcc_needed) { // starting at the top of the committed stack, find the first // uxr with a txnid that is less than oldest_referenced_xid @@ -340,37 +342,34 @@ ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) { break; } } - } - else { + } else { // if mvcc is not needed, we can need the top committed // value and nothing else curr_index = ule->num_cuxrs - 1; } + // curr_index is now set to the youngest uxr older than oldest_referenced_xid - if (curr_index == 0) { - goto done; + // so if it's not the bottom of the stack.. + if (curr_index != 0) { + // ..then we need to get rid of the entries below curr_index + uint32_t num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index; + memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0])); + ule->uxrs[0].xid = TXNID_NONE; // New 'bottom of stack' loses its TXNID + ule->num_cuxrs -= curr_index; } - - // now get rid of the entries below curr_index - num_entries = ule->num_cuxrs + ule->num_puxrs - curr_index; - memmove(&ule->uxrs[0], &ule->uxrs[curr_index], num_entries * sizeof(ule->uxrs[0])); - ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID - ule->num_cuxrs -= curr_index; - -done:; } +// TODO: Clean this up +extern bool garbage_collection_debug; + static void ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &referenced_xids, const xid_omt_t &live_root_txns) { - if (ule->num_cuxrs == 1) goto done; - // will fail if too many num_cuxrs - bool necessary_static[MAX_TRANSACTION_RECORDS]; - bool *necessary; - necessary = necessary_static; - if (ule->num_cuxrs >= MAX_TRANSACTION_RECORDS) { - XMALLOC_N(ule->num_cuxrs, necessary); + if (ule->num_cuxrs == 1) { + return; } - memset(necessary, 0, sizeof(necessary[0])*ule->num_cuxrs); + + toku::scoped_calloc necessary_buf(ule->num_cuxrs * sizeof(bool)); + bool *necessary = reinterpret_cast(necessary_buf.get()); uint32_t curr_committed_entry; curr_committed_entry = ule->num_cuxrs - 1; @@ -400,24 +399,21 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref } tl1 = toku_get_youngest_live_list_txnid_for(xc, snapshot_xids, referenced_xids); - if (tl1 == xc) { - // if tl1 == xc, that means xc should be live and show up in - // live_root_txns, which we check above. So, if we get - // here, something is wrong. - assert(false); - } + + // if tl1 == xc, that means xc should be live and show up in live_root_txns, which we check above. + invariant(tl1 != xc); + if (tl1 == TXNID_NONE) { // set tl1 to youngest live transaction older than ule->uxrs[curr_committed_entry]->xid tl1 = get_next_older_txnid(xc, snapshot_xids); if (tl1 == TXNID_NONE) { - //Remainder is garbage, we're done + // remainder is garbage, we're done break; } } - if (garbage_collection_debug) - { + if (garbage_collection_debug) { int r = snapshot_xids.find_zero(tl1, nullptr, nullptr); - invariant(r==0); //make sure that the txn you are claiming is live is actually live + invariant_zero(r); // make sure that the txn you are claiming is live is actually live } // // tl1 should now be set @@ -431,30 +427,23 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref curr_committed_entry--; } } - uint32_t first_free; - first_free = 0; - uint32_t i; - for (i = 0; i < ule->num_cuxrs; i++) { - //Shift values to 'delete' garbage values. + uint32_t first_free = 0; + for (uint32_t i = 0; i < ule->num_cuxrs; i++) { + // Shift values to 'delete' garbage values. if (necessary[i]) { ule->uxrs[first_free] = ule->uxrs[i]; first_free++; } } - uint32_t saved; - saved = first_free; + uint32_t saved = first_free; invariant(saved <= ule->num_cuxrs); invariant(saved >= 1); ule->uxrs[0].xid = TXNID_NONE; //New 'bottom of stack' loses its TXNID if (first_free != ule->num_cuxrs) { - //Shift provisional values + // Shift provisional values memmove(&ule->uxrs[first_free], &ule->uxrs[ule->num_cuxrs], ule->num_puxrs * sizeof(ule->uxrs[0])); } ule->num_cuxrs = saved; - if (necessary != necessary_static) { - toku_free(necessary); - } -done:; } static size_t ule_packed_memsize(ULE ule) { @@ -491,10 +480,11 @@ enum { // Otehrwise the new_leafentry_p points at the new leaf entry. // As of October 2011, this function always returns 0. void -toku_le_apply_msg(FT_MSG msg, +toku_le_apply_msg(const ft_msg &msg, LEAFENTRY old_leafentry, // NULL if there was no stored data. bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced + uint32_t old_keylen, // length of the any key in data_buffer txn_gc_info *gc_info, LEAFENTRY *new_leafentry_p, int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead @@ -504,20 +494,13 @@ toku_le_apply_msg(FT_MSG msg, int64_t oldnumbytes = 0; int64_t newnumbytes = 0; uint64_t oldmemsize = 0; - uint32_t keylen = ft_msg_get_keylen(msg); - LEAFENTRY copied_old_le = NULL; - size_t old_le_size = old_leafentry ? leafentry_memsize(old_leafentry) : 0; - toku::scoped_malloc copied_old_le_buf(old_le_size); - if (old_leafentry) { - CAST_FROM_VOIDP(copied_old_le, copied_old_le_buf.get()); - memcpy(copied_old_le, old_leafentry, old_le_size); - } + uint32_t keylen = msg.kdbt()->size; if (old_leafentry == NULL) { msg_init_empty_ule(&ule); } else { oldmemsize = leafentry_memsize(old_leafentry); - le_unpack(&ule, copied_old_le); // otherwise unpack leafentry + le_unpack(&ule, old_leafentry); // otherwise unpack leafentry oldnumbytes = ule_get_innermost_numbytes(&ule, keylen); } msg_modify_ule(&ule, msg); // modify unpacked leafentry @@ -550,21 +533,29 @@ toku_le_apply_msg(FT_MSG msg, STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc); STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc); } - int rval = le_pack( + + void *maybe_free = nullptr; + int r = le_pack( &ule, // create packed leafentry data_buffer, idx, - ft_msg_get_key(msg), // contract of this function is caller has this set, always + msg.kdbt()->data, // contract of this function is caller has this set, always keylen, // contract of this function is caller has this set, always + old_keylen, oldmemsize, - new_leafentry_p + new_leafentry_p, + &maybe_free ); - invariant_zero(rval); + invariant_zero(r); if (*new_leafentry_p) { newnumbytes = ule_get_innermost_numbytes(&ule, keylen); } *numbytes_delta_p = newnumbytes - oldnumbytes; + ule_cleanup(&ule); + if (maybe_free != nullptr) { + toku_free(maybe_free); + } } bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info) { @@ -621,15 +612,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, ULE_S ule; int64_t oldnumbytes = 0; int64_t newnumbytes = 0; - LEAFENTRY copied_old_le = NULL; - size_t old_le_size = old_leaf_entry ? leafentry_memsize(old_leaf_entry) : 0; - toku::scoped_malloc copied_old_le_buf(old_le_size); - if (old_leaf_entry) { - CAST_FROM_VOIDP(copied_old_le, copied_old_le_buf.get()); - memcpy(copied_old_le, old_leaf_entry, old_le_size); - } - le_unpack(&ule, copied_old_le); + le_unpack(&ule, old_leaf_entry); oldnumbytes = ule_get_innermost_numbytes(&ule, keylen); uint32_t old_mem_size = leafentry_memsize(old_leaf_entry); @@ -654,21 +638,28 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc); } + void *maybe_free = nullptr; int r = le_pack( &ule, data_buffer, idx, keyp, keylen, + keylen, // old_keylen, same because the key isn't going to change for gc old_mem_size, - new_leaf_entry + new_leaf_entry, + &maybe_free ); - assert(r == 0); + invariant_zero(r); if (*new_leaf_entry) { newnumbytes = ule_get_innermost_numbytes(&ule, keylen); } *numbytes_delta_p = newnumbytes - oldnumbytes; + ule_cleanup(&ule); + if (maybe_free != nullptr) { + toku_free(maybe_free); + } } ///////////////////////////////////////////////////////////////////////////////// @@ -686,10 +677,10 @@ msg_init_empty_ule(ULE ule) { // Purpose is to modify the unpacked leafentry in our private workspace. // static void -msg_modify_ule(ULE ule, FT_MSG msg) { - XIDS xids = ft_msg_get_xids(msg); - invariant(xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS); - enum ft_msg_type type = ft_msg_get_type(msg); +msg_modify_ule(ULE ule, const ft_msg &msg) { + XIDS xids = msg.xids(); + invariant(toku_xids_get_num_xids(xids) < MAX_TRANSACTION_RECORDS); + enum ft_msg_type type = msg.type(); if (type != FT_OPTIMIZE && type != FT_OPTIMIZE_FOR_UPGRADE) { ule_do_implicit_promotions(ule, xids); } @@ -702,9 +693,9 @@ msg_modify_ule(ULE ule, FT_MSG msg) { //fall through to FT_INSERT on purpose. } case FT_INSERT: { - uint32_t vallen = ft_msg_get_vallen(msg); + uint32_t vallen = msg.vdbt()->size; invariant(IS_VALID_LEN(vallen)); - void * valp = ft_msg_get_val(msg); + void * valp = msg.vdbt()->data; ule_apply_insert(ule, xids, vallen, valp); break; } @@ -731,25 +722,23 @@ msg_modify_ule(ULE ule, FT_MSG msg) { assert(false); // These messages don't get this far. Instead they get translated (in setval_fun in do_update) into FT_INSERT messages. break; default: - assert(false /* illegal FT_MSG.type */); + assert(false); /* illegal ft msg type */ break; } } -void -test_msg_modify_ule(ULE ule, FT_MSG msg){ +void test_msg_modify_ule(ULE ule, const ft_msg &msg){ msg_modify_ule(ule,msg); } - static void ule_optimize(ULE ule, XIDS xids) { if (ule->num_puxrs) { TXNID uncommitted = ule->uxrs[ule->num_cuxrs].xid; // outermost uncommitted TXNID oldest_living_xid = TXNID_NONE; - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); if (num_xids > 0) { invariant(num_xids==1); - oldest_living_xid = xids_get_xid(xids, 0); + oldest_living_xid = toku_xids_get_xid(xids, 0); } if (oldest_living_xid == TXNID_NONE || uncommitted < oldest_living_xid) { ule_promote_provisional_innermost_to_committed(ule); @@ -963,7 +952,7 @@ update_le_status(ULE ule, size_t memsize) { } // Purpose is to return a newly allocated leaf entry in packed format, or -// return null if leaf entry should be destroyed (if no transaction records +// return null if leaf entry should be destroyed (if no transaction records // are for inserts). // Transaction records in packed le are stored inner to outer (first xr is innermost), // with some information extracted out of the transaction records into the header. @@ -974,8 +963,10 @@ le_pack(ULE ule, // data to be packed into new leafentry uint32_t idx, void* keyp, uint32_t keylen, + uint32_t old_keylen, uint32_t old_le_size, - LEAFENTRY * const new_leafentry_p // this is what this function creates + LEAFENTRY * const new_leafentry_p, // this is what this function creates + void **const maybe_free ) { invariant(ule->num_cuxrs > 0); @@ -995,16 +986,17 @@ le_pack(ULE ule, // data to be packed into new leafentry } } if (data_buffer && old_le_size > 0) { - data_buffer->delete_leafentry(idx, keylen, old_le_size); + // must pass old_keylen and old_le_size, since that's what is actually stored in data_buffer + data_buffer->delete_leafentry(idx, old_keylen, old_le_size); } *new_leafentry_p = NULL; rval = 0; goto cleanup; } -found_insert:; +found_insert: memsize = le_memsize_from_ule(ule); LEAFENTRY new_leafentry; - get_space_for_le(data_buffer, idx, keyp, keylen, old_le_size, memsize, &new_leafentry); + get_space_for_le(data_buffer, idx, keyp, keylen, old_keylen, old_le_size, memsize, &new_leafentry, maybe_free); //p always points to first unused byte after leafentry we are packing uint8_t *p; @@ -1056,7 +1048,7 @@ found_insert:; for (i = 0; i < ule->num_cuxrs; i++) { p += uxr_pack_length_and_bit(ule->uxrs + ule->num_cuxrs - 1 - i, p); } - + //pack interesting values inner to outer if (ule->num_puxrs!=0) { UXR innermost = ule->uxrs + ule->num_cuxrs + ule->num_puxrs - 1; @@ -1094,7 +1086,7 @@ found_insert:; size_t bytes_written; bytes_written = (size_t)p - (size_t)new_leafentry; invariant(bytes_written == memsize); - + #if ULE_DEBUG if (omt) { //Disable recursive debugging. size_t memsize_verify = leafentry_memsize(new_leafentry); @@ -1342,9 +1334,9 @@ int le_latest_is_del(LEAFENTRY le) { bool le_has_xids(LEAFENTRY le, XIDS xids) { //Read num_uxrs - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); invariant(num_xids > 0); //Disallow checking for having TXNID_NONE - TXNID xid = xids_get_xid(xids, 0); + TXNID xid = toku_xids_get_xid(xids, 0); invariant(xid!=TXNID_NONE); bool rval = (le_outermost_uncommitted_xid(le) == xid); @@ -1594,13 +1586,13 @@ ule_do_implicit_promotions(ULE ule, XIDS xids) { //Optimization for (most) common case. //No commits necessary if everything is already committed. if (ule->num_puxrs > 0) { - int num_xids = xids_get_num_xids(xids); + int num_xids = toku_xids_get_num_xids(xids); invariant(num_xids>0); uint32_t max_index = ule->num_cuxrs + min_i32(ule->num_puxrs, num_xids) - 1; uint32_t ica_index = max_index; uint32_t index; for (index = ule->num_cuxrs; index <= max_index; index++) { - TXNID current_msg_xid = xids_get_xid(xids, index - ule->num_cuxrs); + TXNID current_msg_xid = toku_xids_get_xid(xids, index - ule->num_cuxrs); TXNID current_ule_xid = ule_get_xid(ule, index); if (current_msg_xid != current_ule_xid) { //ica is innermost transaction with matching xids. @@ -1690,7 +1682,7 @@ ule_promote_provisional_innermost_to_index(ULE ule, uint32_t index) { static void ule_apply_insert(ULE ule, XIDS xids, uint32_t vallen, void * valp) { ule_prepare_for_new_uxr(ule, xids); - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this insert + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this insert ule_push_insert_uxr(ule, this_xid == TXNID_NONE, this_xid, vallen, valp); } @@ -1698,7 +1690,7 @@ ule_apply_insert(ULE ule, XIDS xids, uint32_t vallen, void * valp) { static void ule_apply_delete(ULE ule, XIDS xids) { ule_prepare_for_new_uxr(ule, xids); - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this delete + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this delete ule_push_delete_uxr(ule, this_xid == TXNID_NONE, this_xid); } @@ -1709,7 +1701,7 @@ ule_apply_delete(ULE ule, XIDS xids) { // with placeholders. static void ule_prepare_for_new_uxr(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); + TXNID this_xid = toku_xids_get_innermost_xid(xids); //This is for LOADER_USE_PUTS or transactionless environment //where messages use XIDS of 0 if (this_xid == TXNID_NONE && ule_get_innermost_xid(ule) == TXNID_NONE) { @@ -1734,7 +1726,7 @@ ule_prepare_for_new_uxr(ULE ule, XIDS xids) { // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. static void ule_apply_abort(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this abort + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction doing this abort invariant(this_xid!=TXNID_NONE); UXR innermost = ule_get_innermost_uxr(ule); // need to check for provisional entries in ule, otherwise @@ -1765,7 +1757,7 @@ ule_apply_broadcast_commit_all (ULE ule) { // If this transaction did modify the leafentry, then promote whatever it did. // Remember, the innermost uxr can only be an insert or a delete, not a placeholder. void ule_apply_commit(ULE ule, XIDS xids) { - TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction committing + TXNID this_xid = toku_xids_get_innermost_xid(xids); // xid of transaction committing invariant(this_xid!=TXNID_NONE); // need to check for provisional entries in ule, otherwise // there is nothing to abort, not checking this may result @@ -1907,7 +1899,7 @@ ule_add_placeholders(ULE ule, XIDS xids) { //Placeholders can be placed on top of the committed uxr. invariant(ule->num_cuxrs > 0); - uint32_t num_xids = xids_get_num_xids(xids); + uint32_t num_xids = toku_xids_get_num_xids(xids); // we assume that implicit promotion has happened // when we get this call, so the number of xids MUST // be greater than the number of provisional entries @@ -1915,12 +1907,12 @@ ule_add_placeholders(ULE ule, XIDS xids) { // make sure that the xids stack matches up to a certain amount // this first for loop is just debug code for (uint32_t i = 0; i < ule->num_puxrs; i++) { - TXNID current_msg_xid = xids_get_xid(xids, i); + TXNID current_msg_xid = toku_xids_get_xid(xids, i); TXNID current_ule_xid = ule_get_xid(ule, i + ule->num_cuxrs); invariant(current_msg_xid == current_ule_xid); } for (uint32_t i = ule->num_puxrs; i < num_xids-1; i++) { - TXNID current_msg_xid = xids_get_xid(xids, i); + TXNID current_msg_xid = toku_xids_get_xid(xids, i); ule_push_placeholder_uxr(ule, current_msg_xid); } } @@ -2072,7 +2064,7 @@ ule_verify_xids(ULE ule, uint32_t interesting, TXNID *xids) { // is_delp - output parameter that returns answer // context - parameter for f // -int +static int le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_delp, TOKUTXN context) { #if ULE_DEBUG ULE_S ule; @@ -2140,6 +2132,27 @@ le_iterate_is_del(LEAFENTRY le, LE_ITERATE_CALLBACK f, bool *is_delp, TOKUTXN co return r; } +// +// Returns true if the value that is to be read is empty. +// +int le_val_is_del(LEAFENTRY le, bool is_snapshot_read, TOKUTXN txn) { + int rval; + if (is_snapshot_read) { + bool is_del = false; + le_iterate_is_del( + le, + toku_txn_reads_txnid, + &is_del, + txn + ); + rval = is_del; + } + else { + rval = le_latest_is_del(le); + } + return rval; +} + // // Iterates over "possible" TXNIDs in a leafentry's stack, until one is accepted by 'f'. Set // valpp and vallenp to value and length associated with accepted TXNID @@ -2260,9 +2273,27 @@ verify_is_empty:; return r; } -#if TOKU_WINDOWS -#pragma pack(push, 1) -#endif +void le_extract_val(LEAFENTRY le, + // should we return the entire leafentry as the val? + bool is_leaf_mode, bool is_snapshot_read, + TOKUTXN ttxn, uint32_t *vallen, void **val) { + if (is_leaf_mode) { + *val = le; + *vallen = leafentry_memsize(le); + } else if (is_snapshot_read) { + int r = le_iterate_val( + le, + toku_txn_reads_txnid, + val, + vallen, + ttxn + ); + lazy_assert_zero(r); + } else { + *val = le_latest_val_and_len(le, vallen); + } +} + // This is an on-disk format. static_asserts verify everything is packed and aligned correctly. struct __attribute__ ((__packed__)) leafentry_13 { struct leafentry_committed_13 { @@ -2290,9 +2321,6 @@ struct __attribute__ ((__packed__)) leafentry_13 { }; static_assert(18 == sizeof(leafentry_13), "wrong size"); static_assert(9 == __builtin_offsetof(leafentry_13, u), "wrong offset"); -#if TOKU_WINDOWS -#pragma pack(pop) -#endif //Requires: // Leafentry that ule represents should not be destroyed (is not just all deletes) @@ -2467,12 +2495,15 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // malloc instead of a mempool. However after supporting upgrade, // we need to use mempools and the OMT. rval = le_pack(&ule, // create packed leafentry - NULL, + nullptr, + 0, //only matters if we are passing in a bn_data + nullptr, //only matters if we are passing in a bn_data 0, //only matters if we are passing in a bn_data - NULL, //only matters if we are passing in a bn_data 0, //only matters if we are passing in a bn_data 0, //only matters if we are passing in a bn_data - new_leafentry_p); + new_leafentry_p, + nullptr //only matters if we are passing in a bn_data + ); ule_cleanup(&ule); *new_leafentry_memorysize = leafentry_memsize(*new_leafentry_p); return rval; diff --git a/storage/tokudb/ft-index/ft/ule.h b/storage/tokudb/ft-index/ft/ule.h index 0dd34212ff114..337abf25a5f34 100644 --- a/storage/tokudb/ft-index/ft/ule.h +++ b/storage/tokudb/ft-index/ft/ule.h @@ -6,9 +6,6 @@ * requirements of the nested transaction logic belongs here. */ -#ifndef TOKU_ULE_H -#define TOKU_ULE_H - #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -38,7 +35,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,11 +92,13 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include "leafentry.h" -#include "txn_manager.h" +#include "txn/txn_manager.h" #include void toku_ule_status_init(void); @@ -130,5 +129,3 @@ TXNID uxr_get_txnid(UXRHANDLE uxr); //1 does much slower debugging #define GARBAGE_COLLECTION_DEBUG 0 - -#endif // TOKU_ULE_H diff --git a/storage/tokudb/ft-index/ft/valgrind.suppressions b/storage/tokudb/ft-index/ft/valgrind.suppressions index b1ee166207972..d8b9b09bd1fe4 100644 --- a/storage/tokudb/ft-index/ft/valgrind.suppressions +++ b/storage/tokudb/ft-index/ft/valgrind.suppressions @@ -281,3 +281,16 @@ fun:_dl_start obj:/lib/x86_64-linux-gnu/ld-2.17.so } +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + obj:/usr/lib/libdl-2.19.so + fun:dlsym + fun:_Z19toku_memory_startupv + fun:call_init.part.0 + fun:_dl_init + obj:/usr/lib/ld-2.19.so +} + diff --git a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.h b/storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.h deleted file mode 100644 index e2d37b486c171..0000000000000 --- a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.h +++ /dev/null @@ -1,136 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// A threadpool is a limited set of threads that can be used to apply a -// function to work contained in a work queue. The work queue is outside -// of the scope of the threadpool; the threadpool merely provides -// mechanisms to grow the number of threads in the threadpool on demand. - -typedef struct threadpool *THREADPOOL; - -// Create a new threadpool -// Effects: a new threadpool is allocated and initialized. the number of -// threads in the threadpool is limited to max_threads. initially, there -// are no threads in the pool. -// Returns: if there are no errors, the threadpool is set and zero is returned. -// Otherwise, an error number is returned. - -int threadpool_create(THREADPOOL *threadpoolptr, int max_threads); - -// Destroy a threadpool -// Effects: the calling thread joins with all of the threads in the threadpool. -// Effects: the threadpool memory is freed. -// Returns: the threadpool is set to null. - -void threadpool_destroy(THREADPOOL *threadpoolptr); - -// Maybe add a thread to the threadpool. -// Effects: the number of threads in the threadpool is expanded by 1 as long -// as the current number of threads in the threadpool is less than the max -// and there are no idle threads. -// Effects: if the thread is create, it calls the function f with argument arg -// Expects: external serialization on this function; only one thread may -// execute this function - -void threadpool_maybe_add(THREADPOOL theadpool, void *(*f)(void *), void *arg); - -// Set the current thread busy -// Effects: the threadpool keeps a count of the number of idle threads. It -// uses this count to control the creation of additional threads. - -void threadpool_set_thread_busy(THREADPOOL); - -// Set the current thread idle - -void threadpool_set_thread_idle(THREADPOOL); - -// get the current number of threads - -int threadpool_get_current_threads(THREADPOOL); diff --git a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/worker-test.cc b/storage/tokudb/ft-index/ft/worker-thread-benchmarks/worker-test.cc deleted file mode 100644 index 362a3ec5604fd..0000000000000 --- a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/worker-test.cc +++ /dev/null @@ -1,261 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include -#include - -int usage() { - printf("measure multi-thread work scheduling overhead\n"); - printf("-nthreads N (number of worker threads, default 1)\n"); - printf("-nworkitems N (number of work items, default 1)\n"); - printf("-usleeptime N (work time, default 100)\n"); - printf("-ntests N (number of test iterations, default 1)\n"); - printf("-adaptive (use adaptive mutex locks, default no)\n"); - return 1; -} - -typedef struct workitem *WORKITEM; -struct workitem { - struct workitem *next_wq; - int usleeptime; -}; - -#include "workqueue.h" -#include "threadpool.h" - -int usleeptime = 100; - -void do_work(WORKITEM wi __attribute__((unused))) { -#if 0 - // sleep for usleeptime microseconds - usleep(usleeptime); -#else - // busy wait for usleeptime loop interations - int n = wi->usleeptime; - volatile int i; - for (i=0; ilock); assert(r == 0); - while (1) { - WORKITEM wi; - r = workqueue_deq(runner->wq, runner->lock, &wi); - if (r != 0) break; - r = pthread_mutex_unlock(runner->lock); assert(r == 0); - do_work(wi); - r = pthread_mutex_lock(runner->lock); assert(r == 0); - workqueue_enq(runner->cq, wi); - } - r = pthread_mutex_unlock(runner->lock); assert(r == 0); - return arg; -} - -static inline void lockit(pthread_mutex_t *lock, int nthreads) { - if (nthreads > 0) { - int r = pthread_mutex_lock(lock); assert(r == 0); - } -} - -static inline void unlockit(pthread_mutex_t *lock, int nthreads) { - if (nthreads > 0) { - int r = pthread_mutex_unlock(lock); assert(r == 0); - } -} - -int main(int argc, char *argv[]) { - int ntests = 1; - int nworkitems = 1; - int nthreads = 1; - int adaptive = 0; - - int r; - int i; - for (i=1; i -#include -#include -#include -#include -#include -#include - -int usage() { - printf("measure multi-thread work scheduling overhead\n"); - printf("-nworkitems N (number of work items, default 1)\n"); - printf("-usleeptime N (work time, default 100)\n"); - printf("-ntests N (number of test iterations, default 1)\n"); - return 1; -} - -typedef struct workitem *WORKITEM; -struct workitem { - int usleeptime; -}; - -cilk void do_work(WORKITEM wi) { -#if 0 - // sleep for usleeptime microseconds - usleep(wi->usleeptime); -#else - // busy wait for usleeptime loop interations - int n = wi->usleeptime; - volatile int i; - for (i=0; i -#include -#include -#include -#include - -uint64_t x1764_simple (const uint64_t *buf, size_t len) -{ - uint64_t sum=0; - for (size_t i=0; itv_sec-start->tv_sec) +1e-6*(end->tv_usec - start->tv_usec); -} - -int main (int argc, char *argv[]) { - int size = 1024*1024*4 + 8*4; - char *data = malloc(size); - for (int j=0; j<4; j++) { - struct timeval start,end,end2,end3,end4; - for (int i=0; i -void concurrent_tree::create(comparator *cmp) { +void concurrent_tree::create(const comparator *cmp) { // start with an empty root node. we do this instead of // setting m_root to null so there's always a root to lock m_root.create_root(cmp); diff --git a/storage/tokudb/ft-index/locktree/concurrent_tree.h b/storage/tokudb/ft-index/locktree/concurrent_tree.h index 740a5f1311c4e..82977bbf5f127 100644 --- a/storage/tokudb/ft-index/locktree/concurrent_tree.h +++ b/storage/tokudb/ft-index/locktree/concurrent_tree.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef CONCURRENT_TREE_H -#define CONCURRENT_TREE_H - #include #include "treenode.h" @@ -174,7 +173,7 @@ class concurrent_tree { }; // effect: initialize the tree to an empty state - void create(comparator *cmp); + void create(const comparator *cmp); // effect: destroy the tree. // requires: tree is empty @@ -203,5 +202,3 @@ class concurrent_tree { #include "concurrent_tree.cc" } /* namespace toku */ - -#endif /* CONCURRENT_TREE_H */ diff --git a/storage/tokudb/ft-index/locktree/keyrange.cc b/storage/tokudb/ft-index/locktree/keyrange.cc index 0bf9790196cd6..c7cb19a597f8d 100644 --- a/storage/tokudb/ft-index/locktree/keyrange.cc +++ b/storage/tokudb/ft-index/locktree/keyrange.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,165 +91,165 @@ PATENT RIGHTS GRANT: #include "keyrange.h" -#include +#include namespace toku { -// create a keyrange by borrowing the left and right dbt -// pointers. no memory is copied. no checks for infinity needed. -void keyrange::create(const DBT *left, const DBT *right) { - init_empty(); - m_left_key = left; - m_right_key = right; -} - -// destroy the key copies. if they were never set, then destroy does nothing. -void keyrange::destroy(void) { - toku_destroy_dbt(&m_left_key_copy); - toku_destroy_dbt(&m_right_key_copy); -} - -// create a keyrange by copying the keys from the given range. -void keyrange::create_copy(const keyrange &range) { - // start with an initialized, empty range - init_empty(); - - // optimize the case where the left and right keys are the same. - // we'd like to only have one copy of the data. - if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) { - set_both_keys(range.get_left_key()); - } else { - // replace our empty left and right keys with - // copies of the range's left and right keys - replace_left_key(range.get_left_key()); - replace_right_key(range.get_right_key()); + // create a keyrange by borrowing the left and right dbt + // pointers. no memory is copied. no checks for infinity needed. + void keyrange::create(const DBT *left, const DBT *right) { + init_empty(); + m_left_key = left; + m_right_key = right; } -} - -// extend this keyrange by choosing the leftmost and rightmost -// endpoints between this range and the given. replaced keys -// in this range are freed and inherited keys are copied. -void keyrange::extend(comparator *cmp, const keyrange &range) { - const DBT *range_left = range.get_left_key(); - const DBT *range_right = range.get_right_key(); - if (cmp->compare(range_left, get_left_key()) < 0) { - replace_left_key(range_left); + + // destroy the key copies. if they were never set, then destroy does nothing. + void keyrange::destroy(void) { + toku_destroy_dbt(&m_left_key_copy); + toku_destroy_dbt(&m_right_key_copy); } - if (cmp->compare(range_right, get_right_key()) > 0) { - replace_right_key(range_right); + + // create a keyrange by copying the keys from the given range. + void keyrange::create_copy(const keyrange &range) { + // start with an initialized, empty range + init_empty(); + + // optimize the case where the left and right keys are the same. + // we'd like to only have one copy of the data. + if (toku_dbt_equals(range.get_left_key(), range.get_right_key())) { + set_both_keys(range.get_left_key()); + } else { + // replace our empty left and right keys with + // copies of the range's left and right keys + replace_left_key(range.get_left_key()); + replace_right_key(range.get_right_key()); + } } -} - -// how much memory does this keyrange take? -// - the size of the left and right keys -// --- ignore the fact that we may have optimized the point case. -// it complicates things for little gain. -// - the size of the keyrange class itself -uint64_t keyrange::get_memory_size(void) const { - const DBT *left_key = get_left_key(); - const DBT *right_key = get_right_key(); - return left_key->size + right_key->size + sizeof(keyrange); -} - -// compare ranges. -keyrange::comparison keyrange::compare(comparator *cmp, const keyrange &range) const { - if (cmp->compare(get_right_key(), range.get_left_key()) < 0) { - return comparison::LESS_THAN; - } else if (cmp->compare(get_left_key(), range.get_right_key()) > 0) { - return comparison::GREATER_THAN; - } else if (cmp->compare(get_left_key(), range.get_left_key()) == 0 && - cmp->compare(get_right_key(), range.get_right_key()) == 0) { - return comparison::EQUALS; - } else { - return comparison::OVERLAPS; + + // extend this keyrange by choosing the leftmost and rightmost + // endpoints between this range and the given. replaced keys + // in this range are freed and inherited keys are copied. + void keyrange::extend(const comparator &cmp, const keyrange &range) { + const DBT *range_left = range.get_left_key(); + const DBT *range_right = range.get_right_key(); + if (cmp(range_left, get_left_key()) < 0) { + replace_left_key(range_left); + } + if (cmp(range_right, get_right_key()) > 0) { + replace_right_key(range_right); + } } -} - -bool keyrange::overlaps(comparator *cmp, const keyrange &range) const { - // equality is a stronger form of overlapping. - // so two ranges "overlap" if they're either equal or just overlapping. - comparison c = compare(cmp, range); - return c == comparison::EQUALS || c == comparison::OVERLAPS; -} - -keyrange keyrange::get_infinite_range(void) { - keyrange range; - range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); - return range; -} - -void keyrange::init_empty(void) { - m_left_key = nullptr; - m_right_key = nullptr; - toku_init_dbt(&m_left_key_copy); - toku_init_dbt(&m_right_key_copy); - m_point_range = false; -} - -const DBT *keyrange::get_left_key(void) const { - if (m_left_key) { - return m_left_key; - } else { - return &m_left_key_copy; + + // how much memory does this keyrange take? + // - the size of the left and right keys + // --- ignore the fact that we may have optimized the point case. + // it complicates things for little gain. + // - the size of the keyrange class itself + uint64_t keyrange::get_memory_size(void) const { + const DBT *left_key = get_left_key(); + const DBT *right_key = get_right_key(); + return left_key->size + right_key->size + sizeof(keyrange); } -} -const DBT *keyrange::get_right_key(void) const { - if (m_right_key) { - return m_right_key; - } else { - return &m_right_key_copy; + // compare ranges. + keyrange::comparison keyrange::compare(const comparator &cmp, const keyrange &range) const { + if (cmp(get_right_key(), range.get_left_key()) < 0) { + return comparison::LESS_THAN; + } else if (cmp(get_left_key(), range.get_right_key()) > 0) { + return comparison::GREATER_THAN; + } else if (cmp(get_left_key(), range.get_left_key()) == 0 && + cmp(get_right_key(), range.get_right_key()) == 0) { + return comparison::EQUALS; + } else { + return comparison::OVERLAPS; + } } -} - -// copy the given once and set both the left and right pointers. -// optimization for point ranges, so the left and right ranges -// are not copied twice. -void keyrange::set_both_keys(const DBT *key) { - if (toku_dbt_is_infinite(key)) { - m_left_key = key; - m_right_key = key; - } else { - toku_clone_dbt(&m_left_key_copy, *key); - toku_copyref_dbt(&m_right_key_copy, m_left_key_copy); + + bool keyrange::overlaps(const comparator &cmp, const keyrange &range) const { + // equality is a stronger form of overlapping. + // so two ranges "overlap" if they're either equal or just overlapping. + comparison c = compare(cmp, range); + return c == comparison::EQUALS || c == comparison::OVERLAPS; } - m_point_range = true; -} - -// destroy the current left key. set and possibly copy the new one -void keyrange::replace_left_key(const DBT *key) { - // a little magic: - // - // if this is a point range, then the left and right keys share - // one copy of the data, and it lives in the left key copy. so - // if we're replacing the left key, move the real data to the - // right key copy instead of destroying it. now, the memory is - // owned by the right key and the left key may be replaced. - if (m_point_range) { - m_right_key_copy = m_left_key_copy; - } else { - toku_destroy_dbt(&m_left_key_copy); + + keyrange keyrange::get_infinite_range(void) { + keyrange range; + range.create(toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); + return range; } - if (toku_dbt_is_infinite(key)) { - m_left_key = key; - } else { - toku_clone_dbt(&m_left_key_copy, *key); + void keyrange::init_empty(void) { m_left_key = nullptr; - } - m_point_range = false; -} - -// destroy the current right key. set and possibly copy the new one -void keyrange::replace_right_key(const DBT *key) { - toku_destroy_dbt(&m_right_key_copy); - if (toku_dbt_is_infinite(key)) { - m_right_key = key; - } else { - toku_clone_dbt(&m_right_key_copy, *key); m_right_key = nullptr; + toku_init_dbt(&m_left_key_copy); + toku_init_dbt(&m_right_key_copy); + m_point_range = false; + } + + const DBT *keyrange::get_left_key(void) const { + if (m_left_key) { + return m_left_key; + } else { + return &m_left_key_copy; + } + } + + const DBT *keyrange::get_right_key(void) const { + if (m_right_key) { + return m_right_key; + } else { + return &m_right_key_copy; + } + } + + // copy the given once and set both the left and right pointers. + // optimization for point ranges, so the left and right ranges + // are not copied twice. + void keyrange::set_both_keys(const DBT *key) { + if (toku_dbt_is_infinite(key)) { + m_left_key = key; + m_right_key = key; + } else { + toku_clone_dbt(&m_left_key_copy, *key); + toku_copyref_dbt(&m_right_key_copy, m_left_key_copy); + } + m_point_range = true; + } + + // destroy the current left key. set and possibly copy the new one + void keyrange::replace_left_key(const DBT *key) { + // a little magic: + // + // if this is a point range, then the left and right keys share + // one copy of the data, and it lives in the left key copy. so + // if we're replacing the left key, move the real data to the + // right key copy instead of destroying it. now, the memory is + // owned by the right key and the left key may be replaced. + if (m_point_range) { + m_right_key_copy = m_left_key_copy; + } else { + toku_destroy_dbt(&m_left_key_copy); + } + + if (toku_dbt_is_infinite(key)) { + m_left_key = key; + } else { + toku_clone_dbt(&m_left_key_copy, *key); + m_left_key = nullptr; + } + m_point_range = false; + } + + // destroy the current right key. set and possibly copy the new one + void keyrange::replace_right_key(const DBT *key) { + toku_destroy_dbt(&m_right_key_copy); + if (toku_dbt_is_infinite(key)) { + m_right_key = key; + } else { + toku_clone_dbt(&m_right_key_copy, *key); + m_right_key = nullptr; + } + m_point_range = false; } - m_point_range = false; -} } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/keyrange.h b/storage/tokudb/ft-index/locktree/keyrange.h index cab5866a5da14..8b8e1a743e265 100644 --- a/storage/tokudb/ft-index/locktree/keyrange.h +++ b/storage/tokudb/ft-index/locktree/keyrange.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef KEYRANGE_H -#define KEYRANGE_H - #include namespace toku { @@ -118,7 +117,7 @@ class keyrange { // effect: extends the keyrange by choosing the leftmost and rightmost // endpoints from this range and the given range. // replaced keys in this range are freed, new keys are copied. - void extend(comparator *cmp, const keyrange &range); + void extend(const comparator &cmp, const keyrange &range); // returns: the amount of memory this keyrange takes. does not account // for point optimizations or malloc overhead. @@ -144,10 +143,10 @@ class keyrange { // EQUALS if given range has the same left and right endpoints // OVERLAPS if at least one of the given range's endpoints falls // between this range's endpoints - comparison compare(comparator *cmp, const keyrange &range) const; + comparison compare(const comparator &cmp, const keyrange &range) const; // returns: true if the range and the given range are equal or overlapping - bool overlaps(comparator *cmp, const keyrange &range) const; + bool overlaps(const comparator &cmp, const keyrange &range) const; // returns: a keyrange representing -inf, +inf static keyrange get_infinite_range(void); @@ -184,5 +183,3 @@ class keyrange { }; } /* namespace toku */ - -#endif /* KEYRANGE_H */ diff --git a/storage/tokudb/ft-index/locktree/lock_request.cc b/storage/tokudb/ft-index/locktree/lock_request.cc index 362f9bfa98d2c..97fa780bb0426 100644 --- a/storage/tokudb/ft-index/locktree/lock_request.cc +++ b/storage/tokudb/ft-index/locktree/lock_request.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +89,12 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include "portability/toku_race_tools.h" -#include - -#include "locktree.h" -#include "lock_request.h" +#include "ft/txn/txn.h" +#include "locktree/locktree.h" +#include "locktree/lock_request.h" +#include "util/dbt.h" namespace toku { @@ -338,7 +338,7 @@ int lock_request::retry(void) { } void lock_request::retry_all_lock_requests(locktree *lt) { - locktree::lt_lock_request_info *info = lt->get_lock_request_info(); + lt_lock_request_info *info = lt->get_lock_request_info(); // if a thread reads this bit to be true, then it should go ahead and // take the locktree mutex and retry lock requests. we use this bit diff --git a/storage/tokudb/ft-index/locktree/lock_request.h b/storage/tokudb/ft-index/locktree/lock_request.h index 48956826547cf..d1a4c2822e00a 100644 --- a/storage/tokudb/ft-index/locktree/lock_request.h +++ b/storage/tokudb/ft-index/locktree/lock_request.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,21 +86,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCK_REQUEST_H -#define TOKU_LOCK_REQUEST_H - #include -#include -#include -#include +#include "portability/toku_pthread.h" -#include "locktree.h" -#include "txnid_set.h" -#include "wfg.h" +#include "locktree/locktree.h" +#include "locktree/txnid_set.h" +#include "locktree/wfg.h" +#include "ft/comparator.h" namespace toku { @@ -202,7 +200,7 @@ class lock_request { // the lock request info state stored in the // locktree that this lock request is for. - struct locktree::lt_lock_request_info *m_info; + struct lt_lock_request_info *m_info; // effect: tries again to acquire the lock described by this lock request // returns: 0 if retrying the request succeeded and is now complete @@ -243,5 +241,3 @@ class lock_request { ENSURE_POD(lock_request); } /* namespace toku */ - -#endif /* TOKU_LOCK_REQUEST_H */ diff --git a/storage/tokudb/ft-index/locktree/locktree.cc b/storage/tokudb/ft-index/locktree/locktree.cc index 21b0aaa1426d9..eb9be825f4809 100644 --- a/storage/tokudb/ft-index/locktree/locktree.cc +++ b/storage/tokudb/ft-index/locktree/locktree.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,20 +116,16 @@ namespace toku { // but does nothing based on the value of the reference count - it is // up to the user of the locktree to destroy it when it sees fit. -void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id, - DESCRIPTOR desc, ft_compare_func cmp) { - m_mem_tracker = mem_tracker; - m_mgr = mem_tracker->get_manager(); +void locktree::create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp) { + m_mgr = mgr; m_dict_id = dict_id; - // the only reason m_cmp is malloc'd here is to prevent gdb from printing - // out an entire DB struct every time you inspect a locktree. - XCALLOC(m_cmp); - m_cmp->create(cmp, desc); + m_cmp.create_from(cmp); m_reference_count = 1; m_userdata = nullptr; + XCALLOC(m_rangetree); - m_rangetree->create(m_cmp); + m_rangetree->create(&m_cmp); m_sto_txnid = TXNID_NONE; m_sto_buffer.create(); @@ -156,14 +152,25 @@ void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_i void locktree::destroy(void) { invariant(m_reference_count == 0); + m_cmp.destroy(); m_rangetree->destroy(); - toku_free(m_cmp); toku_free(m_rangetree); m_sto_buffer.destroy(); - m_lock_request_info.pending_lock_requests.destroy(); } +void locktree::add_reference(void) { + (void) toku_sync_add_and_fetch(&m_reference_count, 1); +} + +uint32_t locktree::release_reference(void) { + return toku_sync_sub_and_fetch(&m_reference_count, 1); +} + +uint32_t locktree::get_reference_count(void) { + return m_reference_count; +} + // a container for a range/txnid pair struct row_lock { keyrange range; @@ -174,9 +181,8 @@ struct row_lock { // storing each row lock into the given growable array. the // caller does not own the range inside the returned row locks, // so remove from the tree with care using them as keys. -static void iterate_and_get_overlapping_row_locks( - const concurrent_tree::locked_keyrange *lkr, - GrowableArray *row_locks) { +static void iterate_and_get_overlapping_row_locks(const concurrent_tree::locked_keyrange *lkr, + GrowableArray *row_locks) { struct copy_fn_obj { GrowableArray *row_locks; bool fn(const keyrange &range, TXNID txnid) { @@ -193,7 +199,7 @@ static void iterate_and_get_overlapping_row_locks( // which txnids are conflicting, and store them in the conflicts // set, if given. static bool determine_conflicting_txnids(const GrowableArray &row_locks, - const TXNID &txnid, txnid_set *conflicts) { + const TXNID &txnid, txnid_set *conflicts) { bool conflicts_exist = false; const size_t num_overlaps = row_locks.get_size(); for (size_t i = 0; i < num_overlaps; i++) { @@ -218,19 +224,23 @@ static uint64_t row_lock_size_in_tree(const row_lock &lock) { // remove and destroy the given row lock from the locked keyrange, // then notify the memory tracker of the newly freed lock. static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr, - const row_lock &lock, locktree::manager::memory_tracker *mem_tracker) { + const row_lock &lock, locktree_manager *mgr) { const uint64_t mem_released = row_lock_size_in_tree(lock); lkr->remove(lock.range); - mem_tracker->note_mem_released(mem_released); + if (mgr != nullptr) { + mgr->note_mem_released(mem_released); + } } // insert a row lock into the locked keyrange, then notify // the memory tracker of this newly acquired lock. static void insert_row_lock_into_tree(concurrent_tree::locked_keyrange *lkr, - const row_lock &lock, locktree::manager::memory_tracker *mem_tracker) { + const row_lock &lock, locktree_manager *mgr) { uint64_t mem_used = row_lock_size_in_tree(lock); lkr->insert(lock.range, lock.txnid); - mem_tracker->note_mem_used(mem_used); + if (mgr != nullptr) { + mgr->note_mem_used(mem_used); + } } void locktree::sto_begin(TXNID txnid) { @@ -244,15 +254,19 @@ void locktree::sto_append(const DBT *left_key, const DBT *right_key) { keyrange range; range.create(left_key, right_key); - buffer_mem = m_sto_buffer.get_num_bytes(); + buffer_mem = m_sto_buffer.total_memory_size(); m_sto_buffer.append(left_key, right_key); - delta = m_sto_buffer.get_num_bytes() - buffer_mem; - m_mem_tracker->note_mem_used(delta); + delta = m_sto_buffer.total_memory_size() - buffer_mem; + if (m_mgr != nullptr) { + m_mgr->note_mem_used(delta); + } } void locktree::sto_end(void) { - uint64_t num_bytes = m_sto_buffer.get_num_bytes(); - m_mem_tracker->note_mem_released(num_bytes); + uint64_t mem_size = m_sto_buffer.total_memory_size(); + if (m_mgr != nullptr) { + m_mgr->note_mem_released(mem_size); + } m_sto_buffer.destroy(); m_sto_buffer.create(); m_sto_txnid = TXNID_NONE; @@ -281,12 +295,11 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) { concurrent_tree sto_rangetree; concurrent_tree::locked_keyrange sto_lkr; - sto_rangetree.create(m_cmp); + sto_rangetree.create(&m_cmp); // insert all of the ranges from the single txnid buffer into a new rangtree - range_buffer::iterator iter; + range_buffer::iterator iter(&m_sto_buffer); range_buffer::iterator::record rec; - iter.create(&m_sto_buffer); while (iter.current(&rec)) { sto_lkr.prepare(&sto_rangetree); int r = acquire_lock_consolidated(&sto_lkr, @@ -314,8 +327,9 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) { invariant(!m_rangetree->is_empty()); } -bool locktree::sto_try_acquire(void *prepared_lkr, TXNID txnid, - const DBT *left_key, const DBT *right_key) { +bool locktree::sto_try_acquire(void *prepared_lkr, + TXNID txnid, + const DBT *left_key, const DBT *right_key) { if (m_rangetree->is_empty() && m_sto_buffer.is_empty() && m_sto_score >= STO_SCORE_THRESHOLD) { // We can do the optimization because the rangetree is empty, and // we know its worth trying because the sto score is big enough. @@ -344,8 +358,10 @@ bool locktree::sto_try_acquire(void *prepared_lkr, TXNID txnid, // try to acquire a lock and consolidate it with existing locks if possible // param: lkr, a prepared locked keyrange // return: 0 on success, DB_LOCK_NOTGRANTED if conflicting locks exist. -int locktree::acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { +int locktree::acquire_lock_consolidated(void *prepared_lkr, + TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts) { int r = 0; concurrent_tree::locked_keyrange *lkr; @@ -361,8 +377,8 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, size_t num_overlapping_row_locks = overlapping_row_locks.get_size(); // if any overlapping row locks conflict with this request, bail out. - bool conflicts_exist = determine_conflicting_txnids( - overlapping_row_locks, txnid, conflicts); + bool conflicts_exist = determine_conflicting_txnids(overlapping_row_locks, + txnid, conflicts); if (!conflicts_exist) { // there are no conflicts, so all of the overlaps are for the requesting txnid. // so, we must consolidate all existing overlapping ranges and the requested @@ -371,11 +387,11 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, row_lock overlapping_lock = overlapping_row_locks.fetch_unchecked(i); invariant(overlapping_lock.txnid == txnid); requested_range.extend(m_cmp, overlapping_lock.range); - remove_row_lock_from_tree(lkr, overlapping_lock, m_mem_tracker); + remove_row_lock_from_tree(lkr, overlapping_lock, m_mgr); } row_lock new_lock = { .range = requested_range, .txnid = txnid }; - insert_row_lock_into_tree(lkr, new_lock, m_mem_tracker); + insert_row_lock_into_tree(lkr, new_lock, m_mgr); } else { r = DB_LOCK_NOTGRANTED; } @@ -388,8 +404,10 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, // acquire a lock in the given key range, inclusive. if successful, // return 0. otherwise, populate the conflicts txnid_set with the set of // transactions that conflict with this request. -int locktree::acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { +int locktree::acquire_lock(bool is_write_request, + TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts) { int r = 0; // we are only supporting write locks for simplicity @@ -410,9 +428,15 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid, return r; } -int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { - int r = m_mgr->check_current_lock_constraints(big_txn); +int locktree::try_acquire_lock(bool is_write_request, + TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts, bool big_txn) { + // All ranges in the locktree must have left endpoints <= right endpoints. + // Range comparisons rely on this fact, so we make a paranoid invariant here. + paranoid_invariant(m_cmp(left_key, right_key) <= 0); + int r = m_mgr == nullptr ? 0 : + m_mgr->check_current_lock_constraints(big_txn); if (r == 0) { r = acquire_lock(is_write_request, txnid, left_key, right_key, conflicts); } @@ -420,18 +444,19 @@ int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, } // the locktree silently upgrades read locks to write locks for simplicity -int locktree::acquire_read_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { +int locktree::acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, + txnid_set *conflicts, bool big_txn) { return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn); } -int locktree::acquire_write_lock(TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) { +int locktree::acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, + txnid_set *conflicts, bool big_txn) { return try_acquire_lock(true, txnid, left_key, right_key, conflicts, big_txn); } -void locktree::get_conflicts(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { +void locktree::get_conflicts(bool is_write_request, + TXNID txnid, const DBT *left_key, const DBT *right_key, + txnid_set *conflicts) { // because we only support write locks, ignore this bit for now. (void) is_write_request; @@ -480,8 +505,8 @@ void locktree::get_conflicts(bool is_write_request, TXNID txnid, // whole lock [1,3]. Now, someone else can lock 2 before our txn gets // around to unlocking 2, so we should not remove that lock. void locktree::remove_overlapping_locks_for_txnid(TXNID txnid, - const DBT *left_key, const DBT *right_key) { - + const DBT *left_key, + const DBT *right_key) { keyrange release_range; release_range.create(left_key, right_key); @@ -501,7 +526,7 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid, // If this isn't our lock, that's ok, just don't remove it. // See rationale above. if (lock.txnid == txnid) { - remove_row_lock_from_tree(&lkr, lock, m_mem_tracker); + remove_row_lock_from_tree(&lkr, lock, m_mgr); } } @@ -545,12 +570,14 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) { // locks are already released, otherwise we need to do it here. bool released = sto_try_release(txnid); if (!released) { - range_buffer::iterator iter; + range_buffer::iterator iter(ranges); range_buffer::iterator::record rec; - iter.create(ranges); while (iter.current(&rec)) { const DBT *left_key = rec.get_left_key(); const DBT *right_key = rec.get_right_key(); + // All ranges in the locktree must have left endpoints <= right endpoints. + // Range comparisons rely on this fact, so we make a paranoid invariant here. + paranoid_invariant(m_cmp(left_key, right_key) <= 0); remove_overlapping_locks_for_txnid(txnid, left_key, right_key); iter.next(); } @@ -568,8 +595,8 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) { // row locks, storing each one into the given array of size N, // then removing each extracted lock from the locked keyrange. static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr, - locktree::manager::memory_tracker *mem_tracker, - row_lock *row_locks, int num_to_extract) { + locktree_manager *mgr, + row_lock *row_locks, int num_to_extract) { struct extract_fn_obj { int num_extracted; @@ -600,7 +627,7 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr, int num_extracted = extract_fn.num_extracted; invariant(num_extracted <= num_to_extract); for (int i = 0; i < num_extracted; i++) { - remove_row_lock_from_tree(lkr, row_locks[i], mem_tracker); + remove_row_lock_from_tree(lkr, row_locks[i], mgr); } return num_extracted; @@ -614,10 +641,10 @@ struct txnid_range_buffer { TXNID txnid; range_buffer buffer; - static int find_by_txnid(const struct txnid_range_buffer &other_buffer, const TXNID &txnid) { - if (txnid < other_buffer.txnid) { + static int find_by_txnid(struct txnid_range_buffer *const &other_buffer, const TXNID &txnid) { + if (txnid < other_buffer->txnid) { return -1; - } else if (other_buffer.txnid == txnid) { + } else if (other_buffer->txnid == txnid) { return 0; } else { return 1; @@ -632,8 +659,8 @@ struct txnid_range_buffer { // approach works well. if there are many txnids and each // has locks in a random/alternating order, then this does // not work so well. -void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) { - omt range_buffers; +void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) { + omt range_buffers; range_buffers.create(); // prepare and acquire a locked keyrange on the entire locktree @@ -658,8 +685,9 @@ void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *a // we always remove the "first" n because we are removing n // each time we do an extraction. so this loops until its empty. - while ((num_extracted = extract_first_n_row_locks(&lkr, m_mem_tracker, - extracted_buf, num_row_locks_per_batch)) > 0) { + while ((num_extracted = + extract_first_n_row_locks(&lkr, m_mgr, extracted_buf, + num_row_locks_per_batch)) > 0) { int current_index = 0; while (current_index < num_extracted) { // every batch of extracted locks is in range-sorted order. search @@ -682,7 +710,6 @@ void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *a // Try to find a range buffer for the current txnid. Create one if it doesn't exist. // Then, append the new escalated range to the buffer. uint32_t idx; - struct txnid_range_buffer new_range_buffer; struct txnid_range_buffer *existing_range_buffer; int r = range_buffers.find_zero( current_txnid, @@ -690,9 +717,10 @@ void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *a &idx ); if (r == DB_NOTFOUND) { - new_range_buffer.txnid = current_txnid; - new_range_buffer.buffer.create(); - new_range_buffer.buffer.append(escalated_left_key, escalated_right_key); + struct txnid_range_buffer *XMALLOC(new_range_buffer); + new_range_buffer->txnid = current_txnid; + new_range_buffer->buffer.create(); + new_range_buffer->buffer.append(escalated_left_key, escalated_right_key); range_buffers.insert_at(new_range_buffer, idx); } else { invariant_zero(r); @@ -720,14 +748,13 @@ void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *a invariant_zero(r); const TXNID current_txnid = current_range_buffer->txnid; - range_buffer::iterator iter; + range_buffer::iterator iter(¤t_range_buffer->buffer); range_buffer::iterator::record rec; - iter.create(¤t_range_buffer->buffer); while (iter.current(&rec)) { keyrange range; range.create(rec.get_left_key(), rec.get_right_key()); row_lock lock = { .range = range, .txnid = current_txnid }; - insert_row_lock_into_tree(&lkr, lock, m_mem_tracker); + insert_row_lock_into_tree(&lkr, lock, m_mgr); iter.next(); } @@ -737,12 +764,21 @@ void locktree::escalate(manager::lt_escalate_cb after_escalate_callback, void *a } current_range_buffer->buffer.destroy(); } + + while (range_buffers.size() > 0) { + struct txnid_range_buffer *buffer; + int r = range_buffers.fetch(0, &buffer); + invariant_zero(r); + r = range_buffers.delete_at(0); + invariant_zero(r); + toku_free(buffer); + } range_buffers.destroy(); lkr.release(); } -void *locktree::get_userdata(void) { +void *locktree::get_userdata(void) const { return m_userdata; } @@ -750,19 +786,19 @@ void locktree::set_userdata(void *userdata) { m_userdata = userdata; } -struct locktree::lt_lock_request_info *locktree::get_lock_request_info(void) { +struct lt_lock_request_info *locktree::get_lock_request_info(void) { return &m_lock_request_info; } -void locktree::set_descriptor(DESCRIPTOR desc) { - m_cmp->set_descriptor(desc); +void locktree::set_comparator(const comparator &cmp) { + m_cmp.inherit(cmp); } -locktree::manager::memory_tracker *locktree::get_mem_tracker(void) const { - return m_mem_tracker; +locktree_manager *locktree::get_manager(void) const { + return m_mgr; } -int locktree::compare(const locktree *lt) { +int locktree::compare(const locktree *lt) const { if (m_dict_id.dictid < lt->m_dict_id.dictid) { return -1; } else if (m_dict_id.dictid == lt->m_dict_id.dictid) { diff --git a/storage/tokudb/ft-index/locktree/locktree.h b/storage/tokudb/ft-index/locktree/locktree.h index a3c3b566fcff8..3e613aba7a4c1 100644 --- a/storage/tokudb/ft-index/locktree/locktree.h +++ b/storage/tokudb/ft-index/locktree/locktree.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,14 +89,13 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCKTREE_H -#define TOKU_LOCKTREE_H +#pragma once #include #include #include -#include +#include // just for DICTIONARY_ID.. #include #include @@ -105,11 +104,6 @@ PATENT RIGHTS GRANT: #include "wfg.h" #include "range_buffer.h" -#define TOKU_LOCKTREE_ESCALATOR_LAMBDA 0 -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA -#include -#endif - enum { LTM_SIZE_CURRENT = 0, LTM_SIZE_LIMIT, @@ -140,70 +134,30 @@ typedef struct { namespace toku { -class lock_request; -class concurrent_tree; - -// A locktree represents the set of row locks owned by all transactions -// over an open dictionary. Read and write ranges are represented as -// a left and right key which are compared with the given descriptor -// and comparison fn. -// -// Locktrees are not created and destroyed by the user. Instead, they are -// referenced and released using the locktree manager. -// -// A sample workflow looks like this: -// - Create a manager. -// - Get a locktree by dictionaroy id from the manager. -// - Perform read/write lock acquision on the locktree, add references to -// the locktree using the manager, release locks, release references, etc. -// - ... -// - Release the final reference to the locktree. It will be destroyed. -// - Destroy the manager. - -class locktree { -public: - - // effect: Attempts to grant a read lock for the range of keys between [left_key, right_key]. - // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the - // given conflicts set with the txnids that hold conflicting locks in the range. - // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. - // note: Read locks cannot be shared between txnids, as one would expect. - // This is for simplicity since read locks are rare in MySQL. - int acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); - - // effect: Attempts to grant a write lock for the range of keys between [left_key, right_key]. - // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the - // given conflicts set with the txnids that hold conflicting locks in the range. - // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. - int acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); - - // effect: populate the conflicts set with the txnids that would preventing - // the given txnid from getting a lock on [left_key, right_key] - void get_conflicts(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); - - // effect: Release all of the lock ranges represented by the range buffer for a txnid. - void release_locks(TXNID txnid, const range_buffer *ranges); - - // returns: The userdata associated with this locktree, or null if it has not been set. - void *get_userdata(void); - - void set_userdata(void *userdata); - - void set_descriptor(DESCRIPTOR desc); - - int compare(const locktree *lt); - - DICTIONARY_ID get_dict_id() const; + class locktree; + class locktree_manager; + class lock_request; + class concurrent_tree; + + typedef int (*lt_create_cb)(locktree *lt, void *extra); + typedef void (*lt_destroy_cb)(locktree *lt); + typedef void (*lt_escalate_cb)(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra); struct lt_counters { uint64_t wait_count, wait_time; uint64_t long_wait_count, long_wait_time; uint64_t timeout_count; + + void add(const lt_counters &rhs) { + wait_count += rhs.wait_count; + wait_time += rhs.wait_time; + long_wait_count += rhs.long_wait_count; + long_wait_time += rhs.long_wait_time; + timeout_count += rhs.timeout_count; + } }; - // The locktree stores some data for lock requests. It doesn't have to know - // how they work or even what a lock request object looks like. + // Lock request state for some locktree struct lt_lock_request_info { omt pending_lock_requests; toku_mutex_t mutex; @@ -211,46 +165,15 @@ class locktree { lt_counters counters; }; - // Private info struct for storing pending lock request state. - // Only to be used by lock requests. We store it here as - // something less opaque than usual to strike a tradeoff between - // abstraction and code complexity. It is still fairly abstract - // since the lock_request object is opaque - struct lt_lock_request_info *get_lock_request_info(void); - - class manager; - - // the escalator coordinates escalation on a set of locktrees for a bunch of threads - class escalator { - public: - void create(void); - void destroy(void); -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA - void run(manager *mgr, std::function escalate_locktrees_fun); -#else - void run(manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra); -#endif - private: - toku_mutex_t m_escalator_mutex; - toku_cond_t m_escalator_done; - bool m_escalator_running; - }; - ENSURE_POD(escalator); - - // The locktree manager manages a set of locktrees, - // one for each open dictionary. Locktrees are accessed through - // the manager, and when they are no longer needed, they can - // be released by the user. - - class manager { + // The locktree manager manages a set of locktrees, one for each open dictionary. + // Locktrees are retrieved from the manager. When they are no longer needed, they + // are be released by the user. + class locktree_manager { public: - typedef int (*lt_create_cb)(locktree *lt, void *extra); - typedef void (*lt_destroy_cb)(locktree *lt); - typedef void (*lt_escalate_cb)(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra); - - // note: create_cb is called just after a locktree is first created. - // destroy_cb is called just before a locktree is destroyed. - void create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb, void *extra); + // param: create_cb, called just after a locktree is first created. + // destroy_cb, called just before a locktree is destroyed. + // escalate_cb, called after a locktree is escalated (with extra param) + void create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *extra); void destroy(void); @@ -260,11 +183,10 @@ class locktree { // effect: Get a locktree from the manager. If a locktree exists with the given // dict_id, it is referenced and then returned. If one did not exist, it - // is created. It will use the given descriptor and comparison function - // for comparing keys, and the on_create callback passed to manager::create() - // will be called with the given extra parameter. - locktree *get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, ft_compare_func cmp, - void *on_create_extra); + // is created. It will use the comparator for comparing keys. The on_create + // callback (passed to locktree_manager::create()) will be called with the + // given extra parameter. + locktree *get_lt(DICTIONARY_ID dict_id, const comparator &cmp, void *on_create_extra); void reference_lt(locktree *lt); @@ -272,54 +194,6 @@ class locktree { // to zero, the on_destroy callback is called before it gets destroyed. void release_lt(locktree *lt); - // The memory tracker is employed by the manager to take care of - // maintaining the current number of locks and lock memory and run - // escalation if necessary. - // - // To do this, the manager hands out a memory tracker reference to each - // locktree it creates, so that the locktrees can notify the memory - // tracker when locks are acquired and released. - - class memory_tracker { - public: - void set_manager(manager *mgr); - manager *get_manager(void); - - // effect: Determines if too many locks or too much memory is being used, - // Runs escalation on the manager if so. - // returns: 0 if there enough resources to create a new lock, or TOKUDB_OUT_OF_LOCKS - // if there are not enough resources and lock escalation failed to free up - // enough resources for a new lock. - int check_current_lock_constraints(void); - - bool over_big_threshold(void); - - void note_mem_used(uint64_t mem_used); - - void note_mem_released(uint64_t mem_freed); - - private: - manager *m_mgr; - - // returns: true if the manager of this memory tracker currently - // has more locks or lock memory than it is allowed. - // note: this is a lock-less read, and it is ok for the caller to - // get false when they should have gotten true as long as - // a subsequent call gives the correct answer. - // - // in general, if the tracker says the manager is not out of - // locks, you are clear to add O(1) locks to the system. - bool out_of_locks(void) const; - }; - ENSURE_POD(memory_tracker); - - // effect: calls the private function run_escalation(), only ok to - // do for tests. - // rationale: to get better stress test coverage, we want a way to - // deterministicly trigger lock escalation. - void run_escalation_for_test(void); - void run_escalation(void); - void get_status(LTM_STATUS status); // effect: calls the iterate function on each pending lock request @@ -333,10 +207,21 @@ class locktree { void *extra); int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra); + // effect: Determines if too many locks or too much memory is being used, + // Runs escalation on the manager if so. + // param: big_txn, if the current transaction is 'big' (has spilled rollback logs) + // returns: 0 if there enough resources to create a new lock, or TOKUDB_OUT_OF_LOCKS + // if there are not enough resources and lock escalation failed to free up + // enough resources for a new lock. int check_current_lock_constraints(bool big_txn); - // Escalate locktrees touched by a txn - void escalate_lock_trees_for_txn(TXNID, locktree *lt); + bool over_big_threshold(void); + + void note_mem_used(uint64_t mem_used); + + void note_mem_released(uint64_t mem_freed); + + bool out_of_locks(void) const; // Escalate all locktrees void escalate_all_locktrees(void); @@ -344,6 +229,13 @@ class locktree { // Escalate a set of locktrees void escalate_locktrees(locktree **locktrees, int num_locktrees); + // effect: calls the private function run_escalation(), only ok to + // do for tests. + // rationale: to get better stress test coverage, we want a way to + // deterministicly trigger lock escalation. + void run_escalation_for_test(void); + void run_escalation(void); + // Add time t to the escalator's wait time statistics void add_escalator_wait_time(uint64_t t); @@ -353,7 +245,6 @@ class locktree { // tracks the current number of locks and lock memory uint64_t m_max_lock_memory; uint64_t m_current_lock_memory; - memory_tracker m_mem_tracker; struct lt_counters m_lt_counters; @@ -376,22 +267,14 @@ class locktree { void status_init(void); - // effect: Gets a locktree from the map. - // requires: Manager's mutex is held + // Manage the set of open locktrees locktree *locktree_map_find(const DICTIONARY_ID &dict_id); - - // effect: Puts a locktree into the map. - // requires: Manager's mutex is held void locktree_map_put(locktree *lt); - - // effect: Removes a locktree from the map. - // requires: Manager's mutex is held void locktree_map_remove(locktree *lt); static int find_by_dict_id(locktree *const <, const DICTIONARY_ID &dict_id); void escalator_init(void); - void escalator_destroy(void); // statistics about lock escalation. @@ -404,214 +287,290 @@ class locktree { uint64_t m_long_wait_escalation_count; uint64_t m_long_wait_escalation_time; - escalator m_escalator; + // the escalator coordinates escalation on a set of locktrees for a bunch of threads + class locktree_escalator { + public: + void create(void); + void destroy(void); + void run(locktree_manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra); + + private: + toku_mutex_t m_escalator_mutex; + toku_cond_t m_escalator_done; + bool m_escalator_running; + }; + + locktree_escalator m_escalator; friend class manager_unit_test; }; - ENSURE_POD(manager); - manager::memory_tracker *get_mem_tracker(void) const; + // A locktree represents the set of row locks owned by all transactions + // over an open dictionary. Read and write ranges are represented as + // a left and right key which are compared with the given comparator + // + // Locktrees are not created and destroyed by the user. Instead, they are + // referenced and released using the locktree manager. + // + // A sample workflow looks like this: + // - Create a manager. + // - Get a locktree by dictionaroy id from the manager. + // - Perform read/write lock acquision on the locktree, add references to + // the locktree using the manager, release locks, release references, etc. + // - ... + // - Release the final reference to the locktree. It will be destroyed. + // - Destroy the manager. + class locktree { + public: + // effect: Creates a locktree + void create(locktree_manager *mgr, DICTIONARY_ID dict_id, const comparator &cmp); -private: - manager *m_mgr; - manager::memory_tracker *m_mem_tracker; + void destroy(void); - DICTIONARY_ID m_dict_id; + // For thread-safe, external reference counting + void add_reference(void); - // use a comparator object that encapsulates an ft compare - // function and a descriptor in a fake db. this way we can - // pass it around for easy key comparisons. - // - // since this comparator will store a pointer to a descriptor, - // the user of the locktree needs to make sure that the descriptor - // is valid for as long as the locktree. this is currently - // implemented by opening an ft_handle for this locktree and - // storing it as userdata below. - comparator *m_cmp; + // requires: the reference count is > 0 + // returns: the reference count, after decrementing it by one + uint32_t release_reference(void); - uint32_t m_reference_count; + // returns: the current reference count + uint32_t get_reference_count(void); - concurrent_tree *m_rangetree; + // effect: Attempts to grant a read lock for the range of keys between [left_key, right_key]. + // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the + // given conflicts set with the txnids that hold conflicting locks in the range. + // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. + // note: Read locks cannot be shared between txnids, as one would expect. + // This is for simplicity since read locks are rare in MySQL. + int acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); - void *m_userdata; + // effect: Attempts to grant a write lock for the range of keys between [left_key, right_key]. + // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the + // given conflicts set with the txnids that hold conflicting locks in the range. + // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. + int acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); - struct lt_lock_request_info m_lock_request_info; + // effect: populate the conflicts set with the txnids that would preventing + // the given txnid from getting a lock on [left_key, right_key] + void get_conflicts(bool is_write_request, TXNID txnid, + const DBT *left_key, const DBT *right_key, txnid_set *conflicts); - // The following fields and members prefixed with "sto_" are for - // the single txnid optimization, intended to speed up the case - // when only one transaction is using the locktree. If we know - // the locktree has only one transaction, then acquiring locks - // takes O(1) work and releasing all locks takes O(1) work. - // - // How do we know that the locktree only has a single txnid? - // What do we do if it does? - // - // When a txn with txnid T requests a lock: - // - If the tree is empty, the optimization is possible. Set the single - // txnid to T, and insert the lock range into the buffer. - // - If the tree is not empty, check if the single txnid is T. If so, - // append the lock range to the buffer. Otherwise, migrate all of - // the locks in the buffer into the rangetree on behalf of txnid T, - // and invalid the single txnid. - // - // When a txn with txnid T releases its locks: - // - If the single txnid is valid, it must be for T. Destroy the buffer. - // - If it's not valid, release locks the normal way in the rangetree. - // - // To carry out the optimization we need to record a single txnid - // and a range buffer for each locktree, each protected by the root - // lock of the locktree's rangetree. The root lock for a rangetree - // is grabbed by preparing a locked keyrange on the rangetree. - TXNID m_sto_txnid; - range_buffer m_sto_buffer; - - // The single txnid optimization speeds up the case when only one - // transaction is using the locktree. But it has the potential to - // hurt the case when more than one txnid exists. - // - // There are two things we need to do to make the optimization only - // optimize the case we care about, and not hurt the general case. - // - // Bound the worst-case latency for lock migration when the - // optimization stops working: - // - Idea: Stop the optimization and migrate immediate if we notice - // the single txnid has takes many locks in the range buffer. - // - Implementation: Enforce a max size on the single txnid range buffer. - // - Analysis: Choosing the perfect max value, M, is difficult to do - // without some feedback from the field. Intuition tells us that M should - // not be so small that the optimization is worthless, and it should not - // be so big that it's unreasonable to have to wait behind a thread doing - // the work of converting M buffer locks into rangetree locks. - // - // Prevent concurrent-transaction workloads from trying the optimization - // in vain: - // - Idea: Don't even bother trying the optimization if we think the - // system is in a concurrent-transaction state. - // - Implementation: Do something even simpler than detecting whether the - // system is in a concurent-transaction state. Just keep a "score" value - // and some threshold. If at any time the locktree is eligible for the - // optimization, only do it if the score is at this threshold. When you - // actually do the optimization but someone has to migrate locks in the buffer - // (expensive), then reset the score back to zero. Each time a txn - // releases locks, the score is incremented by 1. - // - Analysis: If you let the threshold be "C", then at most 1 / C txns will - // do the optimization in a concurrent-transaction system. Similarly, it - // takes at most C txns to start using the single txnid optimzation, which - // is good when the system transitions from multithreaded to single threaded. - // - // STO_BUFFER_MAX_SIZE: - // - // We choose the max value to be 1 million since most transactions are smaller - // than 1 million and we can create a rangetree of 1 million elements in - // less than a second. So we can be pretty confident that this threshold - // enables the optimization almost always, and prevents super pathological - // latency issues for the first lock taken by a second thread. - // - // STO_SCORE_THRESHOLD: - // - // A simple first guess at a good value for the score threshold is 100. - // By our analysis, we'd end up doing the optimization in vain for - // around 1% of all transactions, which seems reasonable. Further, - // if the system goes single threaded, it ought to be pretty quick - // for 100 transactions to go by, so we won't have to wait long before - // we start doing the single txind optimzation again. - static const int STO_BUFFER_MAX_SIZE = 50 * 1024; - static const int STO_SCORE_THRESHOLD = 100; - int m_sto_score; - - // statistics about time spent ending the STO early - uint64_t m_sto_end_early_count; - tokutime_t m_sto_end_early_time; - - // effect: begins the single txnid optimizaiton, setting m_sto_txnid - // to the given txnid. - // requires: m_sto_txnid is invalid - void sto_begin(TXNID txnid); - - // effect: append a range to the sto buffer - // requires: m_sto_txnid is valid - void sto_append(const DBT *left_key, const DBT *right_key); - - // effect: ends the single txnid optimization, releaseing any memory - // stored in the sto buffer, notifying the tracker, and - // invalidating m_sto_txnid. - // requires: m_sto_txnid is valid - void sto_end(void); - - // params: prepared_lkr is a void * to a prepared locked keyrange. see below. - // effect: ends the single txnid optimization early, migrating buffer locks - // into the rangetree, calling sto_end(), and then setting the - // sto_score back to zero. - // requires: m_sto_txnid is valid - void sto_end_early(void *prepared_lkr); - void sto_end_early_no_accounting(void *prepared_lkr); - - // params: prepared_lkr is a void * to a prepared locked keyrange. we can't use - // the real type because the compiler won't allow us to forward declare - // concurrent_tree::locked_keyrange without including concurrent_tree.h, - // which we cannot do here because it is a template implementation. - // requires: the prepared locked keyrange is for the locktree's rangetree - // requires: m_sto_txnid is valid - // effect: migrates each lock in the single txnid buffer into the locktree's - // rangetree, notifying the memory tracker as necessary. - void sto_migrate_buffer_ranges_to_tree(void *prepared_lkr); - - // effect: If m_sto_txnid is valid, then release the txnid's locks - // by ending the optimization. - // requires: If m_sto_txnid is valid, it is equal to the given txnid - // returns: True if locks were released for this txnid - bool sto_try_release(TXNID txnid); - - // params: prepared_lkr is a void * to a prepared locked keyrange. see above. - // requires: the prepared locked keyrange is for the locktree's rangetree - // effect: If m_sto_txnid is valid and equal to the given txnid, then - // append a range onto the buffer. Otherwise, if m_sto_txnid is valid - // but not equal to this txnid, then migrate the buffer's locks - // into the rangetree and end the optimization, setting the score - // back to zero. - // returns: true if the lock was acquired for this txnid - bool sto_try_acquire(void *prepared_lkr, TXNID txnid, - const DBT *left_key, const DBT *right_key); - - // Effect: - // Provides a hook for a helgrind suppression. - // Returns: - // true if m_sto_txnid is not TXNID_NONE - bool sto_txnid_is_valid_unsafe(void) const; - - // Effect: - // Provides a hook for a helgrind suppression. - // Returns: - // m_sto_score - int sto_get_score_unsafe(void )const; - - // effect: Creates a locktree that uses the given memory tracker - // to report memory usage and honor memory constraints. - void create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id, - DESCRIPTOR desc, ft_compare_func cmp); - - void destroy(void); - - void remove_overlapping_locks_for_txnid(TXNID txnid, - const DBT *left_key, const DBT *right_key); - - int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); - - int acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts); - - int try_acquire_lock(bool is_write_request, TXNID txnid, - const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn); - - void escalate(manager::lt_escalate_cb after_escalate_callback, void *extra); - - friend class locktree_unit_test; - friend class manager_unit_test; - friend class lock_request_unit_test; -}; -ENSURE_POD(locktree); + // effect: Release all of the lock ranges represented by the range buffer for a txnid. + void release_locks(TXNID txnid, const range_buffer *ranges); -} /* namespace toku */ + // effect: Runs escalation on this locktree + void escalate(lt_escalate_cb after_escalate_callback, void *extra); + + // returns: The userdata associated with this locktree, or null if it has not been set. + void *get_userdata(void) const; + + void set_userdata(void *userdata); + + locktree_manager *get_manager(void) const; -#endif /* TOKU_LOCKTREE_H */ + void set_comparator(const comparator &cmp); + + int compare(const locktree *lt) const; + + DICTIONARY_ID get_dict_id() const; + + // Private info struct for storing pending lock request state. + // Only to be used by lock requests. We store it here as + // something less opaque than usual to strike a tradeoff between + // abstraction and code complexity. It is still fairly abstract + // since the lock_request object is opaque + struct lt_lock_request_info *get_lock_request_info(void); + + private: + locktree_manager *m_mgr; + DICTIONARY_ID m_dict_id; + uint32_t m_reference_count; + + // Since the memory referenced by this comparator is not owned by the + // locktree, the user must guarantee it will outlive the locktree. + // + // The ydb API accomplishes this by opening an ft_handle in the on_create + // callback, which will keep the underlying FT (and its descriptor) in memory + // for as long as the handle is open. The ft_handle is stored opaquely in the + // userdata pointer below. see locktree_manager::get_lt w/ on_create_extra + comparator m_cmp; + + concurrent_tree *m_rangetree; + + void *m_userdata; + struct lt_lock_request_info m_lock_request_info; + + // The following fields and members prefixed with "sto_" are for + // the single txnid optimization, intended to speed up the case + // when only one transaction is using the locktree. If we know + // the locktree has only one transaction, then acquiring locks + // takes O(1) work and releasing all locks takes O(1) work. + // + // How do we know that the locktree only has a single txnid? + // What do we do if it does? + // + // When a txn with txnid T requests a lock: + // - If the tree is empty, the optimization is possible. Set the single + // txnid to T, and insert the lock range into the buffer. + // - If the tree is not empty, check if the single txnid is T. If so, + // append the lock range to the buffer. Otherwise, migrate all of + // the locks in the buffer into the rangetree on behalf of txnid T, + // and invalid the single txnid. + // + // When a txn with txnid T releases its locks: + // - If the single txnid is valid, it must be for T. Destroy the buffer. + // - If it's not valid, release locks the normal way in the rangetree. + // + // To carry out the optimization we need to record a single txnid + // and a range buffer for each locktree, each protected by the root + // lock of the locktree's rangetree. The root lock for a rangetree + // is grabbed by preparing a locked keyrange on the rangetree. + TXNID m_sto_txnid; + range_buffer m_sto_buffer; + + // The single txnid optimization speeds up the case when only one + // transaction is using the locktree. But it has the potential to + // hurt the case when more than one txnid exists. + // + // There are two things we need to do to make the optimization only + // optimize the case we care about, and not hurt the general case. + // + // Bound the worst-case latency for lock migration when the + // optimization stops working: + // - Idea: Stop the optimization and migrate immediate if we notice + // the single txnid has takes many locks in the range buffer. + // - Implementation: Enforce a max size on the single txnid range buffer. + // - Analysis: Choosing the perfect max value, M, is difficult to do + // without some feedback from the field. Intuition tells us that M should + // not be so small that the optimization is worthless, and it should not + // be so big that it's unreasonable to have to wait behind a thread doing + // the work of converting M buffer locks into rangetree locks. + // + // Prevent concurrent-transaction workloads from trying the optimization + // in vain: + // - Idea: Don't even bother trying the optimization if we think the + // system is in a concurrent-transaction state. + // - Implementation: Do something even simpler than detecting whether the + // system is in a concurent-transaction state. Just keep a "score" value + // and some threshold. If at any time the locktree is eligible for the + // optimization, only do it if the score is at this threshold. When you + // actually do the optimization but someone has to migrate locks in the buffer + // (expensive), then reset the score back to zero. Each time a txn + // releases locks, the score is incremented by 1. + // - Analysis: If you let the threshold be "C", then at most 1 / C txns will + // do the optimization in a concurrent-transaction system. Similarly, it + // takes at most C txns to start using the single txnid optimzation, which + // is good when the system transitions from multithreaded to single threaded. + // + // STO_BUFFER_MAX_SIZE: + // + // We choose the max value to be 1 million since most transactions are smaller + // than 1 million and we can create a rangetree of 1 million elements in + // less than a second. So we can be pretty confident that this threshold + // enables the optimization almost always, and prevents super pathological + // latency issues for the first lock taken by a second thread. + // + // STO_SCORE_THRESHOLD: + // + // A simple first guess at a good value for the score threshold is 100. + // By our analysis, we'd end up doing the optimization in vain for + // around 1% of all transactions, which seems reasonable. Further, + // if the system goes single threaded, it ought to be pretty quick + // for 100 transactions to go by, so we won't have to wait long before + // we start doing the single txind optimzation again. + static const int STO_BUFFER_MAX_SIZE = 50 * 1024; + static const int STO_SCORE_THRESHOLD = 100; + int m_sto_score; + + // statistics about time spent ending the STO early + uint64_t m_sto_end_early_count; + tokutime_t m_sto_end_early_time; + + // effect: begins the single txnid optimizaiton, setting m_sto_txnid + // to the given txnid. + // requires: m_sto_txnid is invalid + void sto_begin(TXNID txnid); + + // effect: append a range to the sto buffer + // requires: m_sto_txnid is valid + void sto_append(const DBT *left_key, const DBT *right_key); + + // effect: ends the single txnid optimization, releaseing any memory + // stored in the sto buffer, notifying the tracker, and + // invalidating m_sto_txnid. + // requires: m_sto_txnid is valid + void sto_end(void); + + // params: prepared_lkr is a void * to a prepared locked keyrange. see below. + // effect: ends the single txnid optimization early, migrating buffer locks + // into the rangetree, calling sto_end(), and then setting the + // sto_score back to zero. + // requires: m_sto_txnid is valid + void sto_end_early(void *prepared_lkr); + void sto_end_early_no_accounting(void *prepared_lkr); + + // params: prepared_lkr is a void * to a prepared locked keyrange. we can't use + // the real type because the compiler won't allow us to forward declare + // concurrent_tree::locked_keyrange without including concurrent_tree.h, + // which we cannot do here because it is a template implementation. + // requires: the prepared locked keyrange is for the locktree's rangetree + // requires: m_sto_txnid is valid + // effect: migrates each lock in the single txnid buffer into the locktree's + // rangetree, notifying the memory tracker as necessary. + void sto_migrate_buffer_ranges_to_tree(void *prepared_lkr); + + // effect: If m_sto_txnid is valid, then release the txnid's locks + // by ending the optimization. + // requires: If m_sto_txnid is valid, it is equal to the given txnid + // returns: True if locks were released for this txnid + bool sto_try_release(TXNID txnid); + + // params: prepared_lkr is a void * to a prepared locked keyrange. see above. + // requires: the prepared locked keyrange is for the locktree's rangetree + // effect: If m_sto_txnid is valid and equal to the given txnid, then + // append a range onto the buffer. Otherwise, if m_sto_txnid is valid + // but not equal to this txnid, then migrate the buffer's locks + // into the rangetree and end the optimization, setting the score + // back to zero. + // returns: true if the lock was acquired for this txnid + bool sto_try_acquire(void *prepared_lkr, TXNID txnid, + const DBT *left_key, const DBT *right_key); + + // Effect: + // Provides a hook for a helgrind suppression. + // Returns: + // true if m_sto_txnid is not TXNID_NONE + bool sto_txnid_is_valid_unsafe(void) const; + + // Effect: + // Provides a hook for a helgrind suppression. + // Returns: + // m_sto_score + int sto_get_score_unsafe(void )const; + + void remove_overlapping_locks_for_txnid(TXNID txnid, + const DBT *left_key, const DBT *right_key); + + int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts); + + int acquire_lock(bool is_write_request, TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts); + + int try_acquire_lock(bool is_write_request, TXNID txnid, + const DBT *left_key, const DBT *right_key, + txnid_set *conflicts, bool big_txn); + + + friend class locktree_unit_test; + friend class manager_unit_test; + friend class lock_request_unit_test; + + // engine status reaches into the locktree to read some stats + friend void locktree_manager::get_status(LTM_STATUS status); + }; + +} /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/manager.cc b/storage/tokudb/ft-index/locktree/manager.cc index 8a70144e9e422..5f69c46f7da16 100644 --- a/storage/tokudb/ft-index/locktree/manager.cc +++ b/storage/tokudb/ft-index/locktree/manager.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,10 +100,9 @@ PATENT RIGHTS GRANT: namespace toku { -void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *escalate_extra) { +void locktree_manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *escalate_extra) { m_max_lock_memory = DEFAULT_MAX_LOCK_MEMORY; m_current_lock_memory = 0; - m_mem_tracker.set_manager(this); m_locktree_map.create(); m_lt_create_callback = create_cb; @@ -120,7 +119,7 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, escalator_init(); } -void locktree::manager::destroy(void) { +void locktree_manager::destroy(void) { escalator_destroy(); invariant(m_current_lock_memory == 0); invariant(m_locktree_map.size() == 0); @@ -128,19 +127,19 @@ void locktree::manager::destroy(void) { toku_mutex_destroy(&m_mutex); } -void locktree::manager::mutex_lock(void) { +void locktree_manager::mutex_lock(void) { toku_mutex_lock(&m_mutex); } -void locktree::manager::mutex_unlock(void) { +void locktree_manager::mutex_unlock(void) { toku_mutex_unlock(&m_mutex); } -size_t locktree::manager::get_max_lock_memory(void) { +size_t locktree_manager::get_max_lock_memory(void) { return m_max_lock_memory; } -int locktree::manager::set_max_lock_memory(size_t max_lock_memory) { +int locktree_manager::set_max_lock_memory(size_t max_lock_memory) { int r = 0; mutex_lock(); if (max_lock_memory < m_current_lock_memory) { @@ -152,40 +151,40 @@ int locktree::manager::set_max_lock_memory(size_t max_lock_memory) { return r; } -int locktree::manager::find_by_dict_id(locktree *const <, const DICTIONARY_ID &dict_id) { - if (lt->m_dict_id.dictid < dict_id.dictid) { +int locktree_manager::find_by_dict_id(locktree *const <, const DICTIONARY_ID &dict_id) { + if (lt->get_dict_id().dictid < dict_id.dictid) { return -1; - } else if (lt->m_dict_id.dictid == dict_id.dictid) { + } else if (lt->get_dict_id().dictid == dict_id.dictid) { return 0; } else { return 1; } } -locktree *locktree::manager::locktree_map_find(const DICTIONARY_ID &dict_id) { +locktree *locktree_manager::locktree_map_find(const DICTIONARY_ID &dict_id) { locktree *lt; int r = m_locktree_map.find_zero(dict_id, <, nullptr); return r == 0 ? lt : nullptr; } -void locktree::manager::locktree_map_put(locktree *lt) { - int r = m_locktree_map.insert(lt, lt->m_dict_id, nullptr); +void locktree_manager::locktree_map_put(locktree *lt) { + int r = m_locktree_map.insert(lt, lt->get_dict_id(), nullptr); invariant_zero(r); } -void locktree::manager::locktree_map_remove(locktree *lt) { +void locktree_manager::locktree_map_remove(locktree *lt) { uint32_t idx; locktree *found_lt; int r = m_locktree_map.find_zero( - lt->m_dict_id, &found_lt, &idx); + lt->get_dict_id(), &found_lt, &idx); invariant_zero(r); invariant(found_lt == lt); r = m_locktree_map.delete_at(idx); invariant_zero(r); } -locktree *locktree::manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, - ft_compare_func cmp, void *on_create_extra) { +locktree *locktree_manager::get_lt(DICTIONARY_ID dict_id, + const comparator &cmp, void *on_create_extra) { // hold the mutex around searching and maybe // inserting into the locktree map @@ -194,15 +193,14 @@ locktree *locktree::manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, locktree *lt = locktree_map_find(dict_id); if (lt == nullptr) { XCALLOC(lt); - lt->create(&m_mem_tracker, dict_id, desc, cmp); - invariant(lt->m_reference_count == 1); + lt->create(this, dict_id, cmp); // new locktree created - call the on_create callback // and put it in the locktree map if (m_lt_create_callback) { int r = m_lt_create_callback(lt, on_create_extra); if (r != 0) { - (void) toku_sync_sub_and_fetch(<->m_reference_count, 1); + lt->release_reference(); lt->destroy(); toku_free(lt); lt = nullptr; @@ -220,7 +218,7 @@ locktree *locktree::manager::get_lt(DICTIONARY_ID dict_id, DESCRIPTOR desc, return lt; } -void locktree::manager::reference_lt(locktree *lt) { +void locktree_manager::reference_lt(locktree *lt) { // increment using a sync fetch and add. // the caller guarantees that the lt won't be // destroyed while we increment the count here. @@ -231,20 +229,12 @@ void locktree::manager::reference_lt(locktree *lt) { // if the manager's mutex is held, it is ok for the // reference count to transition from 0 to 1 (no race), // since we're serialized with other opens and closes. - toku_sync_fetch_and_add(<->m_reference_count, 1); + lt->add_reference(); } -static void add_lt_counters(locktree::lt_counters *x, locktree::lt_counters *y) { - x->wait_count += y->wait_count; - x->wait_time += y->wait_time; - x->long_wait_count += y->long_wait_count; - x->long_wait_time += y->long_wait_time; - x->timeout_count += y->timeout_count; -} - -void locktree::manager::release_lt(locktree *lt) { +void locktree_manager::release_lt(locktree *lt) { bool do_destroy = false; - DICTIONARY_ID dict_id = lt->m_dict_id; + DICTIONARY_ID dict_id = lt->get_dict_id(); // Release a reference on the locktree. If the count transitions to zero, // then we *may* need to do the cleanup. @@ -274,7 +264,7 @@ void locktree::manager::release_lt(locktree *lt) { // This way, if many threads transition the same locktree's reference count // from 1 to zero and wait behind the manager's mutex, only one of them will // do the actual destroy and the others will happily do nothing. - uint32_t refs = toku_sync_sub_and_fetch(<->m_reference_count, 1); + uint32_t refs = lt->release_reference(); if (refs == 0) { mutex_lock(); locktree *find_lt = locktree_map_find(dict_id); @@ -284,12 +274,12 @@ void locktree::manager::release_lt(locktree *lt) { // If the reference count is zero, it's our responsibility to remove // it and do the destroy. Otherwise, someone still wants it. invariant(find_lt == lt); - if (lt->m_reference_count == 0) { + if (lt->get_reference_count() == 0) { locktree_map_remove(lt); do_destroy = true; } } - add_lt_counters(&m_lt_counters, <->m_lock_request_info.counters); + m_lt_counters.add(lt->get_lock_request_info()->counters); mutex_unlock(); } @@ -303,28 +293,22 @@ void locktree::manager::release_lt(locktree *lt) { } } -// test-only version of lock escalation -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA -void locktree::manager::run_escalation(void) { - m_escalator.run(this, [this] () -> void { escalate_all_locktrees(); }); -} -#else -static void manager_run_escalation_fun(void *extra) { - locktree::manager *thismanager = (locktree::manager *) extra; - thismanager->escalate_all_locktrees(); +void locktree_manager::run_escalation(void) { + struct escalation_fn { + static void run(void *extra) { + locktree_manager *mgr = (locktree_manager *) extra; + mgr->escalate_all_locktrees(); + }; + }; + m_escalator.run(this, escalation_fn::run, this); } -void locktree::manager::run_escalation(void) { - m_escalator.run(this, manager_run_escalation_fun, this); -} -#endif - -void locktree::manager::run_escalation_for_test(void) { +// test-only version of lock escalation +void locktree_manager::run_escalation_for_test(void) { run_escalation(); } -void locktree::manager::escalate_all_locktrees(void) { - if (0) fprintf(stderr, "%d %s:%u\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__); +void locktree_manager::escalate_all_locktrees(void) { uint64_t t0 = toku_current_time_microsec(); // get all locktrees @@ -347,47 +331,25 @@ void locktree::manager::escalate_all_locktrees(void) { add_escalator_wait_time(t1 - t0); } -void locktree::manager::memory_tracker::set_manager(manager *mgr) { - m_mgr = mgr; +void locktree_manager::note_mem_used(uint64_t mem_used) { + (void) toku_sync_fetch_and_add(&m_current_lock_memory, mem_used); } -locktree::manager *locktree::manager::memory_tracker::get_manager(void) { - return m_mgr; -} - -int locktree::manager::memory_tracker::check_current_lock_constraints(void) { - int r = 0; - // check if we're out of locks without the mutex first. then, grab the - // mutex and check again. if we're still out of locks, run escalation. - // return an error if we're still out of locks after escalation. - if (out_of_locks()) { - m_mgr->run_escalation(); - if (out_of_locks()) { - r = TOKUDB_OUT_OF_LOCKS; - } - } - return r; -} - -void locktree::manager::memory_tracker::note_mem_used(uint64_t mem_used) { - (void) toku_sync_fetch_and_add(&m_mgr->m_current_lock_memory, mem_used); -} - -void locktree::manager::memory_tracker::note_mem_released(uint64_t mem_released) { - uint64_t old_mem_used = toku_sync_fetch_and_sub(&m_mgr->m_current_lock_memory, mem_released); +void locktree_manager::note_mem_released(uint64_t mem_released) { + uint64_t old_mem_used = toku_sync_fetch_and_sub(&m_current_lock_memory, mem_released); invariant(old_mem_used >= mem_released); } -bool locktree::manager::memory_tracker::out_of_locks(void) const { - return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory; +bool locktree_manager::out_of_locks(void) const { + return m_current_lock_memory >= m_max_lock_memory; } -bool locktree::manager::memory_tracker::over_big_threshold(void) { - return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory / 2; +bool locktree_manager::over_big_threshold(void) { + return m_current_lock_memory >= m_max_lock_memory / 2; } -int locktree::manager::iterate_pending_lock_requests( - lock_request_iterate_callback callback, void *extra) { +int locktree_manager::iterate_pending_lock_requests(lock_request_iterate_callback callback, + void *extra) { mutex_lock(); int r = 0; size_t num_locktrees = m_locktree_map.size(); @@ -396,7 +358,7 @@ int locktree::manager::iterate_pending_lock_requests( r = m_locktree_map.fetch(i, <); invariant_zero(r); - struct lt_lock_request_info *info = <->m_lock_request_info; + struct lt_lock_request_info *info = lt->get_lock_request_info(); toku_mutex_lock(&info->mutex); size_t num_requests = info->pending_lock_requests.size(); @@ -404,7 +366,7 @@ int locktree::manager::iterate_pending_lock_requests( lock_request *req; r = info->pending_lock_requests.fetch(k, &req); invariant_zero(r); - r = callback(lt->m_dict_id, req->get_txnid(), + r = callback(lt->get_dict_id(), req->get_txnid(), req->get_left_key(), req->get_right_key(), req->get_conflicting_txnid(), req->get_start_time(), extra); } @@ -415,21 +377,25 @@ int locktree::manager::iterate_pending_lock_requests( return r; } -int locktree::manager::check_current_lock_constraints(bool big_txn) { +int locktree_manager::check_current_lock_constraints(bool big_txn) { int r = 0; - if (big_txn && m_mem_tracker.over_big_threshold()) { + if (big_txn && over_big_threshold()) { run_escalation(); - if (m_mem_tracker.over_big_threshold()) { + if (over_big_threshold()) { r = TOKUDB_OUT_OF_LOCKS; } } - if (r == 0) { - r = m_mem_tracker.check_current_lock_constraints(); + if (r == 0 && out_of_locks()) { + run_escalation(); + if (out_of_locks()) { + // return an error if we're still out of locks after escalation. + r = TOKUDB_OUT_OF_LOCKS; + } } return r; } -void locktree::manager::escalator_init(void) { +void locktree_manager::escalator_init(void) { ZERO_STRUCT(m_escalation_mutex); toku_mutex_init(&m_escalation_mutex, nullptr); m_escalation_count = 0; @@ -442,12 +408,12 @@ void locktree::manager::escalator_init(void) { m_escalator.create(); } -void locktree::manager::escalator_destroy(void) { +void locktree_manager::escalator_destroy(void) { m_escalator.destroy(); toku_mutex_destroy(&m_escalation_mutex); } -void locktree::manager::add_escalator_wait_time(uint64_t t) { +void locktree_manager::add_escalator_wait_time(uint64_t t) { toku_mutex_lock(&m_escalation_mutex); m_wait_escalation_count += 1; m_wait_escalation_time += t; @@ -458,8 +424,7 @@ void locktree::manager::add_escalator_wait_time(uint64_t t) { toku_mutex_unlock(&m_escalation_mutex); } -void locktree::manager::escalate_locktrees(locktree **locktrees, int num_locktrees) { - if (0) fprintf(stderr, "%d %s:%u %d\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__, num_locktrees); +void locktree_manager::escalate_locktrees(locktree **locktrees, int num_locktrees) { // there are too many row locks in the system and we need to tidy up. // // a simple implementation of escalation does not attempt @@ -481,65 +446,32 @@ void locktree::manager::escalate_locktrees(locktree **locktrees, int num_locktre toku_mutex_unlock(&m_escalation_mutex); } -#if !TOKU_LOCKTREE_ESCALATOR_LAMBDA struct escalate_args { - locktree::manager *mgr; + locktree_manager *mgr; locktree **locktrees; int num_locktrees; }; -static void manager_escalate_locktrees(void *extra) { - escalate_args *args = (escalate_args *) extra; - args->mgr->escalate_locktrees(args->locktrees, args->num_locktrees); -} -#endif - -void locktree::manager::escalate_lock_trees_for_txn(TXNID txnid UU(), locktree *lt UU()) { - // get lock trees for txnid - const int num_locktrees = 1; - locktree *locktrees[1] = { lt }; - reference_lt(lt); - - // escalate these lock trees - locktree::escalator this_escalator; - this_escalator.create(); -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA - this_escalator.run(this, [this,locktrees,num_locktrees] () -> void { escalate_locktrees(locktrees, num_locktrees); }); -#else - escalate_args args = { this, locktrees, num_locktrees }; - this_escalator.run(this, manager_escalate_locktrees, &args); -#endif - this_escalator.destroy(); -} - -void locktree::escalator::create(void) { +void locktree_manager::locktree_escalator::create(void) { ZERO_STRUCT(m_escalator_mutex); toku_mutex_init(&m_escalator_mutex, nullptr); toku_cond_init(&m_escalator_done, nullptr); m_escalator_running = false; } -void locktree::escalator::destroy(void) { +void locktree_manager::locktree_escalator::destroy(void) { toku_cond_destroy(&m_escalator_done); toku_mutex_destroy(&m_escalator_mutex); } -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA -void locktree::escalator::run(locktree::manager *mgr, std::function escalate_locktrees_fun) { -#else - void locktree::escalator::run(locktree::manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra) { -#endif +void locktree_manager::locktree_escalator::run(locktree_manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra) { uint64_t t0 = toku_current_time_microsec(); toku_mutex_lock(&m_escalator_mutex); if (!m_escalator_running) { // run escalation on this thread m_escalator_running = true; toku_mutex_unlock(&m_escalator_mutex); -#if TOKU_LOCKTREE_ESCALATOR_LAMBDA - escalate_locktrees_fun(); -#else escalate_locktrees_fun(extra); -#endif toku_mutex_lock(&m_escalator_mutex); m_escalator_running = false; toku_cond_broadcast(&m_escalator_done); @@ -551,9 +483,9 @@ void locktree::escalator::run(locktree::manager *mgr, std::function mgr->add_escalator_wait_time(t1 - t0); } -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(status, k, c, t, "locktree: " l, inc) -void locktree::manager::status_init(void) { +void locktree_manager::status_init(void) { STATUS_INIT(LTM_SIZE_CURRENT, LOCKTREE_MEMORY_SIZE, UINT64, "memory size", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_SIZE_LIMIT, LOCKTREE_MEMORY_SIZE_LIMIT, UINT64, "memory size limit", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_ESCALATION_COUNT, LOCKTREE_ESCALATION_NUM, UINT64, "number of times lock escalation ran", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); @@ -583,7 +515,7 @@ void locktree::manager::status_init(void) { #define STATUS_VALUE(x) status.status[x].value.num -void locktree::manager::get_status(LTM_STATUS statp) { +void locktree_manager::get_status(LTM_STATUS statp) { if (!status.initialized) { status_init(); } @@ -602,7 +534,6 @@ void locktree::manager::get_status(LTM_STATUS statp) { uint64_t sto_num_eligible = 0; uint64_t sto_end_early_count = 0; tokutime_t sto_end_early_time = 0; - size_t num_locktrees = 0; struct lt_counters lt_counters = {}; @@ -615,7 +546,7 @@ void locktree::manager::get_status(LTM_STATUS statp) { invariant_zero(r); if (toku_mutex_trylock(<->m_lock_request_info.mutex) == 0) { lock_requests_pending += lt->m_lock_request_info.pending_lock_requests.size(); - add_lt_counters(<_counters, <->m_lock_request_info.counters); + lt_counters.add(lt->get_lock_request_info()->counters); toku_mutex_unlock(<->m_lock_request_info.mutex); } sto_num_eligible += lt->sto_txnid_is_valid_unsafe() ? 1 : 0; diff --git a/storage/tokudb/ft-index/locktree/range_buffer.cc b/storage/tokudb/ft-index/locktree/range_buffer.cc index 5fd86a631c994..cc7bbd90afc47 100644 --- a/storage/tokudb/ft-index/locktree/range_buffer.cc +++ b/storage/tokudb/ft-index/locktree/range_buffer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,215 +89,210 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include #include -#include -#include "range_buffer.h" +#include "portability/memory.h" + +#include "locktree/range_buffer.h" +#include "util/dbt.h" namespace toku { -bool range_buffer::record_header::left_is_infinite(void) const { - return left_neg_inf || left_pos_inf; -} - -bool range_buffer::record_header::right_is_infinite(void) const { - return right_neg_inf || right_pos_inf; -} - -void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) { - left_neg_inf = left_key == toku_dbt_negative_infinity(); - left_pos_inf = left_key == toku_dbt_positive_infinity(); - left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size; - if (right_key) { - right_neg_inf = right_key == toku_dbt_negative_infinity(); - right_pos_inf = right_key == toku_dbt_positive_infinity(); - right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; - } else { - right_neg_inf = left_neg_inf; - right_pos_inf = left_pos_inf; - right_key_size = 0; + bool range_buffer::record_header::left_is_infinite(void) const { + return left_neg_inf || left_pos_inf; + } + + bool range_buffer::record_header::right_is_infinite(void) const { + return right_neg_inf || right_pos_inf; } -} - -const DBT *range_buffer::iterator::record::get_left_key(void) const { - if (m_header.left_neg_inf) { - return toku_dbt_negative_infinity(); - } else if (m_header.left_pos_inf) { - return toku_dbt_positive_infinity(); - } else { - return &m_left_key; + + void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) { + left_neg_inf = left_key == toku_dbt_negative_infinity(); + left_pos_inf = left_key == toku_dbt_positive_infinity(); + left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size; + if (right_key) { + right_neg_inf = right_key == toku_dbt_negative_infinity(); + right_pos_inf = right_key == toku_dbt_positive_infinity(); + right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size; + } else { + right_neg_inf = left_neg_inf; + right_pos_inf = left_pos_inf; + right_key_size = 0; + } + } + + const DBT *range_buffer::iterator::record::get_left_key(void) const { + if (_header.left_neg_inf) { + return toku_dbt_negative_infinity(); + } else if (_header.left_pos_inf) { + return toku_dbt_positive_infinity(); + } else { + return &_left_key; + } } -} - -const DBT *range_buffer::iterator::record::get_right_key(void) const { - if (m_header.right_neg_inf) { - return toku_dbt_negative_infinity(); - } else if (m_header.right_pos_inf) { - return toku_dbt_positive_infinity(); - } else { - return &m_right_key; + + const DBT *range_buffer::iterator::record::get_right_key(void) const { + if (_header.right_neg_inf) { + return toku_dbt_negative_infinity(); + } else if (_header.right_pos_inf) { + return toku_dbt_positive_infinity(); + } else { + return &_right_key; + } } -} -size_t range_buffer::iterator::record::size(void) const { - return sizeof(record_header) + m_header.left_key_size + m_header.right_key_size; -} + size_t range_buffer::iterator::record::size(void) const { + return sizeof(record_header) + _header.left_key_size + _header.right_key_size; + } -void range_buffer::iterator::record::deserialize(const char *buf) { - size_t current = 0; + void range_buffer::iterator::record::deserialize(const char *buf) { + size_t current = 0; - // deserialize the header - memcpy(&m_header, buf, sizeof(record_header)); - current += sizeof(record_header); + // deserialize the header + memcpy(&_header, buf, sizeof(record_header)); + current += sizeof(record_header); - // deserialize the left key if necessary - if (!m_header.left_is_infinite()) { - // point the left DBT's buffer into ours - toku_fill_dbt(&m_left_key, buf + current, m_header.left_key_size); - current += m_header.left_key_size; - } + // deserialize the left key if necessary + if (!_header.left_is_infinite()) { + // point the left DBT's buffer into ours + toku_fill_dbt(&_left_key, buf + current, _header.left_key_size); + current += _header.left_key_size; + } - // deserialize the right key if necessary - if (!m_header.right_is_infinite()) { - if (m_header.right_key_size == 0) { - toku_copyref_dbt(&m_right_key, m_left_key); - } else { - toku_fill_dbt(&m_right_key, buf + current, m_header.right_key_size); + // deserialize the right key if necessary + if (!_header.right_is_infinite()) { + if (_header.right_key_size == 0) { + toku_copyref_dbt(&_right_key, _left_key); + } else { + toku_fill_dbt(&_right_key, buf + current, _header.right_key_size); + } } } -} - -void range_buffer::iterator::create(const range_buffer *buffer) { - m_buffer = buffer; - m_current_offset = 0; - m_current_size = 0; -} - -bool range_buffer::iterator::current(record *rec) { - if (m_current_offset < m_buffer->m_buf_current) { - rec->deserialize(m_buffer->m_buf + m_current_offset); - m_current_size = rec->size(); - return true; - } else { - return false; + + toku::range_buffer::iterator::iterator() : + _ma_chunk_iterator(nullptr), + _current_chunk_base(nullptr), + _current_chunk_offset(0), _current_chunk_max(0), + _current_rec_size(0) { } -} - -// move the iterator to the next record in the buffer -void range_buffer::iterator::next(void) { - invariant(m_current_offset < m_buffer->m_buf_current); - invariant(m_current_size > 0); - - // the next record is m_current_size bytes forward - // now, we don't know how big the current is, set it to 0. - m_current_offset += m_current_size; - m_current_size = 0; -} - -void range_buffer::create(void) { - // allocate buffer space lazily instead of on creation. this way, - // no malloc/free is done if the transaction ends up taking no locks. - m_buf = nullptr; - m_buf_size = 0; - m_buf_current = 0; - m_num_ranges = 0; -} - -void range_buffer::append(const DBT *left_key, const DBT *right_key) { - // if the keys are equal, then only one copy is stored. - if (toku_dbt_equals(left_key, right_key)) { - append_point(left_key); - } else { - append_range(left_key, right_key); + + toku::range_buffer::iterator::iterator(const range_buffer *buffer) : + _ma_chunk_iterator(&buffer->_arena), + _current_chunk_base(nullptr), + _current_chunk_offset(0), _current_chunk_max(0), + _current_rec_size(0) { + reset_current_chunk(); } - m_num_ranges++; -} -bool range_buffer::is_empty(void) const { - return m_buf == nullptr; -} + void range_buffer::iterator::reset_current_chunk() { + _current_chunk_base = _ma_chunk_iterator.current(&_current_chunk_max); + _current_chunk_offset = 0; + } -uint64_t range_buffer::get_num_bytes(void) const { - return m_buf_current; -} + bool range_buffer::iterator::current(record *rec) { + if (_current_chunk_offset < _current_chunk_max) { + const char *buf = reinterpret_cast(_current_chunk_base); + rec->deserialize(buf + _current_chunk_offset); + _current_rec_size = rec->size(); + return true; + } else { + return false; + } + } -int range_buffer::get_num_ranges(void) const { - return m_num_ranges; -} + // move the iterator to the next record in the buffer + void range_buffer::iterator::next(void) { + invariant(_current_chunk_offset < _current_chunk_max); + invariant(_current_rec_size > 0); + + // the next record is _current_rec_size bytes forward + _current_chunk_offset += _current_rec_size; + // now, we don't know how big the current is, set it to 0. + _current_rec_size = 0; + + if (_current_chunk_offset >= _current_chunk_max) { + // current chunk is exhausted, try moving to the next one + if (_ma_chunk_iterator.more()) { + _ma_chunk_iterator.next(); + reset_current_chunk(); + } + } + } -void range_buffer::destroy(void) { - if (m_buf) { - toku_free(m_buf); + void range_buffer::create(void) { + // allocate buffer space lazily instead of on creation. this way, + // no malloc/free is done if the transaction ends up taking no locks. + _arena.create(0); + _num_ranges = 0; } -} -void range_buffer::append_range(const DBT *left_key, const DBT *right_key) { - maybe_grow(sizeof(record_header) + left_key->size + right_key->size); + void range_buffer::append(const DBT *left_key, const DBT *right_key) { + // if the keys are equal, then only one copy is stored. + if (toku_dbt_equals(left_key, right_key)) { + invariant(left_key->size <= MAX_KEY_SIZE); + append_point(left_key); + } else { + invariant(left_key->size <= MAX_KEY_SIZE); + invariant(right_key->size <= MAX_KEY_SIZE); + append_range(left_key, right_key); + } + _num_ranges++; + } - record_header h; - h.init(left_key, right_key); + bool range_buffer::is_empty(void) const { + return total_memory_size() == 0; + } - // serialize the header - memcpy(m_buf + m_buf_current, &h, sizeof(record_header)); - m_buf_current += sizeof(record_header); + uint64_t range_buffer::total_memory_size(void) const { + return _arena.total_size_in_use(); + } - // serialize the left key if necessary - if (!h.left_is_infinite()) { - memcpy(m_buf + m_buf_current, left_key->data, left_key->size); - m_buf_current += left_key->size; + int range_buffer::get_num_ranges(void) const { + return _num_ranges; } - // serialize the right key if necessary - if (!h.right_is_infinite()) { - memcpy(m_buf + m_buf_current, right_key->data, right_key->size); - m_buf_current += right_key->size; + void range_buffer::destroy(void) { + _arena.destroy(); } -} -void range_buffer::append_point(const DBT *key) { - maybe_grow(sizeof(record_header) + key->size); + void range_buffer::append_range(const DBT *left_key, const DBT *right_key) { + size_t record_length = sizeof(record_header) + left_key->size + right_key->size; + char *buf = reinterpret_cast(_arena.malloc_from_arena(record_length)); - record_header h; - h.init(key, nullptr); + record_header h; + h.init(left_key, right_key); - // serialize the header - memcpy(m_buf + m_buf_current, &h, sizeof(record_header)); - m_buf_current += sizeof(record_header); + // serialize the header + memcpy(buf, &h, sizeof(record_header)); + buf += sizeof(record_header); - // serialize the key if necessary - if (!h.left_is_infinite()) { - memcpy(m_buf + m_buf_current, key->data, key->size); - m_buf_current += key->size; - } -} - -void range_buffer::maybe_grow(size_t size) { - static const size_t initial_size = 4096; - static const size_t aggressive_growth_threshold = 128 * 1024; - const size_t needed = m_buf_current + size; - if (m_buf_size < needed) { - if (m_buf_size == 0) { - m_buf_size = initial_size; + // serialize the left key if necessary + if (!h.left_is_infinite()) { + memcpy(buf, left_key->data, left_key->size); + buf += left_key->size; } - // aggressively grow the range buffer to the threshold, - // but only additivately increase the size after that. - while (m_buf_size < needed && m_buf_size < aggressive_growth_threshold) { - m_buf_size <<= 1; - } - while (m_buf_size < needed) { - m_buf_size += aggressive_growth_threshold; + + // serialize the right key if necessary + if (!h.right_is_infinite()) { + memcpy(buf, right_key->data, right_key->size); } - XREALLOC(m_buf, m_buf_size); } -} -size_t range_buffer::get_initial_size(size_t n) const { - size_t r = 4096; - while (r < n) { - r *= 2; + void range_buffer::append_point(const DBT *key) { + size_t record_length = sizeof(record_header) + key->size; + char *buf = reinterpret_cast(_arena.malloc_from_arena(record_length)); + + record_header h; + h.init(key, nullptr); + + // serialize the header + memcpy(buf, &h, sizeof(record_header)); + buf += sizeof(record_header); + + // serialize the key if necessary + if (!h.left_is_infinite()) { + memcpy(buf, key->data, key->size); + } } - return r; -} } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/range_buffer.h b/storage/tokudb/ft-index/locktree/range_buffer.h index ac019ba18ce57..7b1beb90329a6 100644 --- a/storage/tokudb/ft-index/locktree/range_buffer.h +++ b/storage/tokudb/ft-index/locktree/range_buffer.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,136 +86,126 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef RANGE_BUFFER_H -#define RANGE_BUFFER_H - -#include +#include "portability/toku_stdint.h" -#include +#include "util/dbt.h" +#include "util/memarena.h" namespace toku { -// a key range buffer represents a set of key ranges that can -// be stored, iterated over, and then destroyed all at once. - -class range_buffer { -// Private in spirit: We fail POD asserts when we try to store range_buffers in an omt. -// So make it all public, but don't touch. -public: -//private: - - // the key range buffer is a bunch of records in a row. - // each record has the following header, followed by the - // left key and right key data payload, if applicable. - - struct record_header { - bool left_neg_inf; - bool left_pos_inf; - bool right_pos_inf; - bool right_neg_inf; - uint32_t left_key_size; - uint32_t right_key_size; + // a key range buffer represents a set of key ranges that can + // be stored, iterated over, and then destroyed all at once. + class range_buffer { + private: - bool left_is_infinite(void) const; + // the key range buffer is a bunch of records in a row. + // each record has the following header, followed by the + // left key and right key data payload, if applicable. + // we limit keys to be 2^16, since we store lengths as 2 bytes. + static const size_t MAX_KEY_SIZE = 1 << 16; - bool right_is_infinite(void) const; + struct record_header { + bool left_neg_inf; + bool left_pos_inf; + bool right_pos_inf; + bool right_neg_inf; + uint16_t left_key_size; + uint16_t right_key_size; - void init(const DBT *left_key, const DBT *right_key); - }; - static_assert(sizeof(record_header) == 12, "record header format is off"); - -public: + bool left_is_infinite(void) const; - // the iterator abstracts reading over a buffer of variable length - // records one by one until there are no more left. + bool right_is_infinite(void) const; - class iterator { + void init(const DBT *left_key, const DBT *right_key); + }; + static_assert(sizeof(record_header) == 8, "record header format is off"); + public: - // a record represents the user-view of a serialized key range. - // it handles positive and negative infinity and the optimized - // point range case, where left and right points share memory. - - class record { + // the iterator abstracts reading over a buffer of variable length + // records one by one until there are no more left. + class iterator { public: - // get a read-only pointer to the left key of this record's range - const DBT *get_left_key(void) const; - - // get a read-only pointer to the right key of this record's range - const DBT *get_right_key(void) const; + iterator(); + iterator(const range_buffer *buffer); - // how big is this record? this tells us where the next record is - size_t size(void) const; + // a record represents the user-view of a serialized key range. + // it handles positive and negative infinity and the optimized + // point range case, where left and right points share memory. + class record { + public: + // get a read-only pointer to the left key of this record's range + const DBT *get_left_key(void) const; - // populate a record header and point our DBT's - // buffers into ours if they are not infinite. - void deserialize(const char *buf); + // get a read-only pointer to the right key of this record's range + const DBT *get_right_key(void) const; - private: - record_header m_header; - DBT m_left_key; - DBT m_right_key; - }; - - void create(const range_buffer *buffer); + // how big is this record? this tells us where the next record is + size_t size(void) const; - // populate the given record object with the current - // the memory referred to by record is valid for only - // as long as the record exists. - bool current(record *rec); + // populate a record header and point our DBT's + // buffers into ours if they are not infinite. + void deserialize(const char *buf); - // move the iterator to the next record in the buffer - void next(void); + private: + record_header _header; + DBT _left_key; + DBT _right_key; + }; - private: - // the key range buffer we are iterating over, the current - // offset in that buffer, and the size of the current record. - const range_buffer *m_buffer; - size_t m_current_offset; - size_t m_current_size; - }; + // populate the given record object with the current + // the memory referred to by record is valid for only + // as long as the record exists. + bool current(record *rec); - // allocate buffer space lazily instead of on creation. this way, - // no malloc/free is done if the transaction ends up taking no locks. - void create(void); + // move the iterator to the next record in the buffer + void next(void); - // append a left/right key range to the buffer. - // if the keys are equal, then only one copy is stored. - void append(const DBT *left_key, const DBT *right_key); + private: + void reset_current_chunk(); + + // the key range buffer we are iterating over, the current + // offset in that buffer, and the size of the current record. + memarena::chunk_iterator _ma_chunk_iterator; + const void *_current_chunk_base; + size_t _current_chunk_offset; + size_t _current_chunk_max; + size_t _current_rec_size; + }; - // is this range buffer empty? - bool is_empty(void) const; + // allocate buffer space lazily instead of on creation. this way, + // no malloc/free is done if the transaction ends up taking no locks. + void create(void); - // how many bytes are stored in this range buffer? - uint64_t get_num_bytes(void) const; + // append a left/right key range to the buffer. + // if the keys are equal, then only one copy is stored. + void append(const DBT *left_key, const DBT *right_key); - // how many ranges are stored in this range buffer? - int get_num_ranges(void) const; + // is this range buffer empty? + bool is_empty(void) const; - void destroy(void); + // how much memory is being used by this range buffer? + uint64_t total_memory_size(void) const; -//private: - char *m_buf; - size_t m_buf_size; - size_t m_buf_current; - int m_num_ranges; + // how many ranges are stored in this range buffer? + int get_num_ranges(void) const; - void append_range(const DBT *left_key, const DBT *right_key); + void destroy(void); - // append a point to the buffer. this is the space/time saving - // optimization for key ranges where left == right. - void append_point(const DBT *key); + private: + memarena _arena; + int _num_ranges; - void maybe_grow(size_t size); + void append_range(const DBT *left_key, const DBT *right_key); - // the initial size of the buffer is the next power of 2 - // greater than the first entry we insert into the buffer. - size_t get_initial_size(size_t n) const; -}; + // append a point to the buffer. this is the space/time saving + // optimization for key ranges where left == right. + void append_point(const DBT *key); + }; } /* namespace toku */ - -#endif /* RANGE_BUFFER_H */ diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc index f6bb3987d1fc6..a1187d6e0cc8c 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_create_destroy.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc index ecf683ed8f85b..002df28ff9e49 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_acquire_release.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,19 +126,19 @@ void concurrent_tree_unit_test::test_lkr_acquire_release(void) { // if the subtree root does not overlap then one of its children // must exist and have an overlapping range. - if (!lkr.m_subtree->m_range.overlaps(&cmp, range)) { + if (!lkr.m_subtree->m_range.overlaps(cmp, range)) { treenode *left = lkr.m_subtree->m_left_child.ptr; treenode *right = lkr.m_subtree->m_right_child.ptr; if (left != nullptr) { // left exists, so if it does not overlap then the right must - if (!left->m_range.overlaps(&cmp, range)) { + if (!left->m_range.overlaps(cmp, range)) { invariant_notnull(right); - invariant(right->m_range.overlaps(&cmp, range)); + invariant(right->m_range.overlaps(cmp, range)); } } else { // no left child, so the right must exist and be overlapping invariant_notnull(right); - invariant(right->m_range.overlaps(&cmp, range)); + invariant(right->m_range.overlaps(cmp, range)); } } @@ -160,6 +160,8 @@ void concurrent_tree_unit_test::test_lkr_acquire_release(void) { lkr.release(); tree.destroy(); } + + cmp.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc index ae71cda452678..a4c3f01f419c4 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_remove.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,17 +117,17 @@ static void verify_unique_keys(void) { } static uint64_t check_for_range_and_count(concurrent_tree::locked_keyrange *lkr, - comparator *cmp, const keyrange &range, bool range_should_exist) { + const comparator &cmp, const keyrange &range, bool range_should_exist) { struct check_fn_obj { - comparator *cmp; + const comparator *cmp; uint64_t count; keyrange target_range; bool target_range_found; bool fn(const keyrange &query_range, TXNID txnid) { (void) txnid; - if (query_range.compare(cmp, target_range) == keyrange::comparison::EQUALS) { + if (query_range.compare(*cmp, target_range) == keyrange::comparison::EQUALS) { invariant(!target_range_found); target_range_found = true; } @@ -135,7 +135,7 @@ static uint64_t check_for_range_and_count(concurrent_tree::locked_keyrange *lkr, return true; } } check_fn; - check_fn.cmp = cmp; + check_fn.cmp = &cmp; check_fn.count = 0; check_fn.target_range = range; check_fn.target_range_found = false; @@ -174,14 +174,14 @@ void concurrent_tree_unit_test::test_lkr_insert_remove(void) { // insert an element. it should exist and the // count should be correct. lkr.insert(range, i); - n = check_for_range_and_count(&lkr, &cmp, range, true); + n = check_for_range_and_count(&lkr, cmp, range, true); if (i >= cap) { invariant(n == cap + 1); // remove an element previously inserted. it should // no longer exist and the count should be correct. range.create(get_ith_key_from_set(i - cap), get_ith_key_from_set(i - cap)); lkr.remove(range); - n = check_for_range_and_count(&lkr, &cmp, range, false); + n = check_for_range_and_count(&lkr, cmp, range, false); invariant(n == cap); } else { invariant(n == i + 1); @@ -193,12 +193,13 @@ void concurrent_tree_unit_test::test_lkr_insert_remove(void) { keyrange range; range.create(get_ith_key_from_set(num_keys - i - 1), get_ith_key_from_set(num_keys - i - 1)); lkr.remove(range); - n = check_for_range_and_count(&lkr, &cmp, range, false); + n = check_for_range_and_count(&lkr, cmp, range, false); invariant(n == (cap - i - 1)); } lkr.release(); tree.destroy(); + cmp.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc index 5f0f81dc27546..1b3da34c9041f 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_insert_serial_large.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,6 +136,7 @@ void concurrent_tree_unit_test::test_lkr_insert_serial_large(void) { lkr.release(); tree.destroy(); + cmp.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc index c7d5f4d3204f4..9fc67dbf5efd7 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_lkr_remove_all.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -132,6 +132,8 @@ void concurrent_tree_unit_test::test_lkr_remove_all(void) { lkr.release(); tree.destroy(); } + + cmp.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h index bda34978e5075..132dbf24cce38 100644 --- a/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h +++ b/storage/tokudb/ft-index/locktree/tests/concurrent_tree_unit_test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc index b309d9b6fd89b..d88976add4f94 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_create_set.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc index 60300a138df54..55bb483114b63 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_get_set_keys.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc index 742165c59dc69..3c2a6a35562ca 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_killed.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,13 +117,10 @@ static int my_killed_callback(void) { // make sure deadlocks are detected when a lock request starts void lock_request_unit_test::test_wait_time_callback(void) { int r; - locktree::manager mgr; - locktree *lt; - - mgr.create(nullptr, nullptr, nullptr, nullptr); + locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; @@ -136,12 +133,12 @@ void lock_request_unit_test::test_wait_time_callback(void) { const DBT *one = get_dbt(1); // a locks 'one' - request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + request_a.set(<, txnid_a, one, one, lock_request::type::WRITE, false); r = request_a.start(); assert_zero(r); // b tries to lock 'one' - request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); + request_b.set(<, txnid_b, one, one, lock_request::type::WRITE, false); r = request_b.start(); assert(r == DB_LOCK_NOTGRANTED); @@ -162,11 +159,11 @@ void lock_request_unit_test::test_wait_time_callback(void) { request_b.destroy(); - release_lock_and_retry_requests(lt, txnid_a, one, one); + release_lock_and_retry_requests(<, txnid_a, one, one); request_a.destroy(); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc index e49b88f9d7903..96bd2869fcfc6 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_not_killed.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,13 +114,10 @@ static int my_killed_callback(void) { // make sure deadlocks are detected when a lock request starts void lock_request_unit_test::test_wait_time_callback(void) { int r; - locktree::manager mgr; - locktree *lt; - - mgr.create(nullptr, nullptr, nullptr, nullptr); + locktree lt; DICTIONARY_ID dict_id = { 1 }; - lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; @@ -133,12 +130,12 @@ void lock_request_unit_test::test_wait_time_callback(void) { const DBT *one = get_dbt(1); // a locks 'one' - request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + request_a.set(<, txnid_a, one, one, lock_request::type::WRITE, false); r = request_a.start(); assert_zero(r); // b tries to lock 'one' - request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); + request_b.set(<, txnid_b, one, one, lock_request::type::WRITE, false); r = request_b.start(); assert(r == DB_LOCK_NOTGRANTED); @@ -158,11 +155,11 @@ void lock_request_unit_test::test_wait_time_callback(void) { request_b.destroy(); - release_lock_and_retry_requests(lt, txnid_a, one, one); + release_lock_and_retry_requests(<, txnid_a, one, one); request_a.destroy(); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc index 08bd4c14d20b1..af28b06b68260 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_start_deadlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,14 +96,13 @@ namespace toku { // make sure deadlocks are detected when a lock request starts void lock_request_unit_test::test_start_deadlock(void) { int r; - locktree::manager mgr; - locktree *lt; + locktree lt; + // something short const uint64_t lock_wait_time = 10; - mgr.create(nullptr, nullptr, nullptr, nullptr); DICTIONARY_ID dict_id = { 1 }; - lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; TXNID txnid_b = 2001; @@ -119,30 +118,30 @@ void lock_request_unit_test::test_start_deadlock(void) { const DBT *two = get_dbt(2); // start and succeed 1,1 for A and 2,2 for B. - request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + request_a.set(<, txnid_a, one, one, lock_request::type::WRITE, false); r = request_a.start(); invariant_zero(r); - request_b.set(lt, txnid_b, two, two, lock_request::type::WRITE, false); + request_b.set(<, txnid_b, two, two, lock_request::type::WRITE, false); r = request_b.start(); invariant_zero(r); // txnid A should not be granted a lock on 2,2, so it goes pending. - request_a.set(lt, txnid_a, two, two, lock_request::type::WRITE, false); + request_a.set(<, txnid_a, two, two, lock_request::type::WRITE, false); r = request_a.start(); invariant(r == DB_LOCK_NOTGRANTED); // if txnid B wants a lock on 1,1 it should deadlock with A - request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false); + request_b.set(<, txnid_b, one, one, lock_request::type::WRITE, false); r = request_b.start(); invariant(r == DB_LOCK_DEADLOCK); // txnid C should not deadlock on either of these - it should just time out. - request_c.set(lt, txnid_c, one, one, lock_request::type::WRITE, false); + request_c.set(<, txnid_c, one, one, lock_request::type::WRITE, false); r = request_c.start(); invariant(r == DB_LOCK_NOTGRANTED); r = request_c.wait(lock_wait_time); invariant(r == DB_LOCK_NOTGRANTED); - request_c.set(lt, txnid_c, two, two, lock_request::type::WRITE, false); + request_c.set(<, txnid_c, two, two, lock_request::type::WRITE, false); r = request_c.start(); invariant(r == DB_LOCK_NOTGRANTED); r = request_c.wait(lock_wait_time); @@ -150,17 +149,18 @@ void lock_request_unit_test::test_start_deadlock(void) { // release locks for A and B, then wait on A's request which should succeed // since B just unlocked and should have completed A's pending request. - release_lock_and_retry_requests(lt, txnid_a, one, one); - release_lock_and_retry_requests(lt, txnid_b, two, two); + release_lock_and_retry_requests(<, txnid_a, one, one); + release_lock_and_retry_requests(<, txnid_b, two, two); r = request_a.wait(lock_wait_time); invariant_zero(r); - release_lock_and_retry_requests(lt, txnid_a, two, two); + release_lock_and_retry_requests(<, txnid_a, two, two); request_a.destroy(); request_b.destroy(); request_c.destroy(); - mgr.release_lt(lt); - mgr.destroy(); + + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc index 867d5fb1bd09c..a719da64114fc 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_start_pending.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,13 +97,11 @@ namespace toku { // stored in the lock request set as pending. void lock_request_unit_test::test_start_pending(void) { int r; - locktree::manager mgr; - locktree *lt; + locktree lt; lock_request request; - mgr.create(nullptr, nullptr, nullptr, nullptr); DICTIONARY_ID dict_id = { 1 }; - lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; TXNID txnid_b = 2001; @@ -113,15 +111,15 @@ void lock_request_unit_test::test_start_pending(void) { const DBT *two = get_dbt(2); // take a range lock using txnid b - r = lt->acquire_write_lock(txnid_b, zero, two, nullptr, false); + r = lt.acquire_write_lock(txnid_b, zero, two, nullptr, false); invariant_zero(r); - locktree::lt_lock_request_info *info = lt->get_lock_request_info(); + lt_lock_request_info *info = lt.get_lock_request_info(); // start a lock request for 1,1 // it should fail. the request should be stored and in the pending state. request.create(); - request.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + request.set(<, txnid_a, one, one, lock_request::type::WRITE, false); r = request.start(); invariant(r == DB_LOCK_NOTGRANTED); invariant(info->pending_lock_requests.size() == 1); @@ -134,20 +132,21 @@ void lock_request_unit_test::test_start_pending(void) { invariant(compare_dbts(nullptr, &request.m_right_key_copy, one) == 0); // release the range lock for txnid b - locktree_unit_test::locktree_test_release_lock(lt, txnid_b, zero, two); + locktree_unit_test::locktree_test_release_lock(<, txnid_b, zero, two); // now retry the lock requests. // it should transition the request to successfully complete. - lock_request::retry_all_lock_requests(lt); + lock_request::retry_all_lock_requests(<); invariant(info->pending_lock_requests.size() == 0); invariant(request.m_state == lock_request::state::COMPLETE); invariant(request.m_complete_r == 0); - locktree_unit_test::locktree_test_release_lock(lt, txnid_a, one, one); + locktree_unit_test::locktree_test_release_lock(<, txnid_a, one, one); request.destroy(); - mgr.release_lt(lt); - mgr.destroy(); + + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h b/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h index 3183bf2b734e4..8fc4a3f8df861 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_unit_test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCK_REQUEST_UNIT_TEST_H -#define TOKU_LOCK_REQUEST_UNIT_TEST_H - #include "test.h" #include "locktree_unit_test.h" @@ -132,5 +131,3 @@ class lock_request_unit_test { }; } - -#endif diff --git a/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc b/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc index e077d46139987..b583e32e117e8 100644 --- a/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc +++ b/storage/tokudb/ft-index/locktree/tests/lock_request_wait_time_callback.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,12 +98,10 @@ static const uint64_t my_lock_wait_time = 10 * 1000; // 10 sec // make sure deadlocks are detected when a lock request starts void lock_request_unit_test::test_wait_time_callback(void) { int r; - locktree::manager mgr; - locktree *lt; + locktree lt; - mgr.create(nullptr, nullptr, nullptr, nullptr); DICTIONARY_ID dict_id = { 1 }; - lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); TXNID txnid_a = 1001; lock_request request_a; @@ -117,12 +115,12 @@ void lock_request_unit_test::test_wait_time_callback(void) { const DBT *two = get_dbt(2); // a locks 'one' - request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false); + request_a.set(<, txnid_a, one, one, lock_request::type::WRITE, false); r = request_a.start(); assert_zero(r); // b tries to lock 'one' - request_b.set(lt, txnid_b, one, two, lock_request::type::WRITE, false); + request_b.set(<, txnid_b, one, two, lock_request::type::WRITE, false); r = request_b.start(); assert(r == DB_LOCK_NOTGRANTED); uint64_t t_start = toku_current_time_microsec(); @@ -134,11 +132,11 @@ void lock_request_unit_test::test_wait_time_callback(void) { assert(t_delta >= my_lock_wait_time); request_b.destroy(); - release_lock_and_retry_requests(lt, txnid_a, one, one); + release_lock_and_retry_requests(<, txnid_a, one, one); request_a.destroy(); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc b/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc index 70f3a6249c482..716000d475363 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_conflicts.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,11 +105,10 @@ namespace toku { // test write lock conflicts when read or write locks exist // test read lock conflicts when write locks exist void locktree_unit_test::test_conflicts(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; + DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); int r; TXNID txnid_a = 1001; @@ -125,8 +124,8 @@ void locktree_unit_test::test_conflicts(void) { // test_run == 0 means test with read lock // test_run == 1 means test with write lock #define ACQUIRE_LOCK(txn, left, right, conflicts) \ - test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \ - : lt->acquire_write_lock(txn, left, right, conflicts, false) + test_run == 0 ? lt.acquire_read_lock(txn, left, right, conflicts, false) \ + : lt.acquire_write_lock(txn, left, right, conflicts, false) // acquire some locks for txnid_a r = ACQUIRE_LOCK(txnid_a, one, one, nullptr); @@ -142,8 +141,8 @@ void locktree_unit_test::test_conflicts(void) { // if test_run == 0, then read locks exist. only test write locks. #define ACQUIRE_LOCK(txn, left, right, conflicts) \ sub_test_run == 0 && test_run == 1 ? \ - lt->acquire_read_lock(txn, left, right, conflicts, false) \ - : lt->acquire_write_lock(txn, left, right, conflicts, false) + lt.acquire_read_lock(txn, left, right, conflicts, false) \ + : lt.acquire_write_lock(txn, left, right, conflicts, false) // try to get point write locks for txnid_b, should fail r = ACQUIRE_LOCK(txnid_b, one, one, nullptr); invariant(r == DB_LOCK_NOTGRANTED); @@ -162,13 +161,13 @@ void locktree_unit_test::test_conflicts(void) { #undef ACQUIRE_LOCK } - lt->remove_overlapping_locks_for_txnid(txnid_a, one, one); - lt->remove_overlapping_locks_for_txnid(txnid_a, three, four); - invariant(no_row_locks(lt)); + lt.remove_overlapping_locks_for_txnid(txnid_a, one, one); + lt.remove_overlapping_locks_for_txnid(txnid_a, three, four); + invariant(no_row_locks(<)); } - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc b/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc index bb3681d037a69..93bdea239cc52 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_create_destroy.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,27 +95,26 @@ namespace toku { // test simple create and destroy of the locktree void locktree_unit_test::test_create_destroy(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); - locktree::lt_lock_request_info *info = lt->get_lock_request_info(); + lt.create(nullptr, dict_id, dbt_comparator); + + lt_lock_request_info *info = lt.get_lock_request_info(); invariant_notnull(info); toku_mutex_lock(&info->mutex); toku_mutex_unlock(&info->mutex); - invariant(lt->m_dict_id.dictid == dict_id.dictid); - invariant(lt->m_reference_count == 1); - invariant(lt->m_rangetree != nullptr); - invariant(lt->m_userdata == nullptr); + invariant(lt.m_dict_id.dictid == dict_id.dictid); + invariant(lt.m_reference_count == 1); + invariant(lt.m_rangetree != nullptr); + invariant(lt.m_userdata == nullptr); invariant(info->pending_lock_requests.size() == 0); - invariant(lt->m_sto_end_early_count == 0); - invariant(lt->m_sto_end_early_time == 0); + invariant(lt.m_sto_end_early_count == 0); + invariant(lt.m_sto_end_early_time == 0); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc index 7c4af67e6f8c5..02784f52bfa78 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_1big7lt_1small.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,21 +119,7 @@ static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64 return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); } -#if 0 -static locktree **big_txn_lt; -static int n_big_txn_lt; - -static int get_locktrees_touched_by_txn(TXNID txn_id UU(), void *txn_extra UU(), locktree ***ret_locktrees, int *ret_num_locktrees) { - locktree **locktrees = (locktree **) toku_malloc(n_big_txn_lt * sizeof (locktree *)); - for (int i = 0; i < n_big_txn_lt; i++) - locktrees[i] = big_txn_lt[i]; - *ret_locktrees = locktrees; - *ret_num_locktrees = n_big_txn_lt; - return 0; -} -#endif - -static void run_big_txn(locktree::manager *mgr UU(), locktree **lt, int n_lt, TXNID txn_id) { +static void run_big_txn(locktree_manager *mgr UU(), locktree **lt, int n_lt, TXNID txn_id) { int64_t last_i = -1; for (int64_t i = 0; !killed; i++) { for (int j = 0; j < n_lt; j++) { @@ -157,7 +143,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree **lt, int n_lt, TX } struct big_arg { - locktree::manager *mgr; + locktree_manager *mgr; locktree **lt; int n_lt; TXNID txn_id; @@ -171,7 +157,7 @@ static void *big_f(void *_arg) { return arg; } -static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { +static void run_small_txn(locktree_manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { int64_t i; for (i = 0; !killed; i++) { uint64_t t_start = toku_current_time_microsec(); @@ -190,7 +176,7 @@ static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_i } struct small_arg { - locktree::manager *mgr; + locktree_manager *mgr; locktree *lt; TXNID txn_id; int64_t k; @@ -209,7 +195,7 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } -static uint64_t get_escalation_count(locktree::manager &mgr) { +static uint64_t get_escalation_count(locktree_manager &mgr) { LTM_STATUS_S ltm_status; mgr.get_status(<m_status); @@ -251,7 +237,7 @@ int main(int argc, const char *argv[]) { int r; // create a manager - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.set_max_lock_memory(max_lock_memory); @@ -261,16 +247,11 @@ int main(int argc, const char *argv[]) { locktree *big_lt[n_big]; for (int i = 0; i < n_big; i++) { dict_id = { next_dict_id }; next_dict_id++; - big_lt[i] = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + big_lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); } -#if 0 - big_txn_lt = big_lt; - n_big_txn_lt = n_big; -#endif - dict_id = { next_dict_id }; next_dict_id++; - locktree *small_lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr); + locktree *small_lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); // create the worker threads struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 }; diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc index ab1818d2fd3c6..9509224a15f9c 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_1lt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,7 +118,7 @@ static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64 return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); } -static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { +static void run_big_txn(locktree_manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i); int64_t last_i = -1; for (int64_t i = start_i; !killed; i++) { @@ -141,7 +141,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, } struct arg { - locktree::manager *mgr; + locktree_manager *mgr; locktree *lt; TXNID txn_id; int64_t start_i; @@ -158,7 +158,7 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } -static uint64_t get_escalation_count(locktree::manager &mgr) { +static uint64_t get_escalation_count(locktree_manager &mgr) { LTM_STATUS_S ltm_status; mgr.get_status(<m_status); @@ -205,18 +205,15 @@ int main(int argc, const char *argv[]) { int r; // create a manager - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc[n_lt]; - DICTIONARY_ID dict_id[n_lt]; locktree *lt[n_big]; for (int i = 0; i < n_lt; i++) { - desc[i] = nullptr; - dict_id[i] = { (uint64_t)i }; - lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = (uint64_t) i }; + lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); assert(lt[i]); } diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc index d9f62ca29f309..5e315edda7847 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_2big_2lt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,7 +118,7 @@ static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64 return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); } -static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { +static void run_big_txn(locktree_manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) { fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i); int64_t last_i = -1; for (int64_t i = start_i; !killed; i++) { @@ -141,7 +141,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, } struct arg { - locktree::manager *mgr; + locktree_manager *mgr; locktree *lt; TXNID txn_id; int64_t start_i; @@ -158,7 +158,7 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } -static uint64_t get_escalation_count(locktree::manager &mgr) { +static uint64_t get_escalation_count(locktree_manager &mgr) { LTM_STATUS_S ltm_status; mgr.get_status(<m_status); @@ -205,18 +205,15 @@ int main(int argc, const char *argv[]) { int r; // create a manager - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc[n_lt]; - DICTIONARY_ID dict_id[n_lt]; locktree *lt[n_big]; for (int i = 0; i < n_lt; i++) { - desc[i] = nullptr; - dict_id[i] = { (uint64_t)i }; - lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = (uint64_t)i }; + lt[i] = mgr.get_lt(dict_id, dbt_comparator, nullptr); assert(lt[i]); } diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc index cdee8b9d333fa..a7d84aaf6506d 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_impossible.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -123,7 +123,7 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } -static uint64_t get_escalation_count(locktree::manager &mgr) { +static uint64_t get_escalation_count(locktree_manager &mgr) { LTM_STATUS_S ltm_status; mgr.get_status(<m_status); @@ -159,7 +159,7 @@ int main(int argc, const char *argv[]) { int r; // create a manager - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.set_max_lock_memory(max_lock_memory); @@ -167,9 +167,8 @@ int main(int argc, const char *argv[]) { const TXNID txn_b = 100; // create lock trees - DESCRIPTOR desc = nullptr; - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int64_t last_i = -1; for (int64_t i = 0; ; i++) { diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc index 28ae88cb3a811..9228e627e9a54 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_escalation_stalls.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -126,7 +126,7 @@ static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64 return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn); } -static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) { +static void run_big_txn(locktree_manager *mgr UU(), locktree *lt, TXNID txn_id) { int64_t last_i = -1; for (int64_t i = 0; !killed; i++) { uint64_t t_start = toku_current_time_microsec(); @@ -144,7 +144,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) locktree_release_lock(lt, txn_id, 0, last_i); // release the range 0 .. last_i } -static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { +static void run_small_txn(locktree_manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { for (int64_t i = 0; !killed; i++) { uint64_t t_start = toku_current_time_microsec(); int r = locktree_write_lock(lt, txn_id, k, k, false); @@ -160,7 +160,7 @@ static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_i } struct arg { - locktree::manager *mgr; + locktree_manager *mgr; locktree *lt; TXNID txn_id; int64_t k; @@ -183,7 +183,7 @@ static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buff printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra); } -static uint64_t get_escalation_count(locktree::manager &mgr) { +static uint64_t get_escalation_count(locktree_manager &mgr) { LTM_STATUS_S ltm_status; mgr.get_status(<m_status); @@ -223,18 +223,16 @@ int main(int argc, const char *argv[]) { int r; // create a manager - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.set_max_lock_memory(max_lock_memory); // create lock trees - DESCRIPTOR desc_0 = nullptr; - DICTIONARY_ID dict_id_0 = { 1 }; - locktree *lt_0 = mgr.get_lt(dict_id_0, desc_0, compare_dbts, nullptr); + DICTIONARY_ID dict_id_0 = { .dictid = 1 }; + locktree *lt_0 = mgr.get_lt(dict_id_0, dbt_comparator, nullptr); - DESCRIPTOR desc_1 = nullptr; - DICTIONARY_ID dict_id_1 = { 2 }; - locktree *lt_1 = mgr.get_lt(dict_id_1, desc_1, compare_dbts, nullptr); + DICTIONARY_ID dict_id_1 = { .dictid = 2 }; + locktree *lt_1 = mgr.get_lt(dict_id_1, dbt_comparator, nullptr); // create the worker threads struct arg big_arg = { &mgr, lt_0, 1000 }; diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc b/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc index 142e50992a60a..ef490b59cc230 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_infinity.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,11 +95,10 @@ namespace toku { // test that ranges with infinite endpoints work void locktree_unit_test::test_infinity(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; + DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); int r; TXNID txnid_a = 1001; @@ -112,60 +111,60 @@ void locktree_unit_test::test_infinity(void) { const DBT max_int = max_dbt(); // txn A will lock -inf, 5. - r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr, false); + r = lt.acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr, false); invariant(r == 0); // txn B will fail to get any lock <= 5, even min_int - r = lt->acquire_write_lock(txnid_b, five, five, nullptr, false); + r = lt.acquire_write_lock(txnid_b, five, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false); + r = lt.acquire_write_lock(txnid_b, zero, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); + lt.remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); // txn A will lock 1, +inf - r = lt->acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr, false); invariant(r == 0); // txn B will fail to get any lock >= 1, even max_int - r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false); + r = lt.acquire_write_lock(txnid_b, one, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false); + r = lt.acquire_write_lock(txnid_b, two, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); + lt.remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); // txn A will lock -inf, +inf - r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == 0); // txn B will fail to get any lock - r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false); + r = lt.acquire_write_lock(txnid_b, zero, one, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false); + r = lt.acquire_write_lock(txnid_b, two, five, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &min_int, &max_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, &min_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); + r = lt.acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - r = lt->acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr, false); + r = lt.acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr, false); invariant(r == DB_LOCK_NOTGRANTED); - lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); + lt.remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc b/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc index 5916914297d67..67d616867bc53 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_misc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -107,18 +107,18 @@ static int my_compare_dbts(DB *db, const DBT *a, const DBT *b) { // test that get/set userdata works, and that get_manager() works void locktree_unit_test::test_misc(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, my_compare_dbts, nullptr); + toku::comparator my_dbt_comparator; + my_dbt_comparator.create(my_compare_dbts, nullptr); + lt.create(nullptr, dict_id, my_dbt_comparator); - invariant(lt->get_userdata() == nullptr); + invariant(lt.get_userdata() == nullptr); int userdata; - lt->set_userdata(&userdata); - invariant(lt->get_userdata() == &userdata); - lt->set_userdata(nullptr); - invariant(lt->get_userdata() == nullptr); + lt.set_userdata(&userdata); + invariant(lt.get_userdata() == &userdata); + lt.set_userdata(nullptr); + invariant(lt.get_userdata() == nullptr); int r; DBT dbt_a, dbt_b; @@ -126,19 +126,27 @@ void locktree_unit_test::test_misc(void) { expected_a = &dbt_a; expected_b = &dbt_b; + toku::comparator cmp_d1, cmp_d2; + cmp_d1.create(my_compare_dbts, &d1); + cmp_d2.create(my_compare_dbts, &d2); + // make sure the comparator object has the correct // descriptor when we set the locktree's descriptor - lt->set_descriptor(&d1); + lt.set_comparator(cmp_d1); expected_descriptor = &d1; - r = lt->m_cmp->compare(&dbt_a, &dbt_b); + r = lt.m_cmp(&dbt_a, &dbt_b); invariant(r == expected_comparison_magic); - lt->set_descriptor(&d2); + lt.set_comparator(cmp_d2); expected_descriptor = &d2; - r = lt->m_cmp->compare(&dbt_a, &dbt_b); + r = lt.m_cmp(&dbt_a, &dbt_b); invariant(r == expected_comparison_magic); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); + + cmp_d1.destroy(); + cmp_d2.destroy(); + my_dbt_comparator.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc b/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc index ecb710517e13c..4cf950e703786 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_overlapping_relock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,11 +98,10 @@ namespace toku { // write locks if overlapping and ensure that existing read // or write locks are consolidated by overlapping relocks. void locktree_unit_test::test_overlapping_relock(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; + DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); const DBT *zero = get_dbt(0); const DBT *one = get_dbt(1); @@ -121,15 +120,15 @@ void locktree_unit_test::test_overlapping_relock(void) { // do something. at the end of the test, we release 100, 100. const TXNID the_other_txnid = 9999; const DBT *hundred = get_dbt(100); - r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr, false); + r = lt.acquire_write_lock(the_other_txnid, hundred, hundred, nullptr, false); invariant(r == 0); for (int test_run = 0; test_run < 2; test_run++) { // test_run == 0 means test with read lock // test_run == 1 means test with write lock #define ACQUIRE_LOCK(txn, left, right, conflicts) \ - test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \ - : lt->acquire_write_lock(txn, left, right, conflicts, false) + test_run == 0 ? lt.acquire_read_lock(txn, left, right, conflicts, false) \ + : lt.acquire_write_lock(txn, left, right, conflicts, false) // lock [1,1] and [2,2]. then lock [1,2]. // ensure only [1,2] exists in the tree @@ -144,7 +143,7 @@ void locktree_unit_test::test_overlapping_relock(void) { bool saw_the_other; TXNID expected_txnid; keyrange *expected_range; - comparator *cmp; + const comparator *cmp; bool fn(const keyrange &range, TXNID txnid) { if (txnid == the_other_txnid) { invariant(!saw_the_other); @@ -152,15 +151,15 @@ void locktree_unit_test::test_overlapping_relock(void) { return true; } invariant(txnid == expected_txnid); - keyrange::comparison c = range.compare(cmp, *expected_range); + keyrange::comparison c = range.compare(*cmp, *expected_range); invariant(c == keyrange::comparison::EQUALS); return true; } } verify_fn; - verify_fn.cmp = lt->m_cmp; + verify_fn.cmp = <.m_cmp; #define do_verify() \ - do { verify_fn.saw_the_other = false; locktree_iterate(lt, &verify_fn); } while (0) + do { verify_fn.saw_the_other = false; locktree_iterate(<, &verify_fn); } while (0) keyrange range; range.create(one, two); @@ -170,9 +169,9 @@ void locktree_unit_test::test_overlapping_relock(void) { // unlocking [1,1] should remove the only range, // the other unlocks shoudl do nothing. - lt->remove_overlapping_locks_for_txnid(txnid_a, one, one); - lt->remove_overlapping_locks_for_txnid(txnid_a, two, two); - lt->remove_overlapping_locks_for_txnid(txnid_a, one, two); + lt.remove_overlapping_locks_for_txnid(txnid_a, one, one); + lt.remove_overlapping_locks_for_txnid(txnid_a, two, two); + lt.remove_overlapping_locks_for_txnid(txnid_a, one, two); // try overlapping from the right r = ACQUIRE_LOCK(txnid_a, one, three, nullptr); @@ -197,16 +196,16 @@ void locktree_unit_test::test_overlapping_relock(void) { do_verify(); // release one of the locks we acquired. this should clean up the whole range. - lt->remove_overlapping_locks_for_txnid(txnid_a, zero, four); + lt.remove_overlapping_locks_for_txnid(txnid_a, zero, four); #undef ACQUIRE_LOCK } // remove the other txnid's lock now - lt->remove_overlapping_locks_for_txnid(the_other_txnid, hundred, hundred); + lt.remove_overlapping_locks_for_txnid(the_other_txnid, hundred, hundred); - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc b/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc index 549a44a14796d..c4ebb45537d1f 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_simple_lock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,11 +95,11 @@ namespace toku { // test simple, non-overlapping read locks and then write locks void locktree_unit_test::test_simple_lock(void) { - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int r; TXNID txnid_a = 1001; diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc b/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc index 6fdd7270f09af..17ebc3c86f9cc 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc +++ b/storage/tokudb/ft-index/locktree/tests/locktree_single_txnid_optimization.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,11 +98,10 @@ namespace toku { // write locks if overlapping and ensure that existing read // or write locks are consolidated by overlapping relocks. void locktree_unit_test::test_single_txnid_optimization(void) { - locktree::manager mgr; - mgr.create(nullptr, nullptr, nullptr, nullptr); - DESCRIPTOR desc = nullptr; + locktree lt; + DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + lt.create(nullptr, dict_id, dbt_comparator); const DBT *zero = get_dbt(0); const DBT *one = get_dbt(1); @@ -124,13 +123,13 @@ void locktree_unit_test::test_single_txnid_optimization(void) { buffer.create(); #define lock_and_append_point_for_txnid_a(key) \ - r = lt->acquire_write_lock(txnid_a, key, key, nullptr, false); \ + r = lt.acquire_write_lock(txnid_a, key, key, nullptr, false); \ invariant_zero(r); \ buffer.append(key, key); #define maybe_point_locks_for_txnid_b(i) \ if (where == i) { \ - r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false); \ + r = lt.acquire_write_lock(txnid_b, one, one, nullptr, false); \ invariant_zero(r); \ } @@ -143,36 +142,36 @@ void locktree_unit_test::test_single_txnid_optimization(void) { lock_and_append_point_for_txnid_a(zero); maybe_point_locks_for_txnid_b(2); - lt->release_locks(txnid_a, &buffer); + lt.release_locks(txnid_a, &buffer); // txnid b does not take a lock on iteration 3 if (where != 3) { struct verify_fn_obj { TXNID expected_txnid; keyrange *expected_range; - comparator *cmp; + const comparator *cmp; bool fn(const keyrange &range, TXNID txnid) { invariant(txnid == expected_txnid); - keyrange::comparison c = range.compare(cmp, *expected_range); + keyrange::comparison c = range.compare(*cmp, *expected_range); invariant(c == keyrange::comparison::EQUALS); return true; } } verify_fn; - verify_fn.cmp = lt->m_cmp; + verify_fn.cmp = <.m_cmp; keyrange range; range.create(one, one); verify_fn.expected_txnid = txnid_b; verify_fn.expected_range = ⦥ - locktree_iterate(lt, &verify_fn); - lt->remove_overlapping_locks_for_txnid(txnid_b, one, one); + locktree_iterate(<, &verify_fn); + lt.remove_overlapping_locks_for_txnid(txnid_b, one, one); } buffer.destroy(); } - mgr.release_lt(lt); - mgr.destroy(); + lt.release_reference(); + lt.destroy(); } } /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h b/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h index b074cc837bab2..34dbc3a7e596f 100644 --- a/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h +++ b/storage/tokudb/ft-index/locktree/tests/locktree_unit_test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_LOCKTREE_UNIT_TEST_H -#define TOKU_LOCKTREE_UNIT_TEST_H - #include "test.h" #include "locktree.h" @@ -157,5 +156,3 @@ class locktree_unit_test { }; } /* namespace toku */ - -#endif /* TOKU_LOCKTREE_UNIT_TEST_H */ diff --git a/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc b/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc index 6f667c04a74d5..07c00c5d7b786 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_create_destroy.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,17 +94,14 @@ PATENT RIGHTS GRANT: namespace toku { void manager_unit_test::test_create_destroy(void) { - locktree::manager mgr; - locktree::manager::lt_create_cb create_callback = - (locktree::manager::lt_create_cb) (long) 1; - locktree::manager::lt_destroy_cb destroy_callback = - (locktree::manager::lt_destroy_cb) (long) 2; - locktree::manager::lt_escalate_cb escalate_callback = - (locktree::manager::lt_escalate_cb) (long) 3; + locktree_manager mgr; + lt_create_cb create_callback = (lt_create_cb) (long) 1; + lt_destroy_cb destroy_callback = (lt_destroy_cb) (long) 2; + lt_escalate_cb escalate_callback = (lt_escalate_cb) (long) 3; void *extra = (void *) (long) 4; mgr.create(create_callback, destroy_callback, escalate_callback, extra); - invariant(mgr.m_max_lock_memory == locktree::manager::DEFAULT_MAX_LOCK_MEMORY); + invariant(mgr.m_max_lock_memory == locktree_manager::DEFAULT_MAX_LOCK_MEMORY); invariant(mgr.m_current_lock_memory == 0); invariant(mgr.m_escalation_count == 0); invariant(mgr.m_escalation_time == 0); diff --git a/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc b/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc index 1235f15f88508..82cf1dc9f5ab6 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_locktree_map.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,7 @@ PATENT RIGHTS GRANT: namespace toku { void manager_unit_test::test_lt_map(void) { - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, nullptr, nullptr); locktree aa; diff --git a/storage/tokudb/ft-index/locktree/tests/manager_params.cc b/storage/tokudb/ft-index/locktree/tests/manager_params.cc index 95642db512135..7376d91a064bb 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_params.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_params.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ namespace toku { void manager_unit_test::test_params(void) { int r; - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, nullptr, nullptr); uint64_t new_max_lock_memory = 15307752356; diff --git a/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc b/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc index 9bf06f7784092..c2fdee49ffe28 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_reference_release_lt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -107,9 +107,15 @@ static void destroy_cb(locktree *lt) { (*k) = false; } +static int my_cmp(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) { + return 0; +} + void manager_unit_test::test_reference_release_lt(void) { - locktree::manager mgr; + locktree_manager mgr; mgr.create(create_cb, destroy_cb, nullptr, nullptr); + toku::comparator my_comparator; + my_comparator.create(my_cmp, nullptr); DICTIONARY_ID a = { 0 }; DICTIONARY_ID b = { 1 }; @@ -117,18 +123,12 @@ void manager_unit_test::test_reference_release_lt(void) { bool aok = false; bool bok = false; bool cok = false; - - int d = 5; - DESCRIPTOR_S desc_s; - desc_s.dbt.data = &d; - desc_s.dbt.size = desc_s.dbt.ulen = sizeof(d); - desc_s.dbt.flags = DB_DBT_USERMEM; - locktree *alt = mgr.get_lt(a, &desc_s, nullptr, &aok); + locktree *alt = mgr.get_lt(a, my_comparator, &aok); invariant_notnull(alt); - locktree *blt = mgr.get_lt(b, &desc_s, nullptr, &bok); + locktree *blt = mgr.get_lt(b, my_comparator, &bok); invariant_notnull(alt); - locktree *clt = mgr.get_lt(c, &desc_s, nullptr, &cok); + locktree *clt = mgr.get_lt(c, my_comparator, &cok); invariant_notnull(alt); // three distinct locktrees should have been returned @@ -152,9 +152,9 @@ void manager_unit_test::test_reference_release_lt(void) { // get another handle on a and b, they shoudl be the same // as the original alt and blt - locktree *blt2 = mgr.get_lt(b, &desc_s, nullptr, &bok); + locktree *blt2 = mgr.get_lt(b, my_comparator, &bok); invariant(blt2 == blt); - locktree *alt2 = mgr.get_lt(a, &desc_s, nullptr, &aok); + locktree *alt2 = mgr.get_lt(a, my_comparator, &aok); invariant(alt2 == alt); // remove one ref from everything. c should die. a and b are ok. @@ -171,6 +171,7 @@ void manager_unit_test::test_reference_release_lt(void) { invariant(!aok); invariant(!bok); + my_comparator.destroy(); mgr.destroy(); } diff --git a/storage/tokudb/ft-index/locktree/tests/manager_status.cc b/storage/tokudb/ft-index/locktree/tests/manager_status.cc index 6803b5a22b398..b2f1560736a02 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_status.cc +++ b/storage/tokudb/ft-index/locktree/tests/manager_status.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,8 +112,7 @@ static void assert_status(LTM_STATUS ltm_status, const char *keyname, uint64_t v } void manager_unit_test::test_status(void) { - - locktree::manager mgr; + locktree_manager mgr; mgr.create(nullptr, nullptr, nullptr, nullptr); LTM_STATUS_S status; @@ -121,9 +120,8 @@ void manager_unit_test::test_status(void) { assert_status(&status, "LTM_WAIT_COUNT", 0); assert_status(&status, "LTM_TIMEOUT_COUNT", 0); - DESCRIPTOR desc = nullptr; - DICTIONARY_ID dict_id = { 1 }; - locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr); + DICTIONARY_ID dict_id = { .dictid = 1 }; + locktree *lt = mgr.get_lt(dict_id, dbt_comparator, nullptr); int r; TXNID txnid_a = 1001; TXNID txnid_b = 2001; diff --git a/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h b/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h index ba38b97989e42..cec640e0c2cb1 100644 --- a/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h +++ b/storage/tokudb/ft-index/locktree/tests/manager_unit_test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_MANAGER_TEST_H -#define TOKU_MANAGER_TEST_H - #include #include @@ -111,5 +110,3 @@ class manager_unit_test { }; } /* namespace toku */ - -#endif /* TOKU_MANAGER_TEST_H */ diff --git a/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc b/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc index 2fbc27e0d04f0..61d14888229d1 100644 --- a/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc +++ b/storage/tokudb/ft-index/locktree/tests/range_buffer_test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include -#include +#include #include @@ -121,9 +121,8 @@ static void test_points(void) { } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_point = get_dbt_by_iteration(i); invariant(compare_dbts(nullptr, expected_point, rec.get_left_key()) == 0); @@ -151,9 +150,8 @@ static void test_ranges(void) { } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_left = get_dbt_by_iteration(i); const DBT *expected_right = get_dbt_by_iteration(i + 1); @@ -187,9 +185,8 @@ static void test_mixed(void) { } size_t i = 0; - range_buffer::iterator iter; + range_buffer::iterator iter(&buffer); range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { const DBT *expected_left = get_dbt_by_iteration(i); const DBT *expected_right = get_dbt_by_iteration(i + 1); @@ -232,10 +229,10 @@ static void test_small_and_large_points(void) { // Append a small dbt, the buf should be able to fit it. buffer.append(&small_dbt, &small_dbt); - invariant(buffer.m_buf_size >= small_dbt.size); + invariant(buffer.total_memory_size() >= small_dbt.size); // Append a large dbt, the buf should be able to fit it. buffer.append(&large_dbt, &large_dbt); - invariant(buffer.m_buf_size >= (small_dbt.size + large_dbt.size)); + invariant(buffer.total_memory_size() >= (small_dbt.size + large_dbt.size)); toku_free(small_buf); toku_free(large_buf); diff --git a/storage/tokudb/ft-index/locktree/tests/test.h b/storage/tokudb/ft-index/locktree/tests/test.h index cf9a805543c0c..904d0d034152c 100644 --- a/storage/tokudb/ft-index/locktree/tests/test.h +++ b/storage/tokudb/ft-index/locktree/tests/test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,79 +86,90 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_TEST_H -#define TOKU_TEST_H - -#include #include +#include "ft/comparator.h" +#include "util/dbt.h" + namespace toku { -__attribute__((__unused__)) -static DBT min_dbt(void) { - static int64_t min = INT_MIN; - DBT dbt; - toku_fill_dbt(&dbt, &min, sizeof(int64_t)); - dbt.flags = DB_DBT_USERMEM; - return dbt; -} - -__attribute__((__unused__)) -static DBT max_dbt(void) { - static int64_t max = INT_MAX; - DBT dbt; - toku_fill_dbt(&dbt, &max, sizeof(int64_t)); - dbt.flags = DB_DBT_USERMEM; - return dbt; -} - -__attribute__((__unused__)) -static const DBT *get_dbt(int64_t key) { - static const int NUM_DBTS = 1000; - static bool initialized; - static int64_t static_ints[NUM_DBTS]; - static DBT static_dbts[NUM_DBTS]; - invariant(key < NUM_DBTS); - if (!initialized) { - for (int i = 0; i < NUM_DBTS; i++) { - static_ints[i] = i; - toku_fill_dbt(&static_dbts[i], - &static_ints[i], - sizeof(int64_t)); - static_dbts[i].flags = DB_DBT_USERMEM; + __attribute__((__unused__)) + static DBT min_dbt(void) { + static int64_t min = INT_MIN; + DBT dbt; + toku_fill_dbt(&dbt, &min, sizeof(int64_t)); + dbt.flags = DB_DBT_USERMEM; + return dbt; + } + + __attribute__((__unused__)) + static DBT max_dbt(void) { + static int64_t max = INT_MAX; + DBT dbt; + toku_fill_dbt(&dbt, &max, sizeof(int64_t)); + dbt.flags = DB_DBT_USERMEM; + return dbt; + } + + __attribute__((__unused__)) + static const DBT *get_dbt(int64_t key) { + static const int NUM_DBTS = 1000; + static bool initialized; + static int64_t static_ints[NUM_DBTS]; + static DBT static_dbts[NUM_DBTS]; + invariant(key < NUM_DBTS); + if (!initialized) { + for (int i = 0; i < NUM_DBTS; i++) { + static_ints[i] = i; + toku_fill_dbt(&static_dbts[i], + &static_ints[i], + sizeof(int64_t)); + static_dbts[i].flags = DB_DBT_USERMEM; + } + initialized = true; } - initialized = true; + + invariant(key < NUM_DBTS); + return &static_dbts[key]; } - invariant(key < NUM_DBTS); - return &static_dbts[key]; -} - -__attribute__((__unused__)) -static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) { - (void) db; - - // this emulates what a "infinity-aware" comparator object does - if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) { - return toku_dbt_infinite_compare(key1, key2); - } else { - invariant(key1->size == sizeof(int64_t)); - invariant(key2->size == sizeof(int64_t)); - int64_t a = *(int64_t*) key1->data; - int64_t b = *(int64_t*) key2->data; - if (a < b) { - return -1; - } else if (a == b) { - return 0; + __attribute__((__unused__)) + static int compare_dbts(DB *db, const DBT *key1, const DBT *key2) { + (void) db; + + // this emulates what a "infinity-aware" comparator object does + if (toku_dbt_is_infinite(key1) || toku_dbt_is_infinite(key2)) { + return toku_dbt_infinite_compare(key1, key2); } else { - return 1; + invariant(key1->size == sizeof(int64_t)); + invariant(key2->size == sizeof(int64_t)); + int64_t a = *(int64_t*) key1->data; + int64_t b = *(int64_t*) key2->data; + if (a < b) { + return -1; + } else if (a == b) { + return 0; + } else { + return 1; + } } } -} -} /* namespace toku */ + __attribute__((__unused__)) comparator dbt_comparator; + + __attribute__((__constructor__)) + static void construct_dbt_comparator(void) { + dbt_comparator.create(compare_dbts, nullptr); + } -#endif + __attribute__((__destructor__)) + static void destruct_dbt_comparator(void) { + dbt_comparator.destroy(); + } + +} /* namespace toku */ diff --git a/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc b/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc index fe442a5068348..3502b9bf049c3 100644 --- a/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc +++ b/storage/tokudb/ft-index/locktree/tests/txnid_set_test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/tests/wfg_test.cc b/storage/tokudb/ft-index/locktree/tests/wfg_test.cc index a7669135bf0ca..be3f8fa1f2057 100644 --- a/storage/tokudb/ft-index/locktree/tests/wfg_test.cc +++ b/storage/tokudb/ft-index/locktree/tests/wfg_test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/treenode.cc b/storage/tokudb/ft-index/locktree/treenode.cc index 0e8953ce89508..9853874776f81 100644 --- a/storage/tokudb/ft-index/locktree/treenode.cc +++ b/storage/tokudb/ft-index/locktree/treenode.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,7 +99,7 @@ void treenode::mutex_unlock(void) { toku_mutex_unlock(&m_mutex); } -void treenode::init(comparator *cmp) { +void treenode::init(const comparator *cmp) { m_txnid = TXNID_NONE; m_is_root = false; m_is_empty = true; @@ -117,7 +117,7 @@ void treenode::init(comparator *cmp) { m_right_child.set(nullptr); } -void treenode::create_root(comparator *cmp) { +void treenode::create_root(const comparator *cmp) { init(cmp); m_is_root = true; } @@ -145,10 +145,10 @@ bool treenode::is_empty(void) { } bool treenode::range_overlaps(const keyrange &range) { - return m_range.overlaps(m_cmp, range); + return m_range.overlaps(*m_cmp, range); } -treenode *treenode::alloc(comparator *cmp, const keyrange &range, TXNID txnid) { +treenode *treenode::alloc(const comparator *cmp, const keyrange &range, TXNID txnid) { treenode *XCALLOC(node); node->init(cmp); node->set_range_and_txnid(range, txnid); @@ -190,7 +190,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range, // determine which child to look at based on a comparison. if we were // given a comparison hint, use that. otherwise, compare them now. - keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(m_cmp, m_range); + keyrange::comparison c = cmp_hint ? *cmp_hint : range.compare(*m_cmp, m_range); treenode *child; if (c == keyrange::comparison::LESS_THAN) { @@ -209,7 +209,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range, if (child == nullptr) { return this; } else { - c = range.compare(m_cmp, child->m_range); + c = range.compare(*m_cmp, child->m_range); if (c == keyrange::comparison::EQUALS || c == keyrange::comparison::OVERLAPS) { child->mutex_unlock(); return this; @@ -225,7 +225,7 @@ treenode *treenode::find_node_with_overlapping_child(const keyrange &range, template void treenode::traverse_overlaps(const keyrange &range, F *function) { - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); if (c == keyrange::comparison::EQUALS) { // Doesn't matter if fn wants to keep going, there // is nothing left, so return. @@ -264,7 +264,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) { void treenode::insert(const keyrange &range, TXNID txnid) { // choose a child to check. if that child is null, then insert the new node there. // otherwise recur down that child's subtree - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); if (c == keyrange::comparison::LESS_THAN) { treenode *left_child = lock_and_rebalance_left(); if (left_child == nullptr) { @@ -382,7 +382,7 @@ treenode *treenode::remove(const keyrange &range) { // if the range is equal to this node's range, then just remove // the root of this subtree. otherwise search down the tree // in either the left or right children. - keyrange::comparison c = range.compare(m_cmp, m_range); + keyrange::comparison c = range.compare(*m_cmp, m_range); switch (c) { case keyrange::comparison::EQUALS: return remove_root_of_subtree(); diff --git a/storage/tokudb/ft-index/locktree/treenode.h b/storage/tokudb/ft-index/locktree/treenode.h index e48dc50d72b98..7a6880a657cd8 100644 --- a/storage/tokudb/ft-index/locktree/treenode.h +++ b/storage/tokudb/ft-index/locktree/treenode.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,20 +86,19 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TREENODE_H -#define TREENODE_H - -#include #include -#include +#include "portability/memory.h" +#include "portability/toku_pthread.h" -#include - -#include "keyrange.h" +#include "ft/comparator.h" +#include "ft/txn/txn.h" +#include "locktree/keyrange.h" namespace toku { @@ -124,7 +123,7 @@ class treenode { // - node may be unlocked if no other thread has visibility // effect: create the root node - void create_root(comparator *cmp); + void create_root(const comparator *cmp); // effect: destroys the root node void destroy_root(void); @@ -211,7 +210,7 @@ class treenode { child_ptr m_right_child; // comparator for ranges - comparator *m_cmp; + const comparator *m_cmp; // marked for the root node. the root node is never free()'d // when removed, but instead marked as empty. @@ -221,7 +220,7 @@ class treenode { bool m_is_empty; // effect: initializes an empty node with the given comparator - void init(comparator *cmp); + void init(const comparator *cmp); // requires: *parent is initialized to something meaningful. // requires: subtree is non-empty @@ -268,7 +267,7 @@ class treenode { treenode *maybe_rebalance(void); // returns: allocated treenode populated with a copy of the range and txnid - static treenode *alloc(comparator *cmp, const keyrange &range, TXNID txnid); + static treenode *alloc(const comparator *cmp, const keyrange &range, TXNID txnid); // requires: node is a locked root node, or an unlocked non-root node static void free(treenode *node); @@ -283,5 +282,3 @@ class treenode { #include "treenode.cc" } /* namespace toku */ - -#endif /* TREENODE_H */ diff --git a/storage/tokudb/ft-index/locktree/txnid_set.cc b/storage/tokudb/ft-index/locktree/txnid_set.cc index 598a717f933d9..f6b95c9b32fbe 100644 --- a/storage/tokudb/ft-index/locktree/txnid_set.cc +++ b/storage/tokudb/ft-index/locktree/txnid_set.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/txnid_set.h b/storage/tokudb/ft-index/locktree/txnid_set.h index d2971c5c1676c..c2c84b39c0758 100644 --- a/storage/tokudb/ft-index/locktree/txnid_set.h +++ b/storage/tokudb/ft-index/locktree/txnid_set.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,15 +86,14 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_TXNID_SET_H -#define TOKU_TXNID_SET_H - -#include +#include "ft/txn/txn.h" -#include +#include "util/omt.h" namespace toku { @@ -130,5 +129,3 @@ class txnid_set { ENSURE_POD(txnid_set); } /* namespace toku */ - -#endif /* TOKU_TXNID_SET_H */ diff --git a/storage/tokudb/ft-index/locktree/wfg.cc b/storage/tokudb/ft-index/locktree/wfg.cc index dea97d5cd43f6..e18c7f4aa26bc 100644 --- a/storage/tokudb/ft-index/locktree/wfg.cc +++ b/storage/tokudb/ft-index/locktree/wfg.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/locktree/wfg.h b/storage/tokudb/ft-index/locktree/wfg.h index 2bfd3797f9b42..99172902d2e06 100644 --- a/storage/tokudb/ft-index/locktree/wfg.h +++ b/storage/tokudb/ft-index/locktree/wfg.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,17 +86,13 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_WFG_H -#define TOKU_WFG_H - -#include - -#include - -#include "txnid_set.h" +#include "locktree/txnid_set.h" +#include "util/omt.h" namespace toku { @@ -159,5 +155,3 @@ class wfg { ENSURE_POD(wfg); } /* namespace toku */ - -#endif /* TOKU_WFG_H */ diff --git a/storage/tokudb/ft-index/portability/CMakeLists.txt b/storage/tokudb/ft-index/portability/CMakeLists.txt index e4364357fb047..9f84d9b03df2a 100644 --- a/storage/tokudb/ft-index/portability/CMakeLists.txt +++ b/storage/tokudb/ft-index/portability/CMakeLists.txt @@ -19,7 +19,6 @@ target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT add_library(tokuportability_static_conv STATIC ${tokuportability_srcs}) set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON) -add_dependencies(tokuportability_static_conv build_jemalloc) set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}") @@ -29,10 +28,19 @@ set_property(TARGET ${LIBTOKUPORTABILITY} tokuportability_static_conv APPEND PRO set_property(SOURCE file memory os_malloc portability toku_assert toku_rwlock APPEND PROPERTY COMPILE_DEFINITIONS TOKU_ALLOW_DEPRECATED=1) +configure_file(toku_config.h.in toku_config.h) +add_custom_target(generate_config_h DEPENDS + "${CMAKE_CURRENT_BINARY_DIR}/toku_config.h") + # detect when we are being built as a subproject if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) install( - FILES toku_os_types.h toku_time.h + FILES toku_os_types.h toku_time.h toku_list.h toku_os.h + DESTINATION include + COMPONENT tokukv_headers + ) + install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/toku_config.h" DESTINATION include COMPONENT tokukv_headers ) diff --git a/storage/tokudb/ft-index/portability/file.cc b/storage/tokudb/ft-index/portability/file.cc index b351141fe2913..6919b54e81d2a 100644 --- a/storage/tokudb/ft-index/portability/file.cc +++ b/storage/tokudb/ft-index/portability/file.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -166,10 +166,10 @@ try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) { ssize_t n = readlink(fname, symname, MY_MAX_PATH); if ((int)n == -1) - fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd); + fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd); else { tstr[n] = 0; // readlink doesn't append a NUL to the end of the buffer. - fprintf(stderr, "%.24s Tokudb No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname); + fprintf(stderr, "%.24s TokuFT No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname); } fprintf(stderr, "retry in %d second%s\n", toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : ""); fflush(stderr); diff --git a/storage/tokudb/ft-index/portability/huge_page_detection.cc b/storage/tokudb/ft-index/portability/huge_page_detection.cc index 0d376ca521ed6..c90333857c315 100644 --- a/storage/tokudb/ft-index/portability/huge_page_detection.cc +++ b/storage/tokudb/ft-index/portability/huge_page_detection.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,11 +93,8 @@ PATENT RIGHTS GRANT: #include #include -#include - -#include "huge_page_detection.h" - -extern "C" { +#include +#include static bool check_huge_pages_config_file(const char *fname) // Effect: Return true if huge pages are there. If so, print diagnostics. @@ -120,20 +117,6 @@ static bool check_huge_pages_config_file(const char *fname) return huge_pages_enabled; } -/* struct mapinfo { */ -/* void *addr; */ -/* size_t size; */ -/* }; */ - -/* static void* map_it(size_t size, struct mapinfo *mi, int *n_maps) { */ -/* void *r = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); */ -/* if ((long)r==-1) perror("mmap failed"); */ -/* mi[*n_maps].addr = r; */ -/* mi[*n_maps].size = size; */ -/* (*n_maps)++; */ -/* return r; */ -/* } */ - static bool check_huge_pages_in_practice(void) // Effect: Return true if huge pages appear to be defined in practice. { @@ -195,7 +178,7 @@ static bool check_huge_pages_in_practice(void) #endif } -bool complain_and_return_true_if_huge_pages_are_enabled(void) +bool toku_os_huge_pages_enabled(void) // Effect: Return true if huge pages appear to be enabled. If so, print some diagnostics to stderr. // If environment variable TOKU_HUGE_PAGES_OK is set, then don't complain. { @@ -209,4 +192,3 @@ bool complain_and_return_true_if_huge_pages_are_enabled(void) return conf1|conf2|prac; } } -} diff --git a/storage/tokudb/ft-index/portability/huge_page_detection.h b/storage/tokudb/ft-index/portability/huge_page_detection.h deleted file mode 100644 index 7efcbb2fbfcf8..0000000000000 --- a/storage/tokudb/ft-index/portability/huge_page_detection.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - - -#pragma once - -extern "C" bool complain_and_return_true_if_huge_pages_are_enabled(void); -// Effect: Return true if huge pages appear to be enabled. If so, print some diagnostics to stderr. -// If environment variable TOKU_HUGE_PAGES_OK is set, then don't complain. diff --git a/storage/tokudb/ft-index/portability/memory.cc b/storage/tokudb/ft-index/portability/memory.cc index 885a94c508ba4..568be399bb54f 100644 --- a/storage/tokudb/ft-index/portability/memory.cc +++ b/storage/tokudb/ft-index/portability/memory.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "toku_config.h" +#include #include #include @@ -221,6 +221,9 @@ toku_memory_footprint(void * p, size_t touched) { void * toku_malloc(size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } void *p = t_malloc ? t_malloc(size) : os_malloc(size); if (p) { TOKU_ANNOTATE_NEW_MEMORY(p, size); // see #4671 and https://bugs.kde.org/show_bug.cgi?id=297147 @@ -233,11 +236,15 @@ toku_malloc(size_t size) { } } else { toku_sync_add_and_fetch(&status.malloc_fail, 1); + status.last_failed_size = size; } return p; } void *toku_malloc_aligned(size_t alignment, size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } void *p = t_malloc_aligned ? t_malloc_aligned(alignment, size) : os_malloc_aligned(alignment, size); if (p) { TOKU_ANNOTATE_NEW_MEMORY(p, size); // see #4671 and https://bugs.kde.org/show_bug.cgi?id=297147 @@ -250,6 +257,7 @@ void *toku_malloc_aligned(size_t alignment, size_t size) { } } else { toku_sync_add_and_fetch(&status.malloc_fail, 1); + status.last_failed_size = size; } return p; } @@ -264,6 +272,9 @@ toku_calloc(size_t nmemb, size_t size) { void * toku_realloc(void *p, size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } size_t used_orig = p ? my_malloc_usable_size(p) : 0; void *q = t_realloc ? t_realloc(p, size) : os_realloc(p, size); if (q) { @@ -277,11 +288,15 @@ toku_realloc(void *p, size_t size) { } } else { toku_sync_add_and_fetch(&status.realloc_fail, 1); + status.last_failed_size = size; } return q; } void *toku_realloc_aligned(size_t alignment, void *p, size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } size_t used_orig = p ? my_malloc_usable_size(p) : 0; void *q = t_realloc_aligned ? t_realloc_aligned(alignment, p, size) : os_realloc_aligned(alignment, p, size); if (q) { @@ -295,6 +310,7 @@ void *toku_realloc_aligned(size_t alignment, void *p, size_t size) { } } else { toku_sync_add_and_fetch(&status.realloc_fail, 1); + status.last_failed_size = size; } return q; } @@ -329,9 +345,14 @@ toku_free(void *p) { void * toku_xmalloc(size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } void *p = t_xmalloc ? t_xmalloc(size) : os_malloc(size); - if (p == NULL) // avoid function call in common case + if (p == NULL) { // avoid function call in common case + status.last_failed_size = size; resource_assert(p); + } TOKU_ANNOTATE_NEW_MEMORY(p, size); // see #4671 and https://bugs.kde.org/show_bug.cgi?id=297147 if (toku_memory_do_stats) { size_t used = my_malloc_usable_size(p); @@ -348,8 +369,14 @@ void* toku_xmalloc_aligned(size_t alignment, size_t size) // Fail with a resource_assert if the allocation fails (don't return an error code). // Requires: alignment is a power of two. { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } void *p = t_xmalloc_aligned ? t_xmalloc_aligned(alignment, size) : os_malloc_aligned(alignment,size); - resource_assert(p); + if (p == NULL) { + status.last_failed_size = size; + resource_assert(p); + } if (toku_memory_do_stats) { size_t used = my_malloc_usable_size(p); toku_sync_add_and_fetch(&status.malloc_count, 1); @@ -370,10 +397,15 @@ toku_xcalloc(size_t nmemb, size_t size) { void * toku_xrealloc(void *v, size_t size) { + if (size > status.max_requested_size) { + status.max_requested_size = size; + } size_t used_orig = v ? my_malloc_usable_size(v) : 0; void *p = t_xrealloc ? t_xrealloc(v, size) : os_realloc(v, size); - if (p == 0) // avoid function call in common case + if (p == 0) { // avoid function call in common case + status.last_failed_size = size; resource_assert(p); + } if (toku_memory_do_stats) { size_t used = my_malloc_usable_size(p); toku_sync_add_and_fetch(&status.realloc_count, 1); diff --git a/storage/tokudb/ft-index/toku_include/memory.h b/storage/tokudb/ft-index/portability/memory.h similarity index 92% rename from storage/tokudb/ft-index/toku_include/memory.h rename to storage/tokudb/ft-index/portability/memory.h index 215ea2e209d43..837b0a7026509 100644 --- a/storage/tokudb/ft-index/toku_include/memory.h +++ b/storage/tokudb/ft-index/portability/memory.h @@ -1,8 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef MEMORY_H -#define MEMORY_H /* COPYING CONDITIONS NOTICE: @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,12 +87,13 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include - /* Tokutek memory allocation functions and macros. * These are functions for malloc and free */ @@ -207,14 +206,16 @@ void toku_set_func_realloc_only(realloc_fun_t f); void toku_set_func_free(free_fun_t f); typedef struct memory_status { - uint64_t malloc_count; // number of malloc operations - uint64_t free_count; // number of free operations - uint64_t realloc_count; // number of realloc operations - uint64_t malloc_fail; // number of malloc operations that failed - uint64_t realloc_fail; // number of realloc operations that failed - uint64_t requested; // number of bytes requested - uint64_t used; // number of bytes used (requested + overhead), obtained from malloc_usable_size() - uint64_t freed; // number of bytes freed; + uint64_t malloc_count; // number of malloc operations + uint64_t free_count; // number of free operations + uint64_t realloc_count; // number of realloc operations + uint64_t malloc_fail; // number of malloc operations that failed + uint64_t realloc_fail; // number of realloc operations that failed + uint64_t requested; // number of bytes requested + uint64_t used; // number of bytes used (requested + overhead), obtained from malloc_usable_size() + uint64_t freed; // number of bytes freed; + uint64_t max_requested_size; // largest attempted allocation size + uint64_t last_failed_size; // size of the last failed allocation attempt volatile uint64_t max_in_use; // maximum memory footprint (used - freed), approximate (not worth threadsafety overhead for exact) const char *mallocator_version; uint64_t mmap_threshold; @@ -223,5 +224,3 @@ typedef struct memory_status { void toku_memory_get_status(LOCAL_MEMORY_STATUS s); size_t toku_memory_footprint(void * p, size_t touched); - -#endif diff --git a/storage/tokudb/ft-index/portability/os_malloc.cc b/storage/tokudb/ft-index/portability/os_malloc.cc index ecc5ea9ca3c7b..c59167bd8c4eb 100644 --- a/storage/tokudb/ft-index/portability/os_malloc.cc +++ b/storage/tokudb/ft-index/portability/os_malloc.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "toku_config.h" +#include #include #include diff --git a/storage/tokudb/ft-index/portability/portability.cc b/storage/tokudb/ft-index/portability/portability.cc index 55437fdc886af..09c1ccd50be20 100644 --- a/storage/tokudb/ft-index/portability/portability.cc +++ b/storage/tokudb/ft-index/portability/portability.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#include "toku_config.h" +#include #include #include diff --git a/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h b/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h index fb592175dc82f..135481f8997fe 100644 --- a/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h +++ b/storage/tokudb/ft-index/portability/tests/rwlock_condvar.h @@ -33,7 +33,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc b/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc index c8e1188cd0380..ed141edd0bf65 100644 --- a/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc +++ b/storage/tokudb/ft-index/portability/tests/test-active-cpus.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc b/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc index bff4d1bfd18ed..eb4862c225400 100644 --- a/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc +++ b/storage/tokudb/ft-index/portability/tests/test-cache-line-boundary-fails.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,15 +89,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "toku_config.h" -#include +#include #include -#include "test.h" + +#include #include #include #include #include +#include "test.h" + int verbose = 0; static const size_t cachelinesize = 64; diff --git a/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc b/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc index ae4ec26fb97cb..04e58d49bf6e7 100644 --- a/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc +++ b/storage/tokudb/ft-index/portability/tests/test-cpu-freq-openlimit17.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc b/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc index f0e991c97354d..889eb29c5c10f 100644 --- a/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc +++ b/storage/tokudb/ft-index/portability/tests/test-cpu-freq.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc b/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc index e4466b8395294..993eaf4fea23d 100644 --- a/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc +++ b/storage/tokudb/ft-index/portability/tests/test-filesystem-sizes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-flock.cc b/storage/tokudb/ft-index/portability/tests/test-flock.cc index 942dc6b06861e..5ef45b1bd971d 100644 --- a/storage/tokudb/ft-index/portability/tests/test-flock.cc +++ b/storage/tokudb/ft-index/portability/tests/test-flock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc b/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc index 8d1546fcff791..a0de1a0d88224 100644 --- a/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc +++ b/storage/tokudb/ft-index/portability/tests/test-fsync-directory.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-fsync.cc b/storage/tokudb/ft-index/portability/tests/test-fsync.cc index 843a09b5dc913..4d3be11120f22 100644 --- a/storage/tokudb/ft-index/portability/tests/test-fsync.cc +++ b/storage/tokudb/ft-index/portability/tests/test-fsync.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -221,8 +221,6 @@ time_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) { } } -#if !TOKU_WINDOWS -//sync() does not appear to have an analogue on windows. static void time_sync_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) { if (verbose>1) { @@ -277,7 +275,6 @@ time_sync_fsyncs_many_files(int N, int bytes, int fds[/*N*/]) { fflush(stdout); } } -#endif int test_main(int argc, char *const argv[]) { int i; @@ -320,9 +317,7 @@ int test_main(int argc, char *const argv[]) { time_many_fsyncs_one_file(N, bytes, fds); time_fsyncs_many_files(N, bytes, fds); -#if !TOKU_WINDOWS time_sync_fsyncs_many_files(N, bytes, fds); -#endif return 0; } diff --git a/storage/tokudb/ft-index/portability/tests/test-gettime.cc b/storage/tokudb/ft-index/portability/tests/test-gettime.cc index 70b24cd2aafb2..ce0e5cb992197 100644 --- a/storage/tokudb/ft-index/portability/tests/test-gettime.cc +++ b/storage/tokudb/ft-index/portability/tests/test-gettime.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc b/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc index a4660d8ba4f8e..0ff77118d1c19 100644 --- a/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc +++ b/storage/tokudb/ft-index/portability/tests/test-gettimeofday.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-hugepage.cc b/storage/tokudb/ft-index/portability/tests/test-hugepage.cc index 70a6784da8b36..4aa11ee2e795f 100644 --- a/storage/tokudb/ft-index/portability/tests/test-hugepage.cc +++ b/storage/tokudb/ft-index/portability/tests/test-hugepage.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,12 +88,12 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include + +#include +#include int main(void) { - bool enabled = complain_and_return_true_if_huge_pages_are_enabled(); - assert(!enabled); + bool enabled = toku_os_huge_pages_enabled(); + invariant(!enabled); return 0; } diff --git a/storage/tokudb/ft-index/portability/tests/test-max-data.cc b/storage/tokudb/ft-index/portability/tests/test-max-data.cc index 459349460d633..f04b39d84215f 100644 --- a/storage/tokudb/ft-index/portability/tests/test-max-data.cc +++ b/storage/tokudb/ft-index/portability/tests/test-max-data.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-memory-status.cc b/storage/tokudb/ft-index/portability/tests/test-memory-status.cc index 20eea248bae73..87011c4e0d0e4 100644 --- a/storage/tokudb/ft-index/portability/tests/test-memory-status.cc +++ b/storage/tokudb/ft-index/portability/tests/test-memory-status.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-pagesize.cc b/storage/tokudb/ft-index/portability/tests/test-pagesize.cc index 5f921fe492011..f9a78742e354d 100644 --- a/storage/tokudb/ft-index/portability/tests/test-pagesize.cc +++ b/storage/tokudb/ft-index/portability/tests/test-pagesize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc b/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc index a4c5dcd0128c2..9008262fa092c 100644 --- a/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc +++ b/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rdlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc b/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc index b7a21b1fc068f..32b38421aaf5e 100644 --- a/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc +++ b/storage/tokudb/ft-index/portability/tests/test-pthread-rwlock-rwr.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc b/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc index 3fa21f99fa059..abd5e4ec1acef 100644 --- a/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc +++ b/storage/tokudb/ft-index/portability/tests/test-pwrite4g.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-snprintf.cc b/storage/tokudb/ft-index/portability/tests/test-snprintf.cc index 852fab29ba5e4..5f168c8d6126a 100644 --- a/storage/tokudb/ft-index/portability/tests/test-snprintf.cc +++ b/storage/tokudb/ft-index/portability/tests/test-snprintf.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-stat.cc b/storage/tokudb/ft-index/portability/tests/test-stat.cc index 8e3d18eac9a66..bedf7e7e54f39 100644 --- a/storage/tokudb/ft-index/portability/tests/test-stat.cc +++ b/storage/tokudb/ft-index/portability/tests/test-stat.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc b/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc index 8c58823033101..48f616dd81753 100644 --- a/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc +++ b/storage/tokudb/ft-index/portability/tests/test-toku-malloc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/test-xid.cc b/storage/tokudb/ft-index/portability/tests/test-xid.cc index c71b70d499fd7..9277f984b4326 100644 --- a/storage/tokudb/ft-index/portability/tests/test-xid.cc +++ b/storage/tokudb/ft-index/portability/tests/test-xid.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "toku_config.h" +#include #include #include #include diff --git a/storage/tokudb/ft-index/portability/tests/test.h b/storage/tokudb/ft-index/portability/tests/test.h index ff71395e7d793..a3e7994957ef7 100644 --- a/storage/tokudb/ft-index/portability/tests/test.h +++ b/storage/tokudb/ft-index/portability/tests/test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc b/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc index 6249d0b1aa511..6517f61b3c3ad 100644 --- a/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc +++ b/storage/tokudb/ft-index/portability/tests/try-assert-zero.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/try-assert0.cc b/storage/tokudb/ft-index/portability/tests/try-assert0.cc index f2e1a99469bae..89fe6941138fc 100644 --- a/storage/tokudb/ft-index/portability/tests/try-assert0.cc +++ b/storage/tokudb/ft-index/portability/tests/try-assert0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc b/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc index fa6217f39f0ab..57bbe3589bb94 100644 --- a/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc +++ b/storage/tokudb/ft-index/portability/tests/try-leak-lost.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc b/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc index b17418ae67b3f..63c1dd4f756c6 100644 --- a/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc +++ b/storage/tokudb/ft-index/portability/tests/try-leak-reachable.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/tests/try-uninit.cc b/storage/tokudb/ft-index/portability/tests/try-uninit.cc index 415de3203f7a0..c763348ed2f74 100644 --- a/storage/tokudb/ft-index/portability/tests/try-uninit.cc +++ b/storage/tokudb/ft-index/portability/tests/try-uninit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/toku_assert.cc b/storage/tokudb/ft-index/portability/toku_assert.cc index f3eeba1b0f1f8..68e16699e60f9 100644 --- a/storage/tokudb/ft-index/portability/toku_assert.cc +++ b/storage/tokudb/ft-index/portability/toku_assert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "toku_config.h" +#include #include #include "toku_assert.h" @@ -101,16 +101,11 @@ PATENT RIGHTS GRANT: # include #endif #include -#if !TOKU_WINDOWS #include -#endif - -#if !TOKU_WINDOWS -#define N_POINTERS 1000 // These are statically allocated so that the backtrace can run without any calls to malloc() +#define N_POINTERS 1000 static void *backtrace_pointers[N_POINTERS]; -#endif static uint64_t engine_status_num_rows = 0; @@ -123,14 +118,17 @@ toku_assert_init(void) malloc_stats_f = (malloc_stats_fun_t) dlsym(RTLD_DEFAULT, "malloc_stats"); } -// Function pointers are zero by default so asserts can be used by brt-layer tests without an environment. +// Function pointers are zero by default so asserts can be used by ft-layer tests without an environment. static int (*toku_maybe_get_engine_status_text_p)(char* buff, int buffsize) = 0; +static int (*toku_maybe_err_engine_status_p)(void) = 0; static void (*toku_maybe_set_env_panic_p)(int code, const char* msg) = 0; -void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int), +void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int), + int (*toku_maybe_err_engine_status_pointer)(void), void (*toku_maybe_set_env_panic_pointer)(int, const char*), uint64_t num_rows) { toku_maybe_get_engine_status_text_p = toku_maybe_get_engine_status_text_pointer; + toku_maybe_err_engine_status_p = toku_maybe_err_engine_status_pointer; toku_maybe_set_env_panic_p = toku_maybe_set_env_panic_pointer; engine_status_num_rows = num_rows; } @@ -138,56 +136,66 @@ void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)( bool toku_gdb_dump_on_assert = false; void (*do_assert_hook)(void) = NULL; -static void toku_do_backtrace_abort(void) __attribute__((noreturn)); +void db_env_do_backtrace_errfunc(toku_env_err_func errfunc, const void *env) { + // backtrace + int n = backtrace(backtrace_pointers, N_POINTERS); + errfunc(env, 0, "Backtrace: (Note: toku_do_assert=0x%p)\n", toku_do_assert); + char **syms = backtrace_symbols(backtrace_pointers, n); + if (syms) { + for (char **symstr = syms; symstr != NULL && (symstr - syms) < n; ++symstr) { + errfunc(env, 0, *symstr); + } + free(syms); + } + + if (engine_status_num_rows && toku_maybe_err_engine_status_p) { + toku_maybe_err_engine_status_p(); + } else { + errfunc(env, 0, "Engine status function not available\n"); + } + errfunc(env, 0, "Memory usage:\n"); + if (malloc_stats_f) { + malloc_stats_f(); + } -static void -toku_do_backtrace_abort(void) { + if (do_assert_hook) do_assert_hook(); + if (toku_gdb_dump_on_assert) { + toku_try_gdb_stack_trace(nullptr); + } +} +void db_env_do_backtrace(FILE *outf) { // backtrace -#if !TOKU_WINDOWS int n = backtrace(backtrace_pointers, N_POINTERS); - fprintf(stderr, "Backtrace: (Note: toku_do_assert=0x%p)\n", toku_do_assert); fflush(stderr); - backtrace_symbols_fd(backtrace_pointers, n, fileno(stderr)); -#endif + fprintf(outf, "Backtrace: (Note: toku_do_assert=0x%p)\n", toku_do_assert); fflush(outf); + backtrace_symbols_fd(backtrace_pointers, n, fileno(outf)); - fflush(stderr); + fflush(outf); if (engine_status_num_rows && toku_maybe_get_engine_status_text_p) { int buffsize = engine_status_num_rows * 128; // assume 128 characters per row (gross overestimate, should be safe) char buff[buffsize]; toku_maybe_get_engine_status_text_p(buff, buffsize); - fprintf(stderr, "Engine status:\n%s\n", buff); + fprintf(outf, "Engine status:\n%s\n", buff); + } else { + fprintf(outf, "Engine status function not available\n"); } - else - fprintf(stderr, "Engine status function not available\n"); - fprintf(stderr, "Memory usage:\n"); - fflush(stderr); // just in case malloc_stats() crashes, we still want engine status (and to know that malloc_stats() failed) + fprintf(outf, "Memory usage:\n"); + fflush(outf); // just in case malloc_stats() crashes, we still want engine status (and to know that malloc_stats() failed) if (malloc_stats_f) { malloc_stats_f(); } - fflush(stderr); + fflush(outf); if (do_assert_hook) do_assert_hook(); if (toku_gdb_dump_on_assert) { toku_try_gdb_stack_trace(nullptr); } +} -#if TOKU_WINDOWS - //Following commented methods will not always end the process (could hang). - //They could be unacceptable for other reasons as well (popups, - //flush buffers before quitting, etc) - // abort() - // assert(false) (assert.h assert) - // raise(SIGABRT) - // divide by 0 - // null dereference - // _exit - // exit - // ExitProcess - TerminateProcess(GetCurrentProcess(), 134); //Only way found so far to unconditionally - //Terminate the process -#endif - +__attribute__((noreturn)) +static void toku_do_backtrace_abort(void) { + db_env_do_backtrace(stderr); abort(); } diff --git a/storage/tokudb/ft-index/toku_include/toku_assert.h b/storage/tokudb/ft-index/portability/toku_assert.h similarity index 93% rename from storage/tokudb/ft-index/toku_include/toku_assert.h rename to storage/tokudb/ft-index/portability/toku_assert.h index ab9978fdf0b7b..ab5f8c1ffb494 100644 --- a/storage/tokudb/ft-index/toku_include/toku_assert.h +++ b/storage/tokudb/ft-index/portability/toku_assert.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,24 +86,25 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_ASSERT_H -#define TOKU_ASSERT_H /* The problem with assert.h: If NDEBUG is set then it doesn't execute the function, if NDEBUG isn't set then we get a branch that isn't taken. */ + /* This version will complain if NDEBUG is set. */ /* It evaluates the argument and then calls a function toku_do_assert() which takes all the hits for the branches not taken. */ -#include "toku_config.h" +#include #include #include +#include #ifdef NDEBUG #error NDEBUG should not be set #endif - static inline int get_error_errno(void); static inline int @@ -120,7 +121,8 @@ set_errno(int new_errno) void toku_assert_init(void) __attribute__((constructor)); -void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int), +void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int), + int (*toku_maybe_err_engine_status_pointer)(void), void (*toku_maybe_set_env_panic_pointer)(int, const char*), uint64_t num_rows); @@ -134,8 +136,15 @@ void toku_do_assert_expected_fail(uintptr_t/*expr*/, uintptr_t /*expected*/, con // #define GCOV extern void (*do_assert_hook)(void); // Set this to a function you want called after printing the assertion failure message but before calling abort(). By default this is NULL. +// copied here from ydb-internal.h to avoid inclusion hell, the void * is really a DB_ENV but we don't have that type here +typedef void (*toku_env_err_func)(const void * env, int error, const char *fmt, ...); +void db_env_do_backtrace_errfunc(toku_env_err_func errfunc, const void *env); +void db_env_do_backtrace(FILE *outf); -#if defined(GCOV) || TOKU_WINDOWS +#ifdef assert +# undef assert +#endif +#if defined(GCOV) #define assert(expr) toku_do_assert((expr) != 0, #expr, __FUNCTION__, __FILE__, __LINE__, get_maybe_error_errno()) #define assert_zero(expr) toku_do_assert((expr) == 0, #expr, __FUNCTION__, __FILE__, __LINE__, get_maybe_error_errno()) #define assert_equals(expr, expected) toku_do_assert((expr) == (expected), (expected), #expr, __FUNCTION__, __FILE__, __LINE__, get_maybe_error_errno()) @@ -193,5 +202,3 @@ get_error_errno(void) } extern bool toku_gdb_dump_on_assert; - -#endif diff --git a/storage/tokudb/ft-index/portability/toku_atomic.h b/storage/tokudb/ft-index/portability/toku_atomic.h index c24c7ab2607ac..075211a790ca5 100644 --- a/storage/tokudb/ft-index/portability/toku_atomic.h +++ b/storage/tokudb/ft-index/portability/toku_atomic.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,14 +85,14 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2012-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef TOKU_ATOMIC_H -#define TOKU_ATOMIC_H - -#include "toku_config.h" +#include #include + #include #include #include @@ -158,5 +158,3 @@ static inline bool toku_sync_bool_compare_and_swap(T *addr, U oldval, V newval) #pragma GCC poison __sync_synchronize #pragma GCC poison __sync_lock_test_and_set #pragma GCC poison __sync_release - -#endif // TOKU_ATOMIC_H diff --git a/storage/tokudb/ft-index/portability/toku_byteswap.h b/storage/tokudb/ft-index/portability/toku_byteswap.h index 8c26daf572f1c..12c76b00825c6 100644 --- a/storage/tokudb/ft-index/portability/toku_byteswap.h +++ b/storage/tokudb/ft-index/portability/toku_byteswap.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,13 +85,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2012-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef TOKU_BYTESWAP_H -#define TOKU_BYTESWAP_H - -#include "toku_config.h" +#include #if defined(HAVE_BYTESWAP_H) # include @@ -102,5 +101,3 @@ PATENT RIGHTS GRANT: # include # define bswap_64 OSSwapInt64 #endif - -#endif /* TOKU_BYTESWAP_H */ diff --git a/storage/tokudb/ft-index/toku_include/toku_config.h.in b/storage/tokudb/ft-index/portability/toku_config.h.in similarity index 100% rename from storage/tokudb/ft-index/toku_include/toku_config.h.in rename to storage/tokudb/ft-index/portability/toku_config.h.in diff --git a/storage/tokudb/ft-index/portability/toku_crash.cc b/storage/tokudb/ft-index/portability/toku_crash.cc index 2eed142229db5..123746d8f7f57 100644 --- a/storage/tokudb/ft-index/portability/toku_crash.cc +++ b/storage/tokudb/ft-index/portability/toku_crash.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/toku_crash.h b/storage/tokudb/ft-index/portability/toku_crash.h index bd45ee1a1a171..a5dd959a15d5f 100644 --- a/storage/tokudb/ft-index/portability/toku_crash.h +++ b/storage/tokudb/ft-index/portability/toku_crash.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef PORTABILITY_TOKU_CRASH_H -#define PORTABILITY_TOKU_CRASH_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -108,9 +107,6 @@ PATENT RIGHTS GRANT: // //Linux: // abort() and both assert(false) cause FILE buffers to be flushed and written to disk: Unacceptable -//Windows: -// None of them cause file buffers to be flushed/written to disk, however -// abort(), assert(false) , null dereference, and divide by 0 cause popups requiring user intervention during tests: Unacceptable // //kill -SIGKILL $pid is annoying (and so far untested) // @@ -118,11 +114,7 @@ PATENT RIGHTS GRANT: //I'm choosing raise(SIGABRT), followed by divide by 0, followed by null dereference, followed by all the others just in case one gets caught. static void __attribute__((unused, noreturn)) toku_hard_crash_on_purpose(void) { -#if TOKU_WINDOWS - TerminateProcess(GetCurrentProcess(), 137); -#else raise(SIGKILL); //Does not flush buffers on linux; cannot be caught. -#endif { int zero = 0; int infinity = 1/zero; @@ -199,5 +191,3 @@ toku_crash_and_dump_core_on_purpose(void) { } void toku_try_gdb_stack_trace(const char *gdb_path); - -#endif // PORTABILITY_TOKU_CRASH_H diff --git a/storage/tokudb/ft-index/toku_include/toku_htod.h b/storage/tokudb/ft-index/portability/toku_htod.h similarity index 97% rename from storage/tokudb/ft-index/toku_include/toku_htod.h rename to storage/tokudb/ft-index/portability/toku_htod.h index 71a3ee6e4153d..d12d45a13cfa8 100644 --- a/storage/tokudb/ft-index/toku_include/toku_htod.h +++ b/storage/tokudb/ft-index/portability/toku_htod.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,8 +86,9 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." /* Purpose of this file is to provide definitions of * Host to Disk byte transposition functions, an abstraction of @@ -106,10 +107,7 @@ PATENT RIGHTS GRANT: * HOST AND A LITTLE-ENDIAN DISK. */ -#ifndef _TOKU_HTOD_H -#define _TOKU_HTOD_H - -#include "toku_config.h" +#include #if defined(HAVE_ENDIAN_H) # include @@ -166,8 +164,3 @@ toku_htod32(uint32_t i) { #else #error Not supported #endif - - - -#endif - diff --git a/storage/tokudb/ft-index/portability/toku_htonl.h b/storage/tokudb/ft-index/portability/toku_htonl.h index 8d275ebcd0fd1..f2ba320bf1f5e 100644 --- a/storage/tokudb/ft-index/portability/toku_htonl.h +++ b/storage/tokudb/ft-index/portability/toku_htonl.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,16 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - -#ifndef _TOKU_HTONL_H -#define _TOKU_HTONL_H +#pragma once -#if !__linux__ && !__FreeBSD__ && !__sun__ -//#error -#endif +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -// TODO: This byte order stuff should all be in once place (ie: portability layer, not toku_include) #include #include @@ -106,5 +100,3 @@ static inline uint32_t toku_htonl(uint32_t i) { static inline uint32_t toku_ntohl(uint32_t i) { return ntohl(i); } - -#endif diff --git a/storage/tokudb/ft-index/toku_include/toku_list.h b/storage/tokudb/ft-index/portability/toku_list.h similarity index 97% rename from storage/tokudb/ft-index/toku_include/toku_list.h rename to storage/tokudb/ft-index/portability/toku_list.h index b39d56ebd323c..3fc96a671dd78 100644 --- a/storage/tokudb/ft-index/toku_include/toku_list.h +++ b/storage/tokudb/ft-index/portability/toku_list.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef _TOKUDB_LIST_H -#define _TOKUDB_LIST_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,13 +87,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -//TODO: #1378 This is not threadsafe. Make sure when splitting locks -//that we protect these calls. - - // This toku_list is intended to be embedded in other data structures. struct toku_list { struct toku_list *next, *prev; @@ -177,7 +173,3 @@ static inline void toku_list_move(struct toku_list *newhead, struct toku_list *o #else #define toku_list_struct(p, t, f) ((t*)((char*)(p) - ((char*)&((t*)0)->f))) #endif - - - -#endif diff --git a/storage/tokudb/ft-index/toku_include/toku_os.h b/storage/tokudb/ft-index/portability/toku_os.h similarity index 96% rename from storage/tokudb/ft-index/toku_include/toku_os.h rename to storage/tokudb/ft-index/portability/toku_os.h index e53885c979154..71576d7c1dd1e 100644 --- a/storage/tokudb/ft-index/toku_include/toku_os.h +++ b/storage/tokudb/ft-index/portability/toku_os.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,16 +86,16 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_OS_H -#define TOKU_OS_H - #include #include -#include +#include "toku_stdint.h" +#include "toku_os_types.h" // Returns: the current process id int toku_os_getpid(void) __attribute__((__visibility__("default"))); @@ -148,6 +148,9 @@ int toku_os_initialize_settings(int verbosity) __attribute__((__visibility__("d bool toku_os_is_absolute_name(const char* path) __attribute__((__visibility__("default"))); +// Return true if huge pages are enabled. See portability/huge_page_detection.cc for methodology. +bool toku_os_huge_pages_enabled(void) __attribute__((__visibility__("default"))); + // Set whether or not writes assert when ENOSPC is returned or they wait for space void toku_set_assert_on_write_enospc(int do_assert) __attribute__((__visibility__("default"))); @@ -168,13 +171,6 @@ int toku_fsync_dir_by_name_without_accounting(const char *dir_name); // Return 0 on success, otherwise an error number int toku_get_filesystem_sizes(const char *path, uint64_t *avail_size, uint64_t *free_size, uint64_t *total_size); -#if TOKU_WINDOWS -#include -#include -//Test if st_mode (from stat) is a directory -#define S_ISDIR(bitvector) (((bitvector)&_S_IFDIR)!=0) -#endif - // Portable linux 'stat' int toku_stat(const char *name, toku_struct_stat *statbuf) __attribute__((__visibility__("default"))); // Portable linux 'fstat' @@ -182,5 +178,3 @@ int toku_fstat(int fd, toku_struct_stat *statbuf) __attribute__((__visibility__( // Portable linux 'dup2' int toku_dup2(int fd, int fd2) __attribute__((__visibility__("default"))); - -#endif /* TOKU_OS_H */ diff --git a/storage/tokudb/ft-index/portability/toku_os_types.h b/storage/tokudb/ft-index/portability/toku_os_types.h index 073021e474a36..a7053374fde9c 100644 --- a/storage/tokudb/ft-index/portability/toku_os_types.h +++ b/storage/tokudb/ft-index/portability/toku_os_types.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,9 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#if !defined(TOKU_OS_TYPES_H) -#define TOKU_OS_TYPES_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -125,10 +124,6 @@ static inline bool toku_fileids_are_equal(struct fileid *a, struct fileid *b) { typedef struct stat toku_struct_stat; -// windows compat #if !defined(O_BINARY) #define O_BINARY 0 #endif - - -#endif diff --git a/storage/tokudb/ft-index/portability/toku_path.cc b/storage/tokudb/ft-index/portability/toku_path.cc index 22264b7e799bf..89b106309eb18 100644 --- a/storage/tokudb/ft-index/portability/toku_path.cc +++ b/storage/tokudb/ft-index/portability/toku_path.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/toku_path.h b/storage/tokudb/ft-index/portability/toku_path.h index 4c0df9660a948..3ee6736360f06 100644 --- a/storage/tokudb/ft-index/portability/toku_path.h +++ b/storage/tokudb/ft-index/portability/toku_path.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef PORTABILITY_TOKU_PATH_H -#define PORTABILITY_TOKU_PATH_H - #include #include #include @@ -124,5 +123,3 @@ char *toku_path_join(char *dest, int n, const char *base, ...); // There are n path components, including base. // Returns: // dest (useful for chaining function calls) - -#endif // PORTABILITY_TOKU_PATH_H diff --git a/storage/tokudb/ft-index/toku_include/toku_portability.h b/storage/tokudb/ft-index/portability/toku_portability.h similarity index 81% rename from storage/tokudb/ft-index/toku_include/toku_portability.h rename to storage/tokudb/ft-index/portability/toku_portability.h index df76e4bab78fd..9459c2d7ad3f3 100644 --- a/storage/tokudb/ft-index/toku_include/toku_portability.h +++ b/storage/tokudb/ft-index/portability/toku_portability.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_PORTABILITY_H -#define TOKU_PORTABILITY_H #include "toku_config.h" @@ -101,82 +101,16 @@ PATENT RIGHTS GRANT: # define constexpr_static_assert(a, b) static_assert(a, b) #endif -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(__ICL)) - -#define TOKU_WINDOWS 1 -#define DEV_NULL_FILE "NUL" - -# if defined(_WIN64) -# define TOKU_WINDOWS_32 0 -# define TOKU_WINDOWS_64 1 -# else -# define TOKU_WINDOWS_32 1 -# define TOKU_WINDOWS_64 2 +#if defined(_MSC_VER) +# error "Windows is not supported." #endif -#else - -#define TOKU_WINDOWS 0 -#define TOKU_WINDOWS_32 0 -#define TOKU_WINDOWS_64 0 #define DEV_NULL_FILE "/dev/null" -#endif - // include here, before they get deprecated #include -#if TOKU_WINDOWS -// Windows - -#define DO_GCC_PRAGMA(x) /* Nothing */ - -#if defined(__ICL) -#define __attribute__(x) /* Nothing */ -#endif - -#include -#include "toku_stdint.h" - -#ifndef TOKU_OFF_T_DEFINED -#define TOKU_OFF_T_DEFINED -typedef int64_t toku_off_t; -#endif - -#include -#include -#include "unistd.h" -#include "misc.h" -#include "toku_pthread.h" - -#define UNUSED_WARNING(a) a=a /* To make up for missing attributes */ - -#define cast_to_typeof(v) - -#elif defined(__INTEL_COMPILER) - -#define DO_GCC_PRAGMA(x) /* Nothing */ - -#if defined(__ICC) -// Intel linux - -#include -#include -#include -#include -#include -#include - -#define static_assert(foo, bar) -#endif - -#if defined(__cplusplus) -# define cast_to_typeof(v) (decltype(v)) -#else -# define cast_to_typeof(v) (__typeof__(v)) -#endif - -#elif defined(__GNUC__) +#if defined(__GNUC__) // GCC linux #define DO_GCC_PRAGMA(x) _Pragma (#x) @@ -187,12 +121,15 @@ typedef int64_t toku_off_t; #include #include #include + #if __FreeBSD__ #include #endif + #if defined(HAVE_ALLOCA_H) # include #endif + #if defined(__cplusplus) # include #endif @@ -203,10 +140,8 @@ typedef int64_t toku_off_t; # define cast_to_typeof(v) (__typeof__(v)) #endif -#else - -#error Not ICC and not GNUC. What compiler? - +#else // __GNUC__ was not defined, so... +# error "Must use a GNUC-compatible compiler." #endif // Define some constants for Yama in case the build-machine's software is too old. @@ -248,28 +183,6 @@ extern "C" { // Deprecated functions. #if !defined(TOKU_ALLOW_DEPRECATED) -# if defined(__ICL) || defined(__ICC) // Intel Compiler -# pragma deprecated (creat, fstat, stat, getpid, syscall, sysconf, mkdir, strdup) -//# pragma poison off_t -//# pragma poison pthread_attr_t pthread_t -//# pragma poison pthread_mutexattr_t pthread_mutex_t -//# pragma poison pthread_condattr_t pthread_cond_t -//# pragma poison pthread_rwlockattr_t pthread_rwlock_t -//# pragma poison timespec -# ifndef DONT_DEPRECATE_WRITES -# pragma poison write pwrite -# endif -# ifndef DONT_DEPRECATE_MALLOC -# pragma deprecated (malloc, free, realloc) -# endif -# ifndef DONT_DEPRECATE_ERRNO -# pragma deprecated (errno) -# endif -# ifndef TOKU_WINDOWS_ALLOW_DEPRECATED -# pragma poison dup2 -# pragma poison _dup2 -# endif -# else int creat(const char *pathname, mode_t mode) __attribute__((__deprecated__)); int fstat(int fd, struct stat *buf) __attribute__((__deprecated__)); int stat(const char *path, struct stat *buf) __attribute__((__deprecated__)); @@ -279,8 +192,7 @@ int syscall(int __sysno, ...) __attribute__((__deprecated__)); # else long int syscall(long int __sysno, ...) __attribute__((__deprecated__)); # endif -// Sadly, dlmalloc needs sysconf, and on linux this causes trouble with -combine. So let the warnings show up under windows only. -// long int sysconf(int) __attribute__((__deprecated__)); + long int sysconf(int) __attribute__((__deprecated__)); int mkdir(const char *pathname, mode_t mode) __attribute__((__deprecated__)); int dup2(int fd, int fd2) __attribute__((__deprecated__)); int _dup2(int fd, int fd2) __attribute__((__deprecated__)); @@ -344,7 +256,6 @@ extern void *realloc(void*, size_t) __THROW __attribute__((__deprecat #pragma GCC poison __sync_synchronize #pragma GCC poison __sync_lock_test_and_set #pragma GCC poison __sync_release -# endif #endif #if defined(__cplusplus) @@ -415,17 +326,8 @@ void toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)); int toku_portability_init(void); void toku_portability_destroy(void); -static inline uint64_t roundup_to_multiple(uint64_t alignment, uint64_t v) // Effect: Return X, where X the smallest multiple of ALIGNMENT such that X>=V. // Requires: ALIGNMENT is a power of two -{ - assert(0==(alignment&(alignment-1))); // alignment must be a power of two - uint64_t result = (v+alignment-1)&~(alignment-1); - assert(result>=v); // The result is >=V. - assert(result%alignment==0); // The result is a multiple of alignment. - assert(result #include int toku_pthread_yield(void) { diff --git a/storage/tokudb/ft-index/portability/toku_pthread.h b/storage/tokudb/ft-index/portability/toku_pthread.h index 4ec9dfa9634c5..a9dc660b6a751 100644 --- a/storage/tokudb/ft-index/portability/toku_pthread.h +++ b/storage/tokudb/ft-index/portability/toku_pthread.h @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -87,10 +87,9 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_PTHREAD_H -#define TOKU_PTHREAD_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include @@ -422,5 +421,3 @@ static inline int toku_pthread_setspecific(toku_pthread_key_t key, void *data) { return pthread_setspecific(key, data); } - -#endif /* TOKU_PTHREAD_H */ diff --git a/storage/tokudb/ft-index/toku_include/toku_race_tools.h b/storage/tokudb/ft-index/portability/toku_race_tools.h similarity index 97% rename from storage/tokudb/ft-index/toku_include/toku_race_tools.h rename to storage/tokudb/ft-index/portability/toku_race_tools.h index 77417cb043370..b4c83b6119d08 100644 --- a/storage/tokudb/ft-index/toku_include/toku_race_tools.h +++ b/storage/tokudb/ft-index/portability/toku_race_tools.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_RACE_TOOLS_H -#define TOKU_RACE_TOOLS_H -#include "toku_config.h" +#include #if defined(__linux__) && USE_VALGRIND @@ -138,5 +138,3 @@ PATENT RIGHTS GRANT: # define RUNNING_ON_VALGRIND (0U) #endif - -#endif // TOKU_RACE_TOOLS_H diff --git a/storage/tokudb/ft-index/portability/toku_random.h b/storage/tokudb/ft-index/portability/toku_random.h index ab317eb6295d1..a350b171a3b0e 100644 --- a/storage/tokudb/ft-index/portability/toku_random.h +++ b/storage/tokudb/ft-index/portability/toku_random.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_RANDOM_H -#define TOKU_RANDOM_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "toku_config.h" +#include #include #include #include @@ -169,5 +168,3 @@ rand_choices(struct random_data *buf, uint32_t choices) { return result; } - -#endif // TOKU_RANDOM_H diff --git a/storage/tokudb/ft-index/portability/toku_stdint.h b/storage/tokudb/ft-index/portability/toku_stdint.h index 0105c94c50ba2..806e40e612be5 100644 --- a/storage/tokudb/ft-index/portability/toku_stdint.h +++ b/storage/tokudb/ft-index/portability/toku_stdint.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,9 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_STDINT_H -#define TOKU_STDINT_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include #include - -#endif - diff --git a/storage/tokudb/ft-index/portability/toku_stdlib.h b/storage/tokudb/ft-index/portability/toku_stdlib.h index 9d2a7f7877897..229e3945cf9b7 100644 --- a/storage/tokudb/ft-index/portability/toku_stdlib.h +++ b/storage/tokudb/ft-index/portability/toku_stdlib.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,5 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + #include diff --git a/storage/tokudb/ft-index/portability/toku_time.cc b/storage/tokudb/ft-index/portability/toku_time.cc index a20c45dfb0341..9783449319486 100644 --- a/storage/tokudb/ft-index/portability/toku_time.cc +++ b/storage/tokudb/ft-index/portability/toku_time.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/portability/toku_time.h b/storage/tokudb/ft-index/portability/toku_time.h index 6c522c5ad8d5c..069e67c0d2838 100644 --- a/storage/tokudb/ft-index/portability/toku_time.h +++ b/storage/tokudb/ft-index/portability/toku_time.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,17 +86,16 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#ifndef TOKU_TIME_H -#define TOKU_TIME_H +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "toku_config.h" + #include #include #include - static inline float toku_tdiff (struct timeval *a, struct timeval *b) { return (float)((a->tv_sec - b->tv_sec) + 1e-6 * (a->tv_usec - b->tv_usec)); } @@ -161,5 +160,3 @@ static inline uint64_t toku_current_time_microsec(void) { gettimeofday(&t, NULL); return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec; } - -#endif diff --git a/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash b/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash index 42af174876655..c96a02352ca35 100755 --- a/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash +++ b/storage/tokudb/ft-index/scripts/run-nightly-coverage-tests.bash @@ -12,7 +12,7 @@ cd $tokudbdir if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=Debug \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=ON \ @@ -20,7 +20,6 @@ if [ ! -d build ] ; then -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=OFF \ -D USE_GCOV=ON \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ diff --git a/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash b/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash index 467c47b9cd293..39d97de218509 100755 --- a/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash +++ b/storage/tokudb/ft-index/scripts/run-nightly-drd-tests.bash @@ -12,7 +12,7 @@ cd $tokudbdir if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=drd \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=ON \ @@ -20,7 +20,6 @@ if [ ! -d build ] ; then -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=OFF \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ -D RUN_LONG_TESTS=ON \ diff --git a/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash b/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash index 5ac3e62b21612..af08894beb838 100755 --- a/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash +++ b/storage/tokudb/ft-index/scripts/run-nightly-release-tests.bash @@ -12,7 +12,7 @@ cd $tokudbdir if [ ! -d build ] ; then mkdir build pushd build - CC=gcc47 CXX=g++47 cmake \ + cmake \ -D CMAKE_BUILD_TYPE=Release \ -D USE_VALGRIND=ON \ -D TOKU_DEBUG_PARANOID=OFF \ @@ -20,7 +20,6 @@ if [ ! -d build ] ; then -D USE_GTAGS=OFF \ -D USE_CSCOPE=OFF \ -D USE_ETAGS=OFF \ - -D USE_BDB=ON \ -D CMAKE_LINK_DEPENDS_NO_SHARED=ON \ -G Ninja \ -D RUN_LONG_TESTS=ON \ @@ -41,6 +40,6 @@ ctest -j16 \ -E '/drd|/helgrind' ctest -j16 \ -D NightlyMemCheck \ - -E '^ydb/.*\.bdb|test1426\.tdb|/drd|/helgrind' + -E 'test1426\.tdb|/drd|/helgrind' set -e ctest -D NightlySubmit diff --git a/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash b/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash deleted file mode 100755 index ebd2a188f10a4..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.db-benchmark-test.bash +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run db-benchmark-test" - echo "[--tokudb=$tokudb" - echo "[--revision=$revision]" - echo "[--branch=$branch]" - echo "[--suffix=$suffix]" - echo "[--commit=$commit]" - echo "[--cc=$cc]" - echo "[--n=$n]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -n=100 -cc=gcc44 -ft_loader=cilk -branch=toku -revision=0 -tokudb=tokudb -suffix=. -commit=0 -svnserver=https://svn.tokutek.com/tokudb -basedir=$HOME/svn.build -builddir=$basedir/tokudb.build -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` -hostname=`hostname` -instancetype="" - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -if [ $cc = icc ] ; then - d=/opt/intel/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - . $d/compilervars.sh intel64 - fi - d=/opt/intel/cilkutil/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - fi -fi - -# require a revision -if [ $revision -eq 0 ] ; then exit 1; fi -if [ $branch = "." ] ; then branch="toku"; fi - -function append() { - local s=""; local x - for x in $*; do - if [ "$s" != "" ] ; then s=$s-$x; else s=$x; fi - done - echo $s -} - -# setup the branchrevision string -branchrevision="" -if [ $branch != "toku" ] ; then branchrevision=$(append $branchrevision $(basename $branch)); fi -if [ $tokudb != "tokudb" ] ; then branchrevision=$(append $branchrevision $tokudb); fi -branchrevision=$(append $branchrevision $revision) -if [ $suffix != "." ] ; then branchrevision=$(append $branchrevision $suffix); fi - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn co -q $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done -popd -testresultsdir=$builddir/$date - -gccversion=`$cc --version|head -1|cut -f3 -d" "` - -runfile=$testresultsdir/db-benchmark-test-$branchrevision-$cc-$gccversion-$system-$arch-$hostname -if [ "$instancetype" != "" ] ; then runfile=$runfile-$instancetype; fi -rm -rf $runfile - -testresult="PASS" -testdir=db-benchmark-test-$branchrevision -rm -rf $testdir - -# checkout the tokudb branch -if [ $testresult = "PASS" ] ; then - retry svn export -q https://svn.tokutek.com/tokudb/$branch/$tokudb $testdir - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi -fi - -# build it -if [ $testresult = "PASS" ] ; then - pushd $testdir - make release -s CC=$cc GCCVERSION=$gccversion FTLOADER=$ft_loader >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd - pushd $testdir/db-benchmark-test - make build.tdb CC=$cc GCCVERSION=$gccversion -s >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# run tests -if [ $testresult = "PASS" ] ; then - let i=$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $testresult = "PASS" ] ; then - let i=2*$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x --norandom $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $testresult = "PASS" ] ; then - let i=2*$n - pushd $testdir/db-benchmark-test - echo ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1 - ./db-benchmark-test-tokudb -x --noserial $i >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - echo ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - ./scanscan-tokudb --lwc --prelock --prelockflag >>$runfile 2>&1 - exitcode=$? - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# commit results -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult db-benchmark-test $branchrevision $system $arch\" $runfile -fi - -popd - -exit 0 diff --git a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash b/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash deleted file mode 100755 index 23900424af2d6..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.bash +++ /dev/null @@ -1,460 +0,0 @@ -#!/bin/bash - -function usage() { - echo "run.fractal.tree.tests.bash - run the nightly fractal tree test suite" - echo "[--ftcc=$ftcc] [--ftcxx=$ftcxx] [--BDBVERSION=$BDBVERSION] [--ctest_model=$ctest_model]" - echo "[--commit=$commit] [--generator=$generator] [--toku_svnroot=$toku_svnroot]" - return 1 -} - -[ -f /etc/profile.d/gcc47.sh ] && . /etc/profile.d/gcc47.sh -[ -f /etc/profile.d/binutils222.sh ] && . /etc/profile.d/binutils222.sh - -set -e - -pushd $(dirname $0) &>/dev/null -SCRIPTDIR=$PWD -popd &>/dev/null -FULLTOKUDBDIR=$(dirname $SCRIPTDIR) -TOKUDBDIR=$(basename $FULLTOKUDBDIR) -BRANCHDIR=$(basename $(dirname $FULLTOKUDBDIR)) - -function make_tokudb_name() { - local tokudb_dir=$1 - local tokudb=$2 - if [ $tokudb_dir = "toku" ] ; then - echo $tokudb - else - echo $(echo $tokudb_dir-$tokudb | tr / -) - fi -} -tokudb_name=$(make_tokudb_name $BRANCHDIR $TOKUDBDIR) -export TOKUDB_NAME=$tokudb_name - -productname=$tokudb_name - -ftcc=gcc47 -ftcxx=g++47 -BDBVERSION=5.3 -ctest_model=Nightly -generator="Unix Makefiles" -toku_svnroot=$FULLTOKUDBDIR/../.. -commit=1 -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1; - fi -done - -if [[ ! ( ( $ctest_model = Nightly ) || ( $ctest_model = Experimental ) || ( $ctest_model = Continuous ) ) ]]; then - echo "--ctest_model must be Nightly, Experimental, or Continuous" - usage -fi - -BDBDIR=/usr/local/BerkeleyDB.$BDBVERSION -if [ -d $BDBDIR ] ; then - CMAKE_PREFIX_PATH=$BDBDIR:$CMAKE_PREFIX_PATH - export CMAKE_PREFIX_PATH -fi - -# delete some characters that cygwin and osx have trouble with -function sanitize() { - tr -d '[/:\\\\()]' -} - -# gather some info -svnserver=https://svn.tokutek.com/tokudb -nodename=$(uname -n) -system=$(uname -s | tr '[:upper:]' '[:lower:]' | sanitize) -release=$(uname -r | sanitize) -arch=$(uname -m | sanitize) -date=$(date +%Y%m%d) -ncpus=$([ -f /proc/cpuinfo ] && (grep bogomips /proc/cpuinfo | wc -l) || sysctl -n hw.ncpu) -njobs=$(if [ $ncpus -gt 8 ] ; then echo "$ncpus / 3" | bc ; else echo "$ncpus" ; fi) - -GCCVERSION=$($ftcc --version|head -1|cut -f3 -d" ") -export GCCVERSION -CC=$ftcc -export CC -CXX=$ftcxx -export CXX - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -if [[ $commit -eq 1 ]]; then - svnbase=~/svn.build - if [ ! -d $svnbase ] ; then mkdir $svnbase ; fi - - # checkout the build dir - buildbase=$svnbase/tokudb.build - if [ ! -d $buildbase ] ; then - mkdir $buildbase - fi - - # make the build directory, possibly on multiple machines simultaneously, there can be only one - builddir=$buildbase/$date - pushd $buildbase - set +e - svn mkdir $svnserver/tokudb.build/$date -m "" || true - retry svn co -q $svnserver/tokudb.build/$date - if [ ! -d $date ] ; then - exit 1 - fi - set -e - popd - - tracefilepfx=$builddir/$productname+$ftcc-$GCCVERSION+bdb-$BDBVERSION+$nodename+$system+$release+$arch -else - tracefilepfx=$FULLTOKUDBDIR/test-trace -fi - -function getsysinfo() { - tracefile=$1; shift - set +e - uname -a >$tracefile 2>&1 - ulimit -a >>$tracefile 2>&1 - cmake --version >>$tracefile 2>&1 - $ftcc -v >>$tracefile 2>&1 - $ftcxx -v >>$tracefile 2>&1 - valgrind --version >>$tracefile 2>&1 - cat /etc/issue >>$tracefile 2>&1 - cat /proc/version >>$tracefile 2>&1 - cat /proc/cpuinfo >>$tracefile 2>&1 - env >>$tracefile 2>&1 - set -e -} - -function get_latest_svn_revision() { - svn info $1 | awk -v ORS="" '/Last Changed Rev:/ { print $4 }' -} - -function my_mktemp() { - mktemp /tmp/$(whoami).$1.XXXXXXXXXX -} - -yesterday="$(date -u -d yesterday +%F) 03:59:00 +0000" - -if [[ $commit -eq 1 ]]; then - # hack to make long tests run nightly but not when run in experimental mode - longtests=ON -else - longtests=OFF -fi -################################################################################ -## run normal and valgrind on optimized build -resultsdir=$tracefilepfx-Release -mkdir $resultsdir -tracefile=$tracefilepfx-Release/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/opt >/dev/null 2>&1 -cd $FULLTOKUDBDIR/opt -cmake \ - -D CMAKE_BUILD_TYPE=Release \ - -D USE_VALGRIND=ON \ - -D USE_BDB=ON \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -E '/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -ctest -j$njobs \ - -D ${ctest_model}MemCheck \ - -E '^ydb/.*\.bdb$|test1426.tdb|/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - errs=0; - look=0; - ORS=" "; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -/^Memory checking results:/ { - look=1; - FS=" - "; -} -/Errors while running CTest/ { - look=0; - FS=" "; -} -{ - if (look) { - errs+=$2; - } -} -END { - print "ERRORS=" errs; - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/Memory check project/ { printit=0 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -################################################################################ -## run drd tests on debug build -resultsdir=$tracefilepfx-Debug -mkdir $resultsdir -tracefile=$tracefilepfx-Debug/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/dbg >/dev/null 2>&1 -cd $FULLTOKUDBDIR/dbg -cmake \ - -D CMAKE_BUILD_TYPE=Debug \ - -D USE_VALGRIND=ON \ - -D USE_BDB=OFF \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D CMAKE_C_FLAGS_DEBUG="-O1" \ - -D CMAKE_CXX_FLAGS_DEBUG="-O1" \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -R '/drd|/helgrind' \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - ORS=" "; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -END { - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -################################################################################ -## run gcov on debug build -resultsdir=$tracefilepfx-Coverage -mkdir $resultsdir -tracefile=$tracefilepfx-Coverage/trace - -getsysinfo $tracefile - -mkdir -p $FULLTOKUDBDIR/cov >/dev/null 2>&1 -cd $FULLTOKUDBDIR/cov -cmake \ - -D CMAKE_BUILD_TYPE=Debug \ - -D BUILD_TESTING=ON \ - -D USE_GCOV=ON \ - -D USE_BDB=OFF \ - -D RUN_LONG_TESTS=$longtests \ - -D USE_CTAGS=OFF \ - -D USE_GTAGS=OFF \ - -D USE_ETAGS=OFF \ - -D USE_CSCOPE=OFF \ - -D TOKU_SVNROOT="$toku_svnroot" \ - -G "$generator" \ - .. 2>&1 | tee -a $tracefile -cmake --system-information $resultsdir/sysinfo -make clean -# update to yesterday exactly just before ctest does nightly update -svn up -q -r "{$yesterday}" .. -set +e -ctest -j$njobs \ - -D ${ctest_model}Start \ - -D ${ctest_model}Update \ - -D ${ctest_model}Configure \ - -D ${ctest_model}Build \ - -D ${ctest_model}Test \ - -D ${ctest_model}Coverage \ - 2>&1 | tee -a $tracefile -set -e - -cp $tracefile notes.txt -set +e -ctest -D ${ctest_model}Submit -A notes.txt \ - 2>&1 | tee -a $tracefile -set -e -rm notes.txt - -tag=$(head -n1 Testing/TAG) -cp -r Testing/$tag $resultsdir -if [[ $commit -eq 1 ]]; then - cf=$(my_mktemp ftresult) - cat "$resultsdir/trace" | awk ' -BEGIN { - ORS=" "; -} -/Percentage Coverage:/ { - covpct=$3; -} -/[0-9]+% tests passed, [0-9]+ tests failed out of [0-9]+/ { - fail=$4; - total=$9; - pass=total-fail; -} -END { - print "COVERAGE=" covpct - if (fail>0) { - print "FAIL=" fail - } - print "PASS=" pass -}' >"$cf" - get_latest_svn_revision $FULLTOKUDBDIR >>"$cf" - echo -n " " >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - FS=": "; -} -/Build name/ { - print $2; - exit -}' >>"$cf" - (echo; echo) >>"$cf" - cat "$resultsdir/trace" | awk ' -BEGIN { - printit=0 -} -/[0-9]*\% tests passed, [0-9]* tests failed out of [0-9]*/ { printit=1 } -/^ Site:/ { printit=0 } -{ - if (printit) { - print $0 - } -}' >>"$cf" - svn add $resultsdir - svn commit -F "$cf" $resultsdir - rm $cf -fi - -exit 0 diff --git a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake b/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake index f695699255d0f..64d52a567352f 100644 --- a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake +++ b/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.cmake @@ -78,26 +78,20 @@ list(APPEND CTEST_NOTES_FILES ) set(all_opts - -DBDBDIR=/usr/local/BerkeleyDB.5.3 -DBUILD_TESTING=ON -DUSE_CILK=OFF ) set(rel_opts ${all_opts} -DCMAKE_BUILD_TYPE=Release - -DINTEL_CC=ON - -DUSE_BDB=ON ) set(dbg_opts ${all_opts} -DCMAKE_BUILD_TYPE=Debug - -DINTEL_CC=ON - -DUSE_BDB=ON ) set(cov_opts ${all_opts} -DCMAKE_BUILD_TYPE=Debug - -DINTEL_CC=OFF -DUSE_GCOV=ON ) diff --git a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash b/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash deleted file mode 100755 index 2c62504619e00..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.icc.bash +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -run.fractal.tree.tests.bash --ftcc=icc $* diff --git a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash b/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash deleted file mode 100755 index 661548f5adabb..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.fractal.tree.tests.now.bash +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -pushd $(dirname $0) &>/dev/null -SCRIPTDIR=$PWD -popd &>/dev/null - -exec $SCRIPTDIR/run.fractal.tree.tests.bash --ctest_model=Experimental --commit=0 "$@" diff --git a/storage/tokudb/ft-index/scripts/run.loader.stress.bash b/storage/tokudb/ft-index/scripts/run.loader.stress.bash deleted file mode 100755 index 1d4232c1bb3b1..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.loader.stress.bash +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env bash - -function usage() { - echo "run the loader verify test" - echo "[--rows=$rows]" - echo "[--dictionaries=$dictionaries]" - echo "[--ft_loader=$ft_loader]" - echo "[--tokudb=$tokudb]" - echo "[--branch=$branch]" - echo "[--revision=$revision]" - echo "[--suffix=$suffix]" - echo "[--commit=$commit]" -} - -function retry() { - local cmd - local retries - local exitcode - cmd=$* - let retries=0 - while [ $retries -le 10 ] ; do - echo `date` $cmd - bash -c "$cmd" - exitcode=$? - echo `date` $cmd $exitcode $retries - let retries=retries+1 - if [ $exitcode -eq 0 ] ; then break; fi - sleep 10 - done - test $exitcode = 0 -} - -rows=100000000 -dictionaries=3 -ft_loader=cilk -tokudb=tokudb -branch=. -revision=0 -suffix=. -commit=0 -svnserver=https://svn.tokutek.com/tokudb -basedir=~/svn.build -builddir=$basedir/mysql.build -system=`uname -s | tr [:upper:] [:lower:]` -arch=`uname -m | tr [:upper:] [:lower:]` -myhost=`hostname` -instancetype="" -ftcc=gcc -have_cilk=0 - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -# require a revision -if [ $revision -eq 0 ] ; then - exit 1 -fi - -# build -if [ $ftcc = icc ] ; then - d=/opt/intel/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - . $d/compilervars.sh intel64 - fi - d=/opt/intel/cilkutil/bin - if [ -d $d ] ; then - export PATH=$d:$PATH - fi -fi - -# setup the branchrevision string -if [ $branch = "." ] ; then - branchrevision=$revision -else - branchrevision=`basename $branch`-$revision -fi -if [ $suffix != "." ] ; then - branchrevision=$branchrevision-$suffix -fi - -ftccversion=$($ftcc --version|head -1|cut -f3 -d" ") - -# goto the base directory -if [ ! -d $basedir ] ; then mkdir $basedir; fi - -pushd $basedir - -# update the build directory -if [ ! -d $builddir ] ; then mkdir $builddir; fi - -date=`date +%Y%m%d` -testresultsdir=$builddir/$date -pushd $builddir - while [ ! -d $date ] ; do - svn mkdir $svnserver/mysql.build/$date -m "" - svn checkout $svnserver/mysql.build/$date - if [ $? -ne 0 ] ; then rm -rf $date; fi - done -popd - -testresult="PASS" -runfile=$testresultsdir/loader-stress-$rows-$dictionaries-$tokudb-$branchrevision-$ftcc-$ftccversion-$system-$arch-$myhost -if [ "$instancetype" != "" ] ; then runfilefile=$runfile-$instancetype; fi -rm -f $runfile - -# checkout the code -if [ -d loader-stress-$branchrevision ] ; then rm -rf loader-stress-$branchrevision; fi -mkdir loader-stress-$branchrevision - -if [ $branch = "." ] ; then branch=toku; fi - -retry svn export -r $revision -q $svnserver/$branch/$tokudb loader-stress-$branchrevision/$tokudb -exitcode=$? -if [ $exitcode != 0 ] ; then - testresult="FAIL" -fi - -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb - echo `date` make release -s CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile - make -s release CC=$ftcc HAVE_CILK=$have_cilk FTLOADER=$ft_loader >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb/src/tests - echo `date` make loader-stress-test.tdb CC=$ftcc HAVE_CILK=$have_cilk >>$runfile - make loader-stress-test.tdb -s CC=$ftcc HAVE_CILK=$have_cilk >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -# run -if [ $testresult = "PASS" ] ; then - pushd loader-stress-$branchrevision/$tokudb/src/tests - echo `date` ./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile - ./loader-stress-test.tdb -v -r $rows -d $dictionaries -c >>$runfile 2>&1 - exitcode=$? - echo `date` complete $exitcode >>$runfile - if [ $exitcode != 0 ] ; then testresult="FAIL"; fi - popd -fi - -if [ $commit != 0 ] ; then - svn add $runfile - retry svn commit -m \"$testresult loader stress $rows $dictionaries $tokudb $branchrevision $ftcc $ftccversion $system $arch $myhost\" $runfile -fi - -popd - -if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi -exit $exitcode diff --git a/storage/tokudb/ft-index/scripts/run.stress-tests.bash b/storage/tokudb/ft-index/scripts/run.stress-tests.bash deleted file mode 100755 index b3d0e197d7b4a..0000000000000 --- a/storage/tokudb/ft-index/scripts/run.stress-tests.bash +++ /dev/null @@ -1,332 +0,0 @@ -#!/bin/bash -# $Id$ - -DOC=<&2 - echo " [--toku_toplevel=]" 1>&2 - echo " [--log=]" 1>&2 - echo " [--savedir=]" 1>&2 -} - -# parse the command line -while [ $# -gt 0 ] ; do - arg=$1; shift - if [[ $arg =~ --(.*)=(.*) ]] ; then - ok=no - for opt in toku_toplevel log savedir - do - if [[ ${BASH_REMATCH[1]} = $opt ]] - then - ok=yes - fi - done - if [[ $ok = no ]] - then - usage; exit 1 - fi - eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]} - else - usage; exit 1 - fi -done - -src_tests="${toku_toplevel}/src/tests" -testnames=(test_stress1.tdb \ - test_stress5.tdb \ - test_stress6.tdb) -recover_testnames=(recover-test_stress1.tdb \ - recover-test_stress2.tdb \ - recover-test_stress3.tdb) - -save_failure() { - dir="$1"; shift - out="$1"; shift - envdir="$1"; shift - rev=$1; shift - exec="$1"; shift - table_size=$1; shift - cachetable_size=$1; shift - num_ptquery=$1; shift - num_update=$1; shift - phase=$1; shift - dest="${dir}/${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-${phase}-${rev}-$$" - mkdir -p "$dest" - mv $out "${dest}/output.txt" - mv core* "${dest}/" - mv $envdir "${dest}/" -} - -running=no - -run_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - num_ptquery="$1"; shift - num_update="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - rundir=$(mktemp -d ./rundir.XXXXXXXX) - tmplog=$(mktemp) - - ulimit -c unlimited - t0="$(date)" - t1="" - t2="" - envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir" - cd $rundir - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --only_create --num_seconds 600 --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size &> $tmplog - then - rm -f $tmplog - t1="$(date)" - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --only_stress --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size \ - --num_ptquery_threads $num_ptquery \ - --num_update_threads $num_update &> $tmplog - then - rm -f $tmplog - t2="$(date)" - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog" - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update stress - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update create - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - cd .. - rm -rf $rundir "$envdir" -} - -loop_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - ptquery_rand=0 - update_rand=0 - while [[ $running = "yes" ]] - do - num_ptquery=1 - num_update=1 - if [[ $ptquery_rand -gt 1 ]] - then - (( num_ptquery = $RANDOM % 16 )) - fi - if [[ $update_rand -gt 0 ]] - then - (( num_update = $RANDOM % 16 )) - fi - (( ptquery_rand = (ptquery_rand + 1) % 4 )) - (( update_rand = (update_rand + 1) % 2 )) - run_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir - done -} - -run_recover_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - num_ptquery="$1"; shift - num_update="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - rundir=$(mktemp -d ./rundir.XXXXXXXX) - tmplog=$(mktemp) - - ulimit -c unlimited - t0="$(date)" - t1="" - t2="" - envdir="../${exec}-${table_size}-${cachetable_size}-${num_ptquery}-${num_update}-$$.dir" - cd $rundir - if ! LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --test --num_seconds 600 --no-crash_on_update_failure --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size \ - --num_ptquery_threads $num_ptquery \ - --num_update_threads $num_update &> $tmplog - then - rm -f $tmplog - t1="$(date)" - if LD_LIBRARY_PATH=../../../lib:$LD_LIBRARY_PATH \ - ../$exec -v --recover --envdir "$envdir" \ - --num_elements $table_size \ - --cachetable_size $cachetable_size &> $tmplog - then - rm -f $tmplog - t2="$(date)" - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,PASS" | tee -a "$mylog" - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update recover - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - else - save_failure "$mysavedir" $tmplog $envdir $rev $exec $table_size $cachetable_size $num_ptquery $num_update test - echo "\"$exec\",$rev,$table_size,$cachetable_size,$num_ptquery,$num_update,$t0,$t1,$t2,FAIL" | tee -a "$mylog" - fi - cd .. - rm -rf $rundir "$envdir" -} - -loop_recover_test() { - rev=$1; shift - exec="$1"; shift - table_size="$1"; shift - cachetable_size="$1"; shift - mylog="$1"; shift - mysavedir="$1"; shift - - ptquery_rand=0 - update_rand=0 - while [[ $running = "yes" ]] - do - num_ptquery=1 - num_update=1 - if [[ $ptquery_rand -gt 1 ]] - then - (( num_ptquery = $RANDOM % 16 )) - fi - if [[ $update_rand -gt 0 ]] - then - (( num_update = $RANDOM % 16 )) - fi - (( ptquery_rand = (ptquery_rand + 1) % 4 )) - (( update_rand = (update_rand + 1) % 2 )) - run_recover_test $rev $exec $table_size $cachetable_size $num_ptquery $num_update $mylog $mysavedir - done -} - -declare -a pids=(0) -i=0 - -savepid() { - pids[$i]=$1 - (( i = i + 1 )) -} - -killchildren() { - kill ${pids[@]} || true - for exec in ${testnames[@]} ${recover_testnames[@]} - do - pkill -f $exec || true - done -} - -trap killchildren INT TERM EXIT - -mkdir -p $log -mkdir -p $savedir - -while true -do - (cd $toku_toplevel; \ - svn update; \ - make CC=icc DEBUG=0 HAVE_CILK=0 clean fastbuild; \ - make CC=icc DEBUG=0 HAVE_CILK=0 -C src/tests ${testnames[@]} ${recover_testnames[@]}) - - cd $src_tests - - rev=$(svn info ../.. | awk '/Revision/ { print $2 }') - - running=yes - - for exec in ${testnames[@]} - do - for table_size in 2000 200000 50000000 - do - (( small_cachetable = table_size * 50 )) - suffix="${exec}-${table_size}-${small_cachetable}-$$" - touch "${log}/${suffix}" - loop_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - - suffix="${exec}-${table_size}-1000000000-$$" - touch "${log}/${suffix}" - loop_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - done - done - - for exec in ${recover_testnames[@]} - do - for table_size in 2000 200000 50000000 - do - (( small_cachetable = table_size * 50 )) - suffix="${exec}-${table_size}-${small_cachetable}-$$" - touch "${log}/${suffix}" - loop_recover_test $rev $exec $table_size $small_cachetable "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - - suffix="${exec}-${table_size}-1000000000-$$" - touch "${log}/${suffix}" - loop_recover_test $rev $exec $table_size 1000000000 "${log}/${suffix}" "${savedir}/${suffix}" & savepid $! - done - done - - sleep 1d - - running=no - - killchildren - - wait ${pids[@]} || true - - idx=0 - for pid in ${pids[@]} - do - pids[$idx]=0 - (( idx = idx + 1 )) - done -done diff --git a/storage/tokudb/ft-index/scripts/run.stress-tests.py b/storage/tokudb/ft-index/scripts/run.stress-tests.py index fbbf5ee647294..62edbab8f3c2f 100755 --- a/storage/tokudb/ft-index/scripts/run.stress-tests.py +++ b/storage/tokudb/ft-index/scripts/run.stress-tests.py @@ -552,7 +552,6 @@ def rebuild(tokudb, builddir, tokudb_data, cc, cxx, tests): newenv['CXX'] = cxx r = call(['cmake', '-DCMAKE_BUILD_TYPE=Debug', - '-DUSE_BDB=OFF', '-DUSE_GTAGS=OFF', '-DUSE_CTAGS=OFF', '-DUSE_ETAGS=OFF', @@ -735,6 +734,7 @@ def main(opts): 'test_stress6.tdb', 'test_stress7.tdb', 'test_stress_hot_indexing.tdb', + 'test_stress_with_verify.tdb', 'test_stress_openclose.tdb'] default_recover_testnames = ['recover-test_stress1.tdb', 'recover-test_stress2.tdb', @@ -766,8 +766,8 @@ def main(opts): help="skip the tests that don't involve upgrade [default=False]") upgrade_group.add_option('--double_upgrade', action='store_true', dest='double_upgrade', default=False, help='run the upgrade tests twice in a row [default=False]') - upgrade_group.add_option('--add_old_version', action='append', type='choice', dest='old_versions', choices=['4.2.0', '5.0.8', '5.2.7', '6.0.0', '6.1.0', '6.5.1', '6.6.3'], - help='which old versions to use for running the stress tests in upgrade mode. can be specified multiple times [options=4.2.0, 5.0.8, 5.2.7, 6.0.0, 6.1.0, 6.5.1, 6.6.3]') + upgrade_group.add_option('--add_old_version', action='append', type='choice', dest='old_versions', choices=['4.2.0', '5.0.8', '5.2.7', '6.0.0', '6.1.0', '6.5.1', '6.6.3', '7.1.6'], + help='which old versions to use for running the stress tests in upgrade mode. can be specified multiple times [options=4.2.0, 5.0.8, 5.2.7, 6.0.0, 6.1.0, 6.5.1, 6.6.3, 7.1.6]') upgrade_group.add_option('--old_environments_dir', type='string', dest='old_environments_dir', default=('%s/old-stress-test-envs' % default_tokudb_data), help='directory containing old version environments (should contain 5.0.8/, 5.2.7/, etc, and the environments should be in those) [default=../../tokudb.data/stress_environments]') diff --git a/storage/tokudb/ft-index/scripts/tokucilkscreen b/storage/tokudb/ft-index/scripts/tokucilkscreen deleted file mode 100755 index 91a63ec66ef64..0000000000000 --- a/storage/tokudb/ft-index/scripts/tokucilkscreen +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -# exit 1 if cilkscreen finds errors - -function cleanup() { - if [ "$logfile" != "" ] ; then rm $logfile; logfile=; fi -} - -trap cleanup SIGINT -logfile=$(mktemp /tmp/toku_cilkscreen.XXXXXXXX) -cilkscreen $* 2>$logfile -exitcode=$? -if [ $exitcode = 0 ] ; then - cat $logfile >>/dev/fd/2 - grep "No errors found by Cilkscreen" $logfile >/dev/null 2>&1 - exitcode=$? -fi -rm $logfile -exit $exitcode \ No newline at end of file diff --git a/storage/tokudb/ft-index/scripts/tokuvalgrind b/storage/tokudb/ft-index/scripts/tokuvalgrind new file mode 100755 index 0000000000000..a099a1f2ff9a1 --- /dev/null +++ b/storage/tokudb/ft-index/scripts/tokuvalgrind @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +function usage() { + echo "check for valgrind error and set the exit code" +} + +function cleanup() { + if [ "$logfile" != "" ] ; then rm $logfile; fi + exit 1 +} + +args=$* + +logfile= +createlogfile=0 +errorexitcode=1 + +while [ $# -gt 0 ] ; do + arg=$1; shift + if [[ $arg =~ "--" ]] ; then + if [[ $arg =~ --log-file=(.*) ]] ; then + logfile=${BASH_REMATCH[1]} + elif [[ $arg =~ --error-exitcode=(.*) ]] ; then + errorexitcode=${BASH_REMATCH[1]} + fi + else + break + fi +done + +if [ "$logfile" = "" ] ; then + createlogfile=1 + trap cleanup SIGINT + logfile=`mktemp /tmp/$(whoami).tokugrind.XXXXXXXX` + args="--log-file=$logfile $args" +fi + +valgrind $args +exitcode=$? +if [ $exitcode = 0 ] ; then + lines=$(wc -l <$logfile) + if [ $lines -ne 0 ] ; then + exitcode=$errorexitcode + fi +fi + +if [ $createlogfile != 0 ] ; then + cat $logfile >>/dev/stderr + rm $logfile +fi + +exit $exitcode diff --git a/storage/tokudb/ft-index/scripts/watch.stress-tests.bash b/storage/tokudb/ft-index/scripts/watch.stress-tests.bash index 86d8e495a1a24..9dccbe2bbf55f 100755 --- a/storage/tokudb/ft-index/scripts/watch.stress-tests.bash +++ b/storage/tokudb/ft-index/scripts/watch.stress-tests.bash @@ -1,3 +1,3 @@ #!/bin/bash -watch "date ; awk '{ print \$1, \$3 }' < /tmp/run.stress-tests.log | tail -n 100 | sort -k 2 | uniq -c | sort -k 3 -r -s | head -n10; echo ; echo; echo 'Failing tests:'; grep FAILED /tmp/run.stress-tests.log | sort -k 3 -r -s" +watch "date ; awk '{ print \$1, \$3 }' < /tmp/stress-tests-log | sort -k 2 | uniq -c | sort -k 3 -r -s | head -n10; echo ; echo; echo 'Failing tests:'; grep FAILED /tmp/stress-tests-log | sort -k 3 -r -s" diff --git a/storage/tokudb/ft-index/src/errors.cc b/storage/tokudb/ft-index/src/errors.cc index 4101b372a389f..fa1227b25cc33 100644 --- a/storage/tokudb/ft-index/src/errors.cc +++ b/storage/tokudb/ft-index/src/errors.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/indexer-internal.h b/storage/tokudb/ft-index/src/indexer-internal.h index a3f1f96f09682..fd648a88c8f73 100644 --- a/storage/tokudb/ft-index/src/indexer-internal.h +++ b/storage/tokudb/ft-index/src/indexer-internal.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,10 +89,9 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_INDEXER_INTERNAL_H -#define TOKU_INDEXER_INTERNAL_H +#pragma once -#include +#include #include // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array. @@ -168,5 +167,3 @@ void indexer_undo_do_init(DB_INDEXER *indexer); void indexer_undo_do_destroy(DB_INDEXER *indexer); int indexer_undo_do(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info *prov_info, DBT_ARRAY *hot_keys, DBT_ARRAY *hot_vals); - -#endif diff --git a/storage/tokudb/ft-index/src/indexer-undo-do.cc b/storage/tokudb/ft-index/src/indexer-undo-do.cc index 2201c4f37e3df..52489fb7825b9 100644 --- a/storage/tokudb/ft-index/src/indexer-undo-do.cc +++ b/storage/tokudb/ft-index/src/indexer-undo-do.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,13 +96,12 @@ PATENT RIGHTS GRANT: #include #include -#include #include #include #include -#include -#include -#include +#include +#include +#include #include "ydb-internal.h" #include "ydb_row_lock.h" @@ -199,7 +198,7 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info * ULEHANDLE ule = prov_info->ule; // init the xids to the root xid - XIDS xids = xids_get_root_xids(); + XIDS xids = toku_xids_get_root_xids(); // scan the committed stack from bottom to top uint32_t num_committed = ule_get_num_committed(ule); @@ -280,7 +279,7 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info * break; } - xids_destroy(&xids); + toku_xids_destroy(&xids); return result; } @@ -312,7 +311,7 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info ULEHANDLE ule = prov_info->ule; // init the xids to the root xid - XIDS xids = xids_get_root_xids(); + XIDS xids = toku_xids_get_root_xids(); uint32_t num_provisional = prov_info->num_provisional; uint32_t num_committed = prov_info->num_committed; @@ -472,7 +471,7 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, struct ule_prov_info // then this will need to be handled below exit release_txns(ule, prov_states, prov_txns, indexer); exit: - xids_destroy(&xids); + toku_xids_destroy(&xids); return result; } @@ -496,16 +495,16 @@ static int indexer_set_xid(DB_INDEXER *UU(indexer), TXNID this_xid, XIDS *xids_result) { int result = 0; XIDS old_xids = *xids_result; - XIDS new_xids = xids_get_root_xids(); + XIDS new_xids = toku_xids_get_root_xids(); if (this_xid != TXNID_NONE) { XIDS child_xids; - result = xids_create_child(new_xids, &child_xids, this_xid); - xids_destroy(&new_xids); + result = toku_xids_create_child(new_xids, &child_xids, this_xid); + toku_xids_destroy(&new_xids); if (result == 0) new_xids = child_xids; } if (result == 0) { - xids_destroy(&old_xids); + toku_xids_destroy(&old_xids); *xids_result = new_xids; } @@ -517,9 +516,9 @@ static int indexer_append_xid(DB_INDEXER *UU(indexer), TXNID xid, XIDS *xids_result) { XIDS old_xids = *xids_result; XIDS new_xids; - int result = xids_create_child(old_xids, &new_xids, xid); + int result = toku_xids_create_child(old_xids, &new_xids, xid); if (result == 0) { - xids_destroy(&old_xids); + toku_xids_destroy(&old_xids); *xids_result = new_xids; } return result; @@ -581,8 +580,8 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u return prev_found; } -// inject "delete" message into brt with logging in recovery and rollback logs, -// and making assocation between txn and brt +// inject "delete" message into ft with logging in recovery and rollback logs, +// and making assocation between txn and ft static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) { int result = 0; @@ -630,8 +629,8 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi return result; } -// inject "insert" message into brt with logging in recovery and rollback logs, -// and making assocation between txn and brt +// inject "insert" message into ft with logging in recovery and rollback logs, +// and making assocation between txn and ft static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) { int result = 0; @@ -650,7 +649,7 @@ indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT * } // send an insert message into the tree without rollback or recovery logging -// and without associating the txn and the brt +// and without associating the txn and the ft static int indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids) { int result = 0; @@ -682,7 +681,7 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho static int indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) { int result = 0; - if (xids_get_num_xids(xids) > 0) {// send commit only when not the root xid + if (toku_xids_get_num_xids(xids) > 0) {// send commit only when not the root xid // TEST if (indexer->i->test_commit_any) { result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids); diff --git a/storage/tokudb/ft-index/src/indexer.cc b/storage/tokudb/ft-index/src/indexer.cc index b91b738d4d422..aa821f67fbaa1 100644 --- a/storage/tokudb/ft-index/src/indexer.cc +++ b/storage/tokudb/ft-index/src/indexer.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,13 +99,12 @@ PATENT RIGHTS GRANT: #include "ydb-internal.h" #include #include "indexer.h" -#include #include #include #include -#include -#include -#include +#include +#include +#include #include #include "loader.h" #include @@ -118,7 +117,7 @@ PATENT RIGHTS GRANT: static INDEXER_STATUS_S indexer_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(indexer_status, k, c, t, "indexer: " l, inc) static void status_init(void) { @@ -233,32 +232,25 @@ toku_indexer_unlock(DB_INDEXER* indexer) { // after grabbing the indexer lock bool toku_indexer_may_insert(DB_INDEXER* indexer, const DBT* key) { - bool retval = false; + bool may_insert = false; toku_mutex_lock(&indexer->i->indexer_estimate_lock); + // if we have no position estimate, we can't tell, so return false - if (indexer->i->position_estimate.data == NULL) { - retval = false; - } - else { - FT_HANDLE ft_handle = indexer->i->src_db->i->ft_handle; - ft_compare_func keycompare = toku_ft_get_bt_compare(ft_handle); - int r = keycompare( - indexer->i->src_db, - &indexer->i->position_estimate, - key - ); + if (indexer->i->position_estimate.data == nullptr) { + may_insert = false; + } else { + DB *db = indexer->i->src_db; + const toku::comparator &cmp = toku_ft_get_comparator(db->i->ft_handle); + int c = cmp(&indexer->i->position_estimate, key); + // if key > position_estimate, then we know the indexer cursor // is past key, and we can safely say that associated values of // key must be inserted into the indexer's db - if (r < 0) { - retval = true; - } - else { - retval = false; - } + may_insert = c < 0; } + toku_mutex_unlock(&indexer->i->indexer_estimate_lock); - return retval; + return may_insert; } void @@ -546,7 +538,7 @@ struct le_cursor_extra { // cachetable pair locks. because no txn can commit on this db, read // the provisional info for the newly read ule. static int -le_cursor_callback(ITEMLEN keylen, bytevec key, ITEMLEN UU(vallen), bytevec val, void *extra, bool lock_only) { +le_cursor_callback(uint32_t keylen, const void *key, uint32_t UU(vallen), const void *val, void *extra, bool lock_only) { if (lock_only || val == NULL) { ; // do nothing if only locking. do nothing if val==NULL, means DB_NOTFOUND } else { @@ -696,7 +688,7 @@ abort_indexer(DB_INDEXER *indexer) { } -// derived from ha_tokudb::estimate_num_rows +// derived from the handlerton's estimate_num_rows() static int update_estimated_rows(DB_INDEXER *indexer) { int error; diff --git a/storage/tokudb/ft-index/src/indexer.h b/storage/tokudb/ft-index/src/indexer.h index 3a7842af989e7..12625fdc6ea6a 100644 --- a/storage/tokudb/ft-index/src/indexer.h +++ b/storage/tokudb/ft-index/src/indexer.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,9 +89,7 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_INDEXER_H -#define TOKU_INDEXER_H - +#pragma once // locking and unlocking functions to synchronize cursor position with // XXX_multiple APIs @@ -178,6 +176,3 @@ typedef struct { } INDEXER_STATUS_S, *INDEXER_STATUS; void toku_indexer_get_status(INDEXER_STATUS s); - - -#endif // TOKU_INDEXER_H diff --git a/storage/tokudb/ft-index/src/loader.cc b/storage/tokudb/ft-index/src/loader.cc index 53db4da092d1f..1a6bf71844397 100644 --- a/storage/tokudb/ft-index/src/loader.cc +++ b/storage/tokudb/ft-index/src/loader.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,8 +99,8 @@ PATENT RIGHTS GRANT: #include #include -#include -#include +#include +#include #include "ydb-internal.h" #include "ydb_db.h" @@ -119,7 +119,7 @@ enum {MAX_FILE_SIZE=256}; static LOADER_STATUS_S loader_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(loader_status, k, c, t, "loader: " l, inc) static void status_init(void) { @@ -323,15 +323,15 @@ toku_loader_create_loader(DB_ENV *env, for (int i = 0; i < N; i++) { new_inames_in_env[i] = nullptr; } - FT_HANDLE *XMALLOC_N(N, brts); + FT_HANDLE *XMALLOC_N(N, fts); for (int i=0; ii->ft_handle; + fts[i] = dbs[i]->i->ft_handle; } LSN load_lsn; rval = locked_load_inames(env, loader_txn, N, dbs, new_inames_in_env, &load_lsn, puts_allowed); if ( rval!=0 ) { free_inames(new_inames_in_env, N); - toku_free(brts); + toku_free(fts); goto create_exit; } TOKUTXN ttxn = loader_txn ? db_txn_struct_i(loader_txn)->tokutxn : NULL; @@ -340,7 +340,7 @@ toku_loader_create_loader(DB_ENV *env, env->i->generate_row_for_put, src_db, N, - brts, dbs, + fts, dbs, (const char **)new_inames_in_env, compare_functions, loader->i->temp_file_template, @@ -352,12 +352,12 @@ toku_loader_create_loader(DB_ENV *env, puts_allowed); if ( rval!=0 ) { free_inames(new_inames_in_env, N); - toku_free(brts); + toku_free(fts); goto create_exit; } loader->i->inames_in_env = new_inames_in_env; - toku_free(brts); + toku_free(fts); if (!puts_allowed) { rval = ft_loader_close_and_redirect(loader); diff --git a/storage/tokudb/ft-index/src/loader.h b/storage/tokudb/ft-index/src/loader.h index bd8e85aed9332..c709eed3e3594 100644 --- a/storage/tokudb/ft-index/src/loader.h +++ b/storage/tokudb/ft-index/src/loader.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#ifndef TOKU_LOADER_H -#define TOKU_LOADER_H +#pragma once /* Create and set up a loader. @@ -208,6 +207,3 @@ typedef struct { void toku_loader_get_status(LOADER_STATUS s); - - -#endif diff --git a/storage/tokudb/ft-index/src/tests/CMakeLists.txt b/storage/tokudb/ft-index/src/tests/CMakeLists.txt index c37db9539b96a..06f7e5237464d 100644 --- a/storage/tokudb/ft-index/src/tests/CMakeLists.txt +++ b/storage/tokudb/ft-index/src/tests/CMakeLists.txt @@ -20,9 +20,6 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) file(GLOB transparent_upgrade_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" upgrade*.cc) - file(GLOB tdb_dontrun_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" bdb-simple-deadlock*.cc) - string(REGEX REPLACE "\\.cc(;|$)" "\\1" tdb_dontrun_tests "${tdb_dontrun_srcs}") - file(GLOB srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" *.cc) list(REMOVE_ITEM srcs ${transparent_upgrade_srcs}) @@ -36,332 +33,10 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" stress_tests "${stress_test_srcs}") set(tdb_srcs ${srcs}) - list(REMOVE_ITEM tdb_srcs ${tdb_dontrun_srcs}) string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" tdb_bins "${tdb_srcs}") list(REMOVE_ITEM tdb_srcs ${abortrecover_srcs} ${loader_srcs}) string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" tdb_tests "${tdb_srcs}") - if(BDB_FOUND) - set(bdb_dontrun_srcs - backwards_10_each_le_and_msg - bigtxn27 - blackhole - blocking-prelock-range - blocking-set-range-reverse-0 - blocking-table-lock - bug1381 - bug627 - cachetable-race - checkpoint_callback - checkpoint_stress - cursor-isolation - cursor-set-del-rmw - cursor-set-range-rmw - db-put-simple-deadlock - del-simple - del-multiple - del-multiple-huge-primary-row - del-multiple-srcdb - directory_lock - diskfull - dump-env - env_loader_memory - env-put-multiple - env_nproc - env_startup - execute-updates - filesize - get_key_after_bytes_unit - get_last_key - helgrind1 - helgrind2 - helgrind3 - hotindexer-bw - hotindexer-error-callback - hotindexer-insert-committed-optimized - hotindexer-insert-committed - hotindexer-insert-provisional - hotindexer-lock-test - hotindexer-multiclient - hotindexer-nested-insert-committed - hotindexer-put-abort - hotindexer-put-commit - hotindexer-put-multiple - hotindexer-simple-abort - hotindexer-simple-abort-put - hotindexer-undo-do-test - hotindexer-with-queries - hot-optimize-table-tests - insert-dup-prelock - isolation - isolation-read-committed - keyrange - keyrange-merge - last-verify-time - loader-cleanup-test - loader-create-abort - loader-create-close - loader-create-commit-nproc-limit - loader-dup-test - loader-no-puts - loader-nproc-close - loader-nproc-create - loader-reference-test - loader-stress-del - loader-stress-test - loader-tpch-load - locktree_escalation_stalls - lock-pressure - manyfiles - maxsize-for-loader - multiprocess - mvcc-create-table - mvcc-many-committed - mvcc-read-committed - openlimit17 - openlimit17-locktree - openlimit17-metafiles - perf_checkpoint_var - perf_child_txn - perf_cursor_nop - perf_iibench - perf_insert - perf_insert_multiple - perf_malloc_free - perf_nop - perf_ptquery - perf_ptquery2 - perf_rangequery - perf_read_txn - perf_read_txn_single_thread - perf_read_write - perf_txn_single_thread - perf_xmalloc_free - prelock-read-read - prelock-read-write - prelock-write-read - prelock-write-write - print_engine_status - powerfail - preload-db - preload-db-nested - progress - put-del-multiple-array-indexing - put-multiple - queries_with_deletes - recover-2483 - recover-3113 - recover-5146 - recover-child-rollback - recover-compare-db - recover-compare-db-descriptor - recover-del-multiple - recover-del-multiple-abort - recover-del-multiple-srcdb-fdelete-all - recover-delboth-after-checkpoint - recover-delboth-checkpoint - recover-descriptor - recover-descriptor2 - recover-descriptor3 - recover-descriptor4 - recover-descriptor5 - recover-descriptor6 - recover-descriptor7 - recover-descriptor8 - recover-descriptor9 - recover-descriptor10 - recover-descriptor11 - recover-descriptor12 - recover-fclose-in-checkpoint - recover-fcreate-basementnodesize - recover-flt1 - recover-flt2 - recover-flt3 - recover-flt4 - recover-flt5 - recover-flt6 - recover-flt7 - recover-flt8 - recover-flt9 - recover-flt10 - recover-hotindexer-simple-abort-put - recover-loader-test - recover-lsn-filter-multiple - recover-put-multiple - recover-put-multiple-abort - recover-put-multiple-fdelete-all - recover-put-multiple-fdelete-some - recover-put-multiple-srcdb-fdelete-all - recover-split-checkpoint - recover-tablelock - recover-test-logsuppress - recover-test-logsuppress-put - recover-test_stress1 - recover-test_stress2 - recover-test_stress3 - recover-test_stress_openclose - recover-upgrade-db-descriptor-multihandle - recover-upgrade-db-descriptor - recover-update-multiple - recover-update-multiple-abort - recover-update_aborts - recover-update_aborts_before_checkpoint - recover-update_aborts_before_close - recover-update_changes_values - recover-update_changes_values_before_checkpoint - recover-update_changes_values_before_close - recover-update_broadcast_aborts - recover-update_broadcast_aborts2 - recover-update_broadcast_aborts3 - recover-update_broadcast_aborts_before_checkpoint - recover-update_broadcast_aborts_before_close - recover-update_broadcast_changes_values - recover-update_broadcast_changes_values2 - recover-update_broadcast_changes_values3 - recover-update_broadcast_changes_values_before_checkpoint - recover-update_broadcast_changes_values_before_close - recover-update_changes_values_before_close - recovery_fileops_stress - recovery_fileops_unit - recovery_stress - redirect - replace-into-write-lock - root_fifo_2 - root_fifo_32 - root_fifo_41 - seqinsert - shutdown-3344 - stat64 - stat64-create-modify-times - stat64_flatten - stat64-null-txn - stat64-root-changes - stress-gc - stress-gc2 - test-xa-prepare - test1324 - test1572 - test3219 - test3522 - test3522b - test3529 - test_3645 - test_3529_insert_2 - test_3529_table_lock - test_3755 - test_4015 - test_4368 - test_4657 - test_5015 - test_5469 - test-5138 - test938c - test_abort1 - test_abort4 - test_abort5 - test_bad_implicit_promotion - test_blobs_leaf_split - test_bulk_fetch - test_compression_methods - test_cmp_descriptor - test_cursor_interrupt - test_cursor_with_read_txn - test_db_change_pagesize - test_db_change_xxx - test_cursor_delete_2119 - test_db_descriptor - test_db_descriptor_named_db - test_db_txn_locks_read_uncommitted - test_get_max_row_size - test_insert_many_gc - test_iterate_live_transactions - test_iterate_pending_lock_requests - test_large_update_broadcast_small_cachetable - test_lock_timeout_callback - test_locking_with_read_txn - test_locktree_close - test_logflush - test_multiple_checkpoints_block_commit - test_query - test_read_txn_invalid_ops - test_redirect_func - test_restrict - test_row_size_supported - test_simple_read_txn - test_stress0 - test_stress1 - test_stress2 - test_stress3 - test_stress4 - test_stress5 - test_stress6 - test_stress7 - test_stress_openclose - test_stress_with_verify - test_stress_hot_indexing - test_transactional_descriptor - test_trans_desc_during_chkpt - test_trans_desc_during_chkpt2 - test_trans_desc_during_chkpt3 - test_trans_desc_during_chkpt4 - test_txn_abort6 - test_txn_abort8 - test_txn_abort9 - test_txn_close_open_commit - test_txn_commit8 - test_txn_nested1 - test_txn_nested2 - test_txn_nested3 - test_txn_nested4 - test_txn_nested5 - test_update_abort_works - test_update_calls_back - test_update_can_delete_elements - test_update_changes_values - test_update_nonexistent_keys - test_update_previously_deleted - test_update_stress - test_update_txn_snapshot_works_concurrently - test_update_txn_snapshot_works_correctly_with_deletes - test_update_broadcast_abort_works - test_update_broadcast_calls_back - test_update_broadcast_can_delete_elements - test_update_broadcast_changes_values - test_update_broadcast_previously_deleted - test_update_broadcast_stress - test_update_broadcast_update_fun_has_choices - test_update_broadcast_with_empty_table - test_update_broadcast_indexer - test_update_broadcast_loader - test_update_broadcast_nested_updates - test_update_nested_updates - test_update_with_empty_table - test_updates_single_key - txn-ignore - transactional_fileops - update-multiple-data-diagonal - update-multiple-key0 - update-multiple-nochange - update-multiple-with-indexer - update-multiple-with-indexer-array - update - upgrade_simple - upgrade-test-1 - upgrade-test-2 - upgrade-test-3 - upgrade-test-4 - upgrade-test-5 - upgrade-test-6 - upgrade-test-7 - zombie_db - ) - set(bdb_srcs ${srcs}) - string(REGEX REPLACE "\\.cc(;|$)" "\\1" bdb_testbases "${bdb_srcs}") - list(REMOVE_ITEM bdb_testbases ${bdb_dontrun_srcs}) - string(REGEX REPLACE "(.)(;|$)" "\\1.bdb\\2" bdb_tests "${bdb_testbases}") - set(bdb_bins ${bdb_tests}) - endif() - set(tdb_tests_that_should_fail test_db_no_env.tdb test_log8.recover @@ -376,8 +51,6 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) list(REMOVE_ITEM tdb_bins test-5138.tdb) add_executable(test-5138.tdb test-5138.cc) target_link_libraries(test-5138.tdb ${LIBTOKUDB}_static z ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) - set_property(TARGET test-5138.tdb APPEND PROPERTY - COMPILE_DEFINITIONS "USE_TDB;IS_TDB=1;TOKUDB=1") add_space_separated_property(TARGET test-5138.tdb COMPILE_FLAGS -fvisibility=hidden) add_ydb_test(test-5138.tdb) @@ -390,25 +63,9 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) # We link the test with util directly so that the test code itself can use # some of those things (i.e. kibbutz in the threaded tests). target_link_libraries(${base}.tdb util ${LIBTOKUDB} ${LIBTOKUPORTABILITY}) - set_property(TARGET ${base}.tdb APPEND PROPERTY - COMPILE_DEFINITIONS "USE_TDB;IS_TDB=1;TOKUDB=1") add_space_separated_property(TARGET ${base}.tdb COMPILE_FLAGS -fvisibility=hidden) endforeach(bin) - if(BDB_FOUND) - foreach(bin ${bdb_bins}) - get_filename_component(base ${bin} NAME_WE) - - add_executable(${base}.bdb ${base}.cc) - set_property(TARGET ${base}.bdb APPEND PROPERTY - COMPILE_DEFINITIONS "USE_BDB;IS_TDB=0;TOKU_ALLOW_DEPRECATED") - set_target_properties(${base}.bdb PROPERTIES - INCLUDE_DIRECTORIES "${BDB_INCLUDE_DIR};${CMAKE_CURRENT_BINARY_DIR}/../../toku_include;${CMAKE_CURRENT_SOURCE_DIR}/../../toku_include;${CMAKE_CURRENT_SOURCE_DIR}/../../portability;${CMAKE_CURRENT_SOURCE_DIR}/../..") - target_link_libraries(${base}.bdb ${LIBTOKUPORTABILITY} ${BDB_LIBRARIES}) - add_space_separated_property(TARGET ${base}.bdb COMPILE_FLAGS -fvisibility=hidden) - endforeach(bin) - endif() - foreach(bin loader-cleanup-test.tdb diskfull.tdb) set_property(TARGET ${bin} APPEND PROPERTY COMPILE_DEFINITIONS DONT_DEPRECATE_WRITES) @@ -421,22 +78,6 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) endmacro(declare_custom_tests) declare_custom_tests(test1426.tdb) - if(BDB_FOUND) - macro(declare_custom_bdb_tests) - foreach(test ${ARGN}) - list(REMOVE_ITEM bdb_tests ${test}) - endforeach(test) - endmacro(declare_custom_bdb_tests) - - declare_custom_bdb_tests(test1426.bdb) - configure_file(run_test1426.sh . COPYONLY) - add_test(NAME ydb/test1426.tdb - COMMAND run_test1426.sh - $ $ - "test1426.tdb.ctest-data" "test1426.bdb.ctest-data" - $ "${BDB_INCLUDE_DIR}/../bin/db_dump") - add_dependencies(test1426.tdb tokudb_dump) - endif() string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" recover_would_be_tdb_tests "${recover_srcs}") declare_custom_tests(${recover_would_be_tdb_tests}) @@ -519,10 +160,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) endforeach(av) endforeach(ov) - if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR - (CMAKE_CXX_COMPILER_ID STREQUAL Intel AND - CMAKE_BUILD_TYPE STREQUAL Release) - OR USE_GCOV)) + if (NOT (CMAKE_SYSTEM_NAME MATCHES Darwin OR USE_GCOV)) declare_custom_tests(helgrind1.tdb) add_test(NAME ydb/helgrind_helgrind1.tdb COMMAND valgrind --quiet --tool=helgrind --error-exitcode=1 --log-file=helgrind1.tdb.deleteme $) @@ -743,7 +381,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) setup_toku_test_properties(ydb/${test} ${test}) endforeach() - foreach(test ${tdb_tests} ${bdb_tests}) + foreach(test ${tdb_tests}) add_ydb_test(${test}) endforeach(test) @@ -825,16 +463,6 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) ydb/test_txn_nested5.tdb ydb/test_update_broadcast_stress.tdb ) - if (BDB_FOUND) - list(APPEND long_tests - ydb/root_fifo_1.bdb - ydb/root_fifo_31.bdb - ydb/rowsize.bdb - ydb/test_log10.bdb - ydb/test_log7.bdb - ydb/test_logmax.bdb - ) - endif (BDB_FOUND) set_tests_properties(${long_tests} PROPERTIES TIMEOUT 3600) ## some take even longer, with valgrind set(extra_long_tests @@ -850,11 +478,6 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) ydb/test3039.tdb ydb/test_update_stress.tdb ) - if (BDB_FOUND) - list(APPEND extra_long_tests - ydb/test_groupcommit_count.bdb - ) - endif (BDB_FOUND) set_tests_properties(${extra_long_tests} PROPERTIES TIMEOUT 7200) ## these really take a long time with valgrind set(phenomenally_long_tests @@ -864,10 +487,5 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) ydb/recover_stress.tdb ydb/test3529.tdb ) - if (BDB_FOUND) - list(APPEND phenomenally_long_tests - ydb/test1426.tdb - ) - endif (BDB_FOUND) set_tests_properties(${phenomenally_long_tests} PROPERTIES TIMEOUT 14400) endif(BUILD_TESTING OR BUILD_SRC_TESTS) diff --git a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-detect.cc b/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-detect.cc deleted file mode 100644 index ce0dcab38a37f..0000000000000 --- a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-detect.cc +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// verify that the BDB locker can detect deadlocks on the fly and allow -// the deadlock to be unwound by the deadlocked threads. the main thread -// polls for deadlocks with the lock_detect function. -// -// A write locks L -// B write locks M -// A tries to write lock M && B tries to write lock L -// One of A or B gets the DEADLOCK error, the other waits -// A and B release their locks - -#include "test.h" -#include "toku_pthread.h" -#include - -struct test_seq { - int state; - toku_mutex_t lock; - toku_cond_t cv; -}; - -static void test_seq_init(struct test_seq *seq) { - seq->state = 0; - toku_mutex_init(&seq->lock, NULL); - toku_cond_init(&seq->cv, NULL); -} - -static void test_seq_destroy(struct test_seq *seq) { - toku_mutex_destroy(&seq->lock); - toku_cond_destroy(&seq->cv); -} - -static void test_seq_sleep(struct test_seq *seq, int new_state) { - toku_mutex_lock(&seq->lock); - while (seq->state != new_state) { - toku_cond_wait(&seq->cv, &seq->lock); - } - toku_mutex_unlock(&seq->lock); -} - -static void test_seq_next_state(struct test_seq *seq) { - toku_mutex_lock(&seq->lock); - seq->state++; - toku_cond_broadcast(&seq->cv); - toku_mutex_unlock(&seq->lock); -} - -struct locker_args { - DB_ENV *db_env; - struct test_seq *test_seq; - int *deadlock_count; -}; - -static void *run_locker_a(void *arg) { - struct locker_args *locker_args = (struct locker_args *) arg; - DB_ENV *db_env = locker_args->db_env; - struct test_seq *test_seq = locker_args->test_seq; - int r; - - uint32_t locker_a; - r = db_env->lock_id(db_env, &locker_a); assert(r == 0); - - DBT object_l = { .data = (char *) "L", .size = 1 }; - DBT object_m = { .data = (char *) "M", .size = 1 }; - - test_seq_sleep(test_seq, 0); - DB_LOCK lock_a_l; - r = db_env->lock_get(db_env, locker_a, DB_LOCK_NOWAIT, &object_l, DB_LOCK_WRITE, &lock_a_l); assert(r == 0); - test_seq_next_state(test_seq); - - test_seq_sleep(test_seq, 2); - DB_LOCK lock_a_m; - bool m_locked = false; - r = db_env->lock_get(db_env, locker_a, 0, &object_m, DB_LOCK_WRITE, &lock_a_m); - assert(r == 0 || r == DB_LOCK_DEADLOCK); - if (r == 0) - m_locked = true; - - r = db_env->lock_put(db_env, &lock_a_l); assert(r == 0); - - if (m_locked) { - r = db_env->lock_put(db_env, &lock_a_m); assert(r == 0); - } else { - (void) toku_sync_fetch_and_add(locker_args->deadlock_count, 1); - if (verbose) printf("%s:%u m deadlock\n", __FUNCTION__, __LINE__); - } - - r = db_env->lock_id_free(db_env, locker_a); assert(r == 0); - - return arg; -} - -static void *run_locker_b(void *arg) { - struct locker_args *locker_args = (struct locker_args *) arg; - DB_ENV *db_env = locker_args->db_env; - struct test_seq *test_seq = locker_args->test_seq; - int r; - - uint32_t locker_b; - r = db_env->lock_id(db_env, &locker_b); assert(r == 0); - - DBT object_l = { .data = (char *) "L", .size = 1 }; - DBT object_m = { .data = (char *) "M", .size = 1 }; - - test_seq_sleep(test_seq, 1); - DB_LOCK lock_b_m; - r = db_env->lock_get(db_env, locker_b, DB_LOCK_NOWAIT, &object_m, DB_LOCK_WRITE, &lock_b_m); assert(r == 0); - test_seq_next_state(test_seq); - - test_seq_sleep(test_seq, 2); - DB_LOCK lock_b_l; - bool l_locked = false; - r = db_env->lock_get(db_env, locker_b, 0, &object_l, DB_LOCK_WRITE, &lock_b_l); - assert(r == 0 || r == DB_LOCK_DEADLOCK); - if (r == 0) - l_locked = true; - - r = db_env->lock_put(db_env, &lock_b_m); assert(r == 0); - - if (l_locked) { - r = db_env->lock_put(db_env, &lock_b_l); assert(r == 0); - } else { - (void) toku_sync_fetch_and_add(locker_args->deadlock_count, 1); - if (verbose) printf("%s:%u l deadlock\n", __FUNCTION__, __LINE__); - } - - r = db_env->lock_id_free(db_env, locker_b); assert(r == 0); - - return arg; -} - -static void simple_deadlock(DB_ENV *db_env) { - int r; - - struct test_seq test_seq; ZERO_STRUCT(test_seq); test_seq_init(&test_seq); - - int deadlock_count = 0 ; - - toku_pthread_t tid_a; - struct locker_args args_a = { db_env, &test_seq, &deadlock_count }; - r = toku_pthread_create(&tid_a, NULL, run_locker_a, &args_a); assert(r == 0); - - toku_pthread_t tid_b; - struct locker_args args_b = { db_env, &test_seq, &deadlock_count }; - r = toku_pthread_create(&tid_b, NULL, run_locker_b, &args_b); assert(r == 0); - - while (1) { - sleep(10); - int rejected = 0; - r = db_env->lock_detect(db_env, 0, DB_LOCK_YOUNGEST, &rejected); assert(r == 0); - if (verbose) - printf("%s %d\n", __FUNCTION__, rejected); - if (rejected == 0) - break; - } - - void *ret = NULL; - r = toku_pthread_join(tid_a, &ret); assert(r == 0); - r = toku_pthread_join(tid_b, &ret); assert(r == 0); - - assert(deadlock_count == 1); - - test_seq_destroy(&test_seq); -} - -int test_main(int argc, char * const argv[]) { - uint64_t cachesize = 0; - int do_txn = 1; - const char *db_env_dir = TOKU_TEST_FILENAME; - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD; - - // parse_args(argc, argv); - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) { - if (verbose > 0) - verbose--; - continue; - } - assert(0); - } - - // setup env - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); - if (cachesize) { - const uint64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); - - // run test - simple_deadlock(db_env); - - // close env - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-on-the-fly.cc b/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-on-the-fly.cc deleted file mode 100644 index 12386e173c339..0000000000000 --- a/storage/tokudb/ft-index/src/tests/bdb-simple-deadlock-on-the-fly.cc +++ /dev/null @@ -1,292 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// verify that the BDB locker can detect deadlocks on the fly and allow -// the deadlock to be unwound by the deadlocked threads. we use the -// set_lk_detect function to force the locker to check for deadlocks. -// -// A write locks L -// B write locks M -// A tries to write lock M && B tries to write lock L -// One of A or B should get the DEADLOCK error, the other waits -// A and B release their locks - -#include "test.h" -#include "toku_pthread.h" -#include - -struct test_seq { - int state; - toku_mutex_t lock; - toku_cond_t cv; -}; - -static void test_seq_init(struct test_seq *seq) { - seq->state = 0; - toku_mutex_init(&seq->lock, NULL); - toku_cond_init(&seq->cv, NULL); -} - -static void test_seq_destroy(struct test_seq *seq) { - toku_mutex_destroy(&seq->lock); - toku_cond_destroy(&seq->cv); -} - -static void test_seq_sleep(struct test_seq *seq, int new_state) { - toku_mutex_lock(&seq->lock); - while (seq->state != new_state) { - toku_cond_wait(&seq->cv, &seq->lock); - } - toku_mutex_unlock(&seq->lock); -} - -static void test_seq_next_state(struct test_seq *seq) { - toku_mutex_lock(&seq->lock); - seq->state++; - toku_cond_broadcast(&seq->cv); - toku_mutex_unlock(&seq->lock); -} - -struct locker_args { - DB_ENV *db_env; - struct test_seq *test_seq; - int *deadlock_count; -}; - -static void *run_locker_a(void *arg) { - struct locker_args *locker_args = (struct locker_args *) arg; - DB_ENV *db_env = locker_args->db_env; - struct test_seq *test_seq = locker_args->test_seq; - int r; - - uint32_t locker_a; - r = db_env->lock_id(db_env, &locker_a); assert(r == 0); - - DBT object_l = { .data = (char *) "L", .size = 1 }; - DBT object_m = { .data = (char *) "M", .size = 1 }; - - test_seq_sleep(test_seq, 0); - DB_LOCK lock_a_l; - r = db_env->lock_get(db_env, locker_a, DB_LOCK_NOWAIT, &object_l, DB_LOCK_WRITE, &lock_a_l); assert(r == 0); - test_seq_next_state(test_seq); - - test_seq_sleep(test_seq, 2); - DB_LOCK lock_a_m; - bool m_locked = false; - r = db_env->lock_get(db_env, locker_a, 0, &object_m, DB_LOCK_WRITE, &lock_a_m); - assert(r == 0 || r == DB_LOCK_DEADLOCK); - if (r == 0) - m_locked = true; - - r = db_env->lock_put(db_env, &lock_a_l); assert(r == 0); - - if (m_locked) { - r = db_env->lock_put(db_env, &lock_a_m); assert(r == 0); - } else { - (void) toku_sync_fetch_and_add(locker_args->deadlock_count, 1); - if (verbose) printf("%s:%u m deadlock\n", __FUNCTION__, __LINE__); - } - - r = db_env->lock_id_free(db_env, locker_a); assert(r == 0); - - return arg; -} - -static void *run_locker_b(void *arg) { - struct locker_args *locker_args = (struct locker_args *) arg; - DB_ENV *db_env = locker_args->db_env; - struct test_seq *test_seq = locker_args->test_seq; - int r; - - uint32_t locker_b; - r = db_env->lock_id(db_env, &locker_b); assert(r == 0); - - DBT object_l = { .data = (char *) "L", .size = 1 }; - DBT object_m = { .data = (char *) "M", .size = 1 }; - - test_seq_sleep(test_seq, 1); - DB_LOCK lock_b_m; - r = db_env->lock_get(db_env, locker_b, DB_LOCK_NOWAIT, &object_m, DB_LOCK_WRITE, &lock_b_m); assert(r == 0); - test_seq_next_state(test_seq); - - test_seq_sleep(test_seq, 2); - DB_LOCK lock_b_l; - bool l_locked = false; - r = db_env->lock_get(db_env, locker_b, 0, &object_l, DB_LOCK_WRITE, &lock_b_l); - assert(r == 0 || r == DB_LOCK_DEADLOCK); - if (r == 0) - l_locked = true; - - r = db_env->lock_put(db_env, &lock_b_m); assert(r == 0); - - if (l_locked) { - r = db_env->lock_put(db_env, &lock_b_l); assert(r == 0); - } else { - (void) toku_sync_fetch_and_add(locker_args->deadlock_count, 1); - if (verbose) printf("%s:%u l deadlock\n", __FUNCTION__, __LINE__); - } - - r = db_env->lock_id_free(db_env, locker_b); assert(r == 0); - - return arg; -} - -static void simple_deadlock(DB_ENV *db_env) { - int r; - int deadlock_count = 0; - - struct test_seq test_seq; ZERO_STRUCT(test_seq); test_seq_init(&test_seq); - - toku_pthread_t tid_a; - struct locker_args args_a = { db_env, &test_seq, &deadlock_count }; - r = toku_pthread_create(&tid_a, NULL, run_locker_a, &args_a); assert(r == 0); - - toku_pthread_t tid_b; - struct locker_args args_b = { db_env, &test_seq, &deadlock_count }; - r = toku_pthread_create(&tid_b, NULL, run_locker_b, &args_b); assert(r == 0); - - void *ret = NULL; - r = toku_pthread_join(tid_a, &ret); assert(r == 0); - r = toku_pthread_join(tid_b, &ret); assert(r == 0); - - assert(deadlock_count == 1); - - test_seq_destroy(&test_seq); -} - -int test_main(int argc, char * const argv[]) { - uint64_t cachesize = 0; - int do_txn = 1; - const char *db_env_dir = TOKU_TEST_FILENAME; - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD; - - // parse_args(argc, argv); - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) { - if (verbose > 0) - verbose--; - continue; - } - assert(0); - } - - // setup env - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert(r == 0); - - r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0); - - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert(r == 0); - if (cachesize) { - const uint64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); - } - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(USE_BDB) - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); -#endif - - // run test - simple_deadlock(db_env); - - // close env - r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL; - - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/bdb.suppressions b/storage/tokudb/ft-index/src/tests/bdb.suppressions deleted file mode 100644 index 5e06d9a84e743..0000000000000 --- a/storage/tokudb/ft-index/src/tests/bdb.suppressions +++ /dev/null @@ -1,87 +0,0 @@ -{ - bdb_writes_undefined_data_to_disk - Memcheck:Param - pwrite64(buf) - fun:pwrite64 - fun:__os_io - obj:/lib/libdb-4.3.so - fun:__memp_bhwrite - fun:__memp_sync_int - fun:__memp_fsync - fun:__db_sync - fun:__db_refresh - fun:__db_close - fun:__fop_subdb_setup - fun:__db_open - fun:__db_open_pp -} - -{ - bdb_leaks_db_home_strdup - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_strdup - fun:__db_open_pp - fun:main -} - -{ - bdb_leaks_in_db_create - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_calloc - fun:__bam_db_create - fun:db_create - fun:main -} - -{ - bdb_leaks_if_you_open_twice - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_strdup - fun:__db_open_pp -} - -{ - bdb_leaks_again - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_strdup - fun:__db_home - obj:/lib/libdb-4.3.so - fun:__dbenv_open -} - -{ - - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_calloc - fun:__bam_db_create - fun:db_create -} - -{ - - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_calloc - fun:__bam_db_create - fun:db_create -} - -{ - - Memcheck:Leak - fun:_vgrZU_libcZdsoZa_malloc - fun:__os_malloc - fun:__os_calloc - fun:db_create -} diff --git a/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc b/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc index 0707773d6b5e8..7c6e444986a6b 100644 --- a/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc +++ b/storage/tokudb/ft-index/src/tests/big-nested-abort-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -175,11 +175,7 @@ setup (void) { r=db_env_create(&env, 0); CKERR(r); -#ifndef TOKUDB - r=env->set_lk_max_objects(env, N); CKERR(r); -#else r=env->set_redzone(env, 0); CKERR(r); -#endif env->set_errfile(env, stderr); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc b/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc index 711f235275e45..9965a6f572582 100644 --- a/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc +++ b/storage/tokudb/ft-index/src/tests/big-nested-abort-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -174,9 +174,6 @@ setup (void) { r=db_env_create(&env, 0); CKERR(r); -#ifndef TOKUDB - r=env->set_lk_max_objects(env, N); CKERR(r); -#endif env->set_errfile(env, stderr); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc b/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc index 0469867396d09..6e02e6e7799b5 100644 --- a/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc +++ b/storage/tokudb/ft-index/src/tests/big-nested-commit-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -169,9 +169,6 @@ setup (void) { r=db_env_create(&env, 0); CKERR(r); -#ifndef TOKUDB - r=env->set_lk_max_objects(env, N); CKERR(r); -#endif env->set_errfile(env, stderr); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc b/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc index ad25b5e90e8bf..efd951a90d473 100644 --- a/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc +++ b/storage/tokudb/ft-index/src/tests/big-nested-commit-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -168,13 +168,8 @@ setup (void) { r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r=env->set_redzone(env, 0); CKERR(r); -#endif -#ifndef TOKUDB - r=env->set_lk_max_objects(env, N); CKERR(r); -#endif env->set_errfile(env, stderr); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.cc b/storage/tokudb/ft-index/src/tests/big-shutdown.cc similarity index 60% rename from storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.cc rename to storage/tokudb/ft-index/src/tests/big-shutdown.cc index a1fda8c373c20..0dc576e1117ed 100644 --- a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/threadpool.cc +++ b/storage/tokudb/ft-index/src/tests/big-shutdown.cc @@ -88,79 +88,102 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include -#include -#include -#include -#include -#include - -#include "threadpool.h" -#include - -// use gcc builtin fetch_and_add 0->no 1->yes -#define DO_ATOMIC_FETCH_AND_ADD 0 - -struct threadpool { - int max_threads; - int current_threads; - int busy_threads; - pthread_t pids[]; -}; - -int threadpool_create(THREADPOOL *threadpoolptr, int max_threads) { - size_t size = sizeof (struct threadpool) + max_threads*sizeof (pthread_t); - struct threadpool *threadpool = (struct threadpool *) malloc(size); - if (threadpool == 0) - return ENOMEM; - threadpool->max_threads = max_threads; - threadpool->current_threads = 0; - threadpool->busy_threads = 0; - int i; - for (i=0; ipids[i] = 0; - *threadpoolptr = threadpool; - return 0; -} - -void threadpool_destroy(THREADPOOL *threadpoolptr) { - struct threadpool *threadpool = *threadpoolptr; - int i; - for (i=0; icurrent_threads; i++) { - int r; void *ret; - r = pthread_join(threadpool->pids[i], &ret); - assert(r == 0); - } - *threadpoolptr = 0; - free(threadpool); -} -void threadpool_maybe_add(THREADPOOL threadpool, void *(*f)(void *), void *arg) { - if (threadpool->current_threads < threadpool->max_threads) { - int r = pthread_create(&threadpool->pids[threadpool->current_threads], 0, f, arg); - if (r == 0) { - threadpool->current_threads++; - threadpool_set_thread_busy(threadpool); +// Create a lot of dirty nodes, kick off a checkpoint, and close the environment. +// Measure the time it takes to close the environment since we are speeding up that +// function. + +#include "test.h" +#include + +// Insert max_rows key/val pairs into the db +static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + for (uint64_t i = 1; i <= max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, txn, &key, &val, 0); + CKERR(r); + + if ((i % 1000) == 0) { + if (verbose) + fprintf(stderr, "put %" PRIu64 "\n", i); + r = txn->commit(txn, 0); + CKERR(r); + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); } } -} -void threadpool_set_thread_busy(THREADPOOL threadpool) { -#if DO_ATOMIC_FETCH_AND_ADD - (void) toku_sync_fetch_and_add(&threadpool->busy_threads, 1); -#else - threadpool->busy_threads++; -#endif + r = txn->commit(txn, 0); + CKERR(r); } -void threadpool_set_thread_idle(THREADPOOL threadpool) { -#if DO_ATOMIC_FETCH_AND_ADD - (void) toku_sync_fetch_and_add(&threadpool->busy_threads, -1); -#else - threadpool->busy_threads--; -#endif +// Create a cache with a lot of dirty nodes, kick off a checkpoint, and measure the time to +// close the environment. +static void big_shutdown(void) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_inserts(env, db, 1000000, 1024); + + // kick the checkpoint thread + if (verbose) + fprintf(stderr, "env->checkpointing_set_period\n"); + r = env->checkpointing_set_period(env, 2); + CKERR(r); + sleep(3); + + if (verbose) + fprintf(stderr, "db->close\n"); + r = db->close(db, 0); + CKERR(r); + + // measure the shutdown time + uint64_t tstart = toku_current_time_microsec(); + if (verbose) + fprintf(stderr, "env->close\n"); + r = env->close(env, 0); + CKERR(r); + uint64_t tend = toku_current_time_microsec(); + if (verbose) + fprintf(stderr, "env->close complete %" PRIu64 " sec\n", (tend - tstart)/1000000); } -int threadpool_get_current_threads(THREADPOOL threadpool) { - return threadpool->current_threads; +int test_main (int argc, char *const argv[]) { + default_parse_args(argc, argv); + + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + // run the test + big_shutdown(); + + return 0; } diff --git a/storage/tokudb/ft-index/src/tests/bigtxn27.cc b/storage/tokudb/ft-index/src/tests/bigtxn27.cc index baa1e4f7e6e6a..1eedb79543d75 100644 --- a/storage/tokudb/ft-index/src/tests/bigtxn27.cc +++ b/storage/tokudb/ft-index/src/tests/bigtxn27.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/blackhole.cc b/storage/tokudb/ft-index/src/tests/blackhole.cc index 267eb8c1ba36d..34df107b15363 100644 --- a/storage/tokudb/ft-index/src/tests/blackhole.cc +++ b/storage/tokudb/ft-index/src/tests/blackhole.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: // Test that a db ignores insert messages in blackhole mode #include "test.h" -#include +#include static DB *db; static DB *blackhole_db; diff --git a/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc b/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc index 72267437a4afc..3fb5cae46ffdd 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-first-empty.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,7 +98,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_first_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -107,7 +106,6 @@ static int blocking_first_callback(DBT const *a UU(), DBT const *b UU(), void *e copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_first(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) { int r; @@ -122,11 +120,7 @@ static void blocking_first(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptim DBC *cursor = NULL; r = db->cursor(db, txn, &cursor, 0); assert(r == 0); // get a write lock on -inf +inf -#if TOKUDB r = cursor->c_getf_first(cursor, DB_RMW, blocking_first_callback, &context); assert(r == DB_NOTFOUND); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_FIRST + DB_RMW); assert(r == DB_NOTFOUND); -#endif usleep(sleeptime); @@ -219,9 +213,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-first.cc b/storage/tokudb/ft-index/src/tests/blocking-first.cc index 1bd414a90aa62..b501f70d5bbb9 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-first.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-first.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_first_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -125,7 +124,6 @@ static int blocking_first_callback(DBT const *a UU(), DBT const *b UU(), void *e copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_first(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) { int r; @@ -140,11 +138,7 @@ static void blocking_first(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptim DBC *cursor = NULL; r = db->cursor(db, txn, &cursor, 0); assert(r == 0); // get a write lock on -inf ... 0 -#if TOKUDB r = cursor->c_getf_first(cursor, DB_RMW, blocking_first_callback, &context); assert(r == 0); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_FIRST + DB_RMW); assert(r == 0); -#endif usleep(sleeptime); r = cursor->c_close(cursor); assert(r == 0); @@ -236,9 +230,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-last.cc b/storage/tokudb/ft-index/src/tests/blocking-last.cc index 5ca6ce5ec6a43..e087d9623fcee 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-last.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-last.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -116,7 +116,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_last_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -125,7 +124,6 @@ static int blocking_last_callback(DBT const *a UU(), DBT const *b UU(), void *e copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_last(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) { int r; @@ -140,11 +138,7 @@ static void blocking_last(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime DBC *cursor = NULL; r = db->cursor(db, txn, &cursor, 0); assert(r == 0); // get a write lock on -inf ... 0 -#if TOKUDB r = cursor->c_getf_last(cursor, DB_RMW, blocking_last_callback, &context); assert(r == 0); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_LAST + DB_RMW); assert(r == 0); -#endif usleep(sleeptime); r = cursor->c_close(cursor); assert(r == 0); @@ -236,9 +230,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc b/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc index fb591feff2893..dac4aa1ad44dc 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-next-prev-deadlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -124,7 +124,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_next_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -133,7 +132,6 @@ static int blocking_next_callback(DBT const *a UU(), DBT const *b UU(), void *e copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_next(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long sleeptime) { int r; @@ -150,11 +148,7 @@ static void blocking_next(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long slee uint64_t i; for (i = 0; ; i++) { -#if TOKUDB r = cursor->c_getf_next(cursor, DB_RMW, blocking_next_callback, &context); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_NEXT + DB_RMW); -#endif if (r != 0) break; if (verbose) @@ -196,11 +190,7 @@ static void blocking_prev(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long slee uint64_t i; for (i = 0; ; i++) { -#if TOKUDB r = cursor->c_getf_prev(cursor, DB_RMW, blocking_next_callback, &context); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_PREV + DB_RMW); -#endif if (r != 0) break; if (verbose) @@ -305,11 +295,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#else - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc b/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc index 86644a13cf5b4..5fa2f781fb75a 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-next-prev.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -123,7 +123,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_next_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -132,7 +131,6 @@ static int blocking_next_callback(DBT const *a UU(), DBT const *b UU(), void *e copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_next(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long sleeptime) { int r; @@ -149,11 +147,7 @@ static void blocking_next(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long slee uint64_t i; for (i = 0; ; i++) { -#if TOKUDB r = cursor->c_getf_next(cursor, 0, blocking_next_callback, &context); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_NEXT); -#endif if (r != 0) break; if (verbose) @@ -199,11 +193,7 @@ static void blocking_prev(DB_ENV *db_env, DB *db, uint64_t nrows UU(), long slee uint64_t i; for (i = 0; ; i++) { -#if TOKUDB r = cursor->c_getf_prev(cursor, 0, blocking_next_callback, &context); -#else - r = cursor->c_get(cursor, &context.key, &context.val, DB_PREV); -#endif if (r != 0) break; if (verbose) diff --git a/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc b/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc index cece47cbeecd8..78d2975f81b25 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-prelock-range.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -181,9 +181,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc b/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc index c43bcfad5f02e..13fddb8d05b12 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-put-timeout.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -156,11 +156,7 @@ static void t_b(DB_ENV *db_env, DB *db, struct test_seq *seq) { DBT key = { .data = &k, .size = sizeof k }; DBT val = { .data = &k, .size = sizeof k }; r = db->put(db, txn_b, &key, &val, 0); -#if USE_BDB - assert(r == DB_LOCK_DEADLOCK); -#else assert(r == DB_LOCK_NOTGRANTED); -#endif r = txn_b->abort(txn_b); assert(r == 0); } @@ -212,31 +208,12 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if USE_BDB - db_timeout_t t; - r = db_env->get_timeout(db_env, &t, DB_SET_LOCK_TIMEOUT); assert(r == 0); - if (verbose) printf("lock %d\n", t); - r = db_env->get_timeout(db_env, &t, DB_SET_TXN_TIMEOUT); assert(r == 0); - if (verbose) printf("txn %d\n", t); - - r = db_env->set_timeout(db_env, 5000000, DB_SET_LOCK_TIMEOUT); assert(r == 0); - r = db_env->set_timeout(db_env, 5000000, DB_SET_TXN_TIMEOUT); assert(r == 0); - - r = db_env->get_timeout(db_env, &t, DB_SET_LOCK_TIMEOUT); assert(r == 0); - if (verbose) printf("lock %d\n", t); - r = db_env->get_timeout(db_env, &t, DB_SET_TXN_TIMEOUT); assert(r == 0); - if (verbose) printf("txn %d\n", t); - - r = db_env->set_lk_detect(db_env, DB_LOCK_EXPIRE); assert(r == 0); -#endif -#if USE_TDB uint64_t lock_timeout_msec; r = db_env->get_lock_timeout(db_env, &lock_timeout_msec); assert(r == 0); if (verbose) printf("lock timeout: %" PRIu64 "\n", lock_timeout_msec); r = db_env->set_lock_timeout(db_env, 5000, nullptr); assert(r == 0); r = db_env->get_lock_timeout(db_env, &lock_timeout_msec); assert(r == 0); if (verbose) printf("lock timeout: %" PRIu64 "\n", lock_timeout_msec); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc b/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc index 9c8ad60787bdb..c5052fbf8133b 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-put-wakeup.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -208,9 +208,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-put.cc b/storage/tokudb/ft-index/src/tests/blocking-put.cc index afd083986d8bd..8b1cf71e35999 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-put.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-put.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -180,9 +180,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc b/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc index 8d2dad831a27c..8445493832fac 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-set-range-0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_set_range_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -126,7 +125,6 @@ static int blocking_set_range_callback(DBT const *a UU(), DBT const *b UU(), voi copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_set_range(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime, uint64_t the_key) { int r; @@ -144,11 +142,7 @@ static void blocking_set_range(DB_ENV *db_env, DB *db, uint64_t nrows, long slee uint64_t k = htonl(the_key); DBT key = { .data = &k, .size = sizeof k }; -#if TOKUDB r = cursor->c_getf_set_range(cursor, DB_RMW, &key, blocking_set_range_callback, &context); assert(r == 0); -#else - r = cursor->c_get(cursor, &key, &context.val, DB_SET_RANGE + DB_RMW); assert(r == 0); -#endif uint64_t v; assert(context.val.size == sizeof v); memcpy(&v, context.val.data, context.val.size); @@ -246,9 +240,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc b/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc index 838d18ed6d539..a37e5b2a0a730 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-set-range-n.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,6 @@ struct my_callback_context { DBT val; }; -#if TOKUDB static int blocking_set_range_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { DBT const *found_key = a; DBT const *found_val = b; @@ -126,7 +125,6 @@ static int blocking_set_range_callback(DBT const *a UU(), DBT const *b UU(), voi copy_dbt(&context->val, found_val); return 0; } -#endif static void blocking_set_range(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime, uint64_t the_key) { int r; @@ -144,11 +142,7 @@ static void blocking_set_range(DB_ENV *db_env, DB *db, uint64_t nrows, long slee uint64_t k = htonl(the_key); DBT key = { .data = &k, .size = sizeof k }; -#if TOKUDB r = cursor->c_getf_set_range(cursor, DB_RMW, &key, blocking_set_range_callback, &context); assert(r == DB_NOTFOUND); -#else - r = cursor->c_get(cursor, &key, &context.val, DB_SET_RANGE + DB_RMW); assert(r == DB_NOTFOUND); -#endif usleep(sleeptime); r = cursor->c_close(cursor); assert(r == 0); @@ -241,9 +235,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc b/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc index 7d06dc4c9e00d..dc79522d629a6 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-set-range-reverse-0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -142,11 +142,7 @@ static void blocking_set_range(DB_ENV *db_env, DB *db, uint64_t nrows, long slee uint64_t k = htonl(the_key); DBT key = { .data = &k, .size = sizeof k }; -#if TOKUDB r = cursor->c_getf_set_range_reverse(cursor, 0, &key, blocking_set_range_callback, &context); assert(r == 0); -#else - r = cursor->c_get(cursor, &key, &context.val, DB_SET_RANGE_REVERSE); assert(r == 0); -#endif uint64_t v; assert(context.val.size == sizeof v); memcpy(&v, context.val.data, context.val.size); @@ -244,9 +240,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-set.cc b/storage/tokudb/ft-index/src/tests/blocking-set.cc index d229321aa4320..4bb3c5dd1eb4b 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-set.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-set.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,7 +112,6 @@ static void populate(DB_ENV *db_env, DB *db, uint64_t nrows) { r = txn->commit(txn, 0); assert(r == 0); } -#if TOKUDB static int blocking_set_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) { // DBT const *found_key = a; DBT const *found_val = b; @@ -123,7 +122,6 @@ static int blocking_set_callback(DBT const *a UU(), DBT const *b UU(), void *e U memcpy(my_val->data, found_val->data, found_val->size); return 0; } -#endif static void blocking_set(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) { int r; @@ -140,11 +138,7 @@ static void blocking_set(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) uint64_t k = htonl(0); // set to key 0 DBT key = { .data = &k, .size = sizeof k }; -#if TOKUDB r = cursor->c_getf_set(cursor, DB_RMW, &key, blocking_set_callback, &val); assert(r == 0); -#else - r = cursor->c_get(cursor, &key, &val, DB_SET + DB_RMW); assert(r == 0); -#endif uint64_t v; assert(val.size == sizeof v); memcpy(&v, val.data, val.size); @@ -226,9 +220,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc b/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc index 2ae82319a7ed8..8258a698784de 100644 --- a/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc +++ b/storage/tokudb/ft-index/src/tests/blocking-table-lock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -174,9 +174,7 @@ int test_main(int argc, char * const argv[]) { r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0); } r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/bug1381.cc b/storage/tokudb/ft-index/src/tests/bug1381.cc index c603d5e3ab200..988538ef782d2 100644 --- a/storage/tokudb/ft-index/src/tests/bug1381.cc +++ b/storage/tokudb/ft-index/src/tests/bug1381.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/cachetable-race.cc b/storage/tokudb/ft-index/src/tests/cachetable-race.cc index 1e0ffaad40c2d..0ff1fc11b4e0f 100644 --- a/storage/tokudb/ft-index/src/tests/cachetable-race.cc +++ b/storage/tokudb/ft-index/src/tests/cachetable-race.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/checkpoint1.cc b/storage/tokudb/ft-index/src/tests/checkpoint1.cc index 3620b9006a829..68300dee6fb36 100644 --- a/storage/tokudb/ft-index/src/tests/checkpoint1.cc +++ b/storage/tokudb/ft-index/src/tests/checkpoint1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -118,9 +118,7 @@ checkpoint1 (void) toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r = env->set_redzone(env, 0); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc b/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc index c94e1f4b1aba3..3e76020d70b28 100644 --- a/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc +++ b/storage/tokudb/ft-index/src/tests/checkpoint_fairness.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -147,9 +147,7 @@ int test_main(int argc, char * const argv[]) { // try to starve the checkpoint { int chk_r = db_env_create(&env, 0); CKERR(chk_r); } -#ifdef USE_TDB { int chk_r = env->set_redzone(env, 0); CKERR(chk_r); } -#endif { const int size = 10+strlen(env_dir); char cmd[size]; diff --git a/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc b/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc index ebbaf4085fc04..1f39061bb1e78 100644 --- a/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc +++ b/storage/tokudb/ft-index/src/tests/checkpoint_stress.cc @@ -27,7 +27,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -272,13 +272,10 @@ random_acts(void * d) { } } - -#if IS_TDB && !TOKU_WINDOWS return intothevoid; -#endif } -uint64_t max_windows_cachesize = 256 << 20; +uint64_t max_cachesize = 256 << 20; static void run_test (int iter, int die) { @@ -296,7 +293,7 @@ run_test (int iter, int die) { const int32_t K256 = 256 * 1024; uint64_t cachebytes = 0; cachebytes = K256 * (iter + 1) - (128 * 1024); - if (cachebytes > max_windows_cachesize) + if (cachebytes > max_cachesize) cachebytes = 0; if (iter & 2) cachebytes = 0; // use default cachesize half the time diff --git a/storage/tokudb/ft-index/src/tests/checkpoint_test.h b/storage/tokudb/ft-index/src/tests/checkpoint_test.h index e9d4290a406e2..4ea74e09edeba 100644 --- a/storage/tokudb/ft-index/src/tests/checkpoint_test.h +++ b/storage/tokudb/ft-index/src/tests/checkpoint_test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,13 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef CHECKPOINT_TEST_H -#define CHECKPOINT_TEST_H - - DB_ENV *env; enum {MAX_NAME=128}; @@ -537,6 +535,3 @@ snapshot(DICTIONARY d, int do_checkpoint) { db_startup(d, NULL); } } - - -#endif diff --git a/storage/tokudb/ft-index/src/tests/cilktests/bar2.cilk b/storage/tokudb/ft-index/src/tests/cilktests/bar2.cilk deleted file mode 100644 index 3939cee64bc14..0000000000000 --- a/storage/tokudb/ft-index/src/tests/cilktests/bar2.cilk +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include "foo2.h" - -extern "Cilk++" -int foo2 (int i) { - return i+1; -} - -int foo (int i) { - int r; - r = cilk_spawn foo2 (i+1); - cilk_sync; - return r+1; -} - -extern "C++" -void do_foo (void) { - printf("Running cilk\n"); - int r = cilk::run(&foo, 3); - printf("Done r =%d\n", r); -} diff --git a/storage/tokudb/ft-index/src/tests/cilktests/foo.cilk b/storage/tokudb/ft-index/src/tests/cilktests/foo.cilk deleted file mode 100644 index dfc5540db4b0b..0000000000000 --- a/storage/tokudb/ft-index/src/tests/cilktests/foo.cilk +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include -#include -#include - -pthread_t pt[2]; -pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -extern "C" void* start (void *extra __attribute__((__unused__))) { - { int r = pthread_mutex_lock(&mutex); assert(r==0); } - printf("T%lx got lock\n", pthread_self()); - sleep(1); - printf("T%lx releasing lock\n", pthread_self()); - { int r = pthread_mutex_unlock(&mutex); assert(r==0); } - return 0; -} - -void create_pthread(void) { - for (int i=0; i<2; i++) { - int r = pthread_create(&pt[i], 0, start, NULL); - assert(r==0); - } -} - -void join_pthread (void) { - for (int i=0; i<2; i++) { - int r = pthread_join(pt[i], NULL); - assert(r==0); - } -} - -void foo (void) { -} - -int cilk_main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { - create_pthread(); - cilk_spawn foo(); - cilk_sync; - join_pthread(); - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/cilktests/foo2.cpp b/storage/tokudb/ft-index/src/tests/cilktests/foo2.cpp deleted file mode 100644 index edc8e7f8cf0a7..0000000000000 --- a/storage/tokudb/ft-index/src/tests/cilktests/foo2.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: expandtab:ts=8:sw=4:softtabstop=4: -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." - -#include -#include -#include -#include -#include "foo2.h" -#include "cilk.h" - -pthread_t pt[2]; -pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -extern "C" void* start (void *extra __attribute__((__unused__))) { - { int r = pthread_mutex_lock(&mutex); assert(r==0); } - printf("T%lx got lock\n", pthread_self()); - sleep(1); - printf("T%lx releasing lock\n", pthread_self()); - { int r = pthread_mutex_unlock(&mutex); assert(r==0); } - return 0; -} - -void create_pthread(void) { - for (int i=0; i<2; i++) { - int r = pthread_create(&pt[i], 0, start, NULL); - assert(r==0); - } -} - -void join_pthread (void) { - for (int i=0; i<2; i++) { - int r = pthread_join(pt[i], NULL); - assert(r==0); - } -} - -int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { - __cilkscreen_disable_instrumentation(); - create_pthread(); - __cilkscreen_enable_instrumentation(); - do_foo(); - join_pthread(); - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/cilktests/get.cilk b/storage/tokudb/ft-index/src/tests/cilktests/get.cilk deleted file mode 100644 index 810ab332581b0..0000000000000 --- a/storage/tokudb/ft-index/src/tests/cilktests/get.cilk +++ /dev/null @@ -1,27 +0,0 @@ -/* Perform a DB->get in a cilk thread */ - -#include -#include -#include -#include - -#define DIR __FILE__ ".dir" - -DB_ENV *env; - -void foo (void) { - printf("foo\n"); -} - -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; - -int cilk_main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { - system("rm -rf " DIR); - toku_os_mkdir(DIR, 0777); - { int r = db_env_create(&env, 0); assert(r==0); } - { int r = env->open(env, DIR, envflags,S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); } - cilk_spawn foo(); - cilk_sync; - { int r = env->close(env, 0); assert(r==0); } - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/cilktests/measurecilkrun.cilk b/storage/tokudb/ft-index/src/tests/cilktests/measurecilkrun.cilk deleted file mode 100644 index b6949b7e989fb..0000000000000 --- a/storage/tokudb/ft-index/src/tests/cilktests/measurecilkrun.cilk +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include -#include -#include "cilk.h" -#include - -int foo (int i) { - return i+1; -} - -int fooi (void* iv) { - int *ip=(int*)iv; - return foo(*ip); -} - -extern "C++" { - -int N = 1000; - -double tdiff (struct timeval *after, struct timeval *before) -{ - return after->tv_sec - before->tv_sec + (1e-6)*(after->tv_usec - before->tv_usec); -} - - -void do_cilkrun (void) { - cilk::run(&foo, 0); -} - -static cilk::context *ctx; -void do_cilkcxt (void) { - int i = 0; - int j __attribute__((__unused__)) = ctx->run(&fooi, (void*)&i); -} - -void do_N_cilkcxt (void) { - struct timeval start,end; - gettimeofday(&start, 0); - cilk::context ctx; - int r=0; - for (int i=0; iopen(db, NULL, "bdir/b.db", NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); -#if USE_TDB CKERR(r); //Success, so need a new handle r = db->close(db, 0); CKERR(r); r = db_create(&db, env, 0); CKERR(r); -#else - assert(r != 0); -#endif char path[TOKU_PATH_MAX+1]; r = toku_os_mkdir(toku_path_join(path, 2, TOKU_TEST_FILENAME, "bdir"), 0777); assert(r == 0); r = db->open(db, NULL, "bdir/b.db", NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); @@ -137,13 +133,6 @@ static void run_test (void) { r = db->open(db, NULL, "c.db", NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); r = db->close(db, 0); CKERR(r); -#if 0 - // test fname with absolute path - r = db_create(&db, env, 0); CKERR(r); - r = db->open(db, NULL, "/tmp/d.db", NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); - r = db->close(db, 0); CKERR(r); -#endif - r = env->close(env, 0); CKERR(r); } diff --git a/storage/tokudb/ft-index/src/tests/cursor-isolation.cc b/storage/tokudb/ft-index/src/tests/cursor-isolation.cc index 1a1450f2f630b..ec91f5b73d164 100644 --- a/storage/tokudb/ft-index/src/tests/cursor-isolation.cc +++ b/storage/tokudb/ft-index/src/tests/cursor-isolation.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc b/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc index 5b3ba8a37cb1b..4587402c28686 100644 --- a/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc +++ b/storage/tokudb/ft-index/src/tests/cursor-more-than-a-leaf-provdel.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -104,22 +104,14 @@ setup (void) { r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r=env->set_redzone(env, 0); CKERR(r); r=env->set_default_bt_compare(env, int_dbt_cmp); CKERR(r); -#endif env->set_errfile(env, stderr); -#ifdef USE_BDB - r=env->set_lk_max_objects(env, 2*num_insert); CKERR(r); -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); r=env->txn_begin(env, 0, &txn, 0); assert(r==0); -#ifdef USE_BDB - r=db->set_bt_compare(db, int_dbt_cmp); CKERR(r); -#endif r=db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=txn->commit(txn, 0); assert(r==0); } diff --git a/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc b/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc index 38ccf11269763..79df796a6c143 100644 --- a/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc +++ b/storage/tokudb/ft-index/src/tests/cursor-set-del-rmw.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc b/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc index fb5dbca72b406..4f0dce02edd87 100644 --- a/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc +++ b/storage/tokudb/ft-index/src/tests/cursor-set-range-rmw.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc b/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc index 748b513589926..7c57475da6a52 100644 --- a/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc +++ b/storage/tokudb/ft-index/src/tests/cursor-step-over-delete.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc b/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc index 004a7aad12dac..6227b602df26c 100644 --- a/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc +++ b/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock-threads.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -184,9 +184,6 @@ static void simple_deadlock(DB_ENV *db_env, DB *db, int do_txn, int n) { } uint32_t txn_flags = 0; -#if USE_BDB - txn_flags = DB_TXN_NOWAIT; // force no wait for BDB to avoid a bug described below -#endif DB_TXN *txn_a = NULL; if (do_txn) { @@ -209,7 +206,6 @@ static void simple_deadlock(DB_ENV *db_env, DB *db, int do_txn, int n) { test_seq_next_state(&test_seq); test_seq_sleep(&test_seq, 2); - // BDB does not time out this lock request, so the test hangs. it looks like a bug in bdb's __lock_get_internal. insert_row(db, txn_a, htonl(n-1), n-1, DB_LOCK_NOTGRANTED); test_seq_next_state(&test_seq); @@ -268,16 +264,8 @@ int test_main(int argc, char * const argv[]) { } if (!do_txn) db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); -#if USE_BDB - r = db_env->set_flags(db_env, DB_TIME_NOTGRANTED, 1); assert(r == 0); // force DB_LOCK_DEADLOCK to DB_LOCK_NOTGRANTED -#endif r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(USE_TDB) r = db_env->set_lock_timeout(db_env, 0, nullptr); assert(r == 0); // no wait -#elif defined(USE_BDB) - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); - r = db_env->set_timeout(db_env, 10000, DB_SET_LOCK_TIMEOUT); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc b/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc index d373752994a4c..46109e9592f0d 100644 --- a/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc +++ b/storage/tokudb/ft-index/src/tests/db-put-simple-deadlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,7 +88,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// this test demonstrates that a simple deadlock with 2 transactions on a single thread works with tokudb, hangs with bdb +// this test demonstrates that a simple deadlock with 2 transactions on a single thread works with tokudb #include "test.h" @@ -115,9 +115,6 @@ static void simple_deadlock(DB_ENV *db_env, DB *db, int do_txn, int n) { } uint32_t txn_flags = 0; -#if USE_BDB - txn_flags = DB_TXN_NOWAIT; // force no wait for BDB to avoid a bug described below -#endif DB_TXN *txn_a = NULL; if (do_txn) { @@ -133,7 +130,6 @@ static void simple_deadlock(DB_ENV *db_env, DB *db, int do_txn, int n) { insert_row(db, txn_b, htonl(n-1), n-1, 0); - // if the txn_flags is 0, then BDB does not time out this lock request, so the test hangs. it looks like a bug in bdb's __lock_get_internal. insert_row(db, txn_a, htonl(n-1), n-1, DB_LOCK_NOTGRANTED); insert_row(db, txn_b, htonl(0), 0, DB_LOCK_NOTGRANTED); @@ -148,7 +144,7 @@ int test_main(int argc, char * const argv[]) { uint64_t cachesize = 0; uint32_t pagesize = 0; int do_txn = 1; - int nrows = 1000; // for BDB, insert enough rows to create a tree with more than one page in it. this avoids a page locking conflict. + int nrows = 1000; const char *db_env_dir = TOKU_TEST_FILENAME; const char *db_filename = "simple_deadlock"; int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD; @@ -187,14 +183,7 @@ int test_main(int argc, char * const argv[]) { } if (!do_txn) db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); -#if USE_BDB - r = db_env->set_flags(db_env, DB_TIME_NOTGRANTED, 1); assert(r == 0); // force DB_LOCK_DEADLOCK to DB_LOCK_NOTGRANTED -#endif r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(USE_BDB) - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); - r = db_env->set_timeout(db_env, 1000, DB_SET_LOCK_TIMEOUT); assert(r == 0); -#endif // create the db DB *db = NULL; r = db_create(&db, db_env, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc b/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc index 54682db81af38..6466bd2e9ea70 100644 --- a/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc +++ b/storage/tokudb/ft-index/src/tests/db-put-simple-lockwait.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -213,11 +213,7 @@ int test_main(int argc, char * const argv[]) { if (!do_txn) db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if TOKUDB r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#else - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc b/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc index 1a546825fc44f..cfbf95cd59920 100644 --- a/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc +++ b/storage/tokudb/ft-index/src/tests/db-put-update-deadlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,6 @@ PATENT RIGHTS GRANT: // for all i: T(i) reads 0, gets a read lock on 0 // for all i: T(i) writes 0, enters a deadlock // tokudb detects deadlock on the fly -// bdb detects deadlock on the fly or uses a deadlock detector // --poll runs the deadlock detector until all the txns are resolved #include "test.h" @@ -201,18 +200,6 @@ static void update_deadlock(DB_ENV *db_env, DB *db, int do_txn, int nrows, int n } #endif -#if defined(USE_BDB) - // check for deadlocks - if (poll_deadlock) { - while (n_txns > 0) { - sleep(10); - int rejected = 0; - r = db_env->lock_detect(db_env, 0, DB_LOCK_YOUNGEST, &rejected); assert(r == 0); - printf("%s rejected %d\n", __FUNCTION__, rejected); - } - } -#endif - // cleanup for (int i = 0; i < ntxns; i++) { void *ret = NULL; @@ -274,14 +261,7 @@ int test_main(int argc, char * const argv[]) { if (!do_txn) db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0); -#if defined(TOKUDB) r = db_env->set_lock_timeout(db_env, 30 * 1000, nullptr); assert(r == 0); -#endif -#if defined(USE_BDB) - if (!poll_deadlock) { - r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0); - } -#endif // create the db DB *db = NULL; diff --git a/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc b/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc index eb5c6b80b63a0..cd8b50c1c5b08 100644 --- a/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc +++ b/storage/tokudb/ft-index/src/tests/dbremove-nofile-limit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc b/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc index 9d2b2b6871b11..f0ee57228ad6a 100644 --- a/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc +++ b/storage/tokudb/ft-index/src/tests/del-multiple-huge-primary-row.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc b/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc index 5230caf3a4ee4..f14ba646e5905 100644 --- a/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc +++ b/storage/tokudb/ft-index/src/tests/del-multiple-srcdb.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/del-multiple.cc b/storage/tokudb/ft-index/src/tests/del-multiple.cc index b54ff4fce72b5..7f3560fb45969 100644 --- a/storage/tokudb/ft-index/src/tests/del-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/del-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/del-simple.cc b/storage/tokudb/ft-index/src/tests/del-simple.cc index 34376637c9abd..6ae08607f4817 100644 --- a/storage/tokudb/ft-index/src/tests/del-simple.cc +++ b/storage/tokudb/ft-index/src/tests/del-simple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/directory_lock.cc b/storage/tokudb/ft-index/src/tests/directory_lock.cc index ed89e0049002b..c67dfab20c8c7 100644 --- a/storage/tokudb/ft-index/src/tests/directory_lock.cc +++ b/storage/tokudb/ft-index/src/tests/directory_lock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/diskfull.cc b/storage/tokudb/ft-index/src/tests/diskfull.cc index fdce56aa2518c..d52f621a17489 100644 --- a/storage/tokudb/ft-index/src/tests/diskfull.cc +++ b/storage/tokudb/ft-index/src/tests/diskfull.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/dump-env.cc b/storage/tokudb/ft-index/src/tests/dump-env.cc index 9e1459b80eca9..7815aa5bd63a3 100644 --- a/storage/tokudb/ft-index/src/tests/dump-env.cc +++ b/storage/tokudb/ft-index/src/tests/dump-env.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,6 @@ static DB_ENV *env; static DB *db; DB_TXN *txn; -const int num_insert = 25000; static void setup (void) { @@ -106,22 +105,14 @@ setup (void) { } r=db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r=env->set_redzone(env, 0); CKERR(r); r=env->set_default_bt_compare(env, int_dbt_cmp); CKERR(r); -#endif env->set_errfile(env, stderr); -#ifdef USE_BDB - r=env->set_lk_max_objects(env, 2*num_insert); CKERR(r); -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); r=env->txn_begin(env, 0, &txn, 0); assert(r==0); -#ifdef USE_BDB - r=db->set_bt_compare(db, int_dbt_cmp); CKERR(r); -#endif r=db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=txn->commit(txn, 0); assert(r==0); } diff --git a/storage/tokudb/ft-index/src/tests/env-put-multiple.cc b/storage/tokudb/ft-index/src/tests/env-put-multiple.cc index 0988f3d5ca2bc..75ccb0297b3a4 100644 --- a/storage/tokudb/ft-index/src/tests/env-put-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/env-put-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/env_loader_memory.cc b/storage/tokudb/ft-index/src/tests/env_loader_memory.cc index 106bdefd3a97e..ed19f05f9440b 100644 --- a/storage/tokudb/ft-index/src/tests/env_loader_memory.cc +++ b/storage/tokudb/ft-index/src/tests/env_loader_memory.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/env_nproc.cc b/storage/tokudb/ft-index/src/tests/env_nproc.cc index 29bc216f9c8e7..3ed60a18e6948 100644 --- a/storage/tokudb/ft-index/src/tests/env_nproc.cc +++ b/storage/tokudb/ft-index/src/tests/env_nproc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/env_startup.cc b/storage/tokudb/ft-index/src/tests/env_startup.cc index 0fe5a4abac8cb..5be8b9849a0e5 100644 --- a/storage/tokudb/ft-index/src/tests/env_startup.cc +++ b/storage/tokudb/ft-index/src/tests/env_startup.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/filesize.cc b/storage/tokudb/ft-index/src/tests/filesize.cc index c1b7ad2df3080..6b4c03a358cb3 100644 --- a/storage/tokudb/ft-index/src/tests/filesize.cc +++ b/storage/tokudb/ft-index/src/tests/filesize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -196,11 +196,7 @@ delete_n (uint32_t ah) ndelete_notfound++; else ndelete_failed++; -#ifdef USE_BDB - assert(r==0 || r==DB_NOTFOUND); -#else CKERR(r); -#endif } static void diff --git a/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc b/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc index 7303ebac8c890..73a6e92b28fe0 100644 --- a/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc +++ b/storage/tokudb/ft-index/src/tests/get_key_after_bytes_unit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/get_last_key.cc b/storage/tokudb/ft-index/src/tests/get_last_key.cc index 36c7ab6325910..241652928c51e 100644 --- a/storage/tokudb/ft-index/src/tests/get_last_key.cc +++ b/storage/tokudb/ft-index/src/tests/get_last_key.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/helgrind1.cc b/storage/tokudb/ft-index/src/tests/helgrind1.cc index 5e451ab2a25d4..49572197fc1d2 100644 --- a/storage/tokudb/ft-index/src/tests/helgrind1.cc +++ b/storage/tokudb/ft-index/src/tests/helgrind1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/helgrind2.cc b/storage/tokudb/ft-index/src/tests/helgrind2.cc index 608d635a54ab8..d70c4d256dfc2 100644 --- a/storage/tokudb/ft-index/src/tests/helgrind2.cc +++ b/storage/tokudb/ft-index/src/tests/helgrind2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/helgrind3.cc b/storage/tokudb/ft-index/src/tests/helgrind3.cc index 2defde37ba57c..85f909b11f035 100644 --- a/storage/tokudb/ft-index/src/tests/helgrind3.cc +++ b/storage/tokudb/ft-index/src/tests/helgrind3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc b/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc index 6a00afa4a51b3..42f0ef86e8255 100644 --- a/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc +++ b/storage/tokudb/ft-index/src/tests/hot-optimize-table-tests.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,7 +103,7 @@ const int envflags = DB_INIT_MPOOL | DB_ENV* env; unsigned int leaf_hits; -// Custom Update Function for our test BRT. +// Custom Update Function for our test FT. static int update_func(DB* UU(db), const DBT* key, @@ -148,7 +148,7 @@ hot_test_setup(void) // Remove any previous environment. toku_os_recursive_delete(TOKU_TEST_FILENAME); - // Set up a new TokuDB. + // Set up a new environment. { int chk_r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } { int chk_r = db_env_create(&env, 0); CKERR(chk_r); } env->set_errfile(env, stderr); @@ -266,7 +266,7 @@ test_main(int argc, char * const argv[]) default_parse_args(argc, argv); hot_test_setup(); - // Create and Open the Database/BRT + // Create and Open the Database/FT DB *db = NULL; const unsigned int BIG = 4000000; const unsigned int SMALL = 10; diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc b/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc index fa53a4062e82c..eb6b9f1b11b41 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-bw.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,7 +103,6 @@ static int num_rows; static const int FORWARD = 0; static const int BACKWARD = 1; typedef int Direction; -static const int TXN_NONE = 0; static const int TXN_CREATE = 1; static const int TXN_END = 2; typedef int TxnWork; diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc b/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc index 0f0e889d5251f..18e5a0116e1e7 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-error-callback.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc b/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc index 4acd2c57b720b..9268a5d237030 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed-optimized.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc b/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc index b6c409be31589..81aa83ba9f7ac 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-insert-committed.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc b/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc index 911587ff3f303..509f74fbec9ca 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-insert-provisional.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc b/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc index 615486a6496b7..16600c765746d 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-lock-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc b/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc index 89da7da3b3bd4..18dc6e5e03012 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-multiclient.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc b/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc index 446fae8e98376..938ee151b69db 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-nested-insert-committed.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc b/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc index 35f3e317e3e95..f81336cbee4c4 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-put-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc b/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc index b8177d52e131f..2863ef4754cbf 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-put-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc b/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc index e5bb39f93ae43..05e77137ca0cb 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-put-multiple.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc b/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc index 41a7cc5b817c1..0aabcdbdd4aed 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort-put.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc b/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc index f210e0079b569..3fddf1d319f65 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-simple-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc b/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc index e1a2070a036c2..5ef06f621558c 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-undo-do-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,15 +95,12 @@ PATENT RIGHTS GRANT: #include "test.h" -#include -#include -#include -#include #include #include #include +#include + #include "indexer-internal.h" -#include struct txn { TXNID xid; diff --git a/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc b/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc index 7770b34ae0740..c1be755b4d62f 100644 --- a/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc +++ b/storage/tokudb/ft-index/src/tests/hotindexer-with-queries.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/inflate.cc b/storage/tokudb/ft-index/src/tests/inflate.cc index 5a727544be5ee..30f8f2199ae47 100644 --- a/storage/tokudb/ft-index/src/tests/inflate.cc +++ b/storage/tokudb/ft-index/src/tests/inflate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -165,11 +165,7 @@ delete_n (uint32_t ah) DBT key; dbt_init(&key, &an, 4); int r = db->del(db, NULL, &key, DB_DELETE_ANY); -#ifdef USE_BDB - assert(r==0 || r==DB_NOTFOUND); -#else CKERR(r); -#endif } static void diff --git a/storage/tokudb/ft-index/src/tests/inflate2.cc b/storage/tokudb/ft-index/src/tests/inflate2.cc index 2bb930446a262..ce594cf083472 100644 --- a/storage/tokudb/ft-index/src/tests/inflate2.cc +++ b/storage/tokudb/ft-index/src/tests/inflate2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -179,11 +179,7 @@ delete_n_now (uint32_t ah) DBT key; dbt_init(&key, &an, 4); int r = db->del(db, NULL, &key, DB_DELETE_ANY); -#ifdef USE_BDB - assert(r==0 || r==DB_NOTFOUND); -#else CKERR(r); -#endif get_n(ah, DB_NOTFOUND); } diff --git a/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc b/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc index 0771056b072e1..2ba99d0bc025c 100644 --- a/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc +++ b/storage/tokudb/ft-index/src/tests/insert-dup-prelock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc b/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc index ce226508d7914..c949482ca16f7 100644 --- a/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc +++ b/storage/tokudb/ft-index/src/tests/isolation-read-committed.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/isolation.cc b/storage/tokudb/ft-index/src/tests/isolation.cc index 485986099e0a7..dbe4ce9cb4ac1 100644 --- a/storage/tokudb/ft-index/src/tests/isolation.cc +++ b/storage/tokudb/ft-index/src/tests/isolation.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/key-val.h b/storage/tokudb/ft-index/src/tests/key-val.h index d77b8b00e05c8..9a4512bfdaccf 100644 --- a/storage/tokudb/ft-index/src/tests/key-val.h +++ b/storage/tokudb/ft-index/src/tests/key-val.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,10 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." -#ifndef KEY_VAL_H -#define KEY_VAL_H // // Functions to create unique key/value pairs, row generators, checkers, ... for each of NUM_DBS // @@ -295,8 +295,3 @@ static int UU() generate_initial_table(DB *db, DB_TXN *txn, uint32_t rows) return r; } - - - - -#endif // KEY_VAL_H diff --git a/storage/tokudb/ft-index/src/tests/keyrange-merge.cc b/storage/tokudb/ft-index/src/tests/keyrange-merge.cc index 0b5df76d731bb..b53016053ce83 100644 --- a/storage/tokudb/ft-index/src/tests/keyrange-merge.cc +++ b/storage/tokudb/ft-index/src/tests/keyrange-merge.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/keyrange.cc b/storage/tokudb/ft-index/src/tests/keyrange.cc index ee63cd7cdf5d0..85ffcd23357f7 100644 --- a/storage/tokudb/ft-index/src/tests/keyrange.cc +++ b/storage/tokudb/ft-index/src/tests/keyrange.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/last-verify-time.cc b/storage/tokudb/ft-index/src/tests/last-verify-time.cc index d3b5cf456fd3b..057a711ffeccf 100644 --- a/storage/tokudb/ft-index/src/tests/last-verify-time.cc +++ b/storage/tokudb/ft-index/src/tests/last-verify-time.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-blobs-create-leaf.c.notyet b/storage/tokudb/ft-index/src/tests/loader-blobs-create-leaf.c.notyet deleted file mode 100644 index 04a26a5cc25ae..0000000000000 --- a/storage/tokudb/ft-index/src/tests/loader-blobs-create-leaf.c.notyet +++ /dev/null @@ -1,149 +0,0 @@ -// verify that the loader can deal with blobs larger than the node size - -#include "test.h" - -static int my_bt_compare(DB *db, const DBT *a, const DBT *b) { - db = db; - assert(a->size == 8 && b->size == 8); - return memcmp(a->data, b->data, 8); -} - -static int my_row_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val) { - dest_db = dest_db; src_db = src_db; dest_key = dest_key; dest_val = dest_val; src_key = src_key; src_val = src_val; - assert(dest_key->flags == DB_DBT_REALLOC); - dest_key->data = toku_realloc(dest_key->data, src_key->size); - memcpy(dest_key->data, src_key->data, src_key->size); - dest_key->size = src_key->size; - assert(dest_val->flags == DB_DBT_REALLOC); - dest_val->data = toku_realloc(dest_val->data, src_val->size); - memcpy(dest_val->data, src_val->data, src_val->size); - dest_val->size = src_val->size; - return 0; -} - -static void insert(DB_LOADER *loader, int k, int val_size) { - int r; - - // generate the key - char key_buffer[8]; - memset(key_buffer, 0, sizeof key_buffer); - int newa = htonl(k); - memcpy(key_buffer, &newa, sizeof newa); - - // generate the value - char *val_buffer = toku_malloc(val_size); assert(val_buffer); - memset(val_buffer, 0, val_size); - - DBT key = { .data = key_buffer, .size = sizeof key_buffer }; - DBT value = { .data = val_buffer, .size = val_size }; - r = loader->put(loader, &key, &value); - if (DISALLOW_PUTS) { - assert(r == EINVAL); - } - else { - assert_zero(r); - } - - toku_free(val_buffer); -} - -int test_main(int argc, char * const argv[]) { - uint32_t loader_flags = 0; -#if defined(TOKUDB) - char *db_env_dir = "blobs.leafsplit.env.tokudb"; -#else - char *db_env_dir = "blobs.leafsplit.env.bdb"; -#endif - int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; - char *db_filename = "blobs.db"; - int do_txn = 1; - u_int64_t cachesize = 0; - u_int32_t pagesize = 0; - - int i; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (strcmp(arg, "-v") == 0 || strcmp(arg, "--verbose") == 0) { - verbose++; - continue; - } - if (strcmp(arg, "-q") == 0) { - if (verbose > 0) verbose--; - continue; - } - if (strcmp(arg, "-z") == 0) { - loader_flags |= LOADER_COMPRESS_INTERMEDIATES; - continue; - } - if (strcmp(arg, "-p") == 0) { - loader_flags |= LOADER_DISALLOW_PUTS; - continue; - } - if (strcmp(arg, "--txn") == 0 && i+1 < argc) { - do_txn = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--pagesize") == 0 && i+1 < argc) { - pagesize = atoi(argv[++i]); - continue; - } - if (strcmp(arg, "--cachesize") == 0 && i+1 < argc) { - cachesize = atol(argv[++i]); - continue; - } - - assert(0); - } - - int r; - char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1]; - snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir); - r = system(rm_cmd); assert_zero(r); - - r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert_zero(r); - - // create and open the env - DB_ENV *db_env = NULL; - r = db_env_create(&db_env, 0); assert_zero(r); - if (cachesize) { - const u_int64_t gig = 1 << 30; - r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert_zero(r); - } - r = db_env->set_generate_row_callback_for_put(db_env, my_row_generate); assert_zero(r); - r = db_env->set_default_bt_compare(db_env, my_bt_compare); assert_zero(r); - if (!do_txn) - db_env_open_flags &= ~(DB_INIT_TXN | DB_INIT_LOG); - r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r); - - // create the db - DB *db = NULL; - r = db_create(&db, db_env, 0); assert_zero(r); - DB_TXN *create_txn = NULL; - if (do_txn) { - r = db_env->txn_begin(db_env, NULL, &create_txn, 0); assert_zero(r); - } - if (pagesize) { - r = db->set_pagesize(db, pagesize); assert_zero(r); - } - r = db->open(db, create_txn, db_filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert_zero(r); - - DB_LOADER *loader = NULL; - uint32_t db_flags = 0; - uint32_t dbt_flags = 0; - r = db_env->create_loader(db_env, create_txn, &loader, db, 1, &db, &db_flags, &dbt_flags, loader_flags); assert_zero(r); - - insert(loader, 1, 8000000); - insert(loader, 2, 1); - - r = loader->close(loader); assert_zero(r); - - if (do_txn) { - r = create_txn->commit(create_txn, 0); assert_zero(r); - } - - // shutdown - r = db->close(db, 0); assert_zero(r); db = NULL; - r = db_env->close(db_env, 0); assert_zero(r); db_env = NULL; - - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc b/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc index eaed9c4170a0e..c0f92c448ef9d 100644 --- a/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc +++ b/storage/tokudb/ft-index/src/tests/loader-cleanup-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-nproc-close.cc b/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc similarity index 99% rename from storage/tokudb/ft-index/src/tests/loader-nproc-close.cc rename to storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc index 3ef2b0541f7c5..262a63294fd57 100644 --- a/storage/tokudb/ft-index/src/tests/loader-nproc-close.cc +++ b/storage/tokudb/ft-index/src/tests/loader-close-nproc-limit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-create-abort.cc b/storage/tokudb/ft-index/src/tests/loader-create-abort.cc index 5856856469900..3d2cf84cefe5b 100644 --- a/storage/tokudb/ft-index/src/tests/loader-create-abort.cc +++ b/storage/tokudb/ft-index/src/tests/loader-create-abort.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-create-close.cc b/storage/tokudb/ft-index/src/tests/loader-create-close.cc index 4d66a9df00460..8a2d043e51fbb 100644 --- a/storage/tokudb/ft-index/src/tests/loader-create-close.cc +++ b/storage/tokudb/ft-index/src/tests/loader-create-close.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc b/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc index c918c4bfd8ce7..62ba70fa4f8e3 100644 --- a/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc +++ b/storage/tokudb/ft-index/src/tests/loader-create-commit-nproc-limit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +88,10 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." #ident "$Id$" +// This test crashes if a failed loader creation causes the db to be corrupted by unlinking +// the underlying fractal tree files. This unlinking occurs because the txn that logs the +// load log entries is committed rather than aborted. + #include "test.h" #include #include diff --git a/storage/tokudb/ft-index/src/tests/loader-nproc-create.cc b/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc similarity index 99% rename from storage/tokudb/ft-index/src/tests/loader-nproc-create.cc rename to storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc index 7a61fce7799e7..844ca2043c724 100644 --- a/storage/tokudb/ft-index/src/tests/loader-nproc-create.cc +++ b/storage/tokudb/ft-index/src/tests/loader-create-nproc-limit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-dup-test.cc b/storage/tokudb/ft-index/src/tests/loader-dup-test.cc index 5fa41809baaa6..d3bd2aabe5739 100644 --- a/storage/tokudb/ft-index/src/tests/loader-dup-test.cc +++ b/storage/tokudb/ft-index/src/tests/loader-dup-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-no-puts.cc b/storage/tokudb/ft-index/src/tests/loader-no-puts.cc index 6fc20c5c8a158..c2c11a639a0bb 100644 --- a/storage/tokudb/ft-index/src/tests/loader-no-puts.cc +++ b/storage/tokudb/ft-index/src/tests/loader-no-puts.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-reference-test.cc b/storage/tokudb/ft-index/src/tests/loader-reference-test.cc index 7fadcf150d434..4bb9334a71f8a 100644 --- a/storage/tokudb/ft-index/src/tests/loader-reference-test.cc +++ b/storage/tokudb/ft-index/src/tests/loader-reference-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-stress-del.cc b/storage/tokudb/ft-index/src/tests/loader-stress-del.cc index 9578df6600388..c9a262222fb28 100644 --- a/storage/tokudb/ft-index/src/tests/loader-stress-del.cc +++ b/storage/tokudb/ft-index/src/tests/loader-stress-del.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-stress-test.cc b/storage/tokudb/ft-index/src/tests/loader-stress-test.cc index f58b839b314d4..b9e51436632f5 100644 --- a/storage/tokudb/ft-index/src/tests/loader-stress-test.cc +++ b/storage/tokudb/ft-index/src/tests/loader-stress-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc b/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc index cbe3827582176..c89331a4200b4 100644 --- a/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc +++ b/storage/tokudb/ft-index/src/tests/loader-tpch-load.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc b/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc index 037d2fc46a936..2a5581077d5f7 100644 --- a/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc +++ b/storage/tokudb/ft-index/src/tests/locktree_escalation_stalls.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/manyfiles.cc b/storage/tokudb/ft-index/src/tests/manyfiles.cc index 6445f1b7d3849..4c68f8d86fe41 100644 --- a/storage/tokudb/ft-index/src/tests/manyfiles.cc +++ b/storage/tokudb/ft-index/src/tests/manyfiles.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc b/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc index 02b21794abbaa..a95a42d4870e0 100644 --- a/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc +++ b/storage/tokudb/ft-index/src/tests/maxsize-for-loader.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc b/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc index e1c815695ccde..aab33584391da 100644 --- a/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc +++ b/storage/tokudb/ft-index/src/tests/medium-nested-commit-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -177,9 +177,6 @@ test_setup (void) { r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_env_create(&env, 0); CKERR(r); -#ifndef TOKUDB - r=env->set_lk_max_objects(env, N); CKERR(r); -#endif env->set_errfile(env, stderr); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/multiprocess.cc b/storage/tokudb/ft-index/src/tests/multiprocess.cc index fab0d7d389631..93b20d3ad7f5f 100644 --- a/storage/tokudb/ft-index/src/tests/multiprocess.cc +++ b/storage/tokudb/ft-index/src/tests/multiprocess.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc b/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc index 84f8c75db7ce8..db1d16167327d 100644 --- a/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc +++ b/storage/tokudb/ft-index/src/tests/mvcc-create-table.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc b/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc index db261e6ae17c2..bbb7116b42d65 100644 --- a/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc +++ b/storage/tokudb/ft-index/src/tests/mvcc-many-committed.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc b/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc index 38a598ec5be45..6f8d3377c108d 100644 --- a/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc +++ b/storage/tokudb/ft-index/src/tests/mvcc-read-committed.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc b/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc index c83ec2543f935..e9b62752af40d 100644 --- a/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc +++ b/storage/tokudb/ft-index/src/tests/openlimit17-locktree.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc b/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc index 52c319af778c5..29dbeebef7c6c 100644 --- a/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc +++ b/storage/tokudb/ft-index/src/tests/openlimit17-metafiles.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/openlimit17.cc b/storage/tokudb/ft-index/src/tests/openlimit17.cc index 0709d89a0c212..4f322a86f352d 100644 --- a/storage/tokudb/ft-index/src/tests/openlimit17.cc +++ b/storage/tokudb/ft-index/src/tests/openlimit17.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc b/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc index 0c9ad682eb9fb..d0d60641cb1f4 100644 --- a/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc +++ b/storage/tokudb/ft-index/src/tests/perf_checkpoint_var.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_child_txn.cc b/storage/tokudb/ft-index/src/tests/perf_child_txn.cc index 121d9dc3735f8..f6d2e8018eb35 100644 --- a/storage/tokudb/ft-index/src/tests/perf_child_txn.cc +++ b/storage/tokudb/ft-index/src/tests/perf_child_txn.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc b/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc index 71c5b8d170ebd..4f890ab0ca27f 100644 --- a/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc +++ b/storage/tokudb/ft-index/src/tests/perf_cursor_nop.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_iibench.cc b/storage/tokudb/ft-index/src/tests/perf_iibench.cc index b5e094d2c152e..b9d142c65c998 100644 --- a/storage/tokudb/ft-index/src/tests/perf_iibench.cc +++ b/storage/tokudb/ft-index/src/tests/perf_iibench.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_insert.cc b/storage/tokudb/ft-index/src/tests/perf_insert.cc index 31210a6e3436c..9d621b93c0cad 100644 --- a/storage/tokudb/ft-index/src/tests/perf_insert.cc +++ b/storage/tokudb/ft-index/src/tests/perf_insert.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc b/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc index 451bc346897c1..ee6ca92edb475 100644 --- a/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc +++ b/storage/tokudb/ft-index/src/tests/perf_malloc_free.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_nop.cc b/storage/tokudb/ft-index/src/tests/perf_nop.cc index 9f6b5e2ee95aa..e6d4d94640f16 100644 --- a/storage/tokudb/ft-index/src/tests/perf_nop.cc +++ b/storage/tokudb/ft-index/src/tests/perf_nop.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_ptquery.cc b/storage/tokudb/ft-index/src/tests/perf_ptquery.cc index bc17d498a7b14..71922782878e9 100644 --- a/storage/tokudb/ft-index/src/tests/perf_ptquery.cc +++ b/storage/tokudb/ft-index/src/tests/perf_ptquery.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc b/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc index 9ae1bbab8447e..888081bdb8133 100644 --- a/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc +++ b/storage/tokudb/ft-index/src/tests/perf_ptquery2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_rangequery.cc b/storage/tokudb/ft-index/src/tests/perf_rangequery.cc index 88d30049a2991..3d78dd16ab74d 100644 --- a/storage/tokudb/ft-index/src/tests/perf_rangequery.cc +++ b/storage/tokudb/ft-index/src/tests/perf_rangequery.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_read_txn.cc b/storage/tokudb/ft-index/src/tests/perf_read_txn.cc index 9e62314fa5803..2825f6588ce66 100644 --- a/storage/tokudb/ft-index/src/tests/perf_read_txn.cc +++ b/storage/tokudb/ft-index/src/tests/perf_read_txn.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc b/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc index debb1296ae992..f36b748d853b0 100644 --- a/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc +++ b/storage/tokudb/ft-index/src/tests/perf_read_txn_single_thread.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_read_write.cc b/storage/tokudb/ft-index/src/tests/perf_read_write.cc index ef95e9d3aa249..f5d75f57103e6 100644 --- a/storage/tokudb/ft-index/src/tests/perf_read_write.cc +++ b/storage/tokudb/ft-index/src/tests/perf_read_write.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc b/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc index 789024327cbef..52e6d9d7caecc 100644 --- a/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc +++ b/storage/tokudb/ft-index/src/tests/perf_txn_single_thread.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/powerfail.cc b/storage/tokudb/ft-index/src/tests/powerfail.cc index 63ad313e145c6..601df047d097d 100644 --- a/storage/tokudb/ft-index/src/tests/powerfail.cc +++ b/storage/tokudb/ft-index/src/tests/powerfail.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/preload-db-nested.cc b/storage/tokudb/ft-index/src/tests/preload-db-nested.cc index 0d1a3749193df..9c0c8282456bf 100644 --- a/storage/tokudb/ft-index/src/tests/preload-db-nested.cc +++ b/storage/tokudb/ft-index/src/tests/preload-db-nested.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/preload-db.cc b/storage/tokudb/ft-index/src/tests/preload-db.cc index d486af941d249..584176bc99795 100644 --- a/storage/tokudb/ft-index/src/tests/preload-db.cc +++ b/storage/tokudb/ft-index/src/tests/preload-db.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/prelock-read-read.cc b/storage/tokudb/ft-index/src/tests/prelock-read-read.cc index daa6ab108b7b7..b23c81dd11974 100644 --- a/storage/tokudb/ft-index/src/tests/prelock-read-read.cc +++ b/storage/tokudb/ft-index/src/tests/prelock-read-read.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/prelock-read-write.cc b/storage/tokudb/ft-index/src/tests/prelock-read-write.cc index 140c9e79b1cef..0a3a3fddf39ef 100644 --- a/storage/tokudb/ft-index/src/tests/prelock-read-write.cc +++ b/storage/tokudb/ft-index/src/tests/prelock-read-write.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/prelock-write-read.cc b/storage/tokudb/ft-index/src/tests/prelock-write-read.cc index 540d385b1160c..35c194c362d4f 100644 --- a/storage/tokudb/ft-index/src/tests/prelock-write-read.cc +++ b/storage/tokudb/ft-index/src/tests/prelock-write-read.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/prelock-write-write.cc b/storage/tokudb/ft-index/src/tests/prelock-write-write.cc index 8753f158648b3..d9f832cdde7ad 100644 --- a/storage/tokudb/ft-index/src/tests/prelock-write-write.cc +++ b/storage/tokudb/ft-index/src/tests/prelock-write-write.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/print_engine_status.cc b/storage/tokudb/ft-index/src/tests/print_engine_status.cc index 2f1b6b5b98d0f..34b62bd8fe661 100644 --- a/storage/tokudb/ft-index/src/tests/print_engine_status.cc +++ b/storage/tokudb/ft-index/src/tests/print_engine_status.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/progress.cc b/storage/tokudb/ft-index/src/tests/progress.cc index e1d57ec61b0c8..e6af8fb97638d 100644 --- a/storage/tokudb/ft-index/src/tests/progress.cc +++ b/storage/tokudb/ft-index/src/tests/progress.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc b/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc index af0407063f848..0a29d87369fcc 100644 --- a/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc +++ b/storage/tokudb/ft-index/src/tests/put-del-multiple-array-indexing.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc b/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc index eebe61e2839f4..a619e5f0f5846 100644 --- a/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc +++ b/storage/tokudb/ft-index/src/tests/queries_with_deletes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-2483.cc b/storage/tokudb/ft-index/src/tests/recover-2483.cc index 0950a30407584..e31361839f884 100644 --- a/storage/tokudb/ft-index/src/tests/recover-2483.cc +++ b/storage/tokudb/ft-index/src/tests/recover-2483.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,8 +94,6 @@ PATENT RIGHTS GRANT: #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; - DB_ENV *env; DB_TXN *tid; DB *db; diff --git a/storage/tokudb/ft-index/src/tests/recover-3113.cc b/storage/tokudb/ft-index/src/tests/recover-3113.cc index 67a4e1ff4d443..eeba9baf03c04 100644 --- a/storage/tokudb/ft-index/src/tests/recover-3113.cc +++ b/storage/tokudb/ft-index/src/tests/recover-3113.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-5146.cc b/storage/tokudb/ft-index/src/tests/recover-5146.cc index 3ad54539aef00..c05f9effa7d3f 100644 --- a/storage/tokudb/ft-index/src/tests/recover-5146.cc +++ b/storage/tokudb/ft-index/src/tests/recover-5146.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc index 20fc67dd956ba..0d5d4ff20dbde 100644 --- a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc +++ b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fcreate-fdelete-fcreate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc index 3023cc1a1a793..bed20966845b5 100644 --- a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc index 33546958a3753..2dcdbf6b93988 100644 --- a/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc +++ b/storage/tokudb/ft-index/src/tests/recover-checkpoint-fopen-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc b/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc index 00f036cc174a3..62fbfbda6efe0 100644 --- a/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc +++ b/storage/tokudb/ft-index/src/tests/recover-child-rollback.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc b/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc index 8da0e58af8da3..2cbc54efa1716 100644 --- a/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/recover-compare-db-descriptor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,8 +100,6 @@ const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG const char *namea="a.db"; const char *nameb="b.db"; -#if USE_TDB - static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { assert(db); assert(db->cmp_descriptor); @@ -112,17 +110,13 @@ static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { return memcmp(a->data, b->data, a->size); } -#endif - static void change_descriptor(DB_ENV* env, DB* db) { -#if USE_TDB DBT descriptor; dbt_init(&descriptor, descriptor_contents, sizeof(descriptor_contents)); IN_TXN_COMMIT(env, NULL, txn_desc, 0, { { int chk_r = db->change_descriptor(db, txn_desc, &descriptor, DB_UPDATE_CMP_DESCRIPTOR); CKERR(chk_r); } }); -#endif } static void @@ -136,9 +130,7 @@ do_x1_shutdown (bool do_commit, bool do_abort) { DB *dba, *dbb; r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); @@ -181,9 +173,7 @@ do_x1_recover (bool did_commit) { r = toku_os_mkdir(datadir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/recover-compare-db.cc b/storage/tokudb/ft-index/src/tests/recover-compare-db.cc index 0102a36db6afc..6ce16bd479dd8 100644 --- a/storage/tokudb/ft-index/src/tests/recover-compare-db.cc +++ b/storage/tokudb/ft-index/src/tests/recover-compare-db.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,16 +98,12 @@ const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG const char *namea="a.db"; const char *nameb="b.db"; -#if USE_TDB - static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { assert(db); assert(a->size == b->size); return memcmp(a->data, b->data, a->size); } -#endif - static void do_x1_shutdown (bool do_commit, bool do_abort) { int r; @@ -116,9 +112,7 @@ do_x1_shutdown (bool do_commit, bool do_abort) { DB_ENV *env; DB *dba, *dbb; r = db_env_create(&env, 0); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); @@ -155,9 +149,7 @@ do_x1_recover (bool did_commit) { DB *dba, *dbb; int r; r = db_env_create(&env, 0); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc b/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc index 4b8d8b44cf7cf..5a7e1710de1d7 100644 --- a/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-del-multiple-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc b/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc index 3224fa66057aa..632a480583538 100644 --- a/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc +++ b/storage/tokudb/ft-index/src/tests/recover-del-multiple-srcdb-fdelete-all.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc b/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc index be09e29a0b871..d4c7303162a3d 100644 --- a/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/recover-del-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc index 4655b5b5065a3..323b5b64ef780 100644 --- a/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-delboth-after-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc index 3e674644ed5b2..4ee3f5bba66a0 100644 --- a/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-delboth-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor.cc index f726d63fc0c76..df96b8cbf4565 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc index 9b747506fd36e..db73549eb1f05 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor10.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc index 8a2a1f3464472..5d593af25bd36 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor11.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc index b3bb25abaea3c..698fa5d2b63d4 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor12.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc index 7f09f4a7c54d3..62e685962e4d1 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc index 87d607359f32e..58d219af9cfeb 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc index 192a9474b625e..37d7ca80f1f72 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc index 6ce30af5a703c..757116afe1973 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc index b092581c3c19e..68f90b0f276c9 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor6.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc index 77d5c74cc97de..9c3a44d01e559 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor7.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc index d9c993237d830..ac80a3e8a297a 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor8.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc b/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc index be5bf31e3d687..17da090737416 100644 --- a/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc +++ b/storage/tokudb/ft-index/src/tests/recover-descriptor9.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fassociate.cc b/storage/tokudb/ft-index/src/tests/recover-fassociate.cc index d97cfd7f84922..81a19f18cd2a4 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fassociate.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fassociate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc index 5058c49dc6c18..0519b9ba3c916 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fclose-in-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc b/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc index 2297a095e698d..253508295057e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fcreate-basementnodesize.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,7 +99,6 @@ static const char *namea="a.db"; uint32_t nodesizea = 0; static const char *nameb="b.db"; uint32_t nodesizeb = 32*1024; static void do_remove(DB_ENV *env, const char *filename) { -#if TOKUDB int r; DBT dname; DBT iname; @@ -112,12 +111,6 @@ static void do_remove(DB_ENV *env, const char *filename) { toku_path_join(rmpath, 2, TOKU_TEST_FILENAME, iname.data); toku_os_recursive_delete(rmpath); toku_free(iname.data); -#else - (void) env; - char rmpath[TOKU_PATH_MAX+1]; - toku_path_join(rmpath, 2, TOKU_TEST_FILENAME, filename); - toku_os_recursive_delete(rmpath); -#endif } static void run_test (void) { diff --git a/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc b/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc index 240969e5ecadc..1dfccc4c3f41e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fcreate-fclose.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc b/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc index b0ef652e90615..76605330a2340 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fcreate-fdelete.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc b/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc index d0a695b16ad83..7526c20b47409 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fcreate-nodesize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -100,7 +100,6 @@ static const char *namea="a.db"; uint32_t nodesizea = 0; static const char *nameb="b.db"; uint32_t nodesizeb = 64*1024; static void do_remove(DB_ENV *env, const char *filename) { -#if TOKUDB int r; DBT dname; DBT iname; @@ -113,12 +112,6 @@ static void do_remove(DB_ENV *env, const char *filename) { toku_path_join(rmpath, 2, TOKU_TEST_FILENAME, iname.data); toku_os_recursive_delete(rmpath); toku_free(iname.data); -#else - (void) env; - char rmpath[TOKU_PATH_MAX+1]; - toku_path_join(rmpath, 2, TOKU_TEST_FILENAME, filename); - toku_os_recursive_delete(rmpath); -#endif } static void run_test (void) { diff --git a/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc b/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc index 9473b52f2401c..c18db167449ae 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fcreate-xabort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt1.cc b/storage/tokudb/ft-index/src/tests/recover-flt1.cc index 2efea643f73c9..f395ed8171d9e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt1.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt1.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt10.cc b/storage/tokudb/ft-index/src/tests/recover-flt10.cc index 82e774ea96b28..ccee07680b659 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt10.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt10.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt2.cc b/storage/tokudb/ft-index/src/tests/recover-flt2.cc index e7151771bc8fd..bd6125e2dfe7e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt3.cc b/storage/tokudb/ft-index/src/tests/recover-flt3.cc index 323eb2d501931..f4fa3344e68dc 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt3.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt4.cc b/storage/tokudb/ft-index/src/tests/recover-flt4.cc index a4b68f2b8dfb9..d2efee438c9cd 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt4.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt4.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt5.cc b/storage/tokudb/ft-index/src/tests/recover-flt5.cc index 48a5a10b707ec..d4a4c0cec003d 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt5.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt5.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt6.cc b/storage/tokudb/ft-index/src/tests/recover-flt6.cc index 5929ba5200a51..184e3933f6494 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt6.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt6.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt7.cc b/storage/tokudb/ft-index/src/tests/recover-flt7.cc index 40be856bba81e..e8fce283b7139 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt7.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt7.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt8.cc b/storage/tokudb/ft-index/src/tests/recover-flt8.cc index 44a7b0f4f44cc..2f1958b3025af 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt8.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt8.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-flt9.cc b/storage/tokudb/ft-index/src/tests/recover-flt9.cc index a9c89a53ab1ba..28325fbd6c50a 100644 --- a/storage/tokudb/ft-index/src/tests/recover-flt9.cc +++ b/storage/tokudb/ft-index/src/tests/recover-flt9.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc b/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc index 65c63417065ce..b8019b1ec79c9 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fopen-checkpoint-fclose.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc index 6d17bb79998ec..bb750cd3c8d73 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fopen-fclose-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc b/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc index 608ebadd4a684..e745b666f86dc 100644 --- a/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc +++ b/storage/tokudb/ft-index/src/tests/recover-fopen-fdelete-checkpoint-fcreate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc b/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc index 153b911a01845..ae99abb10829e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc +++ b/storage/tokudb/ft-index/src/tests/recover-hotindexer-simple-abort-put.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-loader-test.cc b/storage/tokudb/ft-index/src/tests/recover-loader-test.cc index c9944ba040940..381a0c600baed 100644 --- a/storage/tokudb/ft-index/src/tests/recover-loader-test.cc +++ b/storage/tokudb/ft-index/src/tests/recover-loader-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc b/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc index dc26721818d52..465f8cffab77e 100644 --- a/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/recover-lsn-filter-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc b/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc index 4cd79918a86fe..9877923c50c58 100644 --- a/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc +++ b/storage/tokudb/ft-index/src/tests/recover-lsn-filter.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc b/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc index 777fb3499e6d2..691ffc36162f8 100644 --- a/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-missing-dbfile-2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -109,9 +109,7 @@ static void run_test (void) { DB_ENV *env; r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); DB *dba; @@ -160,9 +158,7 @@ static void run_recover (void) { CKERR(r); r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == DB_RUNRECOVERY); @@ -184,9 +180,7 @@ static void run_no_recover (void) { int r; r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = env->close(env, 0); CKERR(r); exit(0); diff --git a/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc b/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc index 1287ac71c184e..5af1644176b3c 100644 --- a/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc +++ b/storage/tokudb/ft-index/src/tests/recover-missing-dbfile.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -109,9 +109,7 @@ static void run_test (void) { DB *dba; r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); @@ -151,9 +149,7 @@ static void run_recover (void) { CKERR(r); r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR2(r, DB_RUNRECOVERY); @@ -175,9 +171,7 @@ static void run_no_recover (void) { int r; r = db_env_create(&env, 0); CKERR(r); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = env->close(env, 0); CKERR(r); exit(0); diff --git a/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc b/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc index d7b6b75d4cc4e..51681ad0ea8b1 100644 --- a/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc +++ b/storage/tokudb/ft-index/src/tests/recover-missing-logfile.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc b/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc index c2036f6f34b99..abfa78a9283ea 100644 --- a/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-put-multiple-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc b/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc index a92db3a2a22f4..e65667a0e4f11 100644 --- a/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc +++ b/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-all.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc b/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc index 88014d208d25b..4f37a9adf6746 100644 --- a/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc +++ b/storage/tokudb/ft-index/src/tests/recover-put-multiple-fdelete-some.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc b/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc index df56fa4f00bd5..e612e4d4c9aad 100644 --- a/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc +++ b/storage/tokudb/ft-index/src/tests/recover-put-multiple-srcdb-fdelete-all.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc b/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc index 8e4c19141bfdf..21a68384860f6 100644 --- a/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/recover-put-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/workqueue.h b/storage/tokudb/ft-index/src/tests/recover-rollback.cc similarity index 50% rename from storage/tokudb/ft-index/ft/worker-thread-benchmarks/workqueue.h rename to storage/tokudb/ft-index/src/tests/recover-rollback.cc index ab8f856a0194b..2d976c05b5a58 100644 --- a/storage/tokudb/ft-index/ft/worker-thread-benchmarks/workqueue.h +++ b/storage/tokudb/ft-index/src/tests/recover-rollback.cc @@ -89,119 +89,174 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -typedef struct workqueue *WORKQUEUE; -struct workqueue { - WORKITEM head, tail; // head and tail of the linked list of work items - pthread_cond_t wait_read; // wait for read - int want_read; // number of threads waiting to read - pthread_cond_t wait_write; // wait for write - int want_write; // number of threads waiting to write - int ninq; // number of work items in the queue - char closed; // kicks waiting threads off of the write queue -}; - -// initialize a workqueue -// expects: the workqueue is not initialized -// effects: the workqueue is set to empty and the condition variable is initialized - -static void workqueue_init(WORKQUEUE wq) { - wq->head = wq->tail = 0; - int r; - r = pthread_cond_init(&wq->wait_read, 0); assert(r == 0); - wq->want_read = 0; - r = pthread_cond_init(&wq->wait_write, 0); assert(r == 0); - wq->want_write = 0; - wq->ninq = 0; - wq->closed = 0; -} +// Test dirty upgrade. +// Generate a rollback log that requires recovery. -// destroy a workqueue -// expects: the workqueue must be initialized and empty +#include "test.h" -static void workqueue_destroy(WORKQUEUE wq) { - assert(wq->head == 0 && wq->tail == 0); +// Insert max_rows key/val pairs into the db +static void do_inserts(DB_TXN *txn, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); int r; - r = pthread_cond_destroy(&wq->wait_read); assert(r == 0); - r = pthread_cond_destroy(&wq->wait_write); assert(r == 0); -} -// close the workqueue -// effects: signal any threads blocked in the workqueue + for (uint64_t i = 0; i < max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; -static void workqueue_set_closed(WORKQUEUE wq) { - wq->closed = 1; - int r; - r = pthread_cond_broadcast(&wq->wait_read); assert(r == 0); - r = pthread_cond_broadcast(&wq->wait_write); assert(r == 0); + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, txn, &key, &val, 0); + CKERR(r); + } } -// determine whether or not the write queue is empty -// return: 1 if the write queue is empty, otherwise 0 +static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) { + int r; -static int workqueue_empty(WORKQUEUE wq) { - return wq->head == 0; -} + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); -// put a work item at the tail of the write queue -// expects: the mutex is locked -// effects: append the workitem to the end of the write queue and signal -// any readers - -static void workqueue_enq(WORKQUEUE wq, WORKITEM workitem) { - workitem->next_wq = 0; - if (wq->tail) - wq->tail->next_wq = workitem; - else - wq->head = workitem; - wq->tail = workitem; - wq->ninq++; - if (wq->want_read) { - int r = pthread_cond_signal(&wq->wait_read); assert(r == 0); - } -} + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); -// get a workitem from the head of the write queue -// expects: the mutex is locked -// effects: wait until the workqueue is not empty, remove the first workitem from the -// write queue and return it -// returns: 0 if success, otherwise an error - -static int workqueue_deq(WORKQUEUE wq, pthread_mutex_t *mutex, WORKITEM *workitemptr) { - while (workqueue_empty(wq)) { - if (wq->closed) - return EINVAL; - wq->want_read++; - int r = pthread_cond_wait(&wq->wait_read, mutex); assert(r == 0); - wq->want_read--; - } - WORKITEM workitem = wq->head; - wq->head = workitem->next_wq; - if (wq->head == 0) - wq->tail = 0; - wq->ninq--; - workitem->next_wq = 0; - *workitemptr = workitem; - return 0; + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + do_inserts(txn, db, num_rows, val_size); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + r = txn->commit(txn, 0); + CKERR(r); + + if (do_crash) + assert(0); // crash on purpose + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); } -#if 0 +static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) { + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + DBC *c = nullptr; + r = db->cursor(db, txn, &c, 0); + CKERR(r); + + uint64_t i = 0; + while (1) { + DBT key = {}; + DBT val = {}; + r = c->c_get(c, &key, &val, DB_NEXT); + if (r == DB_NOTFOUND) + break; + CKERR(r); + assert(key.size == 16); + uint64_t k[2]; + memcpy(k, key.data, key.size); + assert(htonl(k[0]) == i); + assert(val.size == val_size); + i++; + } + assert(i == num_rows); -// suspend the writer thread -// expects: the mutex is locked + r = c->c_close(c); + CKERR(r); -static void workqueue_wait_write(WORKQUEUE wq, pthread_mutex_t *mutex) { - wq->want_write++; - int r = pthread_cond_wait(&wq->wait_write, mutex); assert(r == 0); - wq->want_write--; + r = txn->commit(txn, 0); + CKERR(r); } -// wakeup the writer threads -// expects: the mutex is locked +static void run_recover(uint64_t num_rows, size_t val_size) { + int r; -static void workqueue_wakeup_write(WORKQUEUE wq) { - if (wq->want_write) { - int r = pthread_cond_broadcast(&wq->wait_write); assert(r == 0); + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_verify(env, db, num_rows, val_size); + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + bool do_test = false; + bool do_recover = false; + bool do_crash = true; + uint64_t num_rows = 1; + size_t val_size = 1; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) verbose--; + continue; + } + if (strcmp(argv[i], "--test") == 0) { + do_test = true; + continue; + } + if (strcmp(argv[i], "--recover") == 0) { + do_recover = true; + continue; + } + if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) { + do_crash = atoi(argv[++i]); + continue; + } + } + if (do_test) { + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + run_test(num_rows, val_size, do_crash); } + if (do_recover) { + run_recover(num_rows, val_size); + } + + return 0; } - -#endif diff --git a/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc b/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc new file mode 100644 index 0000000000000..5a3a89a4052ab --- /dev/null +++ b/storage/tokudb/ft-index/src/tests/recover-rollinclude.cc @@ -0,0 +1,274 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuDB, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +// Create a rollback log with a rollinclude log entry, crash after the txn commits and before the last checkpoint. +// Recovery crashes 7.1.0, should succeed. + +#include "test.h" + +// Insert max_rows key/val pairs into the db + +// We want to force a rollinclude so we use a child transaction and insert enough rows so that it spills. +// It spills at about 144K and 289K rows. +static void do_inserts(DB_ENV *env, DB *db, uint64_t max_rows, size_t val_size) { + char val_data[val_size]; memset(val_data, 0, val_size); + int r; + DB_TXN *parent = nullptr; + r = env->txn_begin(env, nullptr, &parent, 0); + CKERR(r); + + DB_TXN *child = nullptr; + r = env->txn_begin(env, parent, &child, 0); + CKERR(r); + + for (uint64_t i = 0; i < max_rows; i++) { + // pick a sequential key but it does not matter for this test. + uint64_t k[2] = { + htonl(i), random64(), + }; + + DBT key = { .data = k, .size = sizeof k }; + DBT val = { .data = val_data, .size = (uint32_t) val_size }; + r = db->put(db, child, &key, &val, 0); + CKERR(r); + + if (i == max_rows-1) { + r = child->commit(child, 0); + CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + } + } + + r = parent->commit(parent, 0); + CKERR(r); +} + +static void run_test(uint64_t num_rows, size_t val_size, bool do_crash) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); + CKERR(r); + + do_inserts(env, db, num_rows, val_size); + + if (do_crash) + assert(0); // crash on purpose + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +static void do_verify(DB_ENV *env, DB *db, uint64_t num_rows, size_t val_size UU()) { + int r; + DB_TXN *txn = nullptr; + r = env->txn_begin(env, nullptr, &txn, 0); + CKERR(r); + + DBC *c = nullptr; + r = db->cursor(db, txn, &c, 0); + CKERR(r); + + uint64_t i = 0; + while (1) { + DBT key = {}; + DBT val = {}; + r = c->c_get(c, &key, &val, DB_NEXT); + if (r == DB_NOTFOUND) + break; + CKERR(r); + assert(key.size == 16); + uint64_t k[2]; + memcpy(k, key.data, key.size); + assert(htonl(k[0]) == i); + assert(val.size == val_size); + i++; + } + assert(i == num_rows); + + r = c->c_close(c); + CKERR(r); + + r = txn->commit(txn, 0); + CKERR(r); +} + +static void run_recover(uint64_t num_rows, size_t val_size) { + int r; + + DB_ENV *env = nullptr; + r = db_env_create(&env, 0); + CKERR(r); + r = env->set_cachesize(env, 8, 0, 1); + CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, + DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE | DB_RECOVER, + S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + DB *db = nullptr; + r = db_create(&db, env, 0); + CKERR(r); + r = db->open(db, nullptr, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + + do_verify(env, db, num_rows, val_size); + + r = db->close(db, 0); + CKERR(r); + + r = env->close(env, 0); + CKERR(r); +} + +int test_main (int argc, char *const argv[]) { + bool do_test = false; + bool do_recover = false; + bool do_crash = true; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) verbose--; + continue; + } + if (strcmp(argv[i], "--test") == 0) { + do_test = true; + continue; + } + if (strcmp(argv[i], "--recover") == 0) { + do_recover = true; + continue; + } + if (strcmp(argv[i], "--crash") == 0 && i+1 < argc) { + do_crash = atoi(argv[++i]); + continue; + } + } + + uint64_t num_rows = 300000; + size_t val_size = 1; + + if (do_test) { + // init the env directory + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); + CKERR(r); + run_test(num_rows, val_size, do_crash); + } + if (do_recover) { + run_recover(num_rows, val_size); + } + + return 0; +} diff --git a/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc index 9df540aadde1d..4e6b3d16d9834 100644 --- a/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-split-checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc b/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc index 32e22f9607d99..03887ac19e570 100644 --- a/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc +++ b/storage/tokudb/ft-index/src/tests/recover-straddle-txn-nested.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc b/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc index a08e894001562..a728a7de17d5a 100644 --- a/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc +++ b/storage/tokudb/ft-index/src/tests/recover-straddle-txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-tablelock.cc b/storage/tokudb/ft-index/src/tests/recover-tablelock.cc index c75574e60b298..eb2a4318a2038 100644 --- a/storage/tokudb/ft-index/src/tests/recover-tablelock.cc +++ b/storage/tokudb/ft-index/src/tests/recover-tablelock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc b/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc index 14f659f72ad00..c022fdf62436c 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test-logsuppress-put.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc b/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc index 8272bbbcd51ba..020cfbd61227d 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test-logsuppress.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test1.cc b/storage/tokudb/ft-index/src/tests/recover-test1.cc index d9b7cb6fa6cac..6529d6ac968a0 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test1.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test2.cc b/storage/tokudb/ft-index/src/tests/recover-test2.cc index 524c197c6255a..e6bf69b92feb0 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ PATENT RIGHTS GRANT: #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; const char *namea="a.db"; DB_ENV *env; diff --git a/storage/tokudb/ft-index/src/tests/recover-test3.cc b/storage/tokudb/ft-index/src/tests/recover-test3.cc index a3de519172d7e..fa40c494a9652 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,6 @@ PATENT RIGHTS GRANT: #include "test.h" -const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; const char *namea="a.db"; DB_ENV *env; diff --git a/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h b/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h index 56087ba16fa46..82d57b48867cb 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h +++ b/storage/tokudb/ft-index/src/tests/recover-test_crash_in_flusher_thread.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,8 +85,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" + #include "test.h" #include diff --git a/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc b/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc index 7e7be8c26f767..a45667cd8a1b9 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test_stress1.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc b/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc index e07f36dca7c5e..e78f8a222b172 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test_stress2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc b/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc index 2a3017c4cae6e..9794271ec6b87 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test_stress3.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc b/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc index 52b59f96810d2..e84c9f2c9f6ba 100644 --- a/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc +++ b/storage/tokudb/ft-index/src/tests/recover-test_stress_openclose.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc b/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc index 1e6f57a07142c..4d0e0164aa381 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update-multiple-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc b/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc index 437f9615351b6..fe436c95a4df3 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update-multiple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc b/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc index 27e4a19cef1ed..82ffd511c2a54 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_aborts.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc index de3f0996d6399..46723760c88a3 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc b/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc index e1c13d92f932d..feac9dba77d99 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_aborts_before_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc index 9ac5bb5b186bd..05904b0ae7fb8 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc index 2f05dc92c53e1..d88d483bd1716 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc index 3668d7a612e03..c1f1baada138c 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc index a9bc84907eda3..0768def9255a6 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc index 7dd3f647cbea8..0d18ad7bacbac 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_aborts_before_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc index f1c61f9d7f9c9..b8cd95c91b62c 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc index db0080598ebdc..f1a776bc7f509 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc index 28d102579c97b..9ec99677ad02f 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc index 93b40a14c27f8..8197f8ad2fef0 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc index ce187c093033d..8d462e82f5707 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_broadcast_changes_values_before_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc b/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc index 94029e1c99ef0..65a55a121257f 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_changes_values.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc b/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc index 0ff19f1801cae..ab97b660724da 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_checkpoint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc b/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc index 7e075b00456c6..f17edbd1317b5 100644 --- a/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc +++ b/storage/tokudb/ft-index/src/tests/recover-update_changes_values_before_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc b/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc index 67ffd36560f52..c2b8543dba8df 100644 --- a/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc +++ b/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor-multihandle.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,8 +105,6 @@ const char *namea="a.db"; int verified = 0; uint32_t forced_version = 2; -#if USE_TDB - static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { assert(db); assert(db->cmp_descriptor); @@ -121,18 +119,14 @@ static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { return memcmp(a->data, b->data, a->size); } -#endif - static void change_descriptor(DB* db, int which, DB_ENV* env) { -#if USE_TDB DBT descriptor; size_t len = strlen(descriptor_contents[which])+1; dbt_init(&descriptor, descriptor_contents[which], len); IN_TXN_COMMIT(env, NULL, txn_desc, 0, { { int chk_r = db->change_descriptor(db, txn_desc, &descriptor, DB_UPDATE_CMP_DESCRIPTOR); CKERR(chk_r); } }); -#endif } static void @@ -146,9 +140,7 @@ do_x1_shutdown (bool do_commit, bool do_abort) { DB *dba, *dbb; r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); @@ -194,9 +186,7 @@ do_x1_recover (bool did_commit) { r = toku_os_mkdir(datadir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc b/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc index 31cacc9d35423..9db973bc13b73 100644 --- a/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/recover-upgrade-db-descriptor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,8 +105,6 @@ const char *namea="a.db"; int verified = 0; uint32_t forced_version = 2; -#if USE_TDB - static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { assert(db); assert(db->cmp_descriptor); @@ -121,18 +119,14 @@ static int my_compare(DB *UU(db), const DBT *a, const DBT *b) { return memcmp(a->data, b->data, a->size); } -#endif - static void change_descriptor(DB* db, int which, DB_ENV* env) { -#if USE_TDB DBT descriptor; size_t len = strlen(descriptor_contents[which])+1; dbt_init(&descriptor, descriptor_contents[which], len); IN_TXN_COMMIT(env, NULL, txn_desc, 0, { { int chk_r = db->change_descriptor(db, txn_desc, &descriptor, DB_UPDATE_CMP_DESCRIPTOR); CKERR(chk_r); } }); -#endif } static void @@ -146,9 +140,7 @@ do_x1_shutdown (bool do_commit, bool do_abort) { DB *dba; r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); @@ -197,9 +189,7 @@ do_x1_recover (bool did_commit) { r = toku_os_mkdir(datadir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_env_create(&env, 0); CKERR(r); r = env->set_data_dir(env, "data"); CKERR(r); -#if USE_TDB r = env->set_default_bt_compare(env, my_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = db_create(&dba, env, 0); CKERR(r); r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc b/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc index 473d76874e34c..c962f9c1c2926 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x1-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc b/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc index 780c4287f5882..2c0883294e420 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x1-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc b/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc index 415d184aa3d6a..b23235b2af80b 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x1-nested-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc b/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc index a7fb13df76a4e..0426ac54cd7c1 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x1-nested-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc b/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc index c14fa98d0b065..9335aa5e7fb43 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x2-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc b/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc index 373e9cf546f7d..4a2dfa8013bf2 100644 --- a/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc +++ b/storage/tokudb/ft-index/src/tests/recover-x2-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc b/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc index 5546ad53f5d8b..4ac3bccf0a2e5 100644 --- a/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc +++ b/storage/tokudb/ft-index/src/tests/recovery_fileops_stress.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -104,7 +104,6 @@ DB** db_array; DB* states; static const int percent_do_op = 20; static const int percent_do_abort = 25; -static const int commit_abort_ratio = 3; static const int start_crashing_iter = 10; // iterations_per_crash_in_recovery should be an odd number; static const int iterations_per_crash_in_recovery = 7; diff --git a/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc b/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc index 0d7b33212ad3c..9c9681ae5e692 100644 --- a/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc +++ b/storage/tokudb/ft-index/src/tests/recovery_fileops_unit.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/recovery_stress.cc b/storage/tokudb/ft-index/src/tests/recovery_stress.cc index 26190bae824f7..8e704bbc3f6ef 100644 --- a/storage/tokudb/ft-index/src/tests/recovery_stress.cc +++ b/storage/tokudb/ft-index/src/tests/recovery_stress.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include "checkpoint_test.h" -static const uint64_t max_windows_cachesize = 256 << 20; +static const uint64_t max_cachesize = 256 << 20; static const int NUM_DICTIONARIES = 1; static const int OPER_STEPS = 6; @@ -495,7 +495,7 @@ static void run_test (int iter) { uint64_t cachebytes = 0; // 0 => use default size const int32_t K256 = 256 * 1024; cachebytes = K256 * (iter + 1) - (128 * 1024); - if (cachebytes > max_windows_cachesize) + if (cachebytes > max_cachesize) cachebytes = 0; if (iter & 2) cachebytes = 0; // use default cachesize half the time diff --git a/storage/tokudb/ft-index/src/tests/redirect.cc b/storage/tokudb/ft-index/src/tests/redirect.cc index bcbe861adc0d4..9cf9d979f2f56 100644 --- a/storage/tokudb/ft-index/src/tests/redirect.cc +++ b/storage/tokudb/ft-index/src/tests/redirect.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc b/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc index 7f9ec3768ffce..77a03436407b0 100644 --- a/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc +++ b/storage/tokudb/ft-index/src/tests/replace-into-write-lock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/root_fifo_1.cc b/storage/tokudb/ft-index/src/tests/root_fifo_1.cc index fa88b3dfc4ad3..c83fe05c9fad1 100644 --- a/storage/tokudb/ft-index/src/tests/root_fifo_1.cc +++ b/storage/tokudb/ft-index/src/tests/root_fifo_1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/root_fifo_2.cc b/storage/tokudb/ft-index/src/tests/root_fifo_2.cc index 98f209bf7693f..1902774cd9924 100644 --- a/storage/tokudb/ft-index/src/tests/root_fifo_2.cc +++ b/storage/tokudb/ft-index/src/tests/root_fifo_2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -184,11 +184,7 @@ static void root_fifo_2(int n, int create_outside) { // cleanup r = env->close(env, 0); -#if TOKUDB assert(r == 0); env = null_env; -#else - printf("%s:%d env close r=%d\n", __FUNCTION__, __LINE__, r); -#endif } int test_main(int argc, char *const argv[]) { diff --git a/storage/tokudb/ft-index/src/tests/root_fifo_31.cc b/storage/tokudb/ft-index/src/tests/root_fifo_31.cc index 09f57a67ac981..495073c8e8215 100644 --- a/storage/tokudb/ft-index/src/tests/root_fifo_31.cc +++ b/storage/tokudb/ft-index/src/tests/root_fifo_31.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/root_fifo_32.cc b/storage/tokudb/ft-index/src/tests/root_fifo_32.cc index 874405ff68f7d..d75f81dc0129a 100644 --- a/storage/tokudb/ft-index/src/tests/root_fifo_32.cc +++ b/storage/tokudb/ft-index/src/tests/root_fifo_32.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/root_fifo_41.cc b/storage/tokudb/ft-index/src/tests/root_fifo_41.cc index d4f1e6554f101..91fb63985d8e4 100644 --- a/storage/tokudb/ft-index/src/tests/root_fifo_41.cc +++ b/storage/tokudb/ft-index/src/tests/root_fifo_41.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/rowsize.cc b/storage/tokudb/ft-index/src/tests/rowsize.cc index 3823232191501..7e84173006b91 100644 --- a/storage/tokudb/ft-index/src/tests/rowsize.cc +++ b/storage/tokudb/ft-index/src/tests/rowsize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -102,9 +102,7 @@ static void setup_env (void) { {int r = toku_os_mkdir(envdir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); } {int r = db_env_create(&env, 0); CKERR(r); } //env->set_errfile(env, stderr); -#ifdef TOKUDB CKERR(env->set_redzone(env, 0)); -#endif { int r = env->open(env, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); } { int r = db_create(&db, env, 0); CKERR(r); } { int r = db->open(db, NULL, "foo.db", 0, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); } @@ -119,14 +117,11 @@ static void put (const char *keystring, int size, bool should_work) { DBT k, v; dbt_init(&k, keystring, 1+strlen(keystring)); dbt_init(&v, toku_xcalloc(size, 1), size); -#ifdef USE_BDB -#define DB_YES_OVERWRITE 0 -#endif static DB_TXN *txn = NULL; { int r = env->txn_begin(env, 0, &txn, 0); CKERR(r); } { int r = db->put(db, NULL, &k, &v, 0); - if (!IS_TDB || should_work) { + if (should_work) { CKERR(r); } else { assert(r!=0); diff --git a/storage/tokudb/ft-index/src/tests/run_test1426.sh b/storage/tokudb/ft-index/src/tests/run_test1426.sh deleted file mode 100755 index 832dd9935c2fe..0000000000000 --- a/storage/tokudb/ft-index/src/tests/run_test1426.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -set -e - -test $# -ge 4 - -tdbbin=$1; shift -bdbbin=$1; shift -tdbenv=$1; shift -bdbenv=$1; shift -tdbdump=$1; shift -bdbdump=$1; shift - -TOKU_TEST_FILENAME=$bdbenv $bdbbin -$bdbdump -p -h $bdbenv main > dump.bdb.1426 - -TOKU_TEST_FILENAME=$tdbenv $tdbbin -$tdbdump -x -p -h $tdbenv main > dump.tdb.1426 -diff -I db_pagesize=4096 dump.bdb.1426 dump.tdb.1426 diff --git a/storage/tokudb/ft-index/src/tests/seqinsert.cc b/storage/tokudb/ft-index/src/tests/seqinsert.cc index 8d402f2dcf551..85d20bf894100 100644 --- a/storage/tokudb/ft-index/src/tests/seqinsert.cc +++ b/storage/tokudb/ft-index/src/tests/seqinsert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/shutdown-3344.cc b/storage/tokudb/ft-index/src/tests/shutdown-3344.cc index 6b58628790961..94716b558ece9 100644 --- a/storage/tokudb/ft-index/src/tests/shutdown-3344.cc +++ b/storage/tokudb/ft-index/src/tests/shutdown-3344.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/simple.cc b/storage/tokudb/ft-index/src/tests/simple.cc index ed55a1e847e4f..0733a005283a4 100644 --- a/storage/tokudb/ft-index/src/tests/simple.cc +++ b/storage/tokudb/ft-index/src/tests/simple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,9 +136,7 @@ test_main (int argc, char * const argv[]) { parse_args(argc, argv); setup(FLAGS_LOG); env->txn_checkpoint(env, 0, 0, 0); -#ifdef USE_TDB print_engine_status(env); -#endif test_shutdown(); return 0; } diff --git a/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc b/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc index 09c2454675725..46c25dc7208bb 100644 --- a/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc +++ b/storage/tokudb/ft-index/src/tests/stat64-create-modify-times.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc b/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc index eb79901083595..3ca90823b86e6 100644 --- a/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc +++ b/storage/tokudb/ft-index/src/tests/stat64-null-txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc b/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc index b0c7a0131b6a2..80cf022f94644 100644 --- a/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc +++ b/storage/tokudb/ft-index/src/tests/stat64-root-changes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stat64.cc b/storage/tokudb/ft-index/src/tests/stat64.cc index 23e6ee84a657a..8e115fc20d67e 100644 --- a/storage/tokudb/ft-index/src/tests/stat64.cc +++ b/storage/tokudb/ft-index/src/tests/stat64.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stress-gc.cc b/storage/tokudb/ft-index/src/tests/stress-gc.cc index 97e2e7309c302..c67f9b8ed4062 100644 --- a/storage/tokudb/ft-index/src/tests/stress-gc.cc +++ b/storage/tokudb/ft-index/src/tests/stress-gc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stress-gc2.cc b/storage/tokudb/ft-index/src/tests/stress-gc2.cc index adee9ad1b3589..d0a63a874ec57 100644 --- a/storage/tokudb/ft-index/src/tests/stress-gc2.cc +++ b/storage/tokudb/ft-index/src/tests/stress-gc2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stress-test.cc b/storage/tokudb/ft-index/src/tests/stress-test.cc index 0774358fee156..87238a62d18f4 100644 --- a/storage/tokudb/ft-index/src/tests/stress-test.cc +++ b/storage/tokudb/ft-index/src/tests/stress-test.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/stress_openclose.h b/storage/tokudb/ft-index/src/tests/stress_openclose.h index 4e61dcef356b9..ab15960a31096 100644 --- a/storage/tokudb/ft-index/src/tests/stress_openclose.h +++ b/storage/tokudb/ft-index/src/tests/stress_openclose.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -85,6 +85,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" diff --git a/storage/tokudb/ft-index/src/tests/test-5138.cc b/storage/tokudb/ft-index/src/tests/test-5138.cc index 1ec1d4646f93e..546fe6d9368f9 100644 --- a/storage/tokudb/ft-index/src/tests/test-5138.cc +++ b/storage/tokudb/ft-index/src/tests/test-5138.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc b/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc index 2c5f7fae5693d..e78d2130ea319 100644 --- a/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc +++ b/storage/tokudb/ft-index/src/tests/test-nested-xopen-eclose.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test-prepare.cc b/storage/tokudb/ft-index/src/tests/test-prepare.cc index a29241596f054..9033c633ea7ac 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,9 +103,7 @@ static void clean_env (const char *envdir) { static void setup_env (DB_ENV **envp, const char *envdir) { { int chk_r = db_env_create(envp, 0); CKERR(chk_r); } (*envp)->set_errfile(*envp, stderr); -#ifdef TOKUDB { int chk_r = (*envp)->set_redzone(*envp, 0); CKERR(chk_r); } -#endif { int chk_r = (*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } } diff --git a/storage/tokudb/ft-index/src/tests/test-prepare2.cc b/storage/tokudb/ft-index/src/tests/test-prepare2.cc index ce4cf4869ef87..8952f14cf3163 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare2.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,9 +105,7 @@ static void clean_env (const char *envdir) { static void setup_env (DB_ENV **envp, const char *envdir) { { int chk_r = db_env_create(envp, 0); CKERR(chk_r); } (*envp)->set_errfile(*envp, stderr); -#ifdef TOKUDB { int chk_r = (*envp)->set_redzone(*envp, 0); CKERR(chk_r); } -#endif { int chk_r = (*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } } diff --git a/storage/tokudb/ft-index/src/tests/test-prepare3.cc b/storage/tokudb/ft-index/src/tests/test-prepare3.cc index 4cd1305031004..3643d73f41a50 100644 --- a/storage/tokudb/ft-index/src/tests/test-prepare3.cc +++ b/storage/tokudb/ft-index/src/tests/test-prepare3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -105,9 +105,7 @@ static void clean_env (const char *envdir) { static void setup_env (DB_ENV **envp, const char *envdir) { { int chk_r = db_env_create(envp, 0); CKERR(chk_r); } (*envp)->set_errfile(*envp, stderr); -#ifdef TOKUDB { int chk_r = (*envp)->set_redzone(*envp, 0); CKERR(chk_r); } -#endif { int chk_r = (*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } } diff --git a/storage/tokudb/ft-index/src/tests/test-rollinclude.cc b/storage/tokudb/ft-index/src/tests/test-rollinclude.cc index 8a4af61bf5980..6ece4beb67127 100644 --- a/storage/tokudb/ft-index/src/tests/test-rollinclude.cc +++ b/storage/tokudb/ft-index/src/tests/test-rollinclude.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc b/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc index 751589c5d91e6..e08e7361555a3 100644 --- a/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc +++ b/storage/tokudb/ft-index/src/tests/test-xa-prepare.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,9 +103,7 @@ static void clean_env (const char *envdir) { static void setup_env (DB_ENV **envp, const char *envdir) { { int chk_r = db_env_create(envp, 0); CKERR(chk_r); } (*envp)->set_errfile(*envp, stderr); -#ifdef TOKUDB { int chk_r = (*envp)->set_redzone(*envp, 0); CKERR(chk_r); } -#endif { int chk_r = (*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } } diff --git a/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc b/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc index f6359d8cf1add..82f2bc6d15930 100644 --- a/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc +++ b/storage/tokudb/ft-index/src/tests/test-xopen-eclose.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test.h b/storage/tokudb/ft-index/src/tests/test.h index b5ab33ccfeebd..c8e98862038e7 100644 --- a/storage/tokudb/ft-index/src/tests/test.h +++ b/storage/tokudb/ft-index/src/tests/test.h @@ -2,10 +2,6 @@ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef __TEST_H -#define __TEST_H - - /* COPYING CONDITIONS NOTICE: @@ -34,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +87,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + #include #include @@ -107,17 +106,13 @@ PATENT RIGHTS GRANT: #include "toku_assert.h" #include #include -#if defined(USE_TDB) + #include "ydb.h" //TDB uses DB_NOTFOUND for c_del and DB_CURRENT errors. #ifdef DB_KEYEMPTY #error #endif #define DB_KEYEMPTY DB_NOTFOUND -#endif -#ifndef DB_DELETE_ANY -#define DB_DELETE_ANY 0 -#endif // Certain tests fail when row locks taken for read are not shared. // This switch prevents them from failing so long as read locks are not shared. @@ -144,13 +139,6 @@ int verbose=0; fflush(stderr); \ } while (0) -// If the error code depends on BDB vs TDB use this -#ifdef USE_TDB -#define CKERR_depending(r,tdbexpect,bdbexpect) CKERR2(r,tdbexpect) -#else -#define CKERR_depending(r,tdbexpect,bdbexpect) CKERR2(r,bdbexpect) -#endif - static __attribute__((__unused__)) void parse_args (int argc, char * const argv[]) { const char *argv0=argv[0]; @@ -174,7 +162,6 @@ parse_args (int argc, char * const argv[]) { } } -#ifdef USE_TDB static __attribute__((__unused__)) void print_engine_status(DB_ENV * UU(env)) { if (verbose) { // verbose declared statically in this file @@ -187,9 +174,7 @@ print_engine_status(DB_ENV * UU(env)) { printf("%s", buff); } } -#endif -#ifdef USE_TDB static __attribute__((__unused__)) uint64_t get_engine_status_val(DB_ENV * UU(env), const char * keyname) { uint64_t rval = 0; @@ -213,7 +198,6 @@ get_engine_status_val(DB_ENV * UU(env), const char * keyname) { CKERR2(found, 1); return rval; } -#endif static __attribute__((__unused__)) DBT * dbt_init(DBT *dbt, const void *data, uint32_t size) { @@ -293,13 +277,8 @@ uint_dbt_cmp (DB *db, const DBT *a, const DBT *b) { return 0; } -#ifdef USE_TDB #define SET_TRACE_FILE(x) toku_set_trace_file(x) #define CLOSE_TRACE_FILE(x) toku_close_trace_file() -#else -#define SET_TRACE_FILE(x) ((void)0) -#define CLOSE_TRACE_FILE(x) ((void)0) -#endif #include @@ -361,19 +340,11 @@ void print_time_now(void) { static void UU() multiply_locks_for_n_dbs(DB_ENV *env, int num_dbs) { -#ifdef USE_TDB uint64_t current_max_lock_memory; int r = env->get_lk_max_memory(env, ¤t_max_lock_memory); CKERR(r); r = env->set_lk_max_memory(env, current_max_lock_memory * num_dbs); CKERR(r); -#else - uint32_t current_max_locks; - int r = env->get_lk_max_locks(env, ¤t_max_locks); - CKERR(r); - r = env->set_lk_max_locks(env, current_max_locks * num_dbs); - CKERR(r); -#endif } static inline void @@ -402,7 +373,6 @@ static void copy_dbt(DBT *dest, const DBT *src) { } // DBT_ARRAY is a toku-specific type -#ifdef USE_TDB UU() static int env_update_multiple_test_no_array( @@ -507,7 +477,6 @@ static int env_del_multiple_test_no_array( } return r; } -#endif /* Some macros for evaluating blocks or functions within the scope of a * transaction. */ @@ -525,35 +494,14 @@ static int env_del_multiple_test_no_array( { int chk_r = (txn)->abort(txn); CKERR(chk_r); } \ }) - -int test_main (int argc, char * const argv[]); -int -#if defined(__cilkplusplus) -cilk_main(int argc, char *argv[]) -#else -main(int argc, char * const argv[]) -#endif -{ +int test_main(int argc, char *const argv[]); +int main(int argc, char *const argv[]) { int r; -#if IS_TDB && TOKU_WINDOWS - int rinit = toku_ydb_init(); - CKERR(rinit); -#endif -#if !IS_TDB && DB_VERSION_MINOR==4 && DB_VERSION_MINOR == 7 - db_env_set_func_malloc(toku_malloc); - db_env_set_func_free(toku_free); - db_env_set_func_realloc(toku_realloc); -#endif toku_os_initialize_settings(1); r = test_main(argc, argv); -#if IS_TDB && TOKU_WINDOWS - toku_ydb_destroy(); -#endif return r; } #ifndef DB_GID_SIZE #define DB_GID_SIZE DB_XIDDATASIZE #endif - -#endif // __TEST_H diff --git a/storage/tokudb/ft-index/src/tests/test1426.cc b/storage/tokudb/ft-index/src/tests/test1426.cc deleted file mode 100644 index 40ca1dee345cb..0000000000000 --- a/storage/tokudb/ft-index/src/tests/test1426.cc +++ /dev/null @@ -1,196 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "test.h" -/* Test for #1426. Make sure deletes and inserts in a FIFO work. */ -/* This test is run using a special makefile rule that runs the TDB version and the BDB version, dumps their outputs, and compares them */ - -#include -#include -#include - -// |DB_INIT_TXN| DB_INIT_LOG | DB_RECOVER -const int envflags = DB_CREATE|DB_INIT_MPOOL|DB_INIT_LOCK |DB_THREAD |DB_PRIVATE; - -DB_ENV *env; -DB *db; -DB_TXN * const null_txn = NULL; - -static void -empty_cachetable (void) -// Make all the cachetable entries clean. -// Brute force it by closing and reopening everything. -{ - int r; - r = db->close(db, 0); CKERR(r); - r = env->close(env, 0); CKERR(r); - r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB - r = env->set_cachesize(env, 0, 10000000, 1); CKERR(r); -#endif - r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); - r = db_create(&db, env, 0); CKERR(r); - r = db->open(db, null_txn, "main", 0, DB_BTREE, 0, 0666); CKERR(r); -} - -static void -do_insert_delete_fifo (void) -{ - int r; - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); - - r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB - r = env->set_cachesize(env, 0, 10000000, 1); CKERR(r); -#endif - r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); - r = db_create(&db, env, 0); CKERR(r); - r = db->set_pagesize(db, 4096); CKERR(r); - r = db->open(db, null_txn, "main", 0, DB_BTREE, DB_CREATE, 0666); CKERR(r); - { - uint64_t i; - uint64_t n_deleted = 0; - uint64_t N=20000; // total number to insert - uint64_t M= 5000; // size of rolling fifo - uint64_t D= 200; // number to delete at once - for (i=0; iput(db, null_txn, dbt_init(&kt, k, keylen) , dbt_init(&vt, v, vallen), 0); CKERR(r); - } - if (i%D==0) { - // Once every D steps, delete everything until there are only M things left. - // Flush the data down the tree for all the values we will do - { - uint64_t peek_here = n_deleted; - while (peek_here + M < i) { - char k[100]; - int keylen = snprintf(k, sizeof k, "%016" PRIu64 "key", peek_here); - DBT kt; - DBT vt; - memset(&vt, 0, sizeof(vt)); - vt.flags = DB_DBT_MALLOC; - r = db->get(db, null_txn, dbt_init(&kt, k, keylen), &vt, 0); CKERR(r); - peek_here++; - toku_free(vt.data); - } - } - empty_cachetable(); - while (n_deleted + M < i) { - char k[100]; - int keylen = snprintf(k, sizeof k, "%016" PRIu64 "key", n_deleted); - DBT kt; - r = db->del(db, null_txn, dbt_init(&kt, k, keylen), 0); - if (r!=0) printf("error %d %s", r, db_strerror(r)); - CKERR(r); - n_deleted++; - empty_cachetable(); - } - } - } - } - r = db->close(db, 0); CKERR(r); - r = env->close(env, 0); CKERR(r); -} - -int -test_main (int argc, char *const argv[]) -{ - parse_args(argc, argv); - do_insert_delete_fifo(); - return 0; -} - diff --git a/storage/tokudb/ft-index/src/tests/test1572.cc b/storage/tokudb/ft-index/src/tests/test1572.cc index 73d93d587614b..0ea04a9c4013d 100644 --- a/storage/tokudb/ft-index/src/tests/test1572.cc +++ b/storage/tokudb/ft-index/src/tests/test1572.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: /* Is it feasible to run 4 billion transactions in one test in the regression tests? */ #include #include -#include +#include #include static void diff --git a/storage/tokudb/ft-index/src/tests/test1753.cc b/storage/tokudb/ft-index/src/tests/test1753.cc index 1eee0977f06a6..e50b828c92cad 100644 --- a/storage/tokudb/ft-index/src/tests/test1753.cc +++ b/storage/tokudb/ft-index/src/tests/test1753.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -99,10 +99,6 @@ DB_TXN *null_txn=0; static void do_test1753 (int do_create_on_reopen) { - if (IS_TDB==0 && DB_VERSION_MAJOR==4 && DB_VERSION_MINOR<7 && do_create_on_reopen==0) { - return; // do_create_on_reopen==0 segfaults in 4.6 - } - int r; toku_os_recursive_delete(TOKU_TEST_FILENAME); toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); diff --git a/storage/tokudb/ft-index/src/tests/test1842.cc b/storage/tokudb/ft-index/src/tests/test1842.cc index f5fccfb54bc9f..20b014d4a3300 100644 --- a/storage/tokudb/ft-index/src/tests/test1842.cc +++ b/storage/tokudb/ft-index/src/tests/test1842.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,9 +137,7 @@ setup_db (uint32_t dup_mode) { toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r = env->set_default_bt_compare(env, int_dbt_cmp); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_LOCK | DB_INIT_TXN | DB_PRIVATE | DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -148,9 +146,6 @@ setup_db (uint32_t dup_mode) { r = env->txn_begin(env, 0, &txn, 0); CKERR(r); r = db_create(&db, env, 0); CKERR(r); -#ifndef TOKUDB - r=db->set_bt_compare(db, int_dbt_cmp); CKERR(r); -#endif r = db->set_flags(db, dup_mode); assert(r == 0); CKERR(r); r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = txn->commit(txn, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test3039.cc b/storage/tokudb/ft-index/src/tests/test3039.cc index e944eb0cd0090..35b22b374c0c4 100644 --- a/storage/tokudb/ft-index/src/tests/test3039.cc +++ b/storage/tokudb/ft-index/src/tests/test3039.cc @@ -36,7 +36,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,11 +112,7 @@ static DB_ENV *env = NULL; static DB *db; // BDB cannot handle big transactions by default (runs out of locks). -#ifdef TOKUDB #define N_PER_XACTION 10000 -#else -#define N_PER_XACTION 1000 -#endif static void create_db (uint64_t N) { n_rows = N; @@ -124,9 +120,7 @@ static void create_db (uint64_t N) { toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); { int r = db_env_create(&env, 0); CKERR(r); } env->set_errfile(env, stderr); -#ifdef TOKUDB env->set_redzone(env, 0); -#endif { int r = env->set_cachesize(env, 0, 400*4096, 1); CKERR(r); } { int r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); } DB_TXN *txn; diff --git a/storage/tokudb/ft-index/src/tests/test3219.cc b/storage/tokudb/ft-index/src/tests/test3219.cc index 523b3548271ba..d5de370fd60af 100644 --- a/storage/tokudb/ft-index/src/tests/test3219.cc +++ b/storage/tokudb/ft-index/src/tests/test3219.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: // This test, when run under helgrind, should detect the race problem documented in #3219. // The test: // checkpointing runs (in one thread) -// another thread does a brt lookup. +// another thread does an ft lookup. // We expect to see a lock-acquisition error. diff --git a/storage/tokudb/ft-index/src/tests/test3522.cc b/storage/tokudb/ft-index/src/tests/test3522.cc index fe67793e3affb..7166c561f704d 100644 --- a/storage/tokudb/ft-index/src/tests/test3522.cc +++ b/storage/tokudb/ft-index/src/tests/test3522.cc @@ -35,7 +35,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test3522b.cc b/storage/tokudb/ft-index/src/tests/test3522b.cc index 17b2df6b13a64..09c9807dd2b6f 100644 --- a/storage/tokudb/ft-index/src/tests/test3522b.cc +++ b/storage/tokudb/ft-index/src/tests/test3522b.cc @@ -36,7 +36,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test3529.cc b/storage/tokudb/ft-index/src/tests/test3529.cc index 287729451a41f..2c605c3028a44 100644 --- a/storage/tokudb/ft-index/src/tests/test3529.cc +++ b/storage/tokudb/ft-index/src/tests/test3529.cc @@ -40,7 +40,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc b/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc index a7a121b5d9eb6..9dba89f04f191 100644 --- a/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc +++ b/storage/tokudb/ft-index/src/tests/test4573-logtrim.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: const int envflags = DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER; -const int my_lg_max = IS_TDB ? 100 : (4096*2); +const int my_lg_max = 100; int test_main (int UU(argc), char UU(*const argv[])) { int r; diff --git a/storage/tokudb/ft-index/src/tests/test5092.cc b/storage/tokudb/ft-index/src/tests/test5092.cc index 5cfc055749695..16652472b55b3 100644 --- a/storage/tokudb/ft-index/src/tests/test5092.cc +++ b/storage/tokudb/ft-index/src/tests/test5092.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -103,9 +103,7 @@ static void clean_env (const char *envdir) { static void setup_env (DB_ENV **envp, const char *envdir) { { int chk_r = db_env_create(envp, 0); CKERR(chk_r); } (*envp)->set_errfile(*envp, stderr); -#ifdef TOKUDB { int chk_r = (*envp)->set_redzone(*envp, 0); CKERR(chk_r); } -#endif { int chk_r = (*envp)->open(*envp, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(chk_r); } } diff --git a/storage/tokudb/ft-index/src/tests/test938.cc b/storage/tokudb/ft-index/src/tests/test938.cc index 5f7af0b618e23..d6896894b5400 100644 --- a/storage/tokudb/ft-index/src/tests/test938.cc +++ b/storage/tokudb/ft-index/src/tests/test938.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -215,9 +215,7 @@ test_main(int argc, char *const argv[]) { DB_TXN *txn; { r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r = env->set_redzone(env, 0); CKERR(r); -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); env->set_errfile(env, stderr); r=env->txn_begin(env, 0, &txn, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test938b.cc b/storage/tokudb/ft-index/src/tests/test938b.cc index d0d07120f4a2a..78830d8d4c82e 100644 --- a/storage/tokudb/ft-index/src/tests/test938b.cc +++ b/storage/tokudb/ft-index/src/tests/test938b.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test938c.cc b/storage/tokudb/ft-index/src/tests/test938c.cc index 6d3d0a6b6e699..154bf05862d5e 100644 --- a/storage/tokudb/ft-index/src/tests/test938c.cc +++ b/storage/tokudb/ft-index/src/tests/test938c.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -106,7 +106,7 @@ run (void) { // add (1,101) to the tree // In another concurrent txn // look up (1,102) and do DB_NEXT - // That should be fine in TokuDB. + // That should be fine in TokuFT. // It fails before #938 is fixed. // It also fails for BDB for other reasons (page-level locking vs. row-level locking) { @@ -153,9 +153,7 @@ test_main(int argc, char *const argv[]) { DB_TXN *txn; { r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r = env->set_redzone(env, 0); CKERR(r); -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); env->set_errfile(env, stderr); r=env->txn_begin(env, 0, &txn, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc b/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc index 542785e007fb2..d024143415de6 100644 --- a/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc +++ b/storage/tokudb/ft-index/src/tests/test_3529_insert_2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc b/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc index 7a07ff4ac11b2..cb9137a10a02c 100644 --- a/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc +++ b/storage/tokudb/ft-index/src/tests/test_3529_table_lock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_3645.cc b/storage/tokudb/ft-index/src/tests/test_3645.cc index dfd8544ef4b78..1e7c3b5faf617 100644 --- a/storage/tokudb/ft-index/src/tests/test_3645.cc +++ b/storage/tokudb/ft-index/src/tests/test_3645.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_3755.cc b/storage/tokudb/ft-index/src/tests/test_3755.cc index fa6af3b46d8a6..a678352ba1983 100644 --- a/storage/tokudb/ft-index/src/tests/test_3755.cc +++ b/storage/tokudb/ft-index/src/tests/test_3755.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_4015.cc b/storage/tokudb/ft-index/src/tests/test_4015.cc index a1b8f5551555f..c0538b7976c1e 100644 --- a/storage/tokudb/ft-index/src/tests/test_4015.cc +++ b/storage/tokudb/ft-index/src/tests/test_4015.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_4368.cc b/storage/tokudb/ft-index/src/tests/test_4368.cc index f000efa7813bd..ab55a6ee173e5 100644 --- a/storage/tokudb/ft-index/src/tests/test_4368.cc +++ b/storage/tokudb/ft-index/src/tests/test_4368.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_4657.cc b/storage/tokudb/ft-index/src/tests/test_4657.cc index c7a3f7473ce11..6ab9ce56d73dd 100644 --- a/storage/tokudb/ft-index/src/tests/test_4657.cc +++ b/storage/tokudb/ft-index/src/tests/test_4657.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_5015.cc b/storage/tokudb/ft-index/src/tests/test_5015.cc index 4eb337eb88b3c..071b7f3660e0d 100644 --- a/storage/tokudb/ft-index/src/tests/test_5015.cc +++ b/storage/tokudb/ft-index/src/tests/test_5015.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_5469.cc b/storage/tokudb/ft-index/src/tests/test_5469.cc index c7e30b42c2fb6..cbbcb3721cb0c 100644 --- a/storage/tokudb/ft-index/src/tests/test_5469.cc +++ b/storage/tokudb/ft-index/src/tests/test_5469.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_789.cc b/storage/tokudb/ft-index/src/tests/test_789.cc index c99af5bd1e5b6..31cdd6ef7771f 100644 --- a/storage/tokudb/ft-index/src/tests/test_789.cc +++ b/storage/tokudb/ft-index/src/tests/test_789.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_935.cc b/storage/tokudb/ft-index/src/tests/test_935.cc index 971a1c1a85ac5..a676db32460c9 100644 --- a/storage/tokudb/ft-index/src/tests/test_935.cc +++ b/storage/tokudb/ft-index/src/tests/test_935.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_abort1.cc b/storage/tokudb/ft-index/src/tests/test_abort1.cc index 456f5811b73a7..7a8b3384ce831 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort1.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include @@ -113,20 +113,6 @@ test_db_open_aborts (void) { r=db_env_create(&env, 0); assert(r==0); r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_PRIVATE|DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); -#if 0 - { - DB_TXN *tid; - r=env->txn_begin(env, 0, &tid, 0); assert(r==0); - r=db->open(db, tid, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); - r=tid->abort(tid); assert(r==0); - } - { - toku_struct_stat buf; - r=toku_stat(ENVDIR "/foo.db", &buf); - assert(r!=0); - assert(errno==ENOENT); - } -#endif { DB_TXN *tid; r=env->txn_begin(env, 0, &tid, 0); assert(r==0); @@ -142,7 +128,6 @@ test_db_open_aborts (void) { r=tid->abort(tid); assert(r==0); } { -#if USE_TDB { DBT dname; DBT iname; @@ -152,7 +137,6 @@ test_db_open_aborts (void) { r = env->get_iname(env, &dname, &iname); CKERR2(r, DB_NOTFOUND); } -#endif toku_struct_stat statbuf; char filename[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(filename, 2, TOKU_TEST_FILENAME, "foo.db"), &statbuf); @@ -209,7 +193,6 @@ test_db_put_aborts (void) { // The database should exist { char *filename; -#if USE_TDB { DBT dname; DBT iname; @@ -221,9 +204,6 @@ test_db_put_aborts (void) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } -#else - filename = toku_xstrdup("foo.db"); -#endif toku_struct_stat statbuf; char fullfile[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); diff --git a/storage/tokudb/ft-index/src/tests/test_abort2.cc b/storage/tokudb/ft-index/src/tests/test_abort2.cc index 6c63717a99df2..881bc97ad1b16 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort2.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -149,13 +149,6 @@ do_test_abort2 (void) { r=db_create(&db, env, 0); CKERR(r); r=env->txn_begin(env, 0, &txn, 0); CKERR(r); r=db->open(db, txn, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); -#ifndef TOKUDB - { - uint32_t ps; - r=db->get_pagesize(db, &ps); CKERR(r); - assert(ps==4096); - } -#endif r=txn->commit(txn, 0); assert(r==0); r=env->txn_begin(env, 0, &txn, 0); assert(r==0); diff --git a/storage/tokudb/ft-index/src/tests/test_abort3.cc b/storage/tokudb/ft-index/src/tests/test_abort3.cc index 05f810efe450f..4542ad3b1c477 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort3.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -120,11 +120,7 @@ static void op_delete (int i) { int r = db->del(db, txn, dbt_init(&key, hello, strlen(hello)+1), DB_DELETE_ANY); -#ifdef TOKUDB assert(r==0); -#else - assert(r==DB_NOTFOUND || r==0); -#endif } static void lookup (int i, int expect, int expectj) { diff --git a/storage/tokudb/ft-index/src/tests/test_abort4.cc b/storage/tokudb/ft-index/src/tests/test_abort4.cc index 73b82d78c4099..29581dc628551 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort4.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -200,7 +200,6 @@ verify_and_tear_down(int close_first) { int r; { char *filename; -#if USE_TDB { DBT dname; DBT iname; @@ -212,9 +211,6 @@ verify_and_tear_down(int close_first) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } -#else - filename = toku_xstrdup("foo.db"); -#endif toku_struct_stat statbuf; char fullfile[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); diff --git a/storage/tokudb/ft-index/src/tests/test_abort5.cc b/storage/tokudb/ft-index/src/tests/test_abort5.cc index d8837238dd37f..34bf564d9ac92 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort5.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -231,7 +231,6 @@ verify_and_tear_down(int close_first) { int r; { char *filename; -#if USE_TDB { DBT dname; DBT iname; @@ -243,9 +242,6 @@ verify_and_tear_down(int close_first) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } -#else - filename = toku_xstrdup("foo.db"); -#endif toku_struct_stat statbuf; char fullfile[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); diff --git a/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc b/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc index 721414c90dddb..fb983474462f3 100644 --- a/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc +++ b/storage/tokudb/ft-index/src/tests/test_abort_delete_first.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -205,12 +205,12 @@ do_abort_delete_first_or_last(int N, r=db->close(db, 0); CKERR(r); r=env->close(env, 0); CKERR(r); -#if defined(TOKUDB) && defined(__unix__) - char cmd[sizeof("../../utils/tokudb_dump -h foo.db >") + 2 * TOKU_PATH_MAX]; - snprintf(cmd, sizeof(cmd), "../../utils/tokudb_dump -h %s foo.db > %s", TOKU_TEST_FILENAME, DEV_NULL_FILE); + + // Oh man, this is gross. + char cmd[sizeof("../../tools/tokudb_dump -h foo.db >") + 2 * TOKU_PATH_MAX]; + snprintf(cmd, sizeof(cmd), "../../tools/tokudb_dump -h %s foo.db > %s", TOKU_TEST_FILENAME, DEV_NULL_FILE); r=system(cmd); CKERR(r); -#endif } int diff --git a/storage/tokudb/ft-index/src/tests/test_archive0.cc b/storage/tokudb/ft-index/src/tests/test_archive0.cc index 85b444e243ca4..8ffa87e2a6dc4 100644 --- a/storage/tokudb/ft-index/src/tests/test_archive0.cc +++ b/storage/tokudb/ft-index/src/tests/test_archive0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_archive1.cc b/storage/tokudb/ft-index/src/tests/test_archive1.cc index 1a6f521854f7a..5208a5eb1b6a9 100644 --- a/storage/tokudb/ft-index/src/tests/test_archive1.cc +++ b/storage/tokudb/ft-index/src/tests/test_archive1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -135,14 +135,7 @@ test_main (int argc, char *const argv[]) { CKERR(r); //this test no longer produces a list with any entries for TDB // - txn_checkpoint trims unused logfiles -#if IS_TDB assert(list == 0); -#else - assert(list); - assert(list[0]); - if (verbose) printf("file[0]=%s\n", list[0]); - toku_free(list); -#endif } r=db->close(db, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_archive2.cc b/storage/tokudb/ft-index/src/tests/test_archive2.cc index ea67a743f9228..faa73171f7ea5 100644 --- a/storage/tokudb/ft-index/src/tests/test_archive2.cc +++ b/storage/tokudb/ft-index/src/tests/test_archive2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc b/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc index 8fcff7c6132e8..c7555d2d3f60d 100644 --- a/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc +++ b/storage/tokudb/ft-index/src/tests/test_bad_implicit_promotion.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc b/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc index dca73400644e2..eae30421f7950 100644 --- a/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc +++ b/storage/tokudb/ft-index/src/tests/test_blobs_leaf_split.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -112,11 +112,7 @@ static void insert(DB *db, DB_TXN *txn, int k, int val_size) { } int test_main(int argc, char * const argv[]) { -#if defined(TOKUDB) const char *db_env_dir = "dir.blobs.leafsplit.env.tdb"; -#else - const char *db_env_dir = "dir.blobs.leafsplit.env.bdb"; -#endif int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG; const char *db_filename = "blobs.db"; int do_txn = 1; diff --git a/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc b/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc index 76706db6dba0d..800212a6751d9 100644 --- a/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc +++ b/storage/tokudb/ft-index/src/tests/test_bulk_fetch.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cachesize.cc b/storage/tokudb/ft-index/src/tests/test_cachesize.cc index 2c3357b584be1..d161dd8903383 100644 --- a/storage/tokudb/ft-index/src/tests/test_cachesize.cc +++ b/storage/tokudb/ft-index/src/tests/test_cachesize.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,10 +101,6 @@ PATENT RIGHTS GRANT: static uint64_t size_from (uint32_t gbytes, uint32_t bytes) { -#ifdef USE_BDB - if (sizeof (intptr_t) == 4 && gbytes == 4 && bytes == 0) - return 0xffffffff; -#endif return ((uint64_t)gbytes << 30) + bytes; } @@ -119,11 +115,6 @@ expect_le (uint64_t a, uint32_t gbytes, uint32_t bytes) { uint64_t b = size_from(gbytes, bytes); if (a != b && verbose) printf("WARNING: expect %" PRIu64 " got %" PRIu64 "\n", a, b); -#ifdef USE_BDB - if (a > b) { - assert(a == 4ULL<<30 && b == a-1); return; - } -#endif assert(a <= b); } diff --git a/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc b/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc index 87b3da2b4cd8d..3d318ddd34629 100644 --- a/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/test_cmp_descriptor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_compression_methods.cc b/storage/tokudb/ft-index/src/tests/test_compression_methods.cc index ef73c593f56d0..272cf4f145d0f 100644 --- a/storage/tokudb/ft-index/src/tests/test_compression_methods.cc +++ b/storage/tokudb/ft-index/src/tests/test_compression_methods.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_2.cc b/storage/tokudb/ft-index/src/tests/test_cursor_2.cc index de332e6bf7599..d07eb95122e5e 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_2.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_3.cc b/storage/tokudb/ft-index/src/tests/test_cursor_3.cc index 45c0b0b4a8da6..f9f256fc884ee 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_3.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc b/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc index d87ff04f25d2b..125bbee9d5237 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_DB_NEXT_no_dup.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc b/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc index 8a2f5bcba9326..2f8fbb9149ffc 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_db_current.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc b/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc index bb326c80fc513..1fcda002bc516 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_delete2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,7 +125,7 @@ test_cursor_delete2 (void) { r = txn->commit(txn, 0); CKERR(r); r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r); - r = db->del(db, txn, dbt_init(&key, "a", 2), DB_DELETE_ANY); CKERR_depending(r,0,DB_NOTFOUND); + r = db->del(db, txn, dbt_init(&key, "a", 2), DB_DELETE_ANY); CKERR(r); r = txn->commit(txn, 0); CKERR(r); r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r); @@ -139,7 +139,7 @@ test_cursor_delete2 (void) { r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r); r = db->del(db, txn, dbt_init(&key, "a", 2), 0); CKERR(r); - r = db->del(db, txn, dbt_init(&key, "a", 2), DB_DELETE_ANY); CKERR_depending(r,0,DB_NOTFOUND); + r = db->del(db, txn, dbt_init(&key, "a", 2), DB_DELETE_ANY); CKERR(r); r = txn->commit(txn, 0); CKERR(r); r = db->close(db, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc b/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc index 1bdb3daf81cba..60ca37a0ac46d 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc b/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc index d82fc5131da22..e992f86455bee 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_interrupt.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc b/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc index 96b83d778fcb8..6464a2fda8340 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_nonleaf_expand.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_null.cc b/storage/tokudb/ft-index/src/tests/test_cursor_null.cc index 68a65b97e6e16..6c7bf382a8d7e 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_null.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_null.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc b/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc index 6ed74265fff36..62178e1413772 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_stickyness.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc b/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc index 8435b2e1a3e69..d4e3148ec62bb 100644 --- a/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc +++ b/storage/tokudb/ft-index/src/tests/test_cursor_with_read_txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc b/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc index cbb98d1b3e736..ce4008a06d805 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_already_exists.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc b/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc index 83e195093fdb8..d596782c91970 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_change_pagesize.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc b/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc index 35170e5f9ec3a..2033cc6cb040c 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_change_xxx.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc b/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc index a9421b5745148..7f433e0939300 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_close_no_open.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc b/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc index d908e0c2e14eb..962d1ae525670 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_current_clobbers_db.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc b/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc index 36ff6ca45eb13..8a0a385f82e16 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_dbt_mem_behavior.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -223,14 +223,12 @@ test_main(int argc, char *const argv[]) { assert(!was_truncated); bool ulen_should_change = false; -#if defined(USE_TDB) if (flags[j] == DB_DBT_REALLOC) { ulen_should_change = (bool)(old_ulen < sizeof(DATA)); } else if (flags[j] == DB_DBT_MALLOC) { ulen_should_change = (bool)(old_ulen != sizeof(DATA)*2); } -#endif assert(ulen_should_change == (bool)ulen_changed); assert(size_full); assert(doclone == !small_buffer); diff --git a/storage/tokudb/ft-index/src/tests/test_db_delete.cc b/storage/tokudb/ft-index/src/tests/test_db_delete.cc index 8b4337bae854d..4ee9b0fba838a 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_delete.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_delete.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,9 +136,7 @@ test_db_delete (int n, int dup_mode) { /* create the dup database file */ DB_ENV *env; r = db_env_create(&env, 0); assert(r == 0); -#ifdef TOKUDB r = env->set_redzone(env, 0); assert(r == 0); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0); DB *db; @@ -179,16 +177,7 @@ test_db_delete (int n, int dup_mode) { } expect_db_del(db, htonl(n), 0, DB_NOTFOUND); -#if defined(USE_TDB) expect_db_del(db, htonl(n), DB_DELETE_ANY, 0); -#endif -#if defined(USE_BDB) && defined(DB_DELETE_ANY) - #if DB_DELETE_ANY == 0 - expect_db_del(db, htonl(n), DB_DELETE_ANY, DB_NOTFOUND); - #else - expect_db_del(db, htonl(n), DB_DELETE_ANY, EINVAL); - #endif -#endif r = db->close(db, 0); assert(r == 0); r = env->close(env, 0); assert(r == 0); @@ -208,9 +197,7 @@ test_db_get_datasize0 (void) { /* create the dup database file */ DB_ENV *env; r = db_env_create(&env, 0); assert(r == 0); -#ifdef TOKUDB r = env->set_redzone(env, 0); assert(r == 0); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0); DB *db; diff --git a/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc b/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc index de6f6f5f6088f..a9403174818b4 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_descriptor.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc b/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc index 13b1166fa0b0e..4bb22a026b298 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_open_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc b/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc index be3deef6c6dbc..a97ec7de733e6 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_open_nocreate.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,11 +117,7 @@ test_main(int argc, char *const argv[]) { int do_private; for (do_private=0; do_private<2; do_private++) { -#ifdef USE_TDB if (do_private==0) continue; // See #208. -#else - if (do_private==1) continue; // See #530. BDB 4.6.21 segfaults if DB_PRIVATE is passed when no environment previously exists. -#endif int private_flags = do_private ? (DB_CREATE|DB_PRIVATE) : 0; toku_os_recursive_delete(TOKU_TEST_FILENAME); @@ -136,37 +132,12 @@ test_main(int argc, char *const argv[]) { r = db_env_create(&dbenv, 0); CKERR(r); r = dbenv->open(dbenv, TOKU_TEST_FILENAME, private_flags|DB_INIT_MPOOL, 0); -#ifdef USE_TDB - // TokuDB has no trouble opening an environment if the directory exists. + // TokuFT has no trouble opening an environment if the directory exists. CKERR(r); assert(r==0); -#else - if (r!=ENOENT) printf("%s:%d %d: %s\n", __FILE__, __LINE__, r,db_strerror(r)); - assert(r==ENOENT); -#endif dbenv->close(dbenv,0); // free memory } -#ifndef USE_TDB - // Now make sure that if we have a non-private DB that we can tell if it opened or not. - DB *db; - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); - r = db_env_create(&dbenv, 0); - CKERR(r); - r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_CREATE|DB_INIT_MPOOL, 0); - CKERR(r); - r=db_create(&db, dbenv, 0); - CKERR(r); - db->close(db, 0); - dbenv->close(dbenv,0); // free memory - r = db_env_create(&dbenv, 0); - CKERR(r); - r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_INIT_MPOOL, 0); - CKERR(r); - dbenv->close(dbenv,0); // free memory -#endif - return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc b/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc index 58030826b305c..9a2d665edbfd6 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_open_open_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,18 +117,7 @@ test_main(int argc, char*const* argv) { r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_CREATE|DB_INIT_MPOOL|DB_PRIVATE, 0666); if (verbose) printf("r=%d\n", r); -#ifdef USE_TDB assert(r == EINVAL); -#elif USE_BDB -#if DB_VERSION_MAJOR >= 5 - assert(r == EINVAL); -#else - if (verbose) printf("test_db_env_open_open_close.bdb skipped. (BDB apparently does not follow the spec).\n"); - assert(r == 0); -#endif -#else -#error -#endif r = dbenv->close(dbenv, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc b/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc index 7a717ea807b46..ef7bf85b52846 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_set_errpfx.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc b/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc index bdc1b6ef91a20..0baa9185d1582 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_set_lg_dir.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -121,13 +121,8 @@ test_main(int argc, char *const argv[]) { r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_INIT_TXN|DB_INIT_LOG|DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL, 0); CKERR(r); -#ifdef USE_TDB - // According to the BDB man page, you may not call set_lg_dir after doing the open. - // Some versions of BDB don't actually check this or complain r = dbenv->set_lg_dir(dbenv, "."); assert(r == EINVAL); -#endif - r = dbenv->close(dbenv, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc b/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc index f9e47e7add066..b1adbb30120a9 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_set_tmp_dir.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -121,13 +121,8 @@ test_main(int argc, char *const argv[]) { r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL, 0); CKERR(r); -#ifdef USE_TDB - // According to the BDB man page, you may not call set_tmp_dir after doing the open. - // Some versions of BDB don't actually check this or complain r = dbenv->set_tmp_dir(dbenv, "."); assert(r == EINVAL); -#endif - r = dbenv->close(dbenv, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc b/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc index aa9d50c424fef..01ba0792e4a61 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_env_strdup_null.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,13 +108,10 @@ test_main (int UU(argc), char UU(*const argv[])) { toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); r=db_env_create(&env, 0); assert(r==0); -// None of this stuff works with BDB. TDB does more error checking. -#ifdef USE_TDB r=env->set_data_dir(env, NULL); assert(r==EINVAL); r=env->open(env, TOKU_TEST_FILENAME, DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); env->set_errpfx(env, NULL); assert(1); //Did not crash. r=env->set_tmp_dir(env, NULL); assert(r==EINVAL); -#endif r=env->close(env, 0); assert(r==0); return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc b/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc index 328436b4cd39e..1c716cc747a57 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_get_put_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -136,9 +136,7 @@ setup (uint32_t flags) { toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); /* Open/create primary */ r = db_env_create(&dbenv, 0); assert(r == 0); -#ifdef USE_TDB r = dbenv->set_redzone(dbenv, 0); CKERR(r); -#endif r = dbenv->open(dbenv, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0); r = db_create(&dbp, dbenv, 0); CKERR(r); dbp->set_errfile(dbp,0); // Turn off those annoying errors @@ -182,13 +180,6 @@ get_bad_flags (DB* db, uint32_t flags, int r_expect, int keyint, int dataint) { assert(*(int*)data.data == dataint); } -#ifdef USE_TDB -#define EINVAL_FOR_TDB_OK_FOR_BDB EINVAL -#else -#define EINVAL_FOR_TDB_OK_FOR_BDB 0 -#endif - - PUT_TEST put_tests[] = { {0, DB_NODUPDATA, EINVAL, 0, 0}, //r_expect must change to 0, once implemented. {0, 0, 0, 0, 0}, diff --git a/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc b/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc index db530ad1e1847..ba63986af7977 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_named_delete_last.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_no_env.cc b/storage/tokudb/ft-index/src/tests/test_db_no_env.cc index 61952133119c1..8f7a336af5b1a 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_no_env.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_no_env.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc b/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc index ea5002a013170..70580b8f86889 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_open_notexist_reopen.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff --git a/storage/tokudb/ft-index/src/tests/test_db_remove.cc b/storage/tokudb/ft-index/src/tests/test_db_remove.cc index f254fbee17cb5..e736784a7c4f3 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_remove.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_remove.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,11 +114,7 @@ static void test_db_remove (void) { // Now remove it, while it is open. r = env->dbremove(env, NULL, fname, 0, 0); -#ifdef USE_TDB assert(r!=0); -#else - assert(r==0); -#endif r = db1->close(db1, 0); assert(r==0); r = env->close(env, 0); assert(r == 0); diff --git a/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc b/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc index 90ee56278fb85..8f3e50a649c6f 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_remove_subdb.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc b/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc index bd63991da8788..b106c70589fc6 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_set_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_subdb.cc b/storage/tokudb/ft-index/src/tests/test_db_subdb.cc index 4a65317d6c1e0..f29dd14a3fa82 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_subdb.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_subdb.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc b/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc index 0c30b78266519..c12e1bdfce968 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_subdb_different_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc b/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc index f0eb79d5527e7..381bce27596bd 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_txn_locks_nonheaviside.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -236,10 +236,8 @@ setup_dbs (void) { /* Open/create primary */ r = db_env_create(&dbenv, 0); CKERR(r); -#ifdef TOKUDB r = dbenv->set_default_bt_compare(dbenv, int_dbt_cmp); CKERR(r); -#endif uint32_t env_txn_flags = DB_INIT_TXN | DB_INIT_LOCK; uint32_t env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL; r = dbenv->open(dbenv, TOKU_TEST_FILENAME, env_open_flags | env_txn_flags, 0600); @@ -247,10 +245,6 @@ setup_dbs (void) { r = db_create(&db, dbenv, 0); CKERR(r); -#ifndef TOKUDB - r = db->set_bt_compare( db, int_dbt_cmp); - CKERR(r); -#endif char a; for (a = 'a'; a <= 'z'; a++) init_txn(a); @@ -665,16 +659,6 @@ test (void) { int test_main(int argc, char *const argv[]) { parse_args(argc, argv); - if (!IS_TDB) { - if (verbose) { - printf("Warning: " __FILE__" does not work in BDB.\n"); - } - } else { - test(); - /* - test_abort(0); - test_abort(DB_DUP | DB_DUPSORT); - */ - } + test(); return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc b/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc index 5e9a947b9e600..0dd73590b84ea 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_txn_locks_read_uncommitted.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -178,10 +178,8 @@ setup_dbs (void) { /* Open/create primary */ r = db_env_create(&dbenv, 0); CKERR(r); -#ifdef TOKUDB r = dbenv->set_default_bt_compare(dbenv, int_dbt_cmp); CKERR(r); -#endif uint32_t env_txn_flags = DB_INIT_TXN | DB_INIT_LOCK; uint32_t env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL; r = dbenv->open(dbenv, TOKU_TEST_FILENAME, env_open_flags | env_txn_flags, 0600); @@ -189,10 +187,6 @@ setup_dbs (void) { r = db_create(&db, dbenv, 0); CKERR(r); -#ifndef TOKUDB - r = db->set_bt_compare( db, int_dbt_cmp); - CKERR(r); -#endif char a; for (a = 'a'; a <= 'z'; a++) init_txn(a, 0); @@ -256,28 +250,9 @@ table_scan(char txn, bool success) { static void table_prelock(char txn, bool success) { int r; -#if defined USE_TDB && USE_TDB r = db->pre_acquire_table_lock(db, txns[(int)txn]); if (success) CKERR(r); else CKERR2s(r, DB_LOCK_NOTGRANTED, DB_LOCK_DEADLOCK); -#else - DBT key; - DBT data; - - assert(txns[(int)txn] && cursors[(int)txn]); - r = cursors[(int)txn]->c_get(cursors[(int)txn], - dbt_init(&key, 0, 0), - dbt_init(&data, 0, 0), - DB_FIRST | DB_RMW); - while (r==0) { - r = cursors[(int)txn]->c_get(cursors[(int)txn], - dbt_init(&key, 0, 0), - dbt_init(&data, 0, 0), - DB_NEXT | DB_RMW); - } - if (success) CKERR2(r, DB_NOTFOUND); - else CKERR2s(r, DB_LOCK_NOTGRANTED, DB_LOCK_DEADLOCK); -#endif } static void diff --git a/storage/tokudb/ft-index/src/tests/test_db_version.cc b/storage/tokudb/ft-index/src/tests/test_db_version.cc index 1041db949d43c..6ce4574ae577a 100644 --- a/storage/tokudb/ft-index/src/tests/test_db_version.cc +++ b/storage/tokudb/ft-index/src/tests/test_db_version.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc b/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc index b393c5e139947..dd532627502ab 100644 --- a/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_env_close_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -114,12 +114,7 @@ test_main (int argc __attribute__((__unused__)), char *const argv[] __attribute r=db_env_create(&env, 0); assert(r==0); env->set_errfile(env,0); // Turn off those annoying errors r=env->close (env, 1); - //BDB does not check this in some versions -#if defined(USE_TDB) || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3) assert(r==EINVAL); -#else - assert(r==0); -#endif toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); @@ -135,11 +130,6 @@ test_main (int argc __attribute__((__unused__)), char *const argv[] __attribute env->set_errfile(env,0); // Turn off those annoying errors r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_PRIVATE|DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=env->close (env, 1); - //BDB does not check this. -#if defined(USE_TDB) || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3) assert(r==EINVAL); -#else - assert(r==0); -#endif return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc b/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc index 0425b2191d4c7..d45bd7002ab41 100644 --- a/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc +++ b/storage/tokudb/ft-index/src/tests/test_env_create_db_create.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -104,13 +104,7 @@ test_main (int UU(argc), char UU(*const argv[])) { r = db_env_create(&env, 0); assert(r == 0); r = db_create(&db, env, 0); -// BDB doesnt' actually barf on this case. -#ifdef USE_TDB assert(r != 0); -#else - r = db->close(db, 0); - assert(r == 0); -#endif r = env->close(env, 0); assert(r == 0); return 0; diff --git a/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc b/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc index 7bded023a290c..1b621b8005daa 100644 --- a/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_env_open_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -125,25 +125,19 @@ test_main(int argc, char *const argv[]) { toku_os_recursive_delete(TOKU_TEST_FILENAME); toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); -#ifdef USE_TDB char tracefile[TOKU_PATH_MAX+1]; toku_set_trace_file(toku_path_join(tracefile, 2, TOKU_TEST_FILENAME, "trace.tktrace")); -#endif /* test flags */ test_env_open_flags(0, ENOENT); -#ifdef TOKUDB // This one segfaults in BDB 4.6.21 test_env_open_flags(DB_PRIVATE, ENOENT); -#endif test_env_open_flags(DB_PRIVATE+DB_CREATE, 0); test_env_open_flags(DB_PRIVATE+DB_CREATE+DB_INIT_MPOOL, 0); test_env_open_flags(DB_PRIVATE+DB_RECOVER, EINVAL); test_env_open_flags(DB_PRIVATE+DB_CREATE+DB_INIT_MPOOL+DB_RECOVER, EINVAL); -#ifdef USE_TDB toku_close_trace_file(); -#endif return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort8.cc b/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc similarity index 68% rename from storage/tokudb/ft-index/src/tests/test_txn_abort8.cc rename to storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc index 176b5544c783f..d91f965ebc853 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort8.cc +++ b/storage/tokudb/ft-index/src/tests/test_equal_keys_with_different_bytes.cc @@ -29,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: @@ -86,71 +86,64 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "Copyright (c) 2014 Tokutek Inc. All rights reserved." + +#include + #include "test.h" -#include - -#include -#include -#include -#include -#include - -// -static void -test_abort_close (void) { - -#ifndef USE_TDB -#if DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==3 - if (verbose) fprintf(stderr, "%s does not work for BDB %d.%d. Not running\n", __FILE__, DB_VERSION_MAJOR, DB_VERSION_MINOR); - return; -#else - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); +static int compare_strings_case_insensitive(DB *db, const DBT *a, const DBT *b) { + invariant_notnull(db); + return strcasecmp(reinterpret_cast(a->data), + reinterpret_cast(b->data)); +} + +static void test_equal_keys_with_different_bytes(void) { int r; - DB_ENV *env; - r = db_env_create(&env, 0); assert(r == 0); - r = env->set_data_dir(env, TOKU_TEST_FILENAME); - r = env->set_lg_dir(env, TOKU_TEST_FILENAME); - env->set_errfile(env, stdout); - r = env->open(env, 0, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); - if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r)); - assert(r == 0); - DB_TXN *txn = 0; - r = env->txn_begin(env, 0, &txn, 0); assert(r == 0); + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->set_default_bt_compare(env, compare_strings_case_insensitive); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); DB *db; - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); - } - - // Close before abort. - r = db->close(db, 0); - - r = txn->abort(txn); assert(r == 0); - - r = env->close(env, 0); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r!=0); - } -#endif -#endif + r = db_create(&db, env, 0); CKERR(r); + r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r); + + DBT key; + + // put 'key' + dbt_init(&key, "key", sizeof("key")); + r = db->put(db, NULL, &key, &key, 0); CKERR(r); + + // del 'KEY' - should match 'key' + dbt_init(&key, "KEY", sizeof("KEY")); + r = db->del(db, NULL, &key, 0); CKERR(r); + + DBT val; + char val_buf[10]; + dbt_init(&val, val_buf, sizeof(val_buf)); + + // search should fail for 'key' + dbt_init(&key, "key", sizeof("key")); + r = db->get(db, NULL, &key, &val, 0); CKERR2(r, DB_NOTFOUND); + + // search should fail for 'KEY' + dbt_init(&key, "KEY", sizeof("KEY")); + r = db->get(db, NULL, &key, &val, 0); CKERR2(r, DB_NOTFOUND); + + r = db->close(db, 0); CKERR(r); + r = env->close(env, 0); CKERR(r); } int -test_main(int UU(argc), char UU(*const argv[])) { - test_abort_close(); +test_main(int argc, char *const argv[]) { + parse_args(argc, argv); + + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + test_equal_keys_with_different_bytes(); + return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_error.cc b/storage/tokudb/ft-index/src/tests/test_error.cc index 21084e700614d..84c6289990e4b 100644 --- a/storage/tokudb/ft-index/src/tests/test_error.cc +++ b/storage/tokudb/ft-index/src/tests/test_error.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_forkjoin.cc b/storage/tokudb/ft-index/src/tests/test_forkjoin.cc index 8190a7e774587..1fb01b5371262 100644 --- a/storage/tokudb/ft-index/src/tests/test_forkjoin.cc +++ b/storage/tokudb/ft-index/src/tests/test_forkjoin.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc b/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc index 12fc8c1e619f8..5ddddac9bd617 100644 --- a/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc +++ b/storage/tokudb/ft-index/src/tests/test_get_max_row_size.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc b/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc index bf7848088d2c3..384f4e91f46be 100644 --- a/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc +++ b/storage/tokudb/ft-index/src/tests/test_get_zeroed_dbt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc b/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc index acf0d5f7076d8..f5bb46c35dbe7 100644 --- a/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc +++ b/storage/tokudb/ft-index/src/tests/test_groupcommit_count.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -101,12 +101,7 @@ DB_ENV *env; DB *db; int do_sync=1; -#ifdef TOKUDB #define NITER 100 -#else -// BDB is slow. Reduce the work. -#define NITER 25 -#endif static void *start_a_thread (void *i_p) { int *CAST_FROM_VOIDP(which_thread_p, i_p); @@ -214,12 +209,7 @@ do_test (int N) { } } -#ifdef TOKUDB int log_max_n_threads_over_10 = 3; -#else -// BDB is slow. Reduce the work. -int log_max_n_threads_over_10 = 2; -#endif static void my_parse_args (int argc, char *const argv[]) { @@ -267,9 +257,7 @@ test_main (int argc, char *const argv[]) { prev_count=0; db_env_set_func_fsync(do_fsync); -#if TOKUDB db_env_set_num_bucket_mutexes(32); -#endif toku_os_recursive_delete(env_path); { int r=toku_os_mkdir(env_path, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); } diff --git a/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc b/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc index e7aa5071f61bf..ade56e24e4e88 100644 --- a/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc +++ b/storage/tokudb/ft-index/src/tests/test_groupcommit_perf.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_hsoc.cc b/storage/tokudb/ft-index/src/tests/test_hsoc.cc index ada02e5e522a1..283684565017b 100644 --- a/storage/tokudb/ft-index/src/tests/test_hsoc.cc +++ b/storage/tokudb/ft-index/src/tests/test_hsoc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc b/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc index 865736d14fe1a..8b09698fcee87 100644 --- a/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc +++ b/storage/tokudb/ft-index/src/tests/test_insert_cursor_delete_insert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc b/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc index a1884d22c0c7c..be66e852021ef 100644 --- a/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc +++ b/storage/tokudb/ft-index/src/tests/test_insert_many_gc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc b/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc index 667221cdf0445..8168ae477a1ca 100644 --- a/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc +++ b/storage/tokudb/ft-index/src/tests/test_insert_memleak.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/compression-ratio/cratio.cc b/storage/tokudb/ft-index/src/tests/test_insert_unique.cc similarity index 51% rename from storage/tokudb/ft-index/ft/compression-ratio/cratio.cc rename to storage/tokudb/ft-index/src/tests/test_insert_unique.cc index 395504d3b9282..84d1ded6db5af 100644 --- a/storage/tokudb/ft-index/ft/compression-ratio/cratio.cc +++ b/storage/tokudb/ft-index/src/tests/test_insert_unique.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,108 +88,115 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -/* Measure the extent to which we can compress a file. - * Works on version 8. */ - -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -toku_off_t fd_size (int fd) { - int64_t file_size; - int r = toku_os_get_file_size(fd, &file_size); - assert(r==0); - return file_size; -} +/** + * Test that unique inserts work correctly. This exercises the rightmost leaf inject optimization. + */ + +#include -#define NSIZE (1<<20) -unsigned char fbuf[NSIZE]; -unsigned char cbuf[NSIZE+500]; +#include "test.h" -void -measure_header (int fd, toku_off_t off, // read header from this offset - toku_off_t *usize, // size uncompressed (but not including any padding) - toku_off_t *csize) // compressed size -{ +static char random_buf[8]; +static struct random_data random_data; + +static void test_simple_unique_insert(DB_ENV *env) { int r; - r=pread(fd, fbuf, 12, off); - assert(r==12); - assert(memcmp(fbuf,"tokudata",8)==0); - int bsize = toku_dtoh32(*(uint32_t*)(fbuf+8)); - //printf("Bsize=%d\n", bsize); - (*usize)+=bsize; - assert(bsize<=NSIZE); - r=pread(fd, fbuf, bsize, off); - assert(r==bsize); - uLongf destLen=sizeof(cbuf); - r=compress2(cbuf, &destLen, - fbuf+20, bsize-=20, // skip magic nodesize and version - 1); - assert(r==Z_OK); - destLen+=16; // account for the size and magic and version - //printf("Csize=%ld\n", destLen); - (*csize)+=destLen; + DB *db; + r = db_create(&db, env, 0); CKERR(r); + r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0644); CKERR(r); + + DBT key1, key2, key3; + dbt_init(&key1, "a", sizeof("a")); + dbt_init(&key2, "b", sizeof("b")); + dbt_init(&key3, "c", sizeof("c")); + r = db->put(db, NULL, &key1, &key1, DB_NOOVERWRITE); CKERR(r); + r = db->put(db, NULL, &key1, &key1, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + r = db->put(db, NULL, &key3, &key3, DB_NOOVERWRITE); CKERR(r); + r = db->put(db, NULL, &key3, &key3, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + r = db->put(db, NULL, &key2, &key2, DB_NOOVERWRITE); CKERR(r); + r = db->put(db, NULL, &key2, &key2, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + // sanity check + r = db->put(db, NULL, &key1, &key1, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + r = db->put(db, NULL, &key1, &key3, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + + r = db->close(db, 0); CKERR(r); + r = env->dbremove(env, NULL, "db", NULL, 0); CKERR(r); } -void -measure_node (int fd, toku_off_t off, // read header from this offset - toku_off_t *usize, // size uncompressed (but not including any padding) - toku_off_t *csize) // compressed size -{ +static void test_large_sequential_insert_unique(DB_ENV *env) { int r; - r=pread(fd, fbuf, 24, off); - assert(r==24); - //printf("fbuf[0..7]=%c%c%c%c%c%c%c%c\n", fbuf[0], fbuf[1], fbuf[2], fbuf[3], fbuf[4], fbuf[5], fbuf[6], fbuf[7]); - assert(memcmp(fbuf,"tokuleaf",8)==0 || memcmp(fbuf, "tokunode", 8)==0); - assert(8==toku_dtoh32(*(uint32_t*)(fbuf+8))); // check file version - int bsize = toku_dtoh32(*(uint32_t*)(fbuf+20)); - //printf("Bsize=%d\n", bsize); - (*usize)+=bsize; - - assert(bsize<=NSIZE); - r=pread(fd, fbuf, bsize, off); - assert(r==bsize); - uLongf destLen=sizeof(cbuf); - r=compress2(cbuf, &destLen, - fbuf+28, bsize-=28, // skip constant header stuff - 1); - destLen += 24; // add in magic (8), version(4), lsn (8), and size (4). Actually lsn will be compressed, but ignore that for now. - assert(r==Z_OK); - //printf("Csize=%ld\n", destLen); - (*csize)+=destLen; + DB *db; + r = db_create(&db, env, 0); CKERR(r); + + // very small nodes/basements to make a taller tree + r = db->set_pagesize(db, 8 * 1024); CKERR(r); + r = db->set_readpagesize(db, 2 * 1024); CKERR(r); + r = db->open(db, NULL, "db", NULL, DB_BTREE, DB_CREATE, 0644); CKERR(r); + + const int val_size = 1024; + char *XMALLOC_N(val_size, val_buf); + memset(val_buf, 'k', val_size); + DBT val; + dbt_init(&val, val_buf, val_size); + + // grow a tree to about depth 3, taking sanity checks along the way + const int start_num_rows = (64 * 1024 * 1024) / val_size; + for (int i = 0; i < start_num_rows; i++) { + DBT key; + int k = toku_htonl(i); + dbt_init(&key, &k, sizeof(k)); + r = db->put(db, NULL, &key, &val, DB_NOOVERWRITE); CKERR(r); + if (i % 50 == 0) { + // sanity check - should not be able to insert this key twice in a row + r = db->put(db, NULL, &key, &val, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + + // .. but re-inserting is okay, if we provisionally deleted the row + DB_TXN *txn; + r = env->txn_begin(env, NULL, &txn, 0); CKERR(r); + r = db->del(db, NULL, &key, DB_DELETE_ANY); CKERR(r); + r = db->put(db, NULL, &key, &val, DB_NOOVERWRITE); CKERR(r); + r = txn->commit(txn, 0); CKERR(r); + } + if (i > 0 && i % 250 == 0) { + // sanity check - unique checks on random keys we already inserted should + // fail (exercises middle-of-the-tree checks) + for (int check_i = 0; check_i < 4; check_i++) { + DBT rand_key; + int rand_k = toku_htonl(myrandom_r(&random_data) % i); + dbt_init(&rand_key, &rand_k, sizeof(rand_k)); + r = db->put(db, NULL, &rand_key, &val, DB_NOOVERWRITE); CKERR2(r, DB_KEYEXIST); + } + } + } + toku_free(val_buf); + r = db->close(db, 0); CKERR(r); + r = env->dbremove(env, NULL, "db", NULL, 0); CKERR(r); } +int test_main(int argc, char * const argv[]) { + default_parse_args(argc, argv); -/* The single argument is the filename to measure. */ -int main (int argc, const char *argv[]) { - assert(argc==2); - const char *fname=argv[1]; - int fd = open(fname, O_RDONLY); - assert(fd>=0); - toku_off_t fsize = fd_size(fd); - printf("fsize (uncompressed with padding)=%lld\n", (long long)fsize); + int r; + const int envflags = DB_INIT_MPOOL | DB_CREATE | DB_THREAD | + DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | DB_PRIVATE; - toku_off_t usize=0, csize=0; - measure_header(fd, 0, &usize, &csize); + // startup + DB_ENV *env; + toku_os_recursive_delete(TOKU_TEST_FILENAME); + r = toku_os_mkdir(TOKU_TEST_FILENAME, 0755); CKERR(r); + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, envflags, 0755); - toku_off_t i; - for (i=NSIZE; i+24close(env, 0); CKERR(r); - close(fd); return 0; } + diff --git a/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc b/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc index bfc90e71d07a8..dd00ddeeb9a3c 100644 --- a/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc +++ b/storage/tokudb/ft-index/src/tests/test_iterate_live_transactions.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc b/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc index 248c346c30510..03dcce49ffdde 100644 --- a/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc +++ b/storage/tokudb/ft-index/src/tests/test_iterate_pending_lock_requests.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc b/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc new file mode 100644 index 0000000000000..144ac5fce3eee --- /dev/null +++ b/storage/tokudb/ft-index/src/tests/test_keylen_diff.cc @@ -0,0 +1,284 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "test.h" + +// test a comparison function that treats certain different-lengthed keys as equal + +struct packed_key { + char type; + char k[8]; + static packed_key as_int(int v) { + packed_key k; + k.type = 0; + memcpy(k.k, &v, sizeof(int)); + return k; + } + static packed_key as_double(double v) { + packed_key k; + k.type = 1; + memcpy(k.k, &v, sizeof(double)); + return k; + } + size_t size() const { + assert(type == 0 || type == 1); + return type == 0 ? 5 : 9; + } +}; + +// the point is that keys can be packed as integers or doubles, but +// we'll treat them both as doubles for the sake of comparison. +// this means a 4 byte number could equal an 8 byte number. +static int packed_key_cmp(DB *UU(db), const DBT *a, const DBT *b) { + assert(a->size == 5 || a->size == 9); + assert(b->size == 5 || b->size == 9); + char *k1 = reinterpret_cast(a->data); + char *k2 = reinterpret_cast(b->data); + assert(*k1 == 0 || *k1 == 1); + assert(*k2 == 0 || *k2 == 1); + double v1 = *k1 == 0 ? static_cast(*reinterpret_cast(k1 + 1)) : + *reinterpret_cast(k1 + 1); + double v2 = *k2 == 0 ? static_cast(*reinterpret_cast(k2 + 1)) : + *reinterpret_cast(k2 + 1); + if (v1 > v2) { + return 1; + } else if (v1 < v2) { + return -1; + } else { + return 0; + } +} + +static int update_callback(DB *UU(db), const DBT *UU(key), const DBT *old_val, const DBT *extra, + void (*set_val)(const DBT *new_val, void *setval_extra), void *setval_extra) { + assert(extra != nullptr); + assert(old_val != nullptr); + assert(extra->size == 0); + assert(old_val->size == 0); + if (extra->data == nullptr) { + set_val(nullptr, setval_extra); + } else { + DBT new_val; + char empty_v; + dbt_init(&new_val, &empty_v, 0); + set_val(&new_val, setval_extra); + } + return 0; +} + +enum overwrite_method { + VIA_UPDATE_OVERWRITE_BROADCAST, + VIA_UPDATE_DELETE_BROADCAST, + VIA_UPDATE_OVERWRITE, + VIA_UPDATE_DELETE, + VIA_DELETE, + VIA_INSERT, + NUM_OVERWRITE_METHODS +}; + +static void test_keylen_diff(enum overwrite_method method, bool control_test) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->set_default_bt_compare(env, packed_key_cmp); CKERR(r); + env->set_update(env, update_callback); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + r = db->set_pagesize(db, 16 * 1024); // smaller pages so we get a more lush tree + r = db->set_readpagesize(db, 1 * 1024); // smaller basements so we get more per leaf + r = db->open(db, nullptr, "db", nullptr, DB_BTREE, DB_CREATE, 0666); CKERR(r); + + DBT null_dbt, empty_dbt; + char empty_v; + dbt_init(&empty_dbt, &empty_v, 0); + dbt_init(&null_dbt, nullptr, 0); + + const int num_keys = 256 * 1000; + + for (int i = 0; i < num_keys; i++) { + // insert it using a 4 byte key .. + packed_key key = packed_key::as_int(i); + + DBT dbt; + dbt_init(&dbt, &key, key.size()); + r = db->put(db, nullptr, &dbt, &empty_dbt, 0); CKERR(r); + } + + // overwrite keys randomly, so we induce flushes and get better / realistic coverage + int *XMALLOC_N(num_keys, shuffled_keys); + for (int i = 0; i < num_keys; i++) { + shuffled_keys[i] = i; + } + for (int i = num_keys - 1; i >= 1; i--) { + long rnd = random64() % (i + 1); + int tmp = shuffled_keys[rnd]; + shuffled_keys[rnd] = shuffled_keys[i]; + shuffled_keys[i] = tmp; + } + + for (int i = 0; i < num_keys; i++) { + // for the control test, delete it using the same length key + // + // .. otherwise, delete it with an 8 byte key + packed_key key = control_test ? packed_key::as_int(shuffled_keys[i]) : + packed_key::as_double(shuffled_keys[i]); + + DBT dbt; + dbt_init(&dbt, &key, key.size()); + DB_TXN *txn; + env->txn_begin(env, nullptr, &txn, DB_TXN_NOSYNC); CKERR(r); + switch (method) { + case VIA_INSERT: { + r = db->put(db, txn, &dbt, &empty_dbt, 0); CKERR(r); + break; + } + case VIA_DELETE: { + // we purposefully do not pass DB_DELETE_ANY because the hidden query acts as + // a sanity check for the control test and, overall, gives better code coverage + r = db->del(db, txn, &dbt, 0); CKERR(r); + break; + } + case VIA_UPDATE_OVERWRITE: + case VIA_UPDATE_DELETE: { + r = db->update(db, txn, &dbt, method == VIA_UPDATE_DELETE ? &null_dbt : &empty_dbt, 0); CKERR(r); + break; + } + case VIA_UPDATE_OVERWRITE_BROADCAST: + case VIA_UPDATE_DELETE_BROADCAST: { + r = db->update_broadcast(db, txn, method == VIA_UPDATE_DELETE_BROADCAST ? &null_dbt : &empty_dbt, 0); CKERR(r); + if (i > 1 ) { // only need to test broadcast twice - one with abort, one without + txn->abort(txn); // we opened a txn so we should abort it before exiting + goto done; + } + break; + } + default: { + assert(false); + } + } + const bool abort = i % 2 == 0; + if (abort) { + txn->abort(txn); + } else { + txn->commit(txn, 0); + } + } + +done: + toku_free(shuffled_keys); + + // optimize before close to ensure that all messages are applied and any potential bugs are exposed + r = db->optimize(db); + r = db->close(db, 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +int +test_main(int argc, char *const argv[]) { + parse_args(argc, argv); + + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + for (int i = 0; i < NUM_OVERWRITE_METHODS; i++) { + enum overwrite_method method = static_cast(i); + + // control test - must pass for the 'real' test below to be interesting + printf("testing method %d (control)\n", i); + test_keylen_diff(method, true); + + // real test, actually mixes key lengths + printf("testing method %d (real)\n", i); + test_keylen_diff(method, false); + } + + return 0; +} diff --git a/storage/tokudb/ft-index/src/tests/test_kv_gen.h b/storage/tokudb/ft-index/src/tests/test_kv_gen.h index f17b6c1864140..49bb3acdb4270 100644 --- a/storage/tokudb/ft-index/src/tests/test_kv_gen.h +++ b/storage/tokudb/ft-index/src/tests/test_kv_gen.h @@ -2,10 +2,6 @@ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef __TEST_KV_GEN_H -#define __TEST_KV_GEN_H - - /* COPYING CONDITIONS NOTICE: @@ -34,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,6 +87,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" @@ -279,6 +277,3 @@ put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, con } return 0; } - - -#endif // __TEST_KV_GEN_H diff --git a/storage/tokudb/ft-index/src/tests/test_kv_limits.cc b/storage/tokudb/ft-index/src/tests/test_kv_limits.cc index 9ce236bf0ada0..70390bb28029c 100644 --- a/storage/tokudb/ft-index/src/tests/test_kv_limits.cc +++ b/storage/tokudb/ft-index/src/tests/test_kv_limits.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc b/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc index ea164a8ea433f..e5ccb3071b890 100644 --- a/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc +++ b/storage/tokudb/ft-index/src/tests/test_large_update_broadcast_small_cachetable.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc b/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc index d4aae0f95d4dc..74daae7897fed 100644 --- a/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc +++ b/storage/tokudb/ft-index/src/tests/test_lock_timeout_callback.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc b/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc index 8f3349f3c4bd4..f3cb36d1df590 100644 --- a/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc +++ b/storage/tokudb/ft-index/src/tests/test_locking_with_read_txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_locktree_close.cc b/storage/tokudb/ft-index/src/tests/test_locktree_close.cc index f3d220059b636..b5735fd1495fa 100644 --- a/storage/tokudb/ft-index/src/tests/test_locktree_close.cc +++ b/storage/tokudb/ft-index/src/tests/test_locktree_close.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -106,7 +106,7 @@ test_cursor (void) { DB_ENV * env; DB *db; DB_TXN * const null_txn = 0; - const char * const fname = "test.cursor.brt"; + const char * const fname = "test.cursor.ft"; int r; /* create the dup database file */ diff --git a/storage/tokudb/ft-index/src/tests/test_log0.cc b/storage/tokudb/ft-index/src/tests/test_log0.cc index 05c0820f153f1..c597affc562ee 100644 --- a/storage/tokudb/ft-index/src/tests/test_log0.cc +++ b/storage/tokudb/ft-index/src/tests/test_log0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include diff --git a/storage/tokudb/ft-index/src/tests/test_log1.cc b/storage/tokudb/ft-index/src/tests/test_log1.cc index 11e402d7be9ea..8379d20ba4517 100644 --- a/storage/tokudb/ft-index/src/tests/test_log1.cc +++ b/storage/tokudb/ft-index/src/tests/test_log1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #include "test.h" -/* Simple test of logging. Can I start a TokuDB with logging enabled? */ +/* Simple test of logging. Can I start TokuFT with logging enabled? */ #include #include @@ -123,7 +123,6 @@ static void make_db (bool close_env) { CKERR(r); } char *filename; -#if USE_TDB { DBT dname; DBT iname; @@ -135,10 +134,6 @@ static void make_db (bool close_env) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } -#else - filename = toku_xstrdup("foo.db"); -#endif - r=tid->commit(tid, 0); assert(r==0); r=db->close(db, 0); assert(r==0); diff --git a/storage/tokudb/ft-index/src/tests/test_log10.cc b/storage/tokudb/ft-index/src/tests/test_log10.cc index 48535f1871ae1..053efc0f07ddb 100644 --- a/storage/tokudb/ft-index/src/tests/test_log10.cc +++ b/storage/tokudb/ft-index/src/tests/test_log10.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log10 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ /* Lots of stuff gets inserted. */ @@ -120,9 +120,7 @@ static void insert_some (int outeri, bool close_env) { DB *db; DB_TXN *tid; r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|create_flag, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); @@ -148,13 +146,6 @@ static void insert_some (int outeri, bool close_env) { key.data = hello; key.size=strlen(hello)+1; data.data = there; data.size=strlen(there)+1; r=db->put(db, tid, &key, &data, 0); CKERR(r); -#ifndef TOKUDB - // BDB cannot handle such a big txn. - if (i%1000==999) { - r=tid->commit(tid, 0); assert(r==0); - r=env->txn_begin(env, 0, &tid, 0); assert(r==0); - } -#endif } r=tid->commit(tid, 0); assert(r==0); r=db->close(db, 0); assert(r==0); @@ -173,9 +164,7 @@ static void make_db (bool close_env) { toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_log1_abort.cc b/storage/tokudb/ft-index/src/tests/test_log1_abort.cc index 4cc37daec2846..6861698740f7e 100644 --- a/storage/tokudb/ft-index/src/tests/test_log1_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log1_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -129,15 +129,7 @@ test_main (int UU(argc), char UU(*const argv[])) { r=tid->abort(tid); assert(r==0); r=env->close(env, 0); -#ifdef USE_BDB -#if DB_VERSION_MAJOR >= 5 assert(r==0); -#else - assert(r==ENOENT); -#endif -#else - assert(r==0); -#endif { toku_struct_stat statbuf; char filename[TOKU_PATH_MAX+1]; diff --git a/storage/tokudb/ft-index/src/tests/test_log2.cc b/storage/tokudb/ft-index/src/tests/test_log2.cc index 85a3354629fa2..d2a2e6d50068a 100644 --- a/storage/tokudb/ft-index/src/tests/test_log2.cc +++ b/storage/tokudb/ft-index/src/tests/test_log2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log2_abort.cc b/storage/tokudb/ft-index/src/tests/test_log2_abort.cc index 7991f0466432d..9ed3f8a1a6f61 100644 --- a/storage/tokudb/ft-index/src/tests/test_log2_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log2_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Like test_log2 except abort. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log3.cc b/storage/tokudb/ft-index/src/tests/test_log3.cc index 9e4a531a899de..a8b71fa90db8d 100644 --- a/storage/tokudb/ft-index/src/tests/test_log3.cc +++ b/storage/tokudb/ft-index/src/tests/test_log3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log3_abort.cc b/storage/tokudb/ft-index/src/tests/test_log3_abort.cc index dc36d754f09a3..d08dab0c033f6 100644 --- a/storage/tokudb/ft-index/src/tests/test_log3_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log3_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_log4.cc b/storage/tokudb/ft-index/src/tests/test_log4.cc index b0da26e84547e..2117907f1b0cd 100644 --- a/storage/tokudb/ft-index/src/tests/test_log4.cc +++ b/storage/tokudb/ft-index/src/tests/test_log4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log4_abort.cc b/storage/tokudb/ft-index/src/tests/test_log4_abort.cc index 4d73cda39038b..37a000858128c 100644 --- a/storage/tokudb/ft-index/src/tests/test_log4_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log4_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_log5.cc b/storage/tokudb/ft-index/src/tests/test_log5.cc index 6a40394668b6a..82a122c045a18 100644 --- a/storage/tokudb/ft-index/src/tests/test_log5.cc +++ b/storage/tokudb/ft-index/src/tests/test_log5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log5_abort.cc b/storage/tokudb/ft-index/src/tests/test_log5_abort.cc index be74c14b1d4b4..dcd512abd61d4 100644 --- a/storage/tokudb/ft-index/src/tests/test_log5_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log5_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_log6.cc b/storage/tokudb/ft-index/src/tests/test_log6.cc index 9e579d5f4e525..710519d70a7bf 100644 --- a/storage/tokudb/ft-index/src/tests/test_log6.cc +++ b/storage/tokudb/ft-index/src/tests/test_log6.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ #include diff --git a/storage/tokudb/ft-index/src/tests/test_log6_abort.cc b/storage/tokudb/ft-index/src/tests/test_log6_abort.cc index c02e61c82b08e..09db439b22b03 100644 --- a/storage/tokudb/ft-index/src/tests/test_log6_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log6_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc b/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc index ec4490c06fd19..b5ddb6b4c3c2e 100644 --- a/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_log6a_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_log7.cc b/storage/tokudb/ft-index/src/tests/test_log7.cc index cc38ae92ae3a3..afa9a5ab82cfa 100644 --- a/storage/tokudb/ft-index/src/tests/test_log7.cc +++ b/storage/tokudb/ft-index/src/tests/test_log7.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log7 is like test_log5 except maxcount is larger. */ @@ -122,9 +122,6 @@ static void make_db (bool close_env) { toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); r=db_env_create(&env, 0); assert(r==0); -#ifdef TOKUDB - -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); r=env->txn_begin(env, 0, &tid, 0); assert(r==0); @@ -147,13 +144,11 @@ static void make_db (bool close_env) { key.data = hello; key.size=strlen(hello)+1; data.data = there; data.size=strlen(there)+1; r=db->put(db, tid, &key, &data, 0); assert(r==0); -#ifndef TOKUDB // BDB cannot handle this huge transaction even with a lot of locks. if (i%1000==599) { r=tid->commit(tid, 0); assert(r==0); r=env->txn_begin(env, 0, &tid, 0); assert(r==0); } -#endif } r=tid->commit(tid, 0); assert(r==0); r=db->close(db, 0); assert(r==0); diff --git a/storage/tokudb/ft-index/src/tests/test_log8.cc b/storage/tokudb/ft-index/src/tests/test_log8.cc index a2c59400013cf..39c607b3623a1 100644 --- a/storage/tokudb/ft-index/src/tests/test_log8.cc +++ b/storage/tokudb/ft-index/src/tests/test_log8.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log8 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ @@ -119,9 +119,7 @@ static void insert_some (int outeri, bool close_env) { DB *db; DB_TXN *tid; r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|create_flag, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -166,9 +164,7 @@ static void make_db (bool close_env) { toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_log9.cc b/storage/tokudb/ft-index/src/tests/test_log9.cc index c61312f90d2ae..6b7f1fddb444a 100644 --- a/storage/tokudb/ft-index/src/tests/test_log9.cc +++ b/storage/tokudb/ft-index/src/tests/test_log9.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: /* Test to see if we can do logging and recovery. */ -/* This is very specific to TokuDB. It won't work with Berkeley DB. */ +/* This is very specific to TokuFT. It won't work with Berkeley DB. */ /* This test_log8 inserts to a db, closes, reopens, and inserts more to db. We want to make sure that the recovery of the buffers works. */ @@ -119,9 +119,7 @@ static void insert_some (int outeri, bool close_env) { DB *db; DB_TXN *tid; r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE|create_flag, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); @@ -165,9 +163,7 @@ static void make_db (bool close_env) { toku_os_recursive_delete(TOKU_TEST_FILENAME); r=toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0); r=db_env_create(&env, 0); assert(r==0); -#if IS_TDB db_env_enable_engine_status(0); // disable engine status on crash because test is expected to fail -#endif r=env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=db_create(&db, env, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_logflush.cc b/storage/tokudb/ft-index/src/tests/test_logflush.cc index f7870bee5ce92..6ea09b839164f 100644 --- a/storage/tokudb/ft-index/src/tests/test_logflush.cc +++ b/storage/tokudb/ft-index/src/tests/test_logflush.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -96,11 +96,7 @@ PATENT RIGHTS GRANT: // Return the offset static int grep_for_in_logs (const char *str) { -#ifdef TOKUDB #define lfname "log000000000000.tokulog[0-9]*" -#else -#define lfname "log.0000000001" -#endif #define COMMAND "grep -F -q" char lname[TOKU_PATH_MAX+1]; toku_path_join(lname, 2, TOKU_TEST_FILENAME, lfname); diff --git a/storage/tokudb/ft-index/src/tests/test_logmax.cc b/storage/tokudb/ft-index/src/tests/test_logmax.cc index 2ff773a043af3..89c9284ea6bb5 100644 --- a/storage/tokudb/ft-index/src/tests/test_logmax.cc +++ b/storage/tokudb/ft-index/src/tests/test_logmax.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -152,11 +152,7 @@ test_logmax (int logmax) { int effective_max; if (logmax>0) effective_max = logmax; else { -#ifdef TOKUDB effective_max = 100<<20; -#else - effective_max = 10<<20; -#endif } r=env->txn_begin(env, 0, &tid, 0); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc b/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc new file mode 100644 index 0000000000000..5ddb473a6edb5 --- /dev/null +++ b/storage/tokudb/ft-index/src/tests/test_memcmp_magic.cc @@ -0,0 +1,219 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include "test.h" + +#include "util/dbt.h" + +static void test_memcmp_magic(void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + + // Can't set the memcmp magic to 0 (since it's used as a sentinel for `none') + r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL); + + // Should be ok to set it more than once, even to different things, before opening. + r = db->set_memcmp_magic(db, 1); CKERR(r); + r = db->set_memcmp_magic(db, 2); CKERR(r); + r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + // Can't set the memcmp magic after opening. + r = db->set_memcmp_magic(db, 0); CKERR2(r, EINVAL); + r = db->set_memcmp_magic(db, 1); CKERR2(r, EINVAL); + + DB *db2; + r = db_create(&db2, env, 0); CKERR(r); + r = db2->set_memcmp_magic(db2, 3); CKERR(r); // ..we can try setting it to something different + // ..but it should fail to open + r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR2(r, EINVAL); + r = db2->set_memcmp_magic(db2, 2); CKERR(r); + r = db2->open(db2, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + r = db2->close(db2, 0); + r = db->close(db, 0); CKERR(r); + + // dbremove opens its own handle internally. ensure that the open + // operation succeeds (and so does dbremove) despite the fact the + // internal open does not set the memcmp magic + r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +static int comparison_function_unused(DB *UU(db), const DBT *UU(a), const DBT *UU(b)) { + // We're testing that the memcmp magic gets used so the real + // comparison function should never get called. + invariant(false); + return 0; +} + +static int getf_key_cb(const DBT *key, const DBT *UU(val), void *extra) { + DBT *dbt = reinterpret_cast(extra); + toku_clone_dbt(dbt, *key); + return 0; +} + +static void test_memcmp_magic_sort_order(void) { + int r; + + // Verify that randomly generated integer keys are sorted in memcmp + // order when packed as little endian, even with an environment-wide + // comparison function that sorts as though keys are big-endian ints. + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->set_default_bt_compare(env, comparison_function_unused); CKERR(r); + r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL+DB_INIT_TXN, 0); CKERR(r); + + const int magic = 49; + + DB *db; + r = db_create(&db, env, 0); CKERR(r); + r = db->set_memcmp_magic(db, magic); CKERR(r); + r = db->open(db, NULL, "db", "db", DB_BTREE, DB_CREATE, 0666); CKERR(r); + + for (int i = 0; i < 10000; i++) { + char buf[1 + sizeof(int)]; + // Serialize key to first have the magic byte, then the little-endian key. + int k = toku_htonl(random()); + buf[0] = magic; + memcpy(&buf[1], &k, sizeof(int)); + + DBT key; + dbt_init(&key, buf, sizeof(buf)); + r = db->put(db, NULL, &key, &key, 0); CKERR(r); + } + + DB_TXN *txn; + env->txn_begin(env, NULL, &txn, 0); + DBC *dbc; + db->cursor(db, txn, &dbc, 0); + DBT prev_dbt, curr_dbt; + memset(&curr_dbt, 0, sizeof(DBT)); + memset(&prev_dbt, 0, sizeof(DBT)); + while (dbc->c_getf_next(dbc, 0, getf_key_cb, &curr_dbt)) { + invariant(curr_dbt.size == sizeof(int)); + if (prev_dbt.data != NULL) { + // Each key should be >= to the last using memcmp + int c = memcmp(prev_dbt.data, curr_dbt.data, sizeof(int)); + invariant(c <= 0); + } + toku_destroy_dbt(&prev_dbt); + prev_dbt = curr_dbt; + } + toku_destroy_dbt(&curr_dbt); + toku_destroy_dbt(&prev_dbt); + dbc->c_close(dbc); + txn->commit(txn, 0); + + r = db->close(db, 0); CKERR(r); + + // dbremove opens its own handle internally. ensure that the open + // operation succeeds (and so does dbremove) despite the fact the + // internal open does not set the memcmp magic + r = env->dbremove(env, NULL, "db", "db", 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +int +test_main(int argc, char *const argv[]) { + parse_args(argc, argv); + + toku_os_recursive_delete(TOKU_TEST_FILENAME); + int r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + test_memcmp_magic(); + test_memcmp_magic_sort_order(); + + return 0; +} diff --git a/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc b/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc index 1094639e7e732..f4f8d16e3123b 100644 --- a/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc +++ b/storage/tokudb/ft-index/src/tests/test_mostly_seq.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc b/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc index 6cb2637214089..5accd55dadc4e 100644 --- a/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc +++ b/storage/tokudb/ft-index/src/tests/test_multiple_checkpoints_block_commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_nested.cc b/storage/tokudb/ft-index/src/tests/test_nested.cc index 1f96101b940f5..9ce288435ce37 100644 --- a/storage/tokudb/ft-index/src/tests/test_nested.cc +++ b/storage/tokudb/ft-index/src/tests/test_nested.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_nodup_set.cc b/storage/tokudb/ft-index/src/tests/test_nodup_set.cc index f6797c81ef645..81c0d258af364 100644 --- a/storage/tokudb/ft-index/src/tests/test_nodup_set.cc +++ b/storage/tokudb/ft-index/src/tests/test_nodup_set.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_query.cc b/storage/tokudb/ft-index/src/tests/test_query.cc index 1a3ee026b9a16..db199ea2b800a 100644 --- a/storage/tokudb/ft-index/src/tests/test_query.cc +++ b/storage/tokudb/ft-index/src/tests/test_query.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_rand_insert.cc b/storage/tokudb/ft-index/src/tests/test_rand_insert.cc index d87f34af28b50..76c12a9d12472 100644 --- a/storage/tokudb/ft-index/src/tests/test_rand_insert.cc +++ b/storage/tokudb/ft-index/src/tests/test_rand_insert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc b/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc index f86c56637c698..93cab3cd0c0eb 100644 --- a/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc +++ b/storage/tokudb/ft-index/src/tests/test_read_txn_invalid_ops.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_redirect_func.cc b/storage/tokudb/ft-index/src/tests/test_redirect_func.cc index f5e4dacbfcdc0..2107fda309328 100644 --- a/storage/tokudb/ft-index/src/tests/test_redirect_func.cc +++ b/storage/tokudb/ft-index/src/tests/test_redirect_func.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_restrict.cc b/storage/tokudb/ft-index/src/tests/test_restrict.cc index cc1d573ca4f7d..fe71111a6bb9d 100644 --- a/storage/tokudb/ft-index/src/tests/test_restrict.cc +++ b/storage/tokudb/ft-index/src/tests/test_restrict.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc b/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc index 7d569c5bf97b7..f50cc6fc18e8b 100644 --- a/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc +++ b/storage/tokudb/ft-index/src/tests/test_reverse_compare_fun.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -155,10 +155,8 @@ test_reverse_compare (int n) { /* create the dup database file */ DB_ENV *env; r = db_env_create(&env, 0); assert(r == 0); -#ifdef TOKUDB r = env->set_default_bt_compare(env, reverse_compare); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0); DB *db; @@ -166,10 +164,6 @@ test_reverse_compare (int n) { CKERR(r); r = db->set_pagesize(db, 4096); CKERR(r); -#ifndef TOKUDB - r = db->set_bt_compare(db, reverse_compare); - CKERR(r); -#endif r = db->open(db, null_txn, fname, "main", DB_BTREE, DB_CREATE, 0666); CKERR(r); @@ -192,10 +186,6 @@ test_reverse_compare (int n) { CKERR(r); r = db->set_pagesize(db, 4096); CKERR(r); -#ifndef TOKUDB - r = db->set_bt_compare(db, reverse_compare); - CKERR(r); -#endif r = db->open(db, null_txn, fname, "main", DB_BTREE, 0, 0666); CKERR(r); diff --git a/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc b/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc index eea9a913c1c9d..0acea21e86320 100644 --- a/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc +++ b/storage/tokudb/ft-index/src/tests/test_set_func_malloc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -153,7 +153,6 @@ test1 (void) db_env_set_func_realloc(brealloc); db_env_set_func_free(bfree); -#ifdef USE_TDB // toku_malloc isn't affected by calling the BDB set_fun_malloc calls. malloc_counter = realloc_counter = free_counter = 0; @@ -166,8 +165,6 @@ test1 (void) db_env_set_func_malloc(NULL); db_env_set_func_realloc(NULL); db_env_set_func_free(NULL); - -#endif } int diff --git a/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc b/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc index 3538c71e476be..4449a6287b837 100644 --- a/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc +++ b/storage/tokudb/ft-index/src/tests/test_simple_read_txn.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress0.cc b/storage/tokudb/ft-index/src/tests/test_stress0.cc index 6e3eb2e2e8910..5dbca08db48ec 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress0.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress0.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress1.cc b/storage/tokudb/ft-index/src/tests/test_stress1.cc index 9aa5c29e89b3f..81095299265e2 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress1.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress1.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress2.cc b/storage/tokudb/ft-index/src/tests/test_stress2.cc index 255dc10317bd3..cbd798f318bea 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress2.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress3.cc b/storage/tokudb/ft-index/src/tests/test_stress3.cc index 572576261af73..b47e4f812aec4 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress3.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress3.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress4.cc b/storage/tokudb/ft-index/src/tests/test_stress4.cc index 4404f1cecac62..3d420561f5ea8 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress4.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress4.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress5.cc b/storage/tokudb/ft-index/src/tests/test_stress5.cc index 053da44d66c60..a591b3400253a 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress5.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress5.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress6.cc b/storage/tokudb/ft-index/src/tests/test_stress6.cc index e3d47064d13fd..d616622353b20 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress6.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress6.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress7.cc b/storage/tokudb/ft-index/src/tests/test_stress7.cc index 5db318521ee48..cdf03ce803622 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress7.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress7.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -108,7 +108,7 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) { // if (verbose) printf("starting creation of pthreads\n"); - const int num_threads = 4 + cli_args->num_update_threads + cli_args->num_ptquery_threads; + const int num_threads = 5 + cli_args->num_update_threads + cli_args->num_ptquery_threads; struct arg myargs[num_threads]; for (int i = 0; i < num_threads; i++) { arg_init(&myargs[i], dbp, env, cli_args); @@ -129,19 +129,21 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) { myargs[1].operation_extra = &soe[1]; myargs[1].operation = scan_op; - // make the guy that runs HOT in the background + // make the guys that run hot optimize, keyrange, and frag stats in the background myargs[2].operation = hot_op; myargs[3].operation = keyrange_op; + myargs[4].operation = frag_op; + myargs[4].sleep_ms = 100; struct update_op_args uoe = get_update_op_args(cli_args, NULL); // make the guy that updates the db - for (int i = 4; i < 4 + cli_args->num_update_threads; ++i) { + for (int i = 5; i < 5 + cli_args->num_update_threads; ++i) { myargs[i].operation_extra = &uoe; myargs[i].operation = update_op; } // make the guy that does point queries - for (int i = 4 + cli_args->num_update_threads; i < num_threads; i++) { + for (int i = 5 + cli_args->num_update_threads; i < num_threads; i++) { myargs[i].operation = ptquery_op; } run_workers(myargs, num_threads, cli_args->num_seconds, false, cli_args); diff --git a/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc b/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc index 65e7230bba6d5..fe237f063abd6 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress_hot_indexing.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc b/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc index 55d21770b0c70..54c8e784b185a 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress_openclose.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc b/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc index 3c13da4f97591..d259d09d25a64 100644 --- a/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc +++ b/storage/tokudb/ft-index/src/tests/test_stress_with_verify.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_thread_flags.cc b/storage/tokudb/ft-index/src/tests/test_thread_flags.cc index 08429d1effce3..2ff2dabab98f0 100644 --- a/storage/tokudb/ft-index/src/tests/test_thread_flags.cc +++ b/storage/tokudb/ft-index/src/tests/test_thread_flags.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_thread_insert.cc b/storage/tokudb/ft-index/src/tests/test_thread_insert.cc index c8a84196d4a35..a1044948e0e11 100644 --- a/storage/tokudb/ft-index/src/tests/test_thread_insert.cc +++ b/storage/tokudb/ft-index/src/tests/test_thread_insert.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc index d1844ba3f9bb2..5d2196b13f81b 100644 --- a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc +++ b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc index dd545d339005d..3a215a8b6f2e3 100644 --- a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc +++ b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc index 71a9358c332c2..6644cdabaa29d 100644 --- a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc +++ b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc index dd545d339005d..3a215a8b6f2e3 100644 --- a/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc +++ b/storage/tokudb/ft-index/src/tests/test_trans_desc_during_chkpt4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc b/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc index 8c800784e5ccf..4f2e66a938112 100644 --- a/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc +++ b/storage/tokudb/ft-index/src/tests/test_transactional_descriptor.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc b/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc index 27b7f056cf2ab..fb3a522c99530 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_abort5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc b/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc index 87840fc8958be..6678a95980564 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_abort5a.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc b/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc index ce481c342de72..f61aea8e0bdef 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_abort6.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc b/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc index 144591ca65066..f7f0840680f97 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_abort7.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -119,7 +119,6 @@ test_abort_create (void) { { char *filename; -#if USE_TDB { DBT dname; DBT iname; @@ -131,9 +130,6 @@ test_abort_create (void) { CAST_FROM_VOIDP(filename, iname.data); assert(filename); } -#else - filename = toku_xstrdup("test.db"); -#endif toku_struct_stat statbuf; char fullfile[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, filename), &statbuf); @@ -145,7 +141,6 @@ test_abort_create (void) { r = txn->abort(txn); assert(r == 0); { -#if USE_TDB { DBT dname; DBT iname; @@ -155,7 +150,6 @@ test_abort_create (void) { r = env->get_iname(env, &dname, &iname); CKERR2(r, DB_NOTFOUND); } -#endif toku_struct_stat statbuf; char fullfile[TOKU_PATH_MAX+1]; r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); diff --git a/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc b/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc index 8b3906decb87e..7e686e3e8859a 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_begin_commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc b/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc index cbc9d856bd24e..24ef8a0fb729e 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_close_before_commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc b/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc index 6427bf8491e04..e3b715c4ce74c 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_close_before_prepare_commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_commit8.cc b/storage/tokudb/ft-index/src/tests/test_txn_commit8.cc deleted file mode 100644 index 4f4d732dc8bbc..0000000000000 --- a/storage/tokudb/ft-index/src/tests/test_txn_commit8.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#include "test.h" -#include - -#include -#include -#include -#include -#include - -// like test_txn_abort8.c except commit -static void -test_abort_close (void) { - -#ifndef USE_TDB -#if DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==3 - if (verbose) fprintf(stderr, "%s does not work for BDB %d.%d. Not running\n", __FILE__, DB_VERSION_MAJOR, DB_VERSION_MINOR); - return; -#else - toku_os_recursive_delete(TOKU_TEST_FILENAME); - toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); - - int r; - DB_ENV *env; - r = db_env_create(&env, 0); assert(r == 0); - r = env->set_data_dir(env, TOKU_TEST_FILENAME); - r = env->set_lg_dir(env, TOKU_TEST_FILENAME); - env->set_errfile(env, stdout); - r = env->open(env, 0, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); - if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r)); - assert(r == 0); - - DB_TXN *txn = 0; - r = env->txn_begin(env, 0, &txn, 0); assert(r == 0); - - DB *db; - r = db_create(&db, env, 0); assert(r == 0); - r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); - } - - // Close before commit - r = db->close(db, 0); - - r = txn->commit(txn, 0); assert(r == 0); - - r = env->close(env, 0); assert(r == 0); - - { - toku_struct_stat statbuf; - char fullfile[TOKU_PATH_MAX+1]; - r = toku_stat(toku_path_join(fullfile, 2, TOKU_TEST_FILENAME, "test.db"), &statbuf); - assert(r==0); - } -#endif -#endif -} - -int -test_main(int UU(argc), char UU(*const argv[])) { - test_abort_close(); - return 0; -} diff --git a/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc b/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc index 62813ab86c3a4..82a15e0b515f4 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_cursor_last.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -291,12 +291,10 @@ test_main(int argc, char *const argv[]) { parse_args(argc, argv); - if (IS_TDB) { - test_txn_cursor_last_1(0); - test_txn_cursor_last_1(1); - test_txn_cursor_last_2(0); - test_txn_cursor_last_2(1); - } + test_txn_cursor_last_1(0); + test_txn_cursor_last_1(1); + test_txn_cursor_last_2(0); + test_txn_cursor_last_2(1); return 0; } diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc index a5ba6afdfb11d..7797d88e47819 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested1.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include +#include #define MAX_NEST MAX_NESTED_TRANSACTIONS @@ -125,9 +125,7 @@ setup_db (void) { toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO); r = db_env_create(&env, 0); CKERR(r); -#ifdef TOKUDB r = env->set_default_bt_compare(env, int_dbt_cmp); CKERR(r); -#endif r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_MPOOL | DB_INIT_LOG | DB_INIT_LOCK | DB_INIT_TXN | DB_PRIVATE | DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -136,9 +134,6 @@ setup_db (void) { r = env->txn_begin(env, 0, &txn, 0); CKERR(r); r = db_create(&db, env, 0); CKERR(r); -#ifndef TOKUDB - r = db->set_bt_compare(db, int_dbt_cmp); CKERR(r); -#endif r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = txn->commit(txn, 0); CKERR(r); } @@ -219,74 +214,6 @@ test_txn_nesting (int depth) { } } - -#if 0 -static void -test_txn_abort (int insert, int secondnum) { - if (verbose) { fprintf(stderr, "%s (%s):%d [%d,%d]\n", __FILE__, __FUNCTION__, __LINE__, insert, secondnum); fflush(stderr); } - setup_db(); - - DBT key, val; - int r; - - - DB_TXN *parent = NULL, *child = NULL; - - int i = 1; - r = env->txn_begin(env, 0, &parent, 0); CKERR(r); - - //Insert something as a child - r = env->txn_begin(env, parent, &child, 0); CKERR(r); - i = 1; - r = db->put(db, child, dbt_init(&key, &i, sizeof i), dbt_init(&val, &i, sizeof i), 0); - CKERR(r); - r = child->commit(child,DB_TXN_NOSYNC); - child = NULL; - - - //delete it as a child - r = env->txn_begin(env, parent, &child, 0); CKERR(r); - i = secondnum; - if (insert) { - r = db->put(db, child, dbt_init(&key, &i, sizeof i), dbt_init(&val, &i, sizeof i), 0); - CKERR(r); - } - else { // delete - r = db->del(db, child, dbt_init(&key, &i, sizeof i), DB_DELETE_ANY); - if (IS_TDB) { - CKERR(r); - } else { - CKERR2(r, (secondnum==1 ? 0 : DB_NOTFOUND)); - } - } - r = child->commit(child,DB_TXN_NOSYNC); - child = NULL; - - r = parent->abort(parent); - CKERR(r); - parent = NULL; - - - { - DB_TXN *txn = NULL; - /* walk the db, should be empty */ - r = env->txn_begin(env, 0, &txn, 0); CKERR(r); - DBC *cursor; - r = db->cursor(db, txn, &cursor, 0); CKERR(r); - memset(&key, 0, sizeof key); - memset(&val, 0, sizeof val); - r = cursor->c_get(cursor, &key, &val, DB_FIRST); - CKERR2(r, DB_NOTFOUND); - r = cursor->c_close(cursor); CKERR(r); - r = txn->commit(txn, 0); - } - r=db->close(db, 0); CKERR(r); - r=env->close(env, 0); CKERR(r); - -} - -#endif - int test_main(int argc, char *const argv[]) { parse_args(argc, argv); diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc index 542f2574c8517..f5c0d2b4e5137 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -87,15 +87,18 @@ PATENT RIGHTS GRANT: */ #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." -#include "test.h" -#include +#include +#include #include #include #include #include -#include -#include + +#include "src/tests/test.h" + +#include + #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc index 22e5d984a7a25..16ede714422e9 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc index edc6430a8c705..0bca6309169ed 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE MAX_TRANSACTION_RECORDS diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc index df5ad696984f2..02692be3d0893 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested5.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,7 +95,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include +#include #define MAX_NEST MAX_TRANSACTION_RECORDS #define MAX_SIZE (MAX_TRANSACTION_RECORDS + 1) diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc index 2c81c91681d38..10be3fea79d58 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc index 2fbf3f6e2b2eb..6e1928b48915e 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc index c53b1cc68ba9f..2fa58b86b2a25 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc index 729c5ed44dd98..b412aeec884b7 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_nested_abort4.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -160,11 +160,7 @@ test_txn_abort (int insert, int secondnum) { } else { // delete r = db->del(db, child, dbt_init(&key, &i, sizeof i), DB_DELETE_ANY); - if (IS_TDB) { CKERR(r); - } else { - CKERR2(r, (secondnum==1 ? 0 : DB_NOTFOUND)); - } } r = child->commit(child,DB_TXN_NOSYNC); child = NULL; diff --git a/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc b/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc index c701ed5125729..55cf772f207ae 100644 --- a/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc +++ b/storage/tokudb/ft-index/src/tests/test_txn_recover3.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc b/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc index 9bb65016ba02a..9c13a08368f51 100644 --- a/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc +++ b/storage/tokudb/ft-index/src/tests/test_unused_memory_crash.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc b/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc index 595b955855f93..4a0d815749b9d 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_abort_works.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc index 5b9e105cc7c64..c11fffe643fdf 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_abort_works.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc index 22bb1193f3f20..db12a74832b9b 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_calls_back.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc index a54aa20da88fe..804161402ba7d 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_can_delete_elements.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc index c532d571375df..304c799de0782 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_changes_values.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc index 839b42a534700..4a7fa17642486 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_indexer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc index 704e6e08070a1..4e3db6380ae65 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_loader.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc index 42b254b22ef05..2f858beffcd17 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_nested_updates.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc index 912b68f6a1e17..348dd71f94115 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_previously_deleted.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc index 9da0fd8dfa857..fb294e40446d6 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_stress.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc index 31c0dabc39cb5..6f6481d317518 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_update_fun_has_choices.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc b/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc index 82c69f95af80c..5aa27c10b6972 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_broadcast_with_empty_table.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc b/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc index ba64dea1463b5..4970cc6ad4c3a 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_calls_back.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc b/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc index ca59008014fb1..328a569bf5686 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_can_delete_elements.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc b/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc index 623ce1a1f383f..ee346f5494754 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_changes_values.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc b/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc index 28ab01ae6320b..cb44a5bff29bd 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_nested_updates.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc b/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc index 24a1eaf3787f8..1d609aabb9ab5 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_nonexistent_keys.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc b/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc index 27c0164985140..7e9e4bcb5ba23 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_previously_deleted.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_stress.cc b/storage/tokudb/ft-index/src/tests/test_update_stress.cc index 97a6bb93d6cc4..0c0d2c9926a3a 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_stress.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_stress.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc b/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc index 337a90e8d8889..99acf3f7f8ed9 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_concurrently.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc b/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc index 641521e96bdea..61a346c928d47 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_txn_snapshot_works_correctly_with_deletes.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc b/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc index 6ed492b86e982..6d54ee93b19c1 100644 --- a/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc +++ b/storage/tokudb/ft-index/src/tests/test_update_with_empty_table.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc b/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc index 455e82122dd9c..0b4dff69b9f8a 100644 --- a/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc +++ b/storage/tokudb/ft-index/src/tests/test_updates_single_key.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/test_weakxaction.cc b/storage/tokudb/ft-index/src/tests/test_weakxaction.cc index b1ca7e19ba3fd..e99f6510a0ccb 100644 --- a/storage/tokudb/ft-index/src/tests/test_weakxaction.cc +++ b/storage/tokudb/ft-index/src/tests/test_weakxaction.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -128,9 +128,6 @@ test_autotxn (uint32_t env_flags, uint32_t db_flags) { DB_TXN *x1, *x2 = NULL; r = env->txn_begin(env, 0, &x1, DB_TXN_NOWAIT); CKERR(r); - #ifdef USE_BDB - r = env->txn_begin(env, 0, &x2, DB_TXN_NOWAIT); CKERR(r); - #endif DBT k1,k2,v1,v2; dbt_init(&k1, "hello", sizeof "hello"); dbt_init(&k2, "hello", sizeof "hello"); @@ -141,9 +138,6 @@ test_autotxn (uint32_t env_flags, uint32_t db_flags) { r = db->put(db, x1, &k1, &v1, 0); CKERR(r); r = db->get(db, x2, &k2, &v2, 0); assert(r==DB_LOCK_DEADLOCK || r==DB_LOCK_NOTGRANTED); r = x1->commit(x1, 0); CKERR(r); - #ifdef USE_BDB - r = x2->commit(x2, 0); assert(r==0); - #endif r = db->close(db, 0); CKERR(r); r = env->close(env, 0); assert(r==0); } diff --git a/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc b/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc index 37180e9e9521b..c7b4dd1ac7536 100644 --- a/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc +++ b/storage/tokudb/ft-index/src/tests/test_zero_length_keys.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h b/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h index c173d2d2d63ba..2c2525a316511 100644 --- a/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h +++ b/storage/tokudb/ft-index/src/tests/threaded_stress_test_helpers.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved." #ident "$Id$" @@ -102,9 +104,6 @@ PATENT RIGHTS GRANT: // with keys in the range [0, table_size - 1] unless disperse_keys is true, // then the keys are scrambled up in the integer key space. -#ifndef _THREADED_STRESS_TEST_HELPERS_H_ -#define _THREADED_STRESS_TEST_HELPERS_H_ - #include "toku_config.h" #include "test.h" @@ -123,7 +122,7 @@ PATENT RIGHTS GRANT: #include -#include +#include #include #include @@ -209,6 +208,7 @@ struct cli_args { bool nocrashstatus; // do not print engine status upon crash bool prelock_updates; // update threads perform serial updates on a prelocked range bool disperse_keys; // spread the keys out during a load (by reversing the bits in the loop index) to make a wide tree we can spread out random inserts into + bool memcmp_keys; // pack keys big endian and use the builtin key comparison function in the fractal tree bool direct_io; // use direct I/O const char *print_engine_status; // print engine status rows matching a simple regex "a|b|c", matching strings where a or b or c is a subtring. }; @@ -833,12 +833,13 @@ fill_key_buf(int64_t key, uint8_t *data, struct cli_args *args) { } invariant(key >= 0); if (args->key_size == sizeof(int)) { - const int key32 = key; + const int key32 = args->memcmp_keys ? toku_htonl(key) : key; memcpy(data, &key32, sizeof(key32)); } else { invariant(args->key_size >= sizeof(key)); - memcpy(data, &key, sizeof(key)); - memset(data + sizeof(key), 0, args->key_size - sizeof(key)); + const int64_t key64 = args->memcmp_keys ? toku_htonl(key) : key; + memcpy(data, &key64, sizeof(key64)); + memset(data + sizeof(key64), 0, args->key_size - sizeof(key64)); } } @@ -1076,6 +1077,16 @@ static int UU() keyrange_op(DB_TXN *txn, ARG arg, void* UU(operation_extra), voi return r; } +static int UU() frag_op(DB_TXN *UU(txn), ARG arg, void* UU(operation_extra), void *UU(stats_extra)) { + int db_index = myrandom_r(arg->random_data)%arg->cli->num_DBs; + DB *db = arg->dbp[db_index]; + + TOKU_DB_FRAGMENTATION_S frag; + int r = db->get_fragmentation(db, &frag); + invariant_zero(r); + return r; +} + static void UU() get_key_after_bytes_callback(const DBT *UU(end_key), uint64_t UU(skipped), void *UU(extra)) { // nothing } @@ -1966,7 +1977,9 @@ static int create_tables(DB_ENV **env_res, DB **db_res, int num_DBs, db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes); r = db_env_create(&env, 0); assert(r == 0); r = env->set_redzone(env, 0); CKERR(r); - r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + if (!cli_args->memcmp_keys) { + r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + } r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r); r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r); r = env->set_lg_bsize(env, env_args.rollback_node_size); CKERR(r); @@ -2164,7 +2177,9 @@ static int open_tables(DB_ENV **env_res, DB **db_res, int num_DBs, db_env_set_num_bucket_mutexes(env_args.num_bucket_mutexes); r = db_env_create(&env, 0); assert(r == 0); r = env->set_redzone(env, 0); CKERR(r); - r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + if (!cli_args->memcmp_keys) { + r = env->set_default_bt_compare(env, bt_compare); CKERR(r); + } r = env->set_lk_max_memory(env, env_args.lk_max_memory); CKERR(r); env->set_update(env, env_args.update_function); r = env->set_cachesize(env, env_args.cachetable_size / (1 << 30), env_args.cachetable_size % (1 << 30), 1); CKERR(r); @@ -2282,6 +2297,7 @@ static struct cli_args UU() get_default_args(void) { .nocrashstatus = false, .prelock_updates = false, .disperse_keys = false, + .memcmp_keys = false, .direct_io = false, }; DEFAULT_ARGS.env_args.envdir = TOKU_TEST_FILENAME; @@ -2669,6 +2685,7 @@ static inline void parse_stress_test_args (int argc, char *const argv[], struct BOOL_ARG("nocrashstatus", nocrashstatus), BOOL_ARG("prelock_updates", prelock_updates), BOOL_ARG("disperse_keys", disperse_keys), + BOOL_ARG("memcmp_keys", memcmp_keys), BOOL_ARG("direct_io", direct_io), STRING_ARG("--envdir", env_args.envdir), @@ -2924,5 +2941,3 @@ UU() perf_test_main_with_cmp(struct cli_args *args, int (*cmp)(DB *, const DBT * // We want to control the row size and its compressibility. open_and_stress_tables(args, false, cmp); } - -#endif diff --git a/storage/tokudb/ft-index/src/tests/time_create_db.cc b/storage/tokudb/ft-index/src/tests/time_create_db.cc index 2cc2496f33a9f..2365df4701d8f 100644 --- a/storage/tokudb/ft-index/src/tests/time_create_db.cc +++ b/storage/tokudb/ft-index/src/tests/time_create_db.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/transactional_fileops.cc b/storage/tokudb/ft-index/src/tests/transactional_fileops.cc index c58e5d8e8a17e..ea1f8af6e16eb 100644 --- a/storage/tokudb/ft-index/src/tests/transactional_fileops.cc +++ b/storage/tokudb/ft-index/src/tests/transactional_fileops.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc b/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc index f60f939dbc8b8..c39005d0f0044 100644 --- a/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc +++ b/storage/tokudb/ft-index/src/tests/update-multiple-data-diagonal.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc b/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc index 52d672ebc6f1c..51257fd737731 100644 --- a/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc +++ b/storage/tokudb/ft-index/src/tests/update-multiple-key0.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc b/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc index e814bff7d2b87..19a668a67c889 100644 --- a/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc +++ b/storage/tokudb/ft-index/src/tests/update-multiple-nochange.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc b/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc index 684925f98726d..9101771c4d8c5 100644 --- a/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc +++ b/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer-array.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc b/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc index 62f3c7b7e76c4..444bcf1710654 100644 --- a/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc +++ b/storage/tokudb/ft-index/src/tests/update-multiple-with-indexer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/update.cc b/storage/tokudb/ft-index/src/tests/update.cc index e89a02270824b..aa0c4f0dd18c9 100644 --- a/storage/tokudb/ft-index/src/tests/update.cc +++ b/storage/tokudb/ft-index/src/tests/update.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc index 7897f9eb2e398..1f30ab21cf2d4 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-1.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -117,7 +117,7 @@ char *db_v4_dir = OLDDATADIR "env_preload.4.2.0.cleanshutdown"; char *db_v4_dir_node4k = OLDDATADIR "env_preload.4.2.0.node4k.cleanshutdown"; char *db_v4_dir_flat = OLDDATADIR "env_preload.4.2.0.flat.cleanshutdown"; -// HACK: Newer versions of the database/brt to use with this old +// HACK: Newer versions of the database/ft to use with this old // upgrade test code. char *db_v6_dir = OLDDATADIR "env_preload.5.0.8.cleanshutdown"; char *db_v6_dir_node4k = OLDDATADIR "env_preload.5.0.8.node4k.cleanshutdown"; @@ -213,7 +213,7 @@ static void setup(void) { } } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc index e7735ac1567f5..33003f6780bfc 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-2.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -193,7 +193,7 @@ static void setup(void) { src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc index 276251d699a6a..61994a2de6653 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-3.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: // Purpose of this test is to verify that dictionaries created with 4.2.0 -// can be properly truncated with TokuDB version 5.x or later. +// can be properly truncated with TokuFT version 5.x or later. #include "test.h" @@ -216,7 +216,7 @@ static void setup(void) { src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc index 67380e900e1ad..0d083d9d87a62 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-4.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -306,7 +306,7 @@ static void setup(void) { src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc index 564fe607d85a3..2f5d1863e5144 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-5.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -210,7 +210,7 @@ static void setup(void) { src_db_dir = db_v5_dir; } else { - fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION); + fprintf(stderr, "unsupported TokuFT version %d to upgrade\n", SRC_VERSION); assert(0); } diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc index afe99ae68a381..a1e137c980f9b 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-6.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc b/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc index b1a17d0a07970..429f4cddf2cdc 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade-test-7.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,8 @@ PATENT RIGHTS GRANT: #ident "$Id$" -// Purpose of this test is to verify that an environment created by TokuDB 3.1.0 -// is properly rejected by the upgrade logic of TokuDB 5.x and later. +// Purpose of this test is to verify that an environment created by TokuFT 3.1.0 +// is properly rejected by the upgrade logic of TokuFT 5.x and later. #include "test.h" #include "toku_pthread.h" diff --git a/storage/tokudb/ft-index/src/tests/upgrade_simple.cc b/storage/tokudb/ft-index/src/tests/upgrade_simple.cc index a904846005433..678953c4ff735 100644 --- a/storage/tokudb/ft-index/src/tests/upgrade_simple.cc +++ b/storage/tokudb/ft-index/src/tests/upgrade_simple.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc b/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc index 03850b2b0264a..126a7c1453e5c 100644 --- a/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc +++ b/storage/tokudb/ft-index/src/tests/xa-dirty-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc b/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc index 8d28e8a762fa5..2d13e559050c8 100644 --- a/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc +++ b/storage/tokudb/ft-index/src/tests/xa-dirty-rollback.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc b/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc index 3b71f807d442b..3365a1bb1397a 100644 --- a/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc +++ b/storage/tokudb/ft-index/src/tests/xa-txn-discard-abort.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc b/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc index 51b2d0670cd6b..c4d164017ae34 100644 --- a/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc +++ b/storage/tokudb/ft-index/src/tests/xa-txn-discard-commit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/tests/zombie_db.cc b/storage/tokudb/ft-index/src/tests/zombie_db.cc index 16d6a933451b3..56ff71f13da4a 100644 --- a/storage/tokudb/ft-index/src/tests/zombie_db.cc +++ b/storage/tokudb/ft-index/src/tests/zombie_db.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/src/toku_patent.cc b/storage/tokudb/ft-index/src/toku_patent.cc index e7b0ebe2c5649..5261b6f3a79e1 100644 --- a/storage/tokudb/ft-index/src/toku_patent.cc +++ b/storage/tokudb/ft-index/src/toku_patent.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -115,7 +115,7 @@ const char *toku_patent_string = "COPYING CONDITIONS NOTICE:\n\ \n\ COPYRIGHT NOTICE:\n\ \n\ - TokuDB, Tokutek Fractal Tree Indexing Library.\n\ + TokuFT, Tokutek Fractal Tree Indexing Library.\n\ Copyright (C) 2007-2013 Tokutek, Inc.\n\ \n\ DISCLAIMER:\n\ diff --git a/storage/tokudb/ft-index/src/ydb-internal.h b/storage/tokudb/ft-index/src/ydb-internal.h index f8c10975d5a1b..26cc8419f4a77 100644 --- a/storage/tokudb/ft-index/src/ydb-internal.h +++ b/storage/tokudb/ft-index/src/ydb-internal.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef YDB_INTERNAL_H -#define YDB_INTERNAL_H /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,19 +86,22 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" #include #include -#include -#include -#include -// TODO: remove vanilla omt in favor of templated one -#include +#include +#include +#include +#include +#include #include +#include #include #include @@ -152,14 +153,13 @@ struct __toku_db_env_internal { unsigned long cachetable_size; CACHETABLE cachetable; TOKULOGGER logger; - toku::locktree::manager ltm; + toku::locktree_manager ltm; lock_timeout_callback lock_wait_timeout_callback; // Called when a lock request times out waiting for a lock. DB *directory; // Maps dnames to inames DB *persistent_environment; // Stores environment settings, can be used for upgrade - // TODO: toku::omt - OMT open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location) - OMT open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location) + toku::omt *open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location) + toku::omt *open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location) toku_pthread_rwlock_t open_dbs_rwlock; // rwlock that protects the OMT of open dbs. char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /) @@ -192,7 +192,7 @@ struct __toku_db_env_internal { // test-only environment function for running lock escalation static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) { - toku::locktree::manager *mgr = &env->i->ltm; + toku::locktree_manager *mgr = &env->i->ltm; mgr->run_escalation_for_test(); } @@ -279,7 +279,7 @@ struct __toku_db_txn_external { #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part) struct __toku_dbc_internal { - struct ft_cursor *c; + struct ft_cursor ftcursor; DB_TXN *txn; TOKU_ISOLATION iso; struct simple_dbt skey_s,sval_s; @@ -290,12 +290,21 @@ struct __toku_dbc_internal { bool rmw; }; -struct __toku_dbc_external { - struct __toku_dbc external_part; - struct __toku_dbc_internal internal_part; -}; - -#define dbc_struct_i(x) (&((struct __toku_dbc_external *)x)->internal_part) +static_assert(sizeof(__toku_dbc_internal) <= sizeof(((DBC *) nullptr)->_internal), + "__toku_dbc_internal doesn't fit in the internal portion of a DBC"); + +static inline __toku_dbc_internal *dbc_struct_i(DBC *c) { + union dbc_union { + __toku_dbc_internal *dbc_internal; + char *buf; + } u; + u.buf = c->_internal; + return u.dbc_internal; +} + +static inline struct ft_cursor *dbc_ftcursor(DBC *c) { + return &dbc_struct_i(c)->ftcursor; +} static inline int env_opened(DB_ENV *env) { @@ -315,5 +324,3 @@ txn_is_read_only(DB_TXN* txn) { void env_panic(DB_ENV * env, int cause, const char * msg); void env_note_db_opened(DB_ENV *env, DB *db); void env_note_db_closed(DB_ENV *env, DB *db); - -#endif diff --git a/storage/tokudb/ft-index/src/ydb.cc b/storage/tokudb/ft-index/src/ydb.cc index 5108f325e152c..e61bf9401753c 100644 --- a/storage/tokudb/ft-index/src/ydb.cc +++ b/storage/tokudb/ft-index/src/ydb.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,51 +92,40 @@ PATENT RIGHTS GRANT: extern const char *toku_patent_string; const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."; -#include -#include -#include - #include -#include #include -#include -#include -#include #include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ydb.h" -#include "ydb-internal.h" -#include "ydb_cursor.h" -#include "ydb_row_lock.h" -#include "ydb_env_func.h" -#include "ydb_db.h" -#include "ydb_write.h" -#include "ydb_txn.h" -#include "loader.h" -#include "indexer.h" + +#include "portability/memory.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include "portability/toku_pthread.h" +#include "portability/toku_stdlib.h" + +#include "ft/ft-flusher.h" +#include "ft/cachetable/cachetable.h" +#include "ft/cachetable/checkpoint.h" +#include "ft/logger/log.h" +#include "ft/loader/loader.h" +#include "ft/log_header.h" +#include "ft/ft.h" +#include "ft/txn/txn_manager.h" +#include "src/ydb.h" +#include "src/ydb-internal.h" +#include "src/ydb_cursor.h" +#include "src/ydb_row_lock.h" +#include "src/ydb_env_func.h" +#include "src/ydb_db.h" +#include "src/ydb_write.h" +#include "src/ydb_txn.h" +#include "src/loader.h" +#include "src/indexer.h" +#include "util/status.h" +#include "util/context.h" // Include ydb_lib.cc here so that its constructor/destructor gets put into // ydb.o, to make sure they don't get erased at link time (when linking to -// a static libtokudb.a that was compiled with gcc). See #5094. +// a static libtokufractaltree.a that was compiled with gcc). See #5094. #include "ydb_lib.cc" #ifdef TOKUTRACE @@ -197,7 +186,7 @@ typedef struct { static YDB_LAYER_STATUS_S ydb_layer_status; #define STATUS_VALUE(x) ydb_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_layer_status, k, c, t, l, inc) static void ydb_layer_status_init (void) { @@ -233,6 +222,7 @@ static DB_ENV * volatile most_recent_env; // most recently opened env, used fo static int env_get_iname(DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt); static int toku_maybe_get_engine_status_text (char* buff, int buffsize); // for use by toku_assert +static int toku_maybe_err_engine_status (void); static void toku_maybe_set_env_panic(int code, const char * msg); // for use by toku_assert int @@ -262,14 +252,14 @@ static void env_fs_report_in_yellow(DB_ENV *UU(env)) { char tbuf[26]; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr); + fprintf(stderr, "%.24s TokuFT file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr); } static void env_fs_report_in_red(DB_ENV *UU(env)) { char tbuf[26]; time_t tnow = time(NULL); - fprintf(stderr, "%.24s Tokudb file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr); + fprintf(stderr, "%.24s TokuFT file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr); } static inline uint64_t @@ -278,7 +268,7 @@ env_fs_redzone(DB_ENV *env, uint64_t total) { } #define ZONEREPORTLIMIT 12 -// Check the available space in the file systems used by tokudb and erect barriers when available space gets low. +// Check the available space in the file systems used by tokuft and erect barriers when available space gets low. static int env_fs_poller(void *arg) { DB_ENV *env = (DB_ENV *) arg; @@ -455,7 +445,7 @@ static void keep_cachetable_callback (DB_ENV *env, CACHETABLE cachetable) static int ydb_do_recovery (DB_ENV *env) { assert(env->i->real_log_dir); - int r = tokudb_recover(env, + int r = tokuft_recover(env, toku_keep_prepared_txn_callback, keep_cachetable_callback, env->i->logger, @@ -469,33 +459,12 @@ ydb_do_recovery (DB_ENV *env) { static int needs_recovery (DB_ENV *env) { assert(env->i->real_log_dir); - int recovery_needed = tokudb_needs_recovery(env->i->real_log_dir, true); + int recovery_needed = tokuft_needs_recovery(env->i->real_log_dir, true); return recovery_needed ? DB_RUNRECOVERY : 0; } static int toku_env_txn_checkpoint(DB_ENV * env, uint32_t kbyte, uint32_t min, uint32_t flags); -// Instruct db to use the default (built-in) key comparison function -// by setting the flag bits in the db and brt structs -static int -db_use_builtin_key_cmp(DB *db) { - HANDLE_PANICKED_DB(db); - int r = 0; - if (db_opened(db)) - r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n"); - else if (db->i->key_compare_was_set) - r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n"); - else { - uint32_t tflags; - toku_ft_get_flags(db->i->ft_handle, &tflags); - - tflags |= TOKU_DB_KEYCMP_BUILTIN; - toku_ft_set_flags(db->i->ft_handle, tflags); - db->i->key_compare_was_set = true; - } - return r; -} - // Keys used in persistent environment dictionary: // Following keys added in version 12 static const char * orig_env_ver_key = "original_version"; @@ -552,7 +521,7 @@ typedef struct { static PERSISTENT_UPGRADE_STATUS_S persistent_upgrade_status; -#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc) +#define PERSISTENT_UPGRADE_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(persistent_upgrade_status, k, c, t, "upgrade: " l, inc) static void persistent_upgrade_status_init (void) { @@ -702,7 +671,7 @@ capture_persistent_env_contents (DB_ENV * env, DB_TXN * txn) { // return 0 if log exists or ENOENT if log does not exist static int ydb_recover_log_exists(DB_ENV *env) { - int r = tokudb_recover_log_exists(env->i->real_log_dir); + int r = tokuft_recover_log_exists(env->i->real_log_dir); return r; } @@ -852,6 +821,12 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { goto cleanup; } + if (toku_os_huge_pages_enabled()) { + r = toku_ydb_do_error(env, TOKUDB_HUGE_PAGES_ENABLED, + "Huge pages are enabled, disable them before continuing\n"); + goto cleanup; + } + most_recent_env = NULL; assert(sizeof(time_t) == sizeof(uint64_t)); @@ -859,20 +834,20 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { HANDLE_EXTRA_FLAGS(env, flags, DB_CREATE|DB_PRIVATE|DB_INIT_LOG|DB_INIT_TXN|DB_RECOVER|DB_INIT_MPOOL|DB_INIT_LOCK|DB_THREAD); - // DB_CREATE means create if env does not exist, and Tokudb requires it because - // Tokudb requries DB_PRIVATE. + // DB_CREATE means create if env does not exist, and TokuFT requires it because + // TokuFT requries DB_PRIVATE. if ((flags & DB_PRIVATE) && !(flags & DB_CREATE)) { r = toku_ydb_do_error(env, ENOENT, "DB_PRIVATE requires DB_CREATE (seems gratuitous to us, but that's BDB's behavior\n"); goto cleanup; } if (!(flags & DB_PRIVATE)) { - r = toku_ydb_do_error(env, ENOENT, "TokuDB requires DB_PRIVATE\n"); + r = toku_ydb_do_error(env, ENOENT, "TokuFT requires DB_PRIVATE\n"); goto cleanup; } if ((flags & DB_INIT_LOG) && !(flags & DB_INIT_TXN)) { - r = toku_ydb_do_error(env, EINVAL, "TokuDB requires transactions for logging\n"); + r = toku_ydb_do_error(env, EINVAL, "TokuFT requires transactions for logging\n"); goto cleanup; } @@ -984,13 +959,13 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { // This is probably correct, but it will be pain... // if ((flags & DB_THREAD)==0) { -// r = toku_ydb_do_error(env, EINVAL, "TokuDB requires DB_THREAD"); +// r = toku_ydb_do_error(env, EINVAL, "TokuFT requires DB_THREAD"); // goto cleanup; // } unused_flags &= ~DB_THREAD; if (unused_flags!=0) { - r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokudb: %u\n", unused_flags); + r = toku_ydb_do_error(env, EINVAL, "Extra flags not understood by tokuft: %u\n", unused_flags); goto cleanup; } @@ -1029,7 +1004,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { { r = toku_db_create(&env->i->persistent_environment, env, 0); assert_zero(r); - r = db_use_builtin_key_cmp(env->i->persistent_environment); + r = toku_db_use_builtin_key_cmp(env->i->persistent_environment); assert_zero(r); r = toku_db_open_iname(env->i->persistent_environment, txn, toku_product_name_strings.environmentdictionary, DB_CREATE, mode); if (r != 0) { @@ -1067,7 +1042,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { { r = toku_db_create(&env->i->directory, env, 0); assert_zero(r); - r = db_use_builtin_key_cmp(env->i->directory); + r = toku_db_use_builtin_key_cmp(env->i->directory); assert_zero(r); r = toku_db_open_iname(env->i->directory, txn, toku_product_name_strings.fileopsdirectory, DB_CREATE, mode); if (r != 0) { @@ -1108,7 +1083,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { most_recent_env = env; uint64_t num_rows; env_get_engine_status_num_rows(env, &num_rows); - toku_assert_set_fpointers(toku_maybe_get_engine_status_text, toku_maybe_set_env_panic, num_rows); + toku_assert_set_fpointers(toku_maybe_get_engine_status_text, toku_maybe_err_engine_status, toku_maybe_set_env_panic, num_rows); } return r; } @@ -1137,7 +1112,7 @@ env_close(DB_ENV * env, uint32_t flags) { goto panic_and_quit_early; } if (env->i->open_dbs_by_dname) { //Verify that there are no open dbs. - if (toku_omt_size(env->i->open_dbs_by_dname) > 0) { + if (env->i->open_dbs_by_dname->size() > 0) { err_msg = "Cannot close environment due to open DBs\n"; r = toku_ydb_do_error(env, EINVAL, "%s", err_msg); goto panic_and_quit_early; @@ -1161,6 +1136,7 @@ env_close(DB_ENV * env, uint32_t flags) { } env_fsync_log_cron_destroy(env); if (env->i->cachetable) { + toku_cachetable_prepare_close(env->i->cachetable); toku_cachetable_minicron_shutdown(env->i->cachetable); if (env->i->logger) { CHECKPOINTER cp = nullptr; @@ -1218,10 +1194,14 @@ env_close(DB_ENV * env, uint32_t flags) { toku_free(env->i->real_log_dir); if (env->i->real_tmp_dir) toku_free(env->i->real_tmp_dir); - if (env->i->open_dbs_by_dname) - toku_omt_destroy(&env->i->open_dbs_by_dname); - if (env->i->open_dbs_by_dict_id) - toku_omt_destroy(&env->i->open_dbs_by_dict_id); + if (env->i->open_dbs_by_dname) { + env->i->open_dbs_by_dname->destroy(); + toku_free(env->i->open_dbs_by_dname); + } + if (env->i->open_dbs_by_dict_id) { + env->i->open_dbs_by_dict_id->destroy(); + toku_free(env->i->open_dbs_by_dict_id); + } if (env->i->dir) toku_free(env->i->dir); toku_pthread_rwlock_destroy(&env->i->open_dbs_rwlock); @@ -1230,7 +1210,7 @@ env_close(DB_ENV * env, uint32_t flags) { unlock_single_process(env); toku_free(env->i); toku_free(env); - toku_sync_fetch_and_add(&tokudb_num_envs, -1); + toku_sync_fetch_and_add(&tokuft_num_envs, -1); if (flags != 0) { r = EINVAL; } @@ -1405,7 +1385,7 @@ env_set_flags(DB_ENV * env, uint32_t flags, int onoff) { flags &= ~DB_AUTO_COMMIT; } if (flags != 0 && onoff) { - return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n"); + return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support any nonzero ENV flags other than DB_AUTO_COMMIT\n"); } if (onoff) env->i->open_flags |= change; else env->i->open_flags &= ~change; @@ -1451,7 +1431,7 @@ env_get_lg_max(DB_ENV * env, uint32_t *lg_maxp) { static int env_set_lk_detect(DB_ENV * env, uint32_t UU(detect)) { HANDLE_PANICKED_ENV(env); - return toku_ydb_do_error(env, EINVAL, "TokuDB does not (yet) support set_lk_detect\n"); + return toku_ydb_do_error(env, EINVAL, "TokuFT does not (yet) support set_lk_detect\n"); } static int @@ -1796,7 +1776,7 @@ typedef struct { static FS_STATUS_S fsstat; -#define FS_STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc) +#define FS_STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(fsstat, k, c, t, "filesystem: " l, inc) static void fs_status_init(void) { @@ -1845,13 +1825,15 @@ fs_get_status(DB_ENV * env, fs_redzone_state * redzone_state) { // Local status struct used to get information from memory.c typedef enum { MEMORY_MALLOC_COUNT = 0, - MEMORY_FREE_COUNT, + MEMORY_FREE_COUNT, MEMORY_REALLOC_COUNT, - MEMORY_MALLOC_FAIL, - MEMORY_REALLOC_FAIL, - MEMORY_REQUESTED, - MEMORY_USED, - MEMORY_FREED, + MEMORY_MALLOC_FAIL, + MEMORY_REALLOC_FAIL, + MEMORY_REQUESTED, + MEMORY_USED, + MEMORY_FREED, + MEMORY_MAX_REQUESTED_SIZE, + MEMORY_LAST_FAILED_SIZE, MEMORY_MAX_IN_USE, MEMORY_MALLOCATOR_VERSION, MEMORY_MMAP_THRESHOLD, @@ -1865,7 +1847,7 @@ typedef struct { static MEMORY_STATUS_S memory_status; -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(memory_status, k, c, t, "memory: " l, inc) static void memory_status_init(void) { @@ -1879,6 +1861,8 @@ memory_status_init(void) { STATUS_INIT(MEMORY_REQUESTED, nullptr, UINT64, "number of bytes requested", TOKU_ENGINE_STATUS); STATUS_INIT(MEMORY_USED, nullptr, UINT64, "number of bytes used (requested + overhead)", TOKU_ENGINE_STATUS); STATUS_INIT(MEMORY_FREED, nullptr, UINT64, "number of bytes freed", TOKU_ENGINE_STATUS); + STATUS_INIT(MEMORY_MAX_REQUESTED_SIZE, nullptr, UINT64, "largest attempted allocation size", TOKU_ENGINE_STATUS); + STATUS_INIT(MEMORY_LAST_FAILED_SIZE, nullptr, UINT64, "size of the last failed allocation attempt", TOKU_ENGINE_STATUS); STATUS_INIT(MEMORY_MAX_IN_USE, MEM_ESTIMATED_MAXIMUM_MEMORY_FOOTPRINT, UINT64, "estimated maximum memory footprint", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(MEMORY_MALLOCATOR_VERSION, nullptr, CHARSTR, "mallocator version", TOKU_ENGINE_STATUS); STATUS_INIT(MEMORY_MMAP_THRESHOLD, nullptr, UINT64, "mmap threshold", TOKU_ENGINE_STATUS); @@ -2236,6 +2220,83 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { return r; } +// prints engine status using toku_env_err line-by-line +static int +env_err_engine_status(DB_ENV * env) { + uint32_t stringsize = 1024; + uint64_t panic; + char panicstring[stringsize]; + uint64_t num_rows; + uint64_t max_rows; + fs_redzone_state redzone_state; + + toku_env_err(env, 0, "BUILD_ID = %d", BUILD_ID); + + (void) env_get_engine_status_num_rows (env, &max_rows); + TOKU_ENGINE_STATUS_ROW_S mystat[max_rows]; + int r = env->get_engine_status (env, mystat, max_rows, &num_rows, &redzone_state, &panic, panicstring, stringsize, TOKU_ENGINE_STATUS); + + if (r) { + toku_env_err(env, 0, "Engine status not available: "); + if (!env) { + toku_env_err(env, 0, "no environment"); + } + else if (!(env->i)) { + toku_env_err(env, 0, "environment internal struct is null"); + } + else if (!env_opened(env)) { + toku_env_err(env, 0, "environment is not open"); + } + } + else { + if (panic) { + toku_env_err(env, 0, "Env panic code: %" PRIu64, panic); + if (strlen(panicstring)) { + invariant(strlen(panicstring) <= stringsize); + toku_env_err(env, 0, "Env panic string: %s", panicstring); + } + } + + for (uint64_t row = 0; row < num_rows; row++) { + switch (mystat[row].type) { + case FS_STATE: + toku_env_err(env, 0, "%s: %" PRIu64, mystat[row].legend, mystat[row].value.num); + break; + case UINT64: + toku_env_err(env, 0, "%s: %" PRIu64, mystat[row].legend, mystat[row].value.num); + break; + case CHARSTR: + toku_env_err(env, 0, "%s: %s", mystat[row].legend, mystat[row].value.str); + break; + case UNIXTIME: + { + char tbuf[26]; + format_time((time_t*)&mystat[row].value.num, tbuf); + toku_env_err(env, 0, "%s: %s", mystat[row].legend, tbuf); + } + break; + case TOKUTIME: + { + double t = tokutime_to_seconds(mystat[row].value.num); + toku_env_err(env, 0, "%s: %.6f", mystat[row].legend, t); + } + break; + case PARCOUNT: + { + uint64_t v = read_partitioned_counter(mystat[row].value.parcount); + toku_env_err(env, 0, "%s: %" PRIu64, mystat[row].legend, v); + } + break; + default: + toku_env_err(env, 0, "%s: UNKNOWN STATUS TYPE: %d", mystat[row].legend, mystat[row].type); + break; + } + } + } + + return r; +} + // intended for use by toku_assert logic, when env is not known static int toku_maybe_get_engine_status_text (char * buff, int buffsize) { @@ -2251,6 +2312,19 @@ toku_maybe_get_engine_status_text (char * buff, int buffsize) { return r; } +static int +toku_maybe_err_engine_status (void) { + DB_ENV * env = most_recent_env; + int r; + if (engine_status_enable && env != NULL) { + r = env_err_engine_status(env); + } + else { + r = EOPNOTSUPP; + } + return r; +} + // Set panic code and panic string if not already panicked, // intended for use by toku_assert when about to abort(). static void @@ -2303,10 +2377,8 @@ struct ltm_iterate_requests_callback_extra { }; static int -find_db_by_dict_id(OMTVALUE v, void *dict_id_v) { - DB *db = (DB *) v; +find_db_by_dict_id(DB *const &db, const DICTIONARY_ID &dict_id_find) { DICTIONARY_ID dict_id = db->i->dict_id; - DICTIONARY_ID dict_id_find = *(DICTIONARY_ID *) dict_id_v; if (dict_id.dictid < dict_id_find.dictid) { return -1; } else if (dict_id.dictid > dict_id_find.dictid) { @@ -2318,10 +2390,9 @@ find_db_by_dict_id(OMTVALUE v, void *dict_id_v) { static DB * locked_get_db_by_dict_id(DB_ENV *env, DICTIONARY_ID dict_id) { - OMTVALUE dbv; - int r = toku_omt_find_zero(env->i->open_dbs_by_dict_id, find_db_by_dict_id, - (void *) &dict_id, &dbv, nullptr); - return r == 0 ? (DB *) dbv : nullptr; + DB *db; + int r = env->i->open_dbs_by_dict_id->find_zero(dict_id, &db, nullptr); + return r == 0 ? db : nullptr; } static int ltm_iterate_requests_callback(DICTIONARY_ID dict_id, TXNID txnid, @@ -2352,7 +2423,7 @@ env_iterate_pending_lock_requests(DB_ENV *env, return EINVAL; } - toku::locktree::manager *mgr = &env->i->ltm; + toku::locktree_manager *mgr = &env->i->ltm; ltm_iterate_requests_callback_extra e(env, callback, extra); return mgr->iterate_pending_lock_requests(ltm_iterate_requests_callback, &e); } @@ -2373,7 +2444,7 @@ struct iter_txn_row_locks_callback_extra { const int r = lt_map->fetch(which_lt, &ranges); invariant_zero(r); current_db = locked_get_db_by_dict_id(env, ranges.lt->get_dict_id()); - iter.create(ranges.buffer); + iter = toku::range_buffer::iterator(ranges.buffer); } DB_ENV *env; @@ -2471,6 +2542,17 @@ static void env_set_killed_callback(DB_ENV *env, uint64_t default_killed_time_ms env->i->killed_callback = killed_callback; } +static void env_do_backtrace(DB_ENV *env) { + if (env->i->errcall) { + db_env_do_backtrace_errfunc((toku_env_err_func) toku_env_err, (const void *) env); + } + if (env->i->errfile) { + db_env_do_backtrace((FILE *) env->i->errfile); + } else { + db_env_do_backtrace(stderr); + } +} + static int toku_env_create(DB_ENV ** envp, uint32_t flags) { int r = ENOSYS; @@ -2547,6 +2629,7 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { USENV(set_loader_memory_size); USENV(get_loader_memory_size); USENV(set_killed_callback); + USENV(do_backtrace); #undef USENV // unlocked methods @@ -2575,23 +2658,23 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { result->i->bt_compare = toku_builtin_compare_fun; r = toku_logger_create(&result->i->logger); - if (r!=0) goto cleanup; // In particular, logger_create can return the huge page error. - assert(result->i->logger); + invariant_zero(r); + invariant_notnull(result->i->logger); // Create the locktree manager, passing in the create/destroy/escalate callbacks. // The extra parameter for escalation is simply a pointer to this environment. // The escalate callback will need it to translate txnids to DB_TXNs result->i->ltm.create(toku_db_lt_on_create_callback, toku_db_lt_on_destroy_callback, toku_db_txn_escalate_callback, result); - r = toku_omt_create(&result->i->open_dbs_by_dname); - assert_zero(r); - r = toku_omt_create(&result->i->open_dbs_by_dict_id); - assert_zero(r); + XMALLOC(result->i->open_dbs_by_dname); + result->i->open_dbs_by_dname->create(); + XMALLOC(result->i->open_dbs_by_dict_id); + result->i->open_dbs_by_dict_id->create(); toku_pthread_rwlock_init(&result->i->open_dbs_rwlock, NULL); *envp = result; r = 0; - toku_sync_fetch_and_add(&tokudb_num_envs, 1); + toku_sync_fetch_and_add(&tokuft_num_envs, 1); cleanup: if (r!=0) { if (result) { @@ -2612,9 +2695,7 @@ DB_ENV_CREATE_FUN (DB_ENV ** envp, uint32_t flags) { // return <0 if v is earlier in omt than dbv // return >0 if v is later in omt than dbv static int -find_db_by_db_dname(OMTVALUE v, void *dbv) { - DB *db = (DB *) v; // DB* that is stored in the omt - DB *dbfind = (DB *) dbv; // extra, to be compared to v +find_db_by_db_dname(DB *const &db, DB *const &dbfind) { int cmp; const char *dname = db->i->dname; const char *dnamefind = dbfind->i->dname; @@ -2626,9 +2707,7 @@ find_db_by_db_dname(OMTVALUE v, void *dbv) { } static int -find_db_by_db_dict_id(OMTVALUE v, void *dbv) { - DB *db = (DB *) v; - DB *dbfind = (DB *) dbv; +find_db_by_db_dict_id(DB *const &db, DB *const &dbfind) { DICTIONARY_ID dict_id = db->i->dict_id; DICTIONARY_ID dict_id_find = dbfind->i->dict_id; if (dict_id.dictid < dict_id_find.dictid) { @@ -2651,20 +2730,18 @@ env_note_db_opened(DB_ENV *env, DB *db) { assert(db->i->dname); // internal (non-user) dictionary has no dname int r; - OMTVALUE v; uint32_t idx; - r = toku_omt_find_zero(env->i->open_dbs_by_dname, find_db_by_db_dname, - db, &v, &idx); + + r = env->i->open_dbs_by_dname->find_zero(db, nullptr, &idx); assert(r == DB_NOTFOUND); - r = toku_omt_insert_at(env->i->open_dbs_by_dname, db, idx); + r = env->i->open_dbs_by_dname->insert_at(db, idx); assert_zero(r); - r = toku_omt_find_zero(env->i->open_dbs_by_dict_id, find_db_by_db_dict_id, - db, &v, &idx); + r = env->i->open_dbs_by_dict_id->find_zero(db, nullptr, &idx); assert(r == DB_NOTFOUND); - r = toku_omt_insert_at(env->i->open_dbs_by_dict_id, db, idx); + r = env->i->open_dbs_by_dict_id->insert_at(db, idx); assert_zero(r); - STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS) = toku_omt_size(env->i->open_dbs_by_dname); + STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS) = env->i->open_dbs_by_dname->size(); STATUS_VALUE(YDB_LAYER_NUM_DB_OPEN)++; if (STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS) > STATUS_VALUE(YDB_LAYER_MAX_OPEN_DBS)) { STATUS_VALUE(YDB_LAYER_MAX_OPEN_DBS) = STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS); @@ -2677,58 +2754,44 @@ void env_note_db_closed(DB_ENV *env, DB *db) { toku_pthread_rwlock_wrlock(&env->i->open_dbs_rwlock); assert(db->i->dname); // internal (non-user) dictionary has no dname - assert(toku_omt_size(env->i->open_dbs_by_dname) > 0); - assert(toku_omt_size(env->i->open_dbs_by_dict_id) > 0); + assert(env->i->open_dbs_by_dname->size() > 0); + assert(env->i->open_dbs_by_dict_id->size() > 0); int r; - OMTVALUE v; uint32_t idx; - r = toku_omt_find_zero(env->i->open_dbs_by_dname, find_db_by_db_dname, - db, &v, &idx); + + r = env->i->open_dbs_by_dname->find_zero(db, nullptr, &idx); assert_zero(r); - r = toku_omt_delete_at(env->i->open_dbs_by_dname, idx); + r = env->i->open_dbs_by_dname->delete_at(idx); assert_zero(r); - r = toku_omt_find_zero(env->i->open_dbs_by_dict_id, find_db_by_db_dict_id, - db, &v, &idx); + r = env->i->open_dbs_by_dict_id->find_zero(db, nullptr, &idx); assert_zero(r); - r = toku_omt_delete_at(env->i->open_dbs_by_dict_id, idx); + r = env->i->open_dbs_by_dict_id->delete_at(idx); assert_zero(r); STATUS_VALUE(YDB_LAYER_NUM_DB_CLOSE)++; - STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS) = toku_omt_size(env->i->open_dbs_by_dname); + STATUS_VALUE(YDB_LAYER_NUM_OPEN_DBS) = env->i->open_dbs_by_dname->size(); toku_pthread_rwlock_wrunlock(&env->i->open_dbs_rwlock); } static int -find_open_db_by_dname (OMTVALUE v, void *dnamev) { - DB *db = (DB *) v; // DB* that is stored in the omt - int cmp; - const char *dname = db->i->dname; - const char *dnamefind = (char *) dnamev; - cmp = strcmp(dname, dnamefind); - return cmp; +find_open_db_by_dname(DB *const &db, const char *const &dnamefind) { + return strcmp(db->i->dname, dnamefind); } // return true if there is any db open with the given dname static bool env_is_db_with_dname_open(DB_ENV *env, const char *dname) { - int r; - bool rval; - OMTVALUE dbv; - uint32_t idx; + DB *db; toku_pthread_rwlock_rdlock(&env->i->open_dbs_rwlock); - r = toku_omt_find_zero(env->i->open_dbs_by_dname, find_open_db_by_dname, (void*)dname, &dbv, &idx); - if (r==0) { - DB *db = (DB *) dbv; - assert(strcmp(dname, db->i->dname) == 0); - rval = true; - } - else { - assert(r==DB_NOTFOUND); - rval = false; + int r = env->i->open_dbs_by_dname->find_zero(dname, &db, nullptr); + if (r == 0) { + invariant(strcmp(dname, db->i->dname) == 0); + } else { + invariant(r == DB_NOTFOUND); } toku_pthread_rwlock_rdunlock(&env->i->open_dbs_rwlock); - return rval; + return r == 0 ? true : false; } //We do not (yet?) support deleting subdbs by deleting the enclosing 'fname' @@ -2996,15 +3059,15 @@ db_strerror(int error) { case TOKUDB_OUT_OF_LOCKS: return "Out of locks"; case TOKUDB_DICTIONARY_TOO_OLD: - return "Dictionary too old for this version of TokuDB"; + return "Dictionary too old for this version of TokuFT"; case TOKUDB_DICTIONARY_TOO_NEW: - return "Dictionary too new for this version of TokuDB"; + return "Dictionary too new for this version of TokuFT"; case TOKUDB_CANCELED: return "User cancelled operation"; case TOKUDB_NO_DATA: return "Ran out of data (not EOF)"; case TOKUDB_HUGE_PAGES_ENABLED: - return "Transparent huge pages are enabled but TokuDB's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK."; + return "Transparent huge pages are enabled but TokuFT's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK."; } static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string. @@ -3046,7 +3109,7 @@ env_get_iname(DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) { // TODO 2216: Patch out this (dangerous) function when loader is working and // we don't need to test the low-level redirect anymore. -// for use by test programs only, just a wrapper around brt call: +// for use by test programs only, just a wrapper around ft call: int toku_test_db_redirect_dictionary(DB * db, const char * dname_of_new_file, DB_TXN *dbtxn) { int r; @@ -3054,7 +3117,7 @@ toku_test_db_redirect_dictionary(DB * db, const char * dname_of_new_file, DB_TXN DBT iname_dbt; char * new_iname_in_env; - FT_HANDLE brt = db->i->ft_handle; + FT_HANDLE ft_handle = db->i->ft_handle; TOKUTXN tokutxn = db_txn_struct_i(dbtxn)->tokutxn; toku_fill_dbt(&dname_dbt, dname_of_new_file, strlen(dname_of_new_file)+1); @@ -3064,7 +3127,7 @@ toku_test_db_redirect_dictionary(DB * db, const char * dname_of_new_file, DB_TXN new_iname_in_env = (char *) iname_dbt.data; toku_multi_operation_client_lock(); //Must hold MO lock for dictionary_redirect. - r = toku_dictionary_redirect(new_iname_in_env, brt, tokutxn); + r = toku_dictionary_redirect(new_iname_in_env, ft_handle, tokutxn); toku_multi_operation_client_unlock(); toku_free(new_iname_in_env); diff --git a/storage/tokudb/ft-index/src/ydb.h b/storage/tokudb/ft-index/src/ydb.h index e7de82b5db211..fad41f382f3f5 100644 --- a/storage/tokudb/ft-index/src/ydb.h +++ b/storage/tokudb/ft-index/src/ydb.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,8 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_INTERFACE_H) -#define TOKU_YDB_INTERFACE_H +#pragma once // Initialize the ydb library globals. // Called when the ydb library is loaded. @@ -114,5 +111,3 @@ extern "C" uint64_t toku_test_get_latest_lsn(DB_ENV *env) __attribute__((__visib // test-only function extern "C" int toku_test_get_checkpointing_user_data_status(void) __attribute__((__visibility__("default"))); - -#endif diff --git a/storage/tokudb/ft-index/src/ydb_cursor.cc b/storage/tokudb/ft-index/src/ydb_cursor.cc index 58e13b9e50aba..c42e2fb673ea1 100644 --- a/storage/tokudb/ft-index/src/ydb_cursor.cc +++ b/storage/tokudb/ft-index/src/ydb_cursor.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -97,6 +97,7 @@ PATENT RIGHTS GRANT: #include "ydb-internal.h" #include "ydb_cursor.h" #include "ydb_row_lock.h" +#include "ft/cursor.h" static YDB_C_LAYER_STATUS_S ydb_c_layer_status; #ifdef STATUS_VALUE @@ -104,7 +105,7 @@ static YDB_C_LAYER_STATUS_S ydb_c_layer_status; #endif #define STATUS_VALUE(x) ydb_c_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_c_layer_status, k, c, t, l, inc) static void ydb_c_layer_status_init (void) { @@ -133,8 +134,8 @@ get_nonmain_cursor_flags(uint32_t flags) { } static inline bool -c_uninitialized(DBC* c) { - return toku_ft_cursor_uninitialized(dbc_struct_i(c)->c); +c_uninitialized(DBC *c) { + return toku_ft_cursor_uninitialized(dbc_ftcursor(c)); } typedef struct query_context_wrapped_t { @@ -200,7 +201,7 @@ typedef struct query_context_with_input_t { static void query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, uint32_t flag, bool is_write_op, YDB_CALLBACK_FUNCTION f, void *extra) { - context->c = dbc_struct_i(c)->c; + context->c = dbc_ftcursor(c); context->txn = dbc_struct_i(c)->txn; context->db = c->dbp; context->f = f; @@ -247,7 +248,7 @@ query_context_with_input_init(QUERY_CONTEXT_WITH_INPUT context, DBC *c, uint32_t context->input_val = val; } -static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static void c_query_context_init(QUERY_CONTEXT context, DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -277,7 +278,7 @@ c_getf_first(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_first will call c_getf_first_callback(..., context) (if query is successful) - r = toku_ft_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context); + r = toku_ft_cursor_first(dbc_ftcursor(c), c_getf_first_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -290,7 +291,7 @@ c_getf_first(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_first_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -313,11 +314,11 @@ c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_first + //Give ft-layer an error (if any) to return from toku_ft_cursor_first return r; } -static int c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -328,7 +329,7 @@ c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_last will call c_getf_last_callback(..., context) (if query is successful) - r = toku_ft_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context); + r = toku_ft_cursor_last(dbc_ftcursor(c), c_getf_last_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -341,7 +342,7 @@ c_getf_last(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_last_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -364,11 +365,11 @@ c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_last + //Give ft-layer an error (if any) to return from toku_ft_cursor_last return r; } -static int c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -383,7 +384,7 @@ c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_next will call c_getf_next_callback(..., context) (if query is successful) - r = toku_ft_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context); + r = toku_ft_cursor_next(dbc_ftcursor(c), c_getf_next_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -397,7 +398,7 @@ c_getf_next(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_next_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -423,11 +424,11 @@ c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_next + //Give ft-layer an error (if any) to return from toku_ft_cursor_next return r; } -static int c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -442,7 +443,7 @@ c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { c_query_context_init(&context, c, flag, f, extra); while (r == 0) { //toku_ft_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful) - r = toku_ft_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context); + r = toku_ft_cursor_prev(dbc_ftcursor(c), c_getf_prev_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -456,7 +457,7 @@ c_getf_prev(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_prev_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -481,11 +482,11 @@ c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, v r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_prev + //Give ft-layer an error (if any) to return from toku_ft_cursor_prev return r; } -static int c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -495,14 +496,14 @@ c_getf_current(DBC *c, uint32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) { QUERY_CONTEXT_S context; //Describes the context of this query. c_query_context_init(&context, c, flag, f, extra); //toku_ft_cursor_current will call c_getf_current_callback(..., context) (if query is successful) - int r = toku_ft_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context); + int r = toku_ft_cursor_current(dbc_ftcursor(c), DB_CURRENT, c_getf_current_callback, &context); c_query_context_destroy(&context); return r; } //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_current_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT super_context = (QUERY_CONTEXT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -518,11 +519,11 @@ c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val r = 0; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_current + //Give ft-layer an error (if any) to return from toku_ft_cursor_current return r; } -static int c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -534,7 +535,7 @@ toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void * query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set will call c_getf_set_callback(..., context) (if query is successful) - r = toku_ft_cursor_set(dbc_struct_i(c)->c, key, c_getf_set_callback, &context); + r = toku_ft_cursor_set(dbc_ftcursor(c), key, c_getf_set_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -547,7 +548,7 @@ toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void * //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -571,11 +572,11 @@ c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, vo r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_set + //Give ft-layer an error (if any) to return from toku_ft_cursor_set return r; } -static int c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -587,7 +588,7 @@ c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, nullptr, c_getf_set_range_callback, &context); + r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, nullptr, c_getf_set_range_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -600,7 +601,7 @@ c_getf_set_range(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_range_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -627,7 +628,7 @@ c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec v r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_set_range + //Give ft-layer an error (if any) to return from toku_ft_cursor_set_range return r; } @@ -641,7 +642,7 @@ c_getf_set_range_with_bound(DBC *c, uint32_t flag, DBT *key, DBT *key_bound, YDB query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range(dbc_struct_i(c)->c, key, key_bound, c_getf_set_range_callback, &context); + r = toku_ft_cursor_set_range(dbc_ftcursor(c), key, key_bound, c_getf_set_range_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -652,7 +653,7 @@ c_getf_set_range_with_bound(DBC *c, uint32_t flag, DBT *key, DBT *key_bound, YDB return r; } -static int c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool); +static int c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool); static int c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { @@ -664,7 +665,7 @@ c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION query_context_with_input_init(&context, c, flag, key, NULL, f, extra); while (r == 0) { //toku_ft_cursor_set_range_reverse will call c_getf_set_range_reverse_callback(..., context) (if query is successful) - r = toku_ft_cursor_set_range_reverse(dbc_struct_i(c)->c, key, c_getf_set_range_reverse_callback, &context); + r = toku_ft_cursor_set_range_reverse(dbc_ftcursor(c), key, c_getf_set_range_reverse_callback, &context); if (r == DB_LOCK_NOTGRANTED) { r = toku_db_wait_range_lock(context.base.db, context.base.txn, &context.base.request); } else { @@ -677,7 +678,7 @@ c_getf_set_range_reverse(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION //result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..) static int -c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) { +c_getf_set_range_reverse_callback(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only) { QUERY_CONTEXT_WITH_INPUT super_context = (QUERY_CONTEXT_WITH_INPUT) extra; QUERY_CONTEXT_BASE context = &super_context->base; @@ -704,18 +705,23 @@ c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, b r = context->r_user_callback; } - //Give brt-layer an error (if any) to return from toku_ft_cursor_set_range_reverse + //Give ft-layer an error (if any) to return from toku_ft_cursor_set_range_reverse return r; } -// Close a cursor. -int -toku_c_close(DBC * c) { + +int toku_c_close_internal(DBC *c) { HANDLE_PANICKED_DB(c->dbp); HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c); - toku_ft_cursor_close(dbc_struct_i(c)->c); + toku_ft_cursor_destroy(dbc_ftcursor(c)); toku_sdbt_cleanup(&dbc_struct_i(c)->skey_s); toku_sdbt_cleanup(&dbc_struct_i(c)->sval_s); + return 0; +} + +// Close a cursor. +int toku_c_close(DBC *c) { + toku_c_close_internal(c); toku_free(c); return 0; } @@ -739,7 +745,7 @@ c_set_bounds(DBC *dbc, const DBT *left_key, const DBT *right_key, bool pre_acqui DB *db = dbc->dbp; DB_TXN *txn = dbc_struct_i(dbc)->txn; HANDLE_PANICKED_DB(db); - toku_ft_cursor_set_range_lock(dbc_struct_i(dbc)->c, left_key, right_key, + toku_ft_cursor_set_range_lock(dbc_ftcursor(dbc), left_key, right_key, (left_key == toku_dbt_negative_infinity()), (right_key == toku_dbt_positive_infinity()), out_of_range_error); @@ -757,17 +763,16 @@ c_set_bounds(DBC *dbc, const DBT *left_key, const DBT *right_key, bool pre_acqui static void c_remove_restriction(DBC *dbc) { - toku_ft_cursor_remove_restriction(dbc_struct_i(dbc)->c); + toku_ft_cursor_remove_restriction(dbc_ftcursor(dbc)); } static void c_set_check_interrupt_callback(DBC* dbc, bool (*interrupt_callback)(void*), void *extra) { - toku_ft_cursor_set_check_interrupt_cb(dbc_struct_i(dbc)->c, interrupt_callback, extra); + toku_ft_cursor_set_check_interrupt_cb(dbc_ftcursor(dbc), interrupt_callback, extra); } int toku_c_get(DBC* c, DBT* key, DBT* val, uint32_t flag) { - //This function exists for legacy (test compatibility) purposes/parity with bdb. HANDLE_PANICKED_DB(c->dbp); HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c); @@ -829,7 +834,7 @@ toku_c_get(DBC* c, DBT* key, DBT* val, uint32_t flag) { } int -toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_temporary_cursor) { +toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor) { HANDLE_PANICKED_DB(db); HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); DB_ENV* env = db->dbenv; @@ -842,13 +847,7 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_ ); } - int r = 0; - - struct __toku_dbc_external *XMALLOC(eresult); // so the internal stuff is stuck on the end - memset(eresult, 0, sizeof(*eresult)); - DBC *result = &eresult->external_part; - -#define SCRS(name) result->name = name +#define SCRS(name) c->name = name SCRS(c_getf_first); SCRS(c_getf_last); SCRS(c_getf_next); @@ -862,59 +861,49 @@ toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, uint32_t flags, int is_ SCRS(c_set_check_interrupt_callback); #undef SCRS - result->c_get = toku_c_get; - result->c_getf_set = toku_c_getf_set; - result->c_close = toku_c_close; + c->c_get = toku_c_get; + c->c_getf_set = toku_c_getf_set; + c->c_close = toku_c_close; - result->dbp = db; + c->dbp = db; - dbc_struct_i(result)->txn = txn; - dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0}; - dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0}; + dbc_struct_i(c)->txn = txn; + dbc_struct_i(c)->skey_s = (struct simple_dbt){0,0}; + dbc_struct_i(c)->sval_s = (struct simple_dbt){0,0}; if (is_temporary_cursor) { - dbc_struct_i(result)->skey = &db->i->skey; - dbc_struct_i(result)->sval = &db->i->sval; + dbc_struct_i(c)->skey = &db->i->skey; + dbc_struct_i(c)->sval = &db->i->sval; } else { - dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s; - dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s; + dbc_struct_i(c)->skey = &dbc_struct_i(c)->skey_s; + dbc_struct_i(c)->sval = &dbc_struct_i(c)->sval_s; } if (flags & DB_SERIALIZABLE) { - dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE; + dbc_struct_i(c)->iso = TOKU_ISO_SERIALIZABLE; } else { - dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE; + dbc_struct_i(c)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE; } - dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0; + dbc_struct_i(c)->rmw = (flags & DB_RMW) != 0; bool is_snapshot_read = false; if (txn) { - is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED || - dbc_struct_i(result)->iso == TOKU_ISO_SNAPSHOT); + is_snapshot_read = (dbc_struct_i(c)->iso == TOKU_ISO_READ_COMMITTED || + dbc_struct_i(c)->iso == TOKU_ISO_SNAPSHOT); } - r = toku_ft_cursor( + int r = toku_ft_cursor_create( db->i->ft_handle, - &dbc_struct_i(result)->c, + dbc_ftcursor(c), txn ? db_txn_struct_i(txn)->tokutxn : NULL, is_snapshot_read, - ((flags & DBC_DISABLE_PREFETCHING) != 0) + ((flags & DBC_DISABLE_PREFETCHING) != 0), + is_temporary_cursor != 0 ); - assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW); - if (r == 0) { - // Set the is_temporary_cursor boolean inside the brt node so - // that a query only needing one cursor will not perform - // unecessary malloc calls. - if (is_temporary_cursor) { - toku_ft_cursor_set_temporary(dbc_struct_i(result)->c); - } - - *c = result; - } - else { - toku_free(result); + if (r != 0) { + invariant(r == TOKUDB_MVCC_DICTIONARY_TOO_NEW); } return r; } static inline int -autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { +autotxn_db_cursor(DB *db, DB_TXN *txn, DBC *c, uint32_t flags) { if (!txn && (db->dbenv->i->open_flags & DB_INIT_TXN)) { return toku_ydb_do_error(db->dbenv, EINVAL, "Cursors in a transaction environment must have transactions.\n"); @@ -923,9 +912,14 @@ autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { } // Create a cursor on a db. -int -toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { - int r = autotxn_db_cursor(db, txn, c, flags); +int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags) { + DBC *XMALLOC(cursor); + int r = autotxn_db_cursor(db, txn, cursor, flags); + if (r == 0) { + *c = cursor; + } else { + toku_free(cursor); + } return r; } diff --git a/storage/tokudb/ft-index/src/ydb_cursor.h b/storage/tokudb/ft-index/src/ydb_cursor.h index 9666cc4e61ea0..a10e32f30026e 100644 --- a/storage/tokudb/ft-index/src/ydb_cursor.h +++ b/storage/tokudb/ft-index/src/ydb_cursor.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,14 +86,12 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." // This file defines the public interface to the ydb library -#if !defined(TOKU_YDB_CURSOR_H) -#define TOKU_YDB_CURSOR_H - - typedef enum { YDB_C_LAYER_STATUS_NUM_ROWS = 0 /* number of rows in this status array */ } ydb_c_lock_layer_status_entry; @@ -107,10 +105,9 @@ void ydb_c_layer_get_status(YDB_C_LAYER_STATUS statp); int toku_c_get(DBC * c, DBT * key, DBT * data, uint32_t flag); int toku_c_getf_set(DBC *c, uint32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra); -int toku_c_close(DBC * c); -int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC **c, uint32_t flags, int is_temporary_cursor); -int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags); - +int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, uint32_t flags); +int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC *c, uint32_t flags, int is_temporary_cursor); -#endif +int toku_c_close(DBC *c); +int toku_c_close_internal(DBC *c); diff --git a/storage/tokudb/ft-index/src/ydb_db.cc b/storage/tokudb/ft-index/src/ydb_db.cc index 5a06f61eeaba5..2c54a3bd4dc42 100644 --- a/storage/tokudb/ft-index/src/ydb_db.cc +++ b/storage/tokudb/ft-index/src/ydb_db.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,8 +95,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include -#include +#include #include "ydb_cursor.h" #include "ydb_row_lock.h" @@ -115,7 +114,7 @@ static YDB_DB_LAYER_STATUS_S ydb_db_layer_status; #endif #define STATUS_VALUE(x) ydb_db_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_db_layer_status, k, c, t, l, inc) static void ydb_db_layer_status_init (void) { @@ -225,13 +224,13 @@ int db_getf_set(DB *db, DB_TXN *txn, uint32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) { HANDLE_PANICKED_DB(db); HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); - DBC *c; + DBC c; uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW); flags &= ~DB_ISOLATION_FLAGS; int r = toku_db_cursor_internal(db, txn, &c, create_flags | DBC_DISABLE_PREFETCHING, 1); if (r==0) { - r = toku_c_getf_set(c, flags, key, f, extra); - int r2 = toku_c_close(c); + r = toku_c_getf_set(&c, flags, key, f, extra); + int r2 = toku_c_close_internal(&c); if (r==0) r = r2; } return r; @@ -258,12 +257,12 @@ toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, uint32_t flags) { // And DB_GET_BOTH is no longer supported. #2862. if (flags != 0) return EINVAL; - DBC *dbc; + DBC dbc; r = toku_db_cursor_internal(db, txn, &dbc, iso_flags | DBC_DISABLE_PREFETCHING, 1); if (r!=0) return r; uint32_t c_get_flags = DB_SET; - r = toku_c_get(dbc, key, data, c_get_flags | lock_flags); - int r2 = toku_c_close(dbc); + r = toku_c_get(&dbc, key, data, c_get_flags | lock_flags); + int r2 = toku_c_close_internal(&dbc); return r ? r : r2; } @@ -390,10 +389,12 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // locktree's descriptor pointer if necessary static void db_set_descriptors(DB *db, FT_HANDLE ft_handle) { + const toku::comparator &cmp = toku_ft_get_comparator(ft_handle); db->descriptor = toku_ft_get_descriptor(ft_handle); db->cmp_descriptor = toku_ft_get_cmp_descriptor(ft_handle); + invariant(db->cmp_descriptor == cmp.get_descriptor()); if (db->i->lt) { - db->i->lt->set_descriptor(db->cmp_descriptor); + db->i->lt->set_comparator(cmp); } } @@ -430,8 +431,27 @@ void toku_db_lt_on_destroy_callback(toku::locktree *lt) { toku_ft_handle_close(ft_handle); } -int -toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) { +// Instruct db to use the default (built-in) key comparison function +// by setting the flag bits in the db and ft structs +int toku_db_use_builtin_key_cmp(DB *db) { + HANDLE_PANICKED_DB(db); + int r = 0; + if (db_opened(db)) { + r = toku_ydb_do_error(db->dbenv, EINVAL, "Comparison functions cannot be set after DB open.\n"); + } else if (db->i->key_compare_was_set) { + r = toku_ydb_do_error(db->dbenv, EINVAL, "Key comparison function already set.\n"); + } else { + uint32_t tflags; + toku_ft_get_flags(db->i->ft_handle, &tflags); + + tflags |= TOKU_DB_KEYCMP_BUILTIN; + toku_ft_set_flags(db->i->ft_handle, tflags); + db->i->key_compare_was_set = true; + } + return r; +} + +int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags, int mode) { //Set comparison functions if not yet set. HANDLE_READ_ONLY_TXN(txn); if (!db->i->key_compare_was_set && db->dbenv->i->bt_compare) { @@ -474,9 +494,9 @@ toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t fla int r = toku_ft_handle_open(ft_handle, iname_in_env, is_db_create, is_db_excl, db->dbenv->i->cachetable, - txn ? db_txn_struct_i(txn)->tokutxn : NULL_TXN); + txn ? db_txn_struct_i(txn)->tokutxn : nullptr); if (r != 0) { - goto error_cleanup; + goto out; } // if the dictionary was opened as a blackhole, mark the @@ -497,26 +517,27 @@ toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t fla .txn = txn, .ft_handle = db->i->ft_handle, }; - db->i->lt = db->dbenv->i->ltm.get_lt( - db->i->dict_id, - db->cmp_descriptor, - toku_ft_get_bt_compare(db->i->ft_handle), - &on_create_extra); + db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id, + toku_ft_get_comparator(db->i->ft_handle), + &on_create_extra); if (db->i->lt == nullptr) { r = errno; - if (r == 0) + if (r == 0) { r = EINVAL; - goto error_cleanup; + } + goto out; } } - return 0; + r = 0; -error_cleanup: - db->i->dict_id = DICTIONARY_ID_NONE; - db->i->opened = 0; - if (db->i->lt) { - db->dbenv->i->ltm.release_lt(db->i->lt); - db->i->lt = NULL; +out: + if (r != 0) { + db->i->dict_id = DICTIONARY_ID_NONE; + db->i->opened = 0; + if (db->i->lt) { + db->dbenv->i->ltm.release_lt(db->i->lt); + db->i->lt = nullptr; + } } return r; } @@ -565,11 +586,12 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, uint32_t f HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn); int r = 0; TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL; - DBT old_descriptor; bool is_db_hot_index = ((flags & DB_IS_HOT_INDEX) != 0); bool update_cmp_descriptor = ((flags & DB_UPDATE_CMP_DESCRIPTOR) != 0); - toku_init_dbt(&old_descriptor); + DBT old_descriptor_dbt; + toku_init_dbt(&old_descriptor_dbt); + if (!db_opened(db) || !descriptor || (descriptor->size>0 && !descriptor->data)){ r = EINVAL; goto cleanup; @@ -582,23 +604,12 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, uint32_t f if (r != 0) { goto cleanup; } } - // TODO: use toku_clone_dbt(&old-descriptor, db->descriptor); - old_descriptor.size = db->descriptor->dbt.size; - old_descriptor.data = toku_memdup(db->descriptor->dbt.data, db->descriptor->dbt.size); - - toku_ft_change_descriptor( - db->i->ft_handle, - &old_descriptor, - descriptor, - true, - ttxn, - update_cmp_descriptor - ); + toku_clone_dbt(&old_descriptor_dbt, db->descriptor->dbt); + toku_ft_change_descriptor(db->i->ft_handle, &old_descriptor_dbt, descriptor, + true, ttxn, update_cmp_descriptor); cleanup: - if (old_descriptor.data) { - toku_free(old_descriptor.data); - } + toku_destroy_dbt(&old_descriptor_dbt); return r; } @@ -712,6 +723,15 @@ toku_db_get_fanout(DB *db, unsigned int *fanout) { return 0; } +static int +toku_db_set_memcmp_magic(DB *db, uint8_t magic) { + HANDLE_PANICKED_DB(db); + if (db_opened(db)) { + return EINVAL; + } + return toku_ft_handle_set_memcmp_magic(db->i->ft_handle, magic); +} + static int toku_db_get_fractal_tree_info64(DB *db, uint64_t *num_blocks_allocated, uint64_t *num_blocks_in_use, uint64_t *size_allocated, uint64_t *size_in_use) { HANDLE_PANICKED_DB(db); @@ -950,7 +970,7 @@ struct last_key_extra { }; static int -db_get_last_key_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen UU(), bytevec val UU(), void *extra, bool lock_only) { +db_get_last_key_callback(uint32_t keylen, const void *key, uint32_t vallen UU(), const void *val UU(), void *extra, bool lock_only) { if (!lock_only) { DBT keydbt; toku_fill_dbt(&keydbt, key, keylen); @@ -1048,7 +1068,7 @@ toku_db_verify_with_progress(DB *db, int (*progress_callback)(void *extra, float return r; } -int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE brt, bool is_open) { +int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE ft_handle, bool is_open) { if (flags || env == NULL) return EINVAL; @@ -1067,7 +1087,7 @@ int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE brt return ENOMEM; } memset(result->i, 0, sizeof *result->i); - result->i->ft_handle = brt; + result->i->ft_handle = ft_handle; result->i->opened = is_open; *dbp = result; return 0; @@ -1082,10 +1102,10 @@ toku_db_create(DB ** db, DB_ENV * env, uint32_t flags) { return EINVAL; - FT_HANDLE brt; - toku_ft_handle_create(&brt); + FT_HANDLE ft_handle; + toku_ft_handle_create(&ft_handle); - int r = toku_setup_db_internal(db, env, flags, brt, false); + int r = toku_setup_db_internal(db, env, flags, ft_handle, false); if (r != 0) return r; DB *result=*db; @@ -1109,6 +1129,7 @@ toku_db_create(DB ** db, DB_ENV * env, uint32_t flags) { USDB(change_compression_method); USDB(set_fanout); USDB(get_fanout); + USDB(set_memcmp_magic); USDB(change_fanout); USDB(set_flags); USDB(get_flags); @@ -1162,7 +1183,7 @@ toku_db_create(DB ** db, DB_ENV * env, uint32_t flags) { // The new inames are returned to the caller. // It is the caller's responsibility to free them. // If "mark_as_loader" is true, then include a mark in the iname -// to indicate that the file is created by the brt loader. +// to indicate that the file is created by the ft loader. // Return 0 on success (could fail if write lock not available). static int load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new_inames_in_env[/*N*/], LSN *load_lsn, bool mark_as_loader) { @@ -1207,13 +1228,13 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new int do_fsync = 0; LSN *get_lsn = NULL; for (i = 0; i < N; i++) { - FT_HANDLE brt = dbs[i]->i->ft_handle; + FT_HANDLE ft_handle = dbs[i]->i->ft_handle; //Fsync is necessary for the last one only. if (i==N-1) { do_fsync = 1; //We only need a single fsync of logs. get_lsn = load_lsn; //Set pointer to capture the last lsn. } - toku_ft_load(brt, ttxn, new_inames_in_env[i], do_fsync, get_lsn); + toku_ft_load(ft_handle, ttxn, new_inames_in_env[i], do_fsync, get_lsn); } } return rval; diff --git a/storage/tokudb/ft-index/src/ydb_db.h b/storage/tokudb/ft-index/src/ydb_db.h index 5f87fadc3e3a2..edbc72cb0d309 100644 --- a/storage/tokudb/ft-index/src/ydb_db.h +++ b/storage/tokudb/ft-index/src/ydb_db.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_YDB_DB_H -#define TOKU_YDB_DB_H +#pragma once #include @@ -128,18 +127,18 @@ static inline int db_opened(DB *db) { return db->i->opened != 0; } -static inline ft_compare_func -toku_db_get_compare_fun(DB* db) { - return toku_ft_get_bt_compare(db->i->ft_handle); +static inline const toku::comparator &toku_db_get_comparator(DB *db) { + return toku_ft_get_comparator(db->i->ft_handle); } +int toku_db_use_builtin_key_cmp(DB *db); int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn); int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname, uint32_t flags, int mode); int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn); int toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, uint32_t flags); int toku_db_create(DB ** db, DB_ENV * env, uint32_t flags); int toku_db_close(DB * db); -int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE brt, bool is_open); +int toku_setup_db_internal (DB **dbp, DB_ENV *env, uint32_t flags, FT_HANDLE ft_handle, bool is_open); int db_getf_set(DB *db, DB_TXN *txn, uint32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra); int autotxn_db_get(DB* db, DB_TXN* txn, DBT* key, DBT* data, uint32_t flags); @@ -173,5 +172,3 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) { } return r; } - -#endif /* TOKU_YDB_DB_H */ diff --git a/storage/tokudb/ft-index/src/ydb_env_func.cc b/storage/tokudb/ft-index/src/ydb_env_func.cc index 5247e699a234e..714fad74ec5c8 100644 --- a/storage/tokudb/ft-index/src/ydb_env_func.cc +++ b/storage/tokudb/ft-index/src/ydb_env_func.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,12 +94,12 @@ PATENT RIGHTS GRANT: #include #include +#include #include #include #include -#include -#include -#include +#include +#include #include "ydb_env_func.h" diff --git a/storage/tokudb/ft-index/src/ydb_env_func.h b/storage/tokudb/ft-index/src/ydb_env_func.h index cf193b642162c..2fb0c202f9094 100644 --- a/storage/tokudb/ft-index/src/ydb_env_func.h +++ b/storage/tokudb/ft-index/src/ydb_env_func.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_ENV_FUNC_H) -#define TOKU_YDB_ENV_FUNC_H extern void (*checkpoint_callback_f)(void*); extern void * checkpoint_callback_extra; @@ -105,5 +103,3 @@ void setup_dlmalloc(void) __attribute__((__visibility__("default"))); // Test-only function void toku_env_increase_last_xid(DB_ENV *env, uint64_t increment); - -#endif diff --git a/storage/tokudb/ft-index/src/ydb_lib.cc b/storage/tokudb/ft-index/src/ydb_lib.cc index 5c0e539909da4..12742cad5c24e 100644 --- a/storage/tokudb/ft-index/src/ydb_lib.cc +++ b/storage/tokudb/ft-index/src/ydb_lib.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -98,48 +98,13 @@ PATENT RIGHTS GRANT: #if defined(__GNUC__) -static void __attribute__((constructor)) libtokudb_init(void) { - // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__); +static void __attribute__((constructor)) libtokuft_init(void) { int r = toku_ydb_init(); assert(r==0); } -static void __attribute__((destructor)) libtokudb_destroy(void) { - // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__); +static void __attribute__((destructor)) libtokuft_destroy(void) { toku_ydb_destroy(); } -#endif - -#if TOKU_WINDOWS -#include -#define UNUSED(x) x=x - -bool WINAPI DllMain(HINSTANCE h, DWORD reason, LPVOID reserved) { - UNUSED(h); UNUSED(reserved); - // printf("%s:%lu\n", __FUNCTION__, reason); - int r = 0; - switch(reason) { - case DLL_PROCESS_ATTACH: - r = toku_ydb_init(); - break; - case DLL_PROCESS_DETACH: - toku_ydb_destroy(); - break; - case DLL_THREAD_ATTACH: - //TODO: Any new thread code if necessary, i.e. allocate per-thread - // storage. - break; - case DLL_THREAD_DETACH: - //TODO: Any cleanup thread code if necessary, i.e. free per-thread - // storage. - break; - default: - break; - } - assert(r==0); - return true; -} - -#endif - +#endif // __GNUC__ diff --git a/storage/tokudb/ft-index/src/ydb_load.h b/storage/tokudb/ft-index/src/ydb_load.h index f5ffeab528eba..c815969a97cf8 100644 --- a/storage/tokudb/ft-index/src/ydb_load.h +++ b/storage/tokudb/ft-index/src/ydb_load.h @@ -1,8 +1,6 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: #ident "$Id$" -#ifndef YDB_LOAD_H -#define YDB_LOAD_H /* COPYING CONDITIONS NOTICE: @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +87,7 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2010-2013 Tokutek Inc. All rights reserved." +#pragma once /* ydb functions used by loader */ @@ -104,7 +102,7 @@ PATENT RIGHTS GRANT: // The new inames are returned to the caller. // It is the caller's responsibility to free them. // If "mark_as_loader" is true, then include a mark in the iname -// to indicate that the file is created by the brt loader. +// to indicate that the file is created by the ft loader. // Return 0 on success (could fail if write lock not available). int locked_load_inames(DB_ENV * env, DB_TXN * txn, @@ -113,5 +111,3 @@ int locked_load_inames(DB_ENV * env, char * new_inames_in_env[/*N*/], /* out */ LSN *load_lsn, bool mark_as_loader); - -#endif diff --git a/storage/tokudb/ft-index/src/ydb_row_lock.cc b/storage/tokudb/ft-index/src/ydb_row_lock.cc index f7cdcbb563cff..40cafd0e331d9 100644 --- a/storage/tokudb/ft-index/src/ydb_row_lock.cc +++ b/storage/tokudb/ft-index/src/ydb_row_lock.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -137,18 +137,18 @@ static void db_txn_note_row_lock(DB *db, DB_TXN *txn, const DBT *left_key, const map->insert_at(ranges, idx); // let the manager know we're referencing this lt - toku::locktree::manager *ltm = &txn->mgrp->i->ltm; + toku::locktree_manager *ltm = &txn->mgrp->i->ltm; ltm->reference_lt(ranges.lt); } else { invariant_zero(r); } // add a new lock range to this txn's row lock buffer - size_t old_num_bytes = ranges.buffer->get_num_bytes(); + size_t old_mem_size = ranges.buffer->total_memory_size(); ranges.buffer->append(left_key, right_key); - size_t new_num_bytes = ranges.buffer->get_num_bytes(); - invariant(new_num_bytes > old_num_bytes); - lt->get_mem_tracker()->note_mem_used(new_num_bytes - old_num_bytes); + size_t new_mem_size = ranges.buffer->total_memory_size(); + invariant(new_mem_size > old_mem_size); + lt->get_manager()->note_mem_used(new_mem_size - old_mem_size); toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex); } @@ -201,17 +201,16 @@ void toku_db_txn_escalate_callback(TXNID txnid, const toku::locktree *lt, const // // We could theoretically steal the memory from the caller instead of copying // it, but it's simpler to have a callback API that doesn't transfer memory ownership. - lt->get_mem_tracker()->note_mem_released(ranges.buffer->get_num_bytes()); + lt->get_manager()->note_mem_released(ranges.buffer->total_memory_size()); ranges.buffer->destroy(); ranges.buffer->create(); - toku::range_buffer::iterator iter; + toku::range_buffer::iterator iter(&buffer); toku::range_buffer::iterator::record rec; - iter.create(&buffer); while (iter.current(&rec)) { ranges.buffer->append(rec.get_left_key(), rec.get_right_key()); iter.next(); } - lt->get_mem_tracker()->note_mem_used(ranges.buffer->get_num_bytes()); + lt->get_manager()->note_mem_used(ranges.buffer->total_memory_size()); } else { // In rare cases, we may not find the associated locktree, because we are // racing with the transaction trying to add this locktree to the lt map @@ -315,7 +314,7 @@ void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges) { // release all of the locks this txn has ever successfully // acquired and stored in the range buffer for this locktree lt->release_locks(txnid, ranges->buffer); - lt->get_mem_tracker()->note_mem_released(ranges->buffer->get_num_bytes()); + lt->get_manager()->note_mem_released(ranges->buffer->total_memory_size()); ranges->buffer->destroy(); toku_free(ranges->buffer); @@ -324,6 +323,6 @@ void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges) { toku::lock_request::retry_all_lock_requests(lt); // Release our reference on this locktree - toku::locktree::manager *ltm = &txn->mgrp->i->ltm; + toku::locktree_manager *ltm = &txn->mgrp->i->ltm; ltm->release_lt(lt); } diff --git a/storage/tokudb/ft-index/src/ydb_row_lock.h b/storage/tokudb/ft-index/src/ydb_row_lock.h index 2a1a4ffb5c84e..2c3a10d92d6ca 100644 --- a/storage/tokudb/ft-index/src/ydb_row_lock.h +++ b/storage/tokudb/ft-index/src/ydb_row_lock.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,8 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef TOKU_YDB_ROW_LOCK_H -#define TOKU_YDB_ROW_LOCK_H +#pragma once #include @@ -113,5 +112,3 @@ int toku_db_get_point_write_lock(DB *db, DB_TXN *txn, const DBT *key); void toku_db_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn); void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges); - -#endif /* TOKU_YDB_ROW_LOCK_H */ diff --git a/storage/tokudb/ft-index/src/ydb_txn.cc b/storage/tokudb/ft-index/src/ydb_txn.cc index 4ab6669b3b7c4..b6b8e154c6f1b 100644 --- a/storage/tokudb/ft-index/src/ydb_txn.cc +++ b/storage/tokudb/ft-index/src/ydb_txn.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,15 +89,17 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "$Id$" -#include #include -#include -#include -#include +#include #include +#include +#include +#include + + #include "ydb-internal.h" #include "ydb_txn.h" #include "ydb_row_lock.h" diff --git a/storage/tokudb/ft-index/src/ydb_txn.h b/storage/tokudb/ft-index/src/ydb_txn.h index 454b6578e9fce..a2e5a3b09e2de 100644 --- a/storage/tokudb/ft-index/src/ydb_txn.h +++ b/storage/tokudb/ft-index/src/ydb_txn.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,10 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_TXN_H) -#define TOKU_YDB_TXN_H // begin, commit, and abort use the multi operation lock // internally to synchronize with begin checkpoint. callers @@ -112,5 +110,3 @@ bool toku_is_big_tokutxn(TOKUTXN tokutxn); // Test-only function extern "C" void toku_increase_last_xid(DB_ENV *env, uint64_t increment) __attribute__((__visibility__("default"))); - -#endif diff --git a/storage/tokudb/ft-index/src/ydb_write.cc b/storage/tokudb/ft-index/src/ydb_write.cc index 7968f3f96684a..77daf4e6793a7 100644 --- a/storage/tokudb/ft-index/src/ydb_write.cc +++ b/storage/tokudb/ft-index/src/ydb_write.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: #include "ydb-internal.h" #include "indexer.h" #include -#include +#include #include "ydb_row_lock.h" #include "ydb_write.h" #include "ydb_db.h" @@ -106,7 +106,7 @@ static YDB_WRITE_LAYER_STATUS_S ydb_write_layer_status; #endif #define STATUS_VALUE(x) ydb_write_layer_status.status[x].value.num -#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc) +#define STATUS_INIT(k,c,t,l,inc) TOKUFT_STATUS_INIT(ydb_write_layer_status, k, c, t, l, inc) static void ydb_write_layer_status_init (void) { @@ -253,6 +253,30 @@ toku_db_del(DB *db, DB_TXN *txn, DBT *key, uint32_t flags, bool holds_mo_lock) { return r; } +static int +db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, int flags, bool do_log) { + int r = 0; + bool unique = false; + enum ft_msg_type type = FT_INSERT; + if (flags == DB_NOOVERWRITE) { + unique = true; + } else if (flags == DB_NOOVERWRITE_NO_ERROR) { + type = FT_INSERT_NO_OVERWRITE; + } else if (flags != 0) { + // All other non-zero flags are unsupported + r = EINVAL; + } + if (r == 0) { + TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : nullptr; + if (unique) { + r = toku_ft_insert_unique(db->i->ft_handle, key, val, ttxn, do_log); + } else { + toku_ft_maybe_insert(db->i->ft_handle, key, val, ttxn, false, ZERO_LSN, do_log, type); + } + invariant(r == DB_KEYEXIST || r == 0); + } + return r; +} int toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, uint32_t flags, bool holds_mo_lock) { @@ -265,25 +289,16 @@ toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, uint32_t flags, bool holds_ flags &= ~lock_flags; r = db_put_check_size_constraints(db, key, val); - if (r == 0) { - //Do any checking required by the flags. - r = db_put_check_overwrite_constraint(db, txn, key, lock_flags, flags); - } - //Do locking if necessary. Do not grab the lock again if this DB had a unique - //check performed because the lock was already grabbed by its cursor callback. + + //Do locking if necessary. bool do_locking = (bool)(db->i->lt && !(lock_flags&DB_PRELOCKED_WRITE)); - if (r == 0 && do_locking && !(flags & DB_NOOVERWRITE)) { + if (r == 0 && do_locking) { r = toku_db_get_point_write_lock(db, txn, key); } if (r == 0) { - //Insert into the brt. - TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL; - enum ft_msg_type type = FT_INSERT; - if (flags==DB_NOOVERWRITE_NO_ERROR) { - type = FT_INSERT_NO_OVERWRITE; - } + //Insert into the ft. if (!holds_mo_lock) toku_multi_operation_client_lock(); - toku_ft_maybe_insert(db->i->ft_handle, key, val, ttxn, false, ZERO_LSN, true, type); + r = db_put(db, txn, key, val, flags, true); if (!holds_mo_lock) toku_multi_operation_client_unlock(); } @@ -396,9 +411,9 @@ toku_db_update_broadcast(DB *db, DB_TXN *txn, } static void -log_del_single(DB_TXN *txn, FT_HANDLE brt, const DBT *key) { +log_del_single(DB_TXN *txn, FT_HANDLE ft_handle, const DBT *key) { TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; - toku_ft_log_del(ttxn, brt, key); + toku_ft_log_del(ttxn, ft_handle, key); } static uint32_t @@ -413,7 +428,7 @@ sum_size(uint32_t num_arrays, DBT_ARRAY keys[], uint32_t overhead) { } static void -log_del_multiple(DB_TXN *txn, DB *src_db, const DBT *key, const DBT *val, uint32_t num_dbs, FT_HANDLE brts[], DBT_ARRAY keys[]) { +log_del_multiple(DB_TXN *txn, DB *src_db, const DBT *key, const DBT *val, uint32_t num_dbs, FT_HANDLE fts[], DBT_ARRAY keys[]) { if (num_dbs > 0) { TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; FT_HANDLE src_ft = src_db ? src_db->i->ft_handle : NULL; @@ -422,11 +437,11 @@ log_del_multiple(DB_TXN *txn, DB *src_db, const DBT *key, const DBT *val, uint32 if (del_single_sizes < del_multiple_size) { for (uint32_t i = 0; i < num_dbs; i++) { for (uint32_t j = 0; j < keys[i].size; j++) { - log_del_single(txn, brts[i], &keys[i].dbts[j]); + log_del_single(txn, fts[i], &keys[i].dbts[j]); } } } else { - toku_ft_log_del_multiple(ttxn, src_ft, brts, num_dbs, key, val); + toku_ft_log_del_multiple(ttxn, src_ft, fts, num_dbs, key, val); } } } @@ -539,7 +554,7 @@ env_del_multiple( uint32_t lock_flags[num_dbs]; uint32_t remaining_flags[num_dbs]; - FT_HANDLE brts[num_dbs]; + FT_HANDLE fts[num_dbs]; bool indexer_lock_taken = false; bool src_same = false; bool indexer_shortcut = false; @@ -594,7 +609,7 @@ env_del_multiple( if (r != 0) goto cleanup; } } - brts[which_db] = db->i->ft_handle; + fts[which_db] = db->i->ft_handle; } if (indexer) { @@ -611,7 +626,7 @@ env_del_multiple( } } toku_multi_operation_client_lock(); - log_del_multiple(txn, src_db, src_key, src_val, num_dbs, brts, del_keys); + log_del_multiple(txn, src_db, src_key, src_val, num_dbs, fts, del_keys); r = do_del_multiple(txn, num_dbs, db_array, del_keys, src_db, src_key, indexer_shortcut); toku_multi_operation_client_unlock(); if (indexer_lock_taken) { @@ -627,17 +642,19 @@ env_del_multiple( } static void -log_put_multiple(DB_TXN *txn, DB *src_db, const DBT *src_key, const DBT *src_val, uint32_t num_dbs, FT_HANDLE brts[]) { +log_put_multiple(DB_TXN *txn, DB *src_db, const DBT *src_key, const DBT *src_val, uint32_t num_dbs, FT_HANDLE fts[]) { if (num_dbs > 0) { TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; FT_HANDLE src_ft = src_db ? src_db->i->ft_handle : NULL; - toku_ft_log_put_multiple(ttxn, src_ft, brts, num_dbs, src_key, src_val); + toku_ft_log_put_multiple(ttxn, src_ft, fts, num_dbs, src_key, src_val); } } +// Requires: If remaining_flags is non-null, this function performs any required uniqueness checks +// Otherwise, the caller is responsible. static int -do_put_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT_ARRAY keys[], DBT_ARRAY vals[], DB *src_db, const DBT *src_key, bool indexer_shortcut) { - TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn; +do_put_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT_ARRAY keys[], DBT_ARRAY vals[], uint32_t *remaining_flags, DB *src_db, const DBT *src_key, bool indexer_shortcut) { + int r = 0; for (uint32_t which_db = 0; which_db < num_dbs; which_db++) { DB *db = db_array[which_db]; @@ -666,16 +683,21 @@ do_put_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT_ARRAY keys[], } if (do_put) { for (uint32_t i = 0; i < keys[which_db].size; i++) { - // if db is being indexed by an indexer, then put into that db if the src key is to the left or equal to the - // indexers cursor. we have to get the src_db from the indexer and find it in the db_array. - toku_ft_maybe_insert(db->i->ft_handle, - &keys[which_db].dbts[i], &vals[which_db].dbts[i], - ttxn, false, ZERO_LSN, false, FT_INSERT); + int flags = 0; + if (remaining_flags != nullptr) { + flags = remaining_flags[which_db]; + invariant(!(flags & DB_NOOVERWRITE_NO_ERROR)); + } + r = db_put(db, txn, &keys[which_db].dbts[i], &vals[which_db].dbts[i], flags, false); + if (r != 0) { + goto done; + } } } } } - return 0; +done: + return r; } static int @@ -701,7 +723,7 @@ env_put_multiple_internal( uint32_t lock_flags[num_dbs]; uint32_t remaining_flags[num_dbs]; - FT_HANDLE brts[num_dbs]; + FT_HANDLE fts[num_dbs]; bool indexer_shortcut = false; bool indexer_lock_taken = false; bool src_same = false; @@ -754,26 +776,20 @@ env_put_multiple_internal( r = db_put_check_size_constraints(db, &put_key, &put_val); if (r != 0) goto cleanup; - //Check overwrite constraints - r = db_put_check_overwrite_constraint(db, txn, - &put_key, - lock_flags[which_db], remaining_flags[which_db]); - if (r != 0) goto cleanup; if (remaining_flags[which_db] == DB_NOOVERWRITE_NO_ERROR) { //put_multiple does not support delaying the no error, since we would //have to log the flag in the put_multiple. r = EINVAL; goto cleanup; } - //Do locking if necessary. Do not grab the lock again if this DB had a unique - //check performed because the lock was already grabbed by its cursor callback. - if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE) && !(remaining_flags[which_db] & DB_NOOVERWRITE)) { + //Do locking if necessary. + if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) { //Needs locking r = toku_db_get_point_write_lock(db, txn, &put_key); if (r != 0) goto cleanup; } } - brts[which_db] = db->i->ft_handle; + fts[which_db] = db->i->ft_handle; } if (indexer) { @@ -790,8 +806,10 @@ env_put_multiple_internal( } } toku_multi_operation_client_lock(); - log_put_multiple(txn, src_db, src_key, src_val, num_dbs, brts); - r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, src_db, src_key, indexer_shortcut); + r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, remaining_flags, src_db, src_key, indexer_shortcut); + if (r == 0) { + log_put_multiple(txn, src_db, src_key, src_val, num_dbs, fts); + } toku_multi_operation_client_unlock(); if (indexer_lock_taken) { toku_indexer_unlock(indexer); @@ -933,8 +951,8 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, } else if (idx_old == old_keys.size) { cmp = +1; } else { - ft_compare_func cmpfun = toku_db_get_compare_fun(db); - cmp = cmpfun(db, curr_old_key, curr_new_key); + const toku::comparator &cmpfn = toku_db_get_comparator(db); + cmp = cmpfn(curr_old_key, curr_new_key); } bool do_del = false; @@ -1075,7 +1093,7 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, // recovery so we don't end up losing data. // So unlike env->put_multiple, we ONLY log a 'put_multiple' log entry. log_put_multiple(txn, src_db, new_src_key, new_src_data, n_put_dbs, put_fts); - r = do_put_multiple(txn, n_put_dbs, put_dbs, put_key_arrays, put_val_arrays, src_db, new_src_key, indexer_shortcut); + r = do_put_multiple(txn, n_put_dbs, put_dbs, put_key_arrays, put_val_arrays, nullptr, src_db, new_src_key, indexer_shortcut); } toku_multi_operation_client_unlock(); if (indexer_lock_taken) { diff --git a/storage/tokudb/ft-index/src/ydb_write.h b/storage/tokudb/ft-index/src/ydb_write.h index a890089d895f3..00c4ab4da5e77 100644 --- a/storage/tokudb/ft-index/src/ydb_write.h +++ b/storage/tokudb/ft-index/src/ydb_write.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,11 +88,8 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// This file defines the public interface to the ydb library - -#if !defined(TOKU_YDB_WRITE_H) -#define TOKU_YDB_WRITE_H +#pragma once typedef enum { YDB_LAYER_NUM_INSERTS = 0, @@ -119,7 +116,6 @@ typedef struct { void ydb_write_layer_get_status(YDB_WRITE_LAYER_STATUS statp); - int toku_db_del(DB *db, DB_TXN *txn, DBT *key, uint32_t flags, bool holds_mo_lock); int toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, uint32_t flags, bool holds_mo_lock); int autotxn_db_del(DB* db, DB_TXN* txn, DBT* key, uint32_t flags); @@ -159,8 +155,3 @@ int env_update_multiple( uint32_t num_keys, DBT_ARRAY keys[], uint32_t num_vals, DBT_ARRAY vals[] ); - - - - -#endif diff --git a/storage/tokudb/ft-index/toku_include/CMakeLists.txt b/storage/tokudb/ft-index/toku_include/CMakeLists.txt deleted file mode 100644 index 442910125388d..0000000000000 --- a/storage/tokudb/ft-index/toku_include/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -configure_file(toku_config.h.in toku_config.h) -add_custom_target(generate_config_h DEPENDS - "${CMAKE_CURRENT_BINARY_DIR}/toku_config.h") - -# detect when we are being built as a subproject -if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING) - install( - FILES toku_list.h toku_os.h - DESTINATION include - COMPONENT tokukv_headers - ) - install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/toku_config.h" - DESTINATION include - COMPONENT tokukv_headers - ) -endif () \ No newline at end of file diff --git a/storage/tokudb/ft-index/toku_include/old-db.h b/storage/tokudb/ft-index/toku_include/old-db.h deleted file mode 100644 index 94db13d614151..0000000000000 --- a/storage/tokudb/ft-index/toku_include/old-db.h +++ /dev/null @@ -1,276 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." -#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef _YOBI_DB_H -#define _YOBI_DB_H - -#include "ydb-constants.h" - - -#include -#include - -typedef enum { - DB_BTREE=1, - // DB_HASH=2, - // DB_RECNO=3, - // DB_QUEUE=4, - // DB_UNKNOWN=5 /* Figure it out on open. */ -} DBTYPE; - -typedef enum { - DB_NOTICE_LOGFILE_CHANGED -} db_notices; - -enum { - DB_VERB_CHKPOINT = 0x0001, - DB_VERB_DEADLOCK = 0x0002, - DB_VERB_RECOVERY = 0x0004 - -}; - -typedef struct yobi_db DB; -typedef struct yobi_db_btree_stat DB_BTREE_STAT; -typedef struct yobi_db_env DB_ENV; -typedef struct yobi_db_key_range DB_KEY_RANGE; -typedef struct yobi_db_lsn DB_LSN; -typedef struct yobi_db_txn DB_TXN; -typedef struct yobi_db_txn_active DB_TXN_ACTIVE; -typedef struct yobi_db_txn_stat DB_TXN_STAT; -typedef struct yobi_dbc DBC; -typedef struct yobi_dbt DBT; - -struct yobi_db { - void *app_private; - int (*close) (DB *, uint32_t); - int (*cursor) (DB *, DB_TXN *, DBC **, uint32_t); - int (*del) (DB *, DB_TXN *, DBT *, uint32_t); - int (*get) (DB *, DB_TXN *, DBT *, DBT *, uint32_t); - int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, uint32_t); - int (*open) (DB *, DB_TXN *, - const char *, const char *, DBTYPE, uint32_t, int); - int (*put) (DB *, DB_TXN *, DBT *, DBT *, uint32_t); - int (*remove) (DB *, const char *, const char *, uint32_t); - int (*rename) (DB *, const char *, const char *, const char *, uint32_t); - int (*set_bt_compare) (DB *, - int (*)(DB *, const DBT *, const DBT *)); - int (*set_flags) (DB *, uint32_t); - int (*stat) (DB *, void *, uint32_t); - - struct ydb_db_internal *i; -}; -enum { - DB_DBT_MALLOC = 0x002, - DB_DBT_REALLOC = 0x010, - DB_DBT_USERMEM = 0x020, - DB_DBT_DUPOK = 0x040 -}; -struct yobi_dbt { - void *app_private; - void *data; - uint32_t flags; - uint32_t size; - uint32_t ulen; -}; -struct yobi_db_txn { - int (*commit) (DB_TXN*, uint32_t); - uint32_t (*id) (DB_TXN *); - // internal stuff - struct yobi_db_txn_internal *i; -}; -struct yobi_dbc { - int (*c_get) (DBC *, DBT *, DBT *, uint32_t); - int (*c_close) (DBC *); - int (*c_del) (DBC *, uint32_t); - struct yobi_dbc_internal *i; -}; -struct yobi_db_env { - // Methods used by MYSQL - void (*err) (const DB_ENV *, int, const char *, ...); - int (*open) (DB_ENV *, const char *, uint32_t, int); - int (*close) (DB_ENV *, uint32_t); - int (*txn_checkpoint) (DB_ENV *, uint32_t, uint32_t, uint32_t); - int (*log_flush) (DB_ENV *, const DB_LSN *); - void (*set_errcall) (DB_ENV *, void (*)(const char *, char *)); - void (*set_errpfx) (DB_ENV *, const char *); - void (*set_noticecall) (DB_ENV *, void (*)(DB_ENV *, db_notices)); - int (*set_flags) (DB_ENV *, uint32_t, int); - int (*set_data_dir) (DB_ENV *, const char *); - int (*set_tmp_dir) (DB_ENV *, const char *); - int (*set_verbose) (DB_ENV *, uint32_t, int); - int (*set_lg_bsize) (DB_ENV *, uint32_t); - int (*set_lg_dir) (DB_ENV *, const char *); - int (*set_lg_max) (DB_ENV *, uint32_t); - int (*set_cachesize) (DB_ENV *, uint32_t, uint32_t, int); - int (*set_lk_detect) (DB_ENV *, uint32_t); - int (*set_lk_max) (DB_ENV *, uint32_t); - int (*log_archive) (DB_ENV *, char **[], uint32_t); - int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, uint32_t); -#ifdef _YDB_WRAP_H -#undef txn_begin -#endif - int (*txn_begin) (DB_ENV *, DB_TXN *, DB_TXN **, uint32_t); -#ifdef _YDB_WRAP_H -#define txn_begin txn_begin_ydb -#endif - // Internal state - struct db_env_ydb_internal *i; -}; -struct yobi_db_key_range { - double less,equal,greater; -}; -struct yobi_db_btree_stat { - uint32_t bt_ndata; - uint32_t bt_nkeys; -}; -struct yobi_db_txn_stat { - uint32_t st_nactive; - DB_TXN_ACTIVE *st_txnarray; -}; -struct yobi_db_lsn { - int hello; -}; -struct yobi_db_txn_active { - DB_LSN lsn; - uint32_t txnid; -}; - -#ifndef _YDB_WRAP_H -#define DB_VERSION_STRING "Yobiduck: Fractal DB (November 19, 2006)" -#else -#define DB_VERSION_STRING_ydb "Yobiduck: Fractal DB (November 19, 2006) (wrapped bdb)" -#endif - -enum { - DB_ARCH_ABS = 0x001, - DB_ARCH_LOG = 0x004 -}; - -enum { - DB_CREATE = 0x0000001, - DB_RDONLY = 0x0000010, - DB_RECOVER = 0x0000020, - DB_THREAD = 0x0000040, - DB_TXN_NOSYNC = 0x0000100, - - DB_PRIVATE = 0x0100000 -}; - -enum { - DB_LOCK_DEFAULT = 1, - DB_LOCK_OLDEST = 7, - DB_LOCK_RANDOM = 8 -}; - -enum { - DB_DUP = 0x000002 -}; - -enum { - DB_NOOVERWRITE = 23 -}; - -enum { - DB_INIT_LOCK = 0x001000, - DB_INIT_LOG = 0x002000, - DB_INIT_MPOOL = 0x004000, - DB_INIT_TXN = 0x008000 -}; - -int db_create (DB **, DB_ENV *, uint32_t); -int db_env_create (DB_ENV **, uint32_t); - -int txn_begin (DB_ENV *, DB_TXN *, DB_TXN **, uint32_t); -int txn_commit (DB_TXN *, uint32_t); -int txn_abort (DB_TXN *); - -int log_compare (const DB_LSN *, const DB_LSN *); - -#endif diff --git a/storage/tokudb/ft-index/tools/CMakeLists.txt b/storage/tokudb/ft-index/tools/CMakeLists.txt new file mode 100644 index 0000000000000..4ed0cb4cbdc08 --- /dev/null +++ b/storage/tokudb/ft-index/tools/CMakeLists.txt @@ -0,0 +1,20 @@ +set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO) + +set(tools tokudb_dump tokuftdump tdb_logprint tdb-recover ftverify ba_replay) +foreach(tool ${tools}) + add_executable(${tool} ${tool}.cc) + add_dependencies(${tool} install_tdb_h) + target_link_libraries(${tool} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) + + add_space_separated_property(TARGET ${tool} COMPILE_FLAGS -fvisibility=hidden) +endforeach(tool) + +# link in math.h library just for this tool. +target_link_libraries(ftverify m) + +install( + TARGETS tokuftdump + DESTINATION bin + COMPONENT Server + ) + diff --git a/storage/tokudb/ft-index/tools/ba_replay.cc b/storage/tokudb/ft-index/tools/ba_replay.cc new file mode 100644 index 0000000000000..e274ac0a1e8c3 --- /dev/null +++ b/storage/tokudb/ft-index/tools/ba_replay.cc @@ -0,0 +1,679 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +// Replay a block allocator trace against different strategies and compare +// the results + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ft/serialize/block_allocator.h" + +using std::map; +using std::set; +using std::string; +using std::vector; + +static int verbose = false; + +static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) { + if (!pred) { + fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line); + abort(); + } +} + +static char *trim_whitespace(char *line) { + // skip leading whitespace + while (isspace(*line)) { + line++; + } + return line; +} + +static int64_t parse_number(char **ptr, int line_num, int base) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + char *new_ptr; + int64_t n = strtoll(line, &new_ptr, base); + ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num); + ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num); + *ptr = new_ptr; + return n; +} + +static uint64_t parse_uint64(char **ptr, int line_num) { + int64_t n = parse_number(ptr, line_num, 10); + // we happen to know that the uint64's we deal with will + // take less than 63 bits (they come from pointers) + return static_cast(n); +} + +static string parse_token(char **ptr, int line_num) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + // parse the first token, which represents the traced function + char token[64]; + int r = sscanf(*ptr, "%64s", token); + ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num); + *ptr += strlen(token); + return string(token); +} + +static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) { + *ptr = trim_whitespace(*ptr); + char *line = *ptr; + + uint64_t offset, size; + int bytes_read; + int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read); + ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num); + *ptr += bytes_read; + return block_allocator::blockpair(offset, size); +} + +static char *strip_newline(char *line, bool *found) { + char *ptr = strchr(line, '\n'); + if (ptr != nullptr) { + if (found != nullptr) { + *found = true; + } + *ptr = '\0'; + } + return line; +} + +static char *read_trace_line(FILE *file) { + const int buf_size = 4096; + char buf[buf_size]; + std::stringstream ss; + while (true) { + if (fgets(buf, buf_size, file) == nullptr) { + break; + } + bool has_newline = false; + ss << strip_newline(buf, &has_newline); + if (has_newline) { + // end of the line, we're done out + break; + } + } + std::string s = ss.str(); + return s.size() ? toku_strdup(s.c_str()) : nullptr; +} + +static vector canonicalize_trace_from(FILE *file) { + // new trace, canonicalized from a raw trace + vector canonicalized_trace; + + // raw allocator id -> canonical allocator id + // + // keeps track of allocators that were created as part of the trace, + // and therefore will be part of the canonicalized trace. + uint64_t allocator_id_seq_num = 0; + map allocator_ids; + + // allocated offset -> allocation seq num + // + uint64_t allocation_seq_num = 0; + static const uint64_t ASN_NONE = (uint64_t) -1; + typedef map offset_seq_map; + + // raw allocator id -> offset_seq_map that tracks its allocations + map offset_to_seq_num_maps; + + int line_num = 0; + char *line; + while ((line = read_trace_line(file)) != nullptr) { + line_num++; + char *ptr = line; + + string fn = parse_token(&ptr, line_num); + int64_t allocator_id = parse_number(&ptr, line_num, 16); + + std::stringstream ss; + if (fn.find("ba_trace_create") != string::npos) { + ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num); + ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs", + "corrupted trace: bad fn", line, line_num); + + // we only convert the allocator_id to an allocator_id_seq_num + // in the canonical trace and leave the rest of the line as-is. + allocator_ids[allocator_id] = allocator_id_seq_num; + ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl; + allocator_id_seq_num++; + + // First, read passed the reserve / alignment values. + (void) parse_uint64(&ptr, line_num); + (void) parse_uint64(&ptr, line_num); + if (fn == "ba_trace_create_from_blockpairs") { + // For each blockpair created by this traceline, add its offset to the offset seq map + // with asn ASN_NONE so that later canonicalizations of `free' know whether to write + // down the asn or the raw offset. + offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; + while (*trim_whitespace(ptr) != '\0') { + const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); + (*map)[bp.offset] = ASN_NONE; + } + } + } else { + ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num); + uint64_t canonical_allocator_id = allocator_ids[allocator_id]; + + // this is the map that tracks allocations for this allocator + offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; + + if (fn == "ba_trace_alloc") { + const uint64_t size = parse_uint64(&ptr, line_num); + const uint64_t heat = parse_uint64(&ptr, line_num); + const uint64_t offset = parse_uint64(&ptr, line_num); + ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num); + + // remember that an allocation at `offset' has the current alloc seq num + (*map)[offset] = allocation_seq_num; + + // translate `offset = alloc(size)' to `asn = alloc(size)' + ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl; + allocation_seq_num++; + } else if (fn == "ba_trace_free") { + const uint64_t offset = parse_uint64(&ptr, line_num); + ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num); + + // get the alloc seq num for an allcation that occurred at `offset' + const uint64_t asn = (*map)[offset]; + map->erase(offset); + + // if there's an asn, then a corresponding ba_trace_alloc occurred and we should + // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs + // and we write the original offset. + if (asn != ASN_NONE) { + ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl; + } else { + ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl; + } + } else if (fn == "ba_trace_destroy") { + // Remove this allocator from both maps + allocator_ids.erase(allocator_id); + offset_to_seq_num_maps.erase(allocator_id); + + // translate `destroy(ptr_id) to destroy(canonical_id)' + ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl; + } else { + ba_replay_assert(false, "corrupted trace: bad fn", line, line_num); + } + } + canonicalized_trace.push_back(ss.str()); + + toku_free(line); + } + + if (allocator_ids.size() != 0) { + fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running"); + } + + return canonicalized_trace; +} + +struct streaming_variance_calculator { + int64_t n_samples; + int64_t mean; + int64_t variance; + + // math credit: AoCP, Donald Knuth, '62 + void add_sample(int64_t x) { + n_samples++; + if (n_samples == 1) { + mean = x; + variance = 0; + } else { + int64_t old_mean = mean; + mean = old_mean + ((x - old_mean) / n_samples); + variance = (((n_samples - 1) * variance) + + ((x - old_mean) * (x - mean))) / n_samples; + } + } +}; + +struct canonical_trace_stats { + uint64_t n_lines_replayed; + + uint64_t n_create; + uint64_t n_create_from_blockpairs; + uint64_t n_alloc_hot; + uint64_t n_alloc_cold; + uint64_t n_free; + uint64_t n_destroy; + + struct streaming_variance_calculator alloc_hot_bytes; + struct streaming_variance_calculator alloc_cold_bytes; + + canonical_trace_stats() { + memset(this, 0, sizeof(*this)); + } +}; + +struct fragmentation_report { + TOKU_DB_FRAGMENTATION_S beginning; + TOKU_DB_FRAGMENTATION_S end; + fragmentation_report() { + memset(this, 0, sizeof(*this)); + } + void merge(const struct fragmentation_report &src_report) { + for (int i = 0; i < 2; i++) { + TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end; + const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end; + dst->file_size_bytes += src->file_size_bytes; + dst->data_bytes += src->data_bytes; + dst->data_blocks += src->data_blocks; + dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional; + dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional; + dst->unused_bytes += src->unused_bytes; + dst->unused_blocks += src->unused_blocks; + dst->largest_unused_block += src->largest_unused_block; + } + } +}; + +static void replay_canonicalized_trace(const vector &canonicalized_trace, + block_allocator::allocation_strategy strategy, + map *reports, + struct canonical_trace_stats *stats) { + // maps an allocator id to its block allocator + map allocator_map; + + // maps allocation seq num to allocated offset + map seq_num_to_offset; + + for (vector::const_iterator it = canonicalized_trace.begin(); + it != canonicalized_trace.end(); it++) { + const int line_num = stats->n_lines_replayed++; + + char *line = toku_strdup(it->c_str()); + line = strip_newline(line, nullptr); + + char *ptr = trim_whitespace(line); + + // canonical allocator id is in base 10, not 16 + string fn = parse_token(&ptr, line_num); + int64_t allocator_id = parse_number(&ptr, line_num, 10); + + if (fn.find("ba_trace_create") != string::npos) { + const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num); + const uint64_t alignment = parse_uint64(&ptr, line_num); + ba_replay_assert(allocator_map.count(allocator_id) == 0, + "corrupted canonical trace: double create", line, line_num); + + block_allocator *ba = new block_allocator(); + if (fn == "ba_trace_create") { + ba->create(reserve_at_beginning, alignment); + stats->n_create++; + } else { + ba_replay_assert(fn == "ba_trace_create_from_blockpairs", + "corrupted canonical trace: bad create fn", line, line_num); + vector pairs; + while (*trim_whitespace(ptr) != '\0') { + const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); + pairs.push_back(bp); + } + ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size()); + stats->n_create_from_blockpairs++; + } + ba->set_strategy(strategy); + + TOKU_DB_FRAGMENTATION_S report; + ba->get_statistics(&report); + (*reports)[allocator_id].beginning = report; + allocator_map[allocator_id] = ba; + } else { + ba_replay_assert(allocator_map.count(allocator_id) > 0, + "corrupted canonical trace: no such allocator", line, line_num); + + block_allocator *ba = allocator_map[allocator_id]; + if (fn == "ba_trace_alloc") { + // replay an `alloc' whose result will be associated with a certain asn + const uint64_t size = parse_uint64(&ptr, line_num); + const uint64_t heat = parse_uint64(&ptr, line_num); + const uint64_t asn = parse_uint64(&ptr, line_num); + ba_replay_assert(seq_num_to_offset.count(asn) == 0, + "corrupted canonical trace: double alloc (asn in use)", line, line_num); + + uint64_t offset; + ba->alloc_block(size, heat, &offset); + seq_num_to_offset[asn] = offset; + heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++; + heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size); + } else if (fn == "ba_trace_free_asn") { + // replay a `free' on a block whose offset is the result of an alloc with an asn + const uint64_t asn = parse_uint64(&ptr, line_num); + ba_replay_assert(seq_num_to_offset.count(asn) == 1, + "corrupted canonical trace: double free (asn unused)", line, line_num); + + const uint64_t offset = seq_num_to_offset[asn]; + ba->free_block(offset); + seq_num_to_offset.erase(asn); + stats->n_free++; + } else if (fn == "ba_trace_free_offset") { + // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs + const uint64_t offset = parse_uint64(&ptr, line_num); + ba->free_block(offset); + stats->n_free++; + } else if (fn == "ba_trace_destroy") { + TOKU_DB_FRAGMENTATION_S report; + ba->get_statistics(&report); + ba->destroy(); + (*reports)[allocator_id].end = report; + allocator_map.erase(allocator_id); + stats->n_destroy++; + } else { + ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num); + } + } + + toku_free(line); + } +} + +static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) { + switch (strategy) { + case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT: + return "first-fit"; + case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT: + return "best-fit"; + case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE: + return "heat-zone"; + case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT: + return "padded-fit"; + default: + abort(); + } +} + +static block_allocator::allocation_strategy cstring_to_strategy(const char *str) { + if (strcmp(str, "first-fit") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT; + } + if (strcmp(str, "best-fit") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT; + } + if (strcmp(str, "heat-zone") == 0) { + return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE; + } + if (strcmp(str, "padded-fit") != 0) { + fprintf(stderr, "bad strategy string: %s\n", str); + abort(); + } + return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT; +} + +static void print_result_verbose(uint64_t allocator_id, + block_allocator::allocation_strategy strategy, + const struct fragmentation_report &report) { + if (report.end.data_bytes + report.end.unused_bytes + + report.beginning.data_bytes + report.beginning.unused_bytes + < 32UL * 1024 * 1024) { + printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); + return; + } + + printf(" allocator_id: %20" PRId64 "\n", allocator_id); + printf(" strategy: %20s\n", strategy_to_cstring(strategy)); + + for (int i = 0; i < 2; i++) { + const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end; + printf("%s\n", i == 0 ? "BEFORE" : "AFTER"); + + uint64_t total_bytes = r->data_bytes + r->unused_bytes; + uint64_t total_blocks = r->data_blocks + r->unused_blocks; + + // byte statistics + printf(" total bytes: %20" PRId64 "\n", total_bytes); + printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes, + static_cast(r->data_bytes) / total_bytes); + printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes, + static_cast(r->unused_bytes) / total_bytes); + + // block statistics + printf(" total blocks: %20" PRId64 "\n", total_blocks); + printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks, + static_cast(r->data_blocks) / total_blocks); + printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks, + static_cast(r->unused_blocks) / total_blocks); + + // misc + printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block); + } +} + +static void print_result(uint64_t allocator_id, + block_allocator::allocation_strategy strategy, + const struct fragmentation_report &report) { + const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning; + const TOKU_DB_FRAGMENTATION_S *end = &report.end; + + uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes; + uint64_t total_end_bytes = end->data_bytes + end->unused_bytes; + if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) { + if (verbose) { + printf("\n"); + printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); + } + return; + } + printf("\n"); + if (verbose) { + print_result_verbose(allocator_id, strategy, report); + } else { + printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n", + strategy_to_cstring(strategy), allocator_id, + static_cast(report.end.data_bytes) / total_end_bytes, + static_cast(report.beginning.data_bytes) / total_beginning_bytes); + } +} + +static int only_aggregate_reports; + +static struct option getopt_options[] = { + { "verbose", no_argument, &verbose, 1 }, + { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 }, + { "include-strategy", required_argument, nullptr, 'i' }, + { "exclude-strategy", required_argument, nullptr, 'x' }, + { nullptr, 0, nullptr, 0 }, +}; + +int main(int argc, char *argv[]) { + int opt; + set candidate_strategies, excluded_strategies; + while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) { + switch (opt) { + case 0: + break; + case 'i': + candidate_strategies.insert(cstring_to_strategy(optarg)); + break; + case 'x': + excluded_strategies.insert(cstring_to_strategy(optarg)); + break; + case '?': + default: + abort(); + }; + } + // Default to everything if nothing was explicitly included. + if (candidate_strategies.empty()) { + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT); + candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE); + } + // ..but remove anything that was explicitly excluded + for (set::const_iterator it = excluded_strategies.begin(); + it != excluded_strategies.end(); it++) { + candidate_strategies.erase(*it); + } + + // Run the real trace + // + // First, read the raw trace from stdin + vector canonicalized_trace = canonicalize_trace_from(stdin); + + if (!only_aggregate_reports) { + printf("\n"); + printf("Individual reports, by allocator:\n"); + } + + struct canonical_trace_stats stats; + map reports_by_strategy; + for (set::const_iterator it = candidate_strategies.begin(); + it != candidate_strategies.end(); it++) { + const block_allocator::allocation_strategy strategy(*it); + + // replay the canonicalized trace against the current strategy. + // + // we provided the allocator map so we can gather statistics later + struct canonical_trace_stats dummy_stats; + map reports; + replay_canonicalized_trace(canonicalized_trace, strategy, &reports, + // Only need to gather canonical trace stats once + it == candidate_strategies.begin() ? &stats : &dummy_stats); + + struct fragmentation_report aggregate_report; + memset(&aggregate_report, 0, sizeof(aggregate_report)); + for (map::iterator rp = reports.begin(); + rp != reports.end(); rp++) { + const struct fragmentation_report &report = rp->second; + aggregate_report.merge(report); + if (!only_aggregate_reports) { + print_result(rp->first, strategy, report); + } + } + reports_by_strategy[strategy] = aggregate_report; + } + + printf("\n"); + printf("Aggregate reports, by strategy:\n"); + + for (map::iterator it = reports_by_strategy.begin(); + it != reports_by_strategy.end(); it++) { + print_result(0, it->first, it->second); + } + + printf("\n"); + printf("Overall trace stats:\n"); + printf("\n"); + printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed); + printf(" n_create: %15" PRIu64 "\n", stats.n_create); + printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs); + printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot); + printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold); + printf(" n_free: %15" PRIu64 "\n", stats.n_free); + printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy); + printf("\n"); + printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean); + printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance)); + printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean); + printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance)); + printf("\n"); + + return 0; +} diff --git a/storage/tokudb/ft-index/ft/ftverify.cc b/storage/tokudb/ft-index/tools/ftverify.cc similarity index 93% rename from storage/tokudb/ft-index/ft/ftverify.cc rename to storage/tokudb/ft-index/tools/ftverify.cc index fbac7399dc4ec..120658b2cb19b 100644 --- a/storage/tokudb/ft-index/ft/ftverify.cc +++ b/storage/tokudb/ft-index/tools/ftverify.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,18 +94,19 @@ PATENT RIGHTS GRANT: // fractal tree file, one block at a time. //////////////////////////////////////////////////////////////////// -#include "fttypes.h" -#include "ft-internal.h" -#include "ft_layout_version.h" -#include "block_table.h" -#include "x1764.h" -#include "rbuf.h" -#include "sub_block.h" +#include "portability/toku_assert.h" +#include "portability/toku_list.h" +#include "portability/toku_portability.h" -#include -#include -#include -#include +#include "ft/serialize/block_allocator.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_layout_version.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" +#include "ft/serialize/rbuf.h" +#include "ft/serialize/sub_block.h" +#include "util/threadpool.h" #include #include @@ -200,7 +201,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) } } { - toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE; + toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf( fd, header_1_off, @@ -216,7 +217,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) // If either header is too new, the dictionary is unreadable if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { - fprintf(stderr, "This dictionary was created with too new a version of TokuDB. Aborting.\n"); + fprintf(stderr, "This dictionary was created with a version of TokuFT that is too new. Aborting.\n"); abort(); } if (h0_acceptable) { @@ -411,10 +412,8 @@ check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void // Passes our check_block() function to be called as we iterate over // the block table. This will print any interesting failures and // update us on our progress. -static void -check_block_table(int fd, BLOCK_TABLE bt, struct ft *h) -{ - int64_t num_blocks = toku_block_get_blocks_in_use_unlocked(bt); +static void check_block_table(int fd, block_table *bt, struct ft *h) { + int64_t num_blocks = bt->get_blocks_in_use_unlocked(); printf("Starting verification of checkpoint containing"); printf(" %" PRId64 " blocks.\n", num_blocks); fflush(stdout); @@ -424,13 +423,11 @@ check_block_table(int fd, BLOCK_TABLE bt, struct ft *h) .blocks_failed = 0, .total_blocks = num_blocks, .h = h }; - int r = 0; - r = toku_blocktable_iterate(bt, - TRANSLATION_CURRENT, - check_block, - &extra, - true, - true); + int r = bt->iterate(block_table::TRANSLATION_CURRENT, + check_block, + &extra, + true, + true); if (r != 0) { // We can print more information here if necessary. } @@ -492,11 +489,11 @@ main(int argc, char const * const argv[]) // walk over the block table and check blocks if (h1) { printf("Checking dictionary from header 1.\n"); - check_block_table(dictfd, h1->blocktable, h1); + check_block_table(dictfd, &h1->blocktable, h1); } if (h2) { printf("Checking dictionary from header 2.\n"); - check_block_table(dictfd, h2->blocktable, h2); + check_block_table(dictfd, &h2->blocktable, h2); } if (h1 == NULL && h2 == NULL) { printf("Both headers have a corruption and could not be used.\n"); diff --git a/storage/tokudb/ft-index/utils/pmprof b/storage/tokudb/ft-index/tools/pmprof similarity index 100% rename from storage/tokudb/ft-index/utils/pmprof rename to storage/tokudb/ft-index/tools/pmprof diff --git a/storage/tokudb/ft-index/ft/tdb-recover.cc b/storage/tokudb/ft-index/tools/tdb-recover.cc similarity index 92% rename from storage/tokudb/ft-index/ft/tdb-recover.cc rename to storage/tokudb/ft-index/tools/tdb-recover.cc index 0d3fe0c75be34..8f185bedb04d0 100644 --- a/storage/tokudb/ft-index/ft/tdb-recover.cc +++ b/storage/tokudb/ft-index/tools/tdb-recover.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -95,18 +95,15 @@ PATENT RIGHTS GRANT: // cd ../src/tests/tmpdir // ../../../ft/recover ../dir.test_log2.c.tdb -#include "ft-ops.h" -#include "recover.h" +#include "ft/ft-ops.h" +#include "ft/logger/recover.h" static int recovery_main(int argc, const char *const argv[]); -int -main(int argc, const char *const argv[]) { - { - int rr = toku_ft_layer_init(); - assert(rr==0); - } - int r = recovery_main(argc, argv); +int main(int argc, const char *const argv[]) { + int r = toku_ft_layer_init(); + assert(r == 0); + r = recovery_main(argc, argv); toku_ft_layer_destroy(); return r; } @@ -123,11 +120,11 @@ int recovery_main (int argc, const char *const argv[]) { return(1); } - int r = tokudb_recover(NULL, - NULL_prepared_txn_callback, - NULL_keep_cachetable_callback, - NULL_logger, - data_dir, log_dir, NULL, NULL, NULL, NULL, 0); + int r = tokuft_recover(nullptr, + nullptr, + nullptr, + nullptr, + data_dir, log_dir, nullptr, nullptr, nullptr, nullptr, 0); if (r!=0) { fprintf(stderr, "Recovery failed\n"); return(1); diff --git a/storage/tokudb/ft-index/ft/tdb_logprint.cc b/storage/tokudb/ft-index/tools/tdb_logprint.cc similarity index 97% rename from storage/tokudb/ft-index/ft/tdb_logprint.cc rename to storage/tokudb/ft-index/tools/tdb_logprint.cc index c221a88e36c9a..1dd7581b9f563 100644 --- a/storage/tokudb/ft-index/ft/tdb_logprint.cc +++ b/storage/tokudb/ft-index/tools/tdb_logprint.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,8 @@ PATENT RIGHTS GRANT: /* Dump the log from stdin to stdout. */ -#include +#include "ft/log_header.h" +#include "ft/logger/logger.h" static void newmain (int count) { int i; diff --git a/storage/tokudb/ft-index/utils/tokudb_dump.cc b/storage/tokudb/ft-index/tools/tokudb_dump.cc similarity index 65% rename from storage/tokudb/ft-index/utils/tokudb_dump.cc rename to storage/tokudb/ft-index/tools/tokudb_dump.cc index c560093be31a3..2da50bb793ad3 100644 --- a/storage/tokudb/ft-index/utils/tokudb_dump.cc +++ b/storage/tokudb/ft-index/tools/tokudb_dump.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +88,10 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." +#include + +#include +#include #include #include #include @@ -97,8 +101,8 @@ PATENT RIGHTS GRANT: #include #include #include -#include -#include "tokudb_common.h" +#include +#include typedef struct { bool leadingspace; @@ -120,7 +124,245 @@ typedef struct { } dump_globals; dump_globals g; -#include "tokudb_common_funcs.h" + +#define SET_BITS(bitvector, bits) ((bitvector) |= (bits)) +#define REMOVE_BITS(bitvector, bits) ((bitvector) &= ~(bits)) +#define IS_SET_ANY(bitvector, bits) ((bitvector) & (bits)) +#define IS_SET_ALL(bitvector, bits) (((bitvector) & (bits)) == (bits)) + +#define IS_POWER_OF_2(num) ((num) > 0 && ((num) & ((num) - 1)) == 0) + +//DB_ENV->err disabled since it does not use db_strerror +#define PRINT_ERROR(retval, ...) \ +do { \ +if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__); \ +else { \ + fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ + fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval)); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} \ +} while (0) + +//DB_ENV->err disabled since it does not use db_strerror, errx does not exist. +#define PRINT_ERRORX(...) \ +do { \ +if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__); \ +else { \ + fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ + fprintf(stderr, "%s: ", g.progname); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + fflush(stderr); \ +} \ +} while (0) + +int strtoint32 (char* str, int32_t* num, int32_t min, int32_t max, int base); +int strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base); +int strtoint64 (char* str, int64_t* num, int64_t min, int64_t max, int base); +int strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base); + +/* + * Convert a string to an integer of type "type". + * + * + * Sets errno and returns: + * EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+ + * ERANGE: value out of range specified. (Range of [min, max]) + * + * *num is unchanged on error. + * Returns: + * + */ +#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt) \ +int name(char* str, type* num, type min, type max, int base) \ +{ \ + char* test; \ + bigtype value; \ + \ + assert(str); \ + assert(num); \ + assert(min <= max); \ + assert(g.dbenv || g.progname); \ + assert(base == 0 || (base >= 2 && base <= 36)); \ + \ + errno = 0; \ + while (isspace(*str)) str++; \ + value = strtofunc(str, &test, base); \ + if ((*test != '\0' && *test != '\n') || test == str) { \ + PRINT_ERRORX("%s: Invalid numeric argument\n", str); \ + errno = EINVAL; \ + goto error; \ + } \ + if (errno != 0) { \ + PRINT_ERROR(errno, "%s\n", str); \ + } \ + if (value < min) { \ + PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \ + goto error; \ + } \ + if (value > max) { \ + PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \ + goto error; \ + } \ + *num = value; \ + return EXIT_SUCCESS; \ +error: \ + return errno; \ +} + +DEF_STR_TO(strtoint32, int32_t, int64_t, strtoll, PRId32) +DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32) +DEF_STR_TO(strtoint64, int64_t, int64_t, strtoll, PRId64) +DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64) + +static inline void +outputbyte(uint8_t ch) +{ + if (g.plaintext) { + if (ch == '\\') printf("\\\\"); + else if (isprint(ch)) printf("%c", ch); + else printf("\\%02x", ch); + } + else printf("%02x", ch); +} + +static inline void +outputstring(char* str) +{ + char* p; + + for (p = str; *p != '\0'; p++) { + outputbyte((uint8_t)*p); + } +} + +static inline void +outputplaintextstring(char* str) +{ + bool old_plaintext = g.plaintext; + g.plaintext = true; + outputstring(str); + g.plaintext = old_plaintext; +} + +static inline int +hextoint(int ch) +{ + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } + if (ch >= 'a' && ch <= 'z') { + return ch - 'a' + 10; + } + if (ch >= 'A' && ch <= 'Z') { + return ch - 'A' + 10; + } + return EOF; +} + +static inline int +printabletocstring(char* inputstr, char** poutputstr) +{ + char highch; + char lowch; + char nextch; + char* cstring; + + assert(inputstr); + assert(poutputstr); + assert(*poutputstr == NULL); + + cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char)); + if (cstring == NULL) { + PRINT_ERROR(errno, "printabletocstring"); + goto error; + } + + for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) { + if (*inputstr == '\\') { + if ((highch = *++inputstr) == '\\') { + *cstring++ = '\\'; + continue; + } + if (highch == '\0' || (lowch = *++inputstr) == '\0') { + PRINT_ERROR(0, "unexpected end of input data or key/data pair"); + goto error; + } + if (!isxdigit(highch)) { + PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch); + goto error; + } + if (!isxdigit(lowch)) { + PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch); + goto error; + } + nextch = (char)((hextoint(highch) << 4) | hextoint(lowch)); + if (nextch == '\0') { + /* Database names are c strings, and cannot have extra NULL terminators. */ + PRINT_ERROR(0, "Unexpected '\\00' in input.\n"); + goto error; + } + *cstring++ = nextch; + } + else *cstring++ = *inputstr; + } + /* Terminate the string. */ + *cstring = '\0'; + return EXIT_SUCCESS; + +error: + PRINT_ERROR(0, "Quitting out due to errors.\n"); + return EXIT_FAILURE; +} + +static inline int +verify_library_version(void) +{ + int major; + int minor; + + db_version(&major, &minor, NULL); + if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) { + PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n", + DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +static int last_caught = 0; + +static void catch_signal(int which_signal) { + last_caught = which_signal; + if (last_caught == 0) last_caught = SIGINT; +} + +static inline void +init_catch_signals(void) { + signal(SIGINT, catch_signal); + signal(SIGTERM, catch_signal); +#ifdef SIGHUP + signal(SIGHUP, catch_signal); +#endif +#ifdef SIGPIPE + signal(SIGPIPE, catch_signal); +#endif +} + +static inline int +caught_any_signals(void) { + return last_caught != 0; +} + +static inline void +resend_signals(void) { + if (last_caught) { + signal(last_caught, SIG_DFL); + raise(last_caught); + } +} static int usage (void); static int create_init_env(void); @@ -131,7 +373,7 @@ static int dump_footer (void); static int dump_header (void); static int close_database (void); -int test_main(int argc, char *const argv[]) { +int main(int argc, char *const argv[]) { int ch; int retval; @@ -368,15 +610,6 @@ int create_init_env() //However, do we need to use DB_INIT_LOG to join a logging environment? //REMOVE_BITS(flags, DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN); SET_BITS(flags, DB_CREATE | DB_PRIVATE); -#if defined(USE_BDB) && USE_BDB==1 - { - int r; - r = dbenv->set_lk_max_objects(dbenv, 100000); - assert(r==0); - r = dbenv->set_lk_max_locks(dbenv, 100000); - assert(r==0); - } -#endif retval = dbenv->open(dbenv, g.homedir, flags, 0); if (retval) { diff --git a/storage/tokudb/ft-index/ft/tokuftdump.cc b/storage/tokudb/ft-index/tools/tokuftdump.cc similarity index 82% rename from storage/tokudb/ft-index/ft/tokuftdump.cc rename to storage/tokudb/ft-index/tools/tokuftdump.cc index 3db319a5b45ab..3aab5401cd39b 100644 --- a/storage/tokudb/ft-index/ft/tokuftdump.cc +++ b/storage/tokudb/ft-index/tools/tokuftdump.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -91,10 +91,6 @@ PATENT RIGHTS GRANT: // Dump a fractal tree file -#include "cachetable.h" -#include "ft.h" -#include "fttypes.h" -#include "ft-internal.h" #include #include #include @@ -102,6 +98,14 @@ PATENT RIGHTS GRANT: #include #include +#include "ft/serialize/block_table.h" +#include "ft/cachetable/cachetable.h" +#include "ft/ft.h" +#include "ft/ft-internal.h" +#include "ft/serialize/ft-serialize.h" +#include "ft/serialize/ft_node-serialize.h" +#include "ft/node.h" + static int do_dump_data = 1; static int do_interactive = 0; static int do_header = 0; @@ -109,6 +113,8 @@ static int do_fragmentation = 0; static int do_garbage = 0; static int do_translation_table = 0; static int do_rootnode = 0; +static int do_node = 0; +static BLOCKNUM do_node_num; static int do_tsv = 0; static const char *arg0; @@ -121,9 +127,9 @@ static void format_time(const uint64_t time_int, char *buf) { buf[24] = 0; } -static void print_item(bytevec val, ITEMLEN len) { +static void print_item(const void *val, uint32_t len) { printf("\""); - ITEMLEN i; + uint32_t i; for (i=0; iblocktable, blocknum, &diskoffset, &disksize); + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &diskoffset, &disksize); printf(" diskoffset =%" PRId64 "\n", diskoffset); printf(" disksize =%" PRId64 "\n", disksize); printf(" serialize_size =%u\n", toku_serialize_ftnode_size(n)); printf(" flags =%u\n", n->flags); - printf(" thisnodename=%" PRId64 "\n", n->thisnodename.b); + printf(" blocknum=%" PRId64 "\n", n->blocknum.b); //printf(" log_lsn =%lld\n", n->log_lsn.lsn); // The log_lsn is a memory-only value. printf(" height =%d\n", n->height); printf(" layout_version=%d\n", n->layout_version); @@ -252,15 +258,15 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) { tokutime_to_seconds(bfe.deserialize_time)); printf(" n_children=%d\n", n->n_children); - printf(" total_childkeylens=%u\n", n->totalchildkeylens); + printf(" pivotkeys.total_size()=%u\n", (unsigned) n->pivotkeys.total_size()); printf(" pivots:\n"); for (int i=0; in_children-1; i++) { - const DBT *piv = &n->childkeys[i]; + const DBT piv = n->pivotkeys.get_pivot(i); printf(" pivot %2d:", i); if (n->flags) printf(" flags=%x ", n->flags); - print_item(piv->data, piv->size); + print_item(piv.data, piv.size); printf("\n"); } printf(" children:\n"); @@ -275,44 +281,53 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) { printf(" buffer contains %u bytes (%d items)\n", n_bytes, n_entries); } if (do_dump_data) { - FIFO_ITERATE(bnc->buffer, key, keylen, data, datalen, typ, msn, xids, UU(is_fresh), - { - printf(" msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn); - printf(" TYPE="); - switch ((enum ft_msg_type)typ) { - case FT_NONE: printf("NONE"); goto ok; - case FT_INSERT: printf("INSERT"); goto ok; - case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok; - case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok; - case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok; - case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok; - case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok; - case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok; - case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok; - case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok; - case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok; - case FT_UPDATE: printf("UPDATE"); goto ok; - case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok; - } - printf("HUH?"); - ok: - printf(" xid="); - xids_fprintf(stdout, xids); - printf(" "); - print_item(key, keylen); - if (datalen>0) { - printf(" "); - print_item(data, datalen); - } - printf("\n"); - } - ); + struct dump_data_fn { + int operator()(const ft_msg &msg, bool UU(is_fresh)) { + enum ft_msg_type type = (enum ft_msg_type) msg.type(); + MSN msn = msg.msn(); + XIDS xids = msg.xids(); + const void *key = msg.kdbt()->data; + const void *data = msg.vdbt()->data; + uint32_t keylen = msg.kdbt()->size; + uint32_t datalen = msg.vdbt()->size; + printf(" msn=%" PRIu64 " (0x%" PRIx64 ") ", msn.msn, msn.msn); + printf(" TYPE="); + switch (type) { + case FT_NONE: printf("NONE"); goto ok; + case FT_INSERT: printf("INSERT"); goto ok; + case FT_INSERT_NO_OVERWRITE: printf("INSERT_NO_OVERWRITE"); goto ok; + case FT_DELETE_ANY: printf("DELETE_ANY"); goto ok; + case FT_ABORT_ANY: printf("ABORT_ANY"); goto ok; + case FT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok; + case FT_COMMIT_BROADCAST_ALL: printf("COMMIT_BROADCAST_ALL"); goto ok; + case FT_COMMIT_BROADCAST_TXN: printf("COMMIT_BROADCAST_TXN"); goto ok; + case FT_ABORT_BROADCAST_TXN: printf("ABORT_BROADCAST_TXN"); goto ok; + case FT_OPTIMIZE: printf("OPTIMIZE"); goto ok; + case FT_OPTIMIZE_FOR_UPGRADE: printf("OPTIMIZE_FOR_UPGRADE"); goto ok; + case FT_UPDATE: printf("UPDATE"); goto ok; + case FT_UPDATE_BROADCAST_ALL: printf("UPDATE_BROADCAST_ALL"); goto ok; + } + printf("HUH?"); +ok: + printf(" xid="); + toku_xids_fprintf(stdout, xids); + printf(" "); + print_item(key, keylen); + if (datalen>0) { + printf(" "); + print_item(data, datalen); + } + printf("\n"); + return 0; + } + } dump_fn; + bnc->msg_buffer.iterate(dump_fn); } } else { printf(" n_bytes_in_buffer= %" PRIu64 "", BLB_DATA(n, i)->get_disk_size()); - printf(" items_in_buffer=%u\n", BLB_DATA(n, i)->omt_size()); + printf(" items_in_buffer=%u\n", BLB_DATA(n, i)->num_klpairs()); if (do_dump_data) { - BLB_DATA(n, i)->omt_iterate(NULL); + BLB_DATA(n, i)->iterate(NULL); } } } @@ -320,14 +335,14 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT h) { toku_free(ndd); } -static void dump_block_translation(FT h, uint64_t offset) { - toku_blocknum_dump_translation(h->blocktable, make_blocknum(offset)); +static void dump_block_translation(FT ft, uint64_t offset) { + ft->blocktable.blocknum_dump_translation(make_blocknum(offset)); } -static void dump_fragmentation(int UU(f), FT h, int tsv) { +static void dump_fragmentation(int UU(f), FT ft, int tsv) { int64_t used_space; int64_t total_space; - toku_blocktable_internal_fragmentation(h->blocktable, &total_space, &used_space); + ft->blocktable.internal_fragmentation(&total_space, &used_space); int64_t fragsizes = total_space - used_space; if (tsv) { @@ -343,7 +358,7 @@ static void dump_fragmentation(int UU(f), FT h, int tsv) { typedef struct { int fd; - FT h; + FT ft; uint64_t blocksizes; uint64_t leafsizes; uint64_t leafblocks; @@ -353,8 +368,8 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void frag_help_extra *CAST_FROM_VOIDP(info, extra); FTNODE n; FTNODE_DISK_DATA ndd = NULL; - struct ftnode_fetch_extra bfe; - fill_bfe_for_full_read(&bfe, info->h); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(info->ft); int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); if (r==0) { info->blocksizes += size; @@ -368,13 +383,13 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void return 0; } -static void dump_nodesizes(int fd, FT h) { +static void dump_nodesizes(int fd, FT ft) { frag_help_extra info; memset(&info, 0, sizeof(info)); info.fd = fd; - info.h = h; - toku_blocktable_iterate(h->blocktable, TRANSLATION_CHECKPOINTED, - nodesizes_helper, &info, true, true); + info.ft = ft; + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, + nodesizes_helper, &info, true, true); printf("leafblocks\t%" PRIu64 "\n", info.leafblocks); printf("blocksizes\t%" PRIu64 "\n", info.blocksizes); printf("leafsizes\t%" PRIu64 "\n", info.leafsizes); @@ -391,12 +406,12 @@ static void dump_garbage_stats(int fd, FT ft) { typedef struct __dump_node_extra { int fd; - FT h; + FT ft; } dump_node_extra; static int dump_node_wrapper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) { dump_node_extra *CAST_FROM_VOIDP(info, extra); - dump_node(info->fd, b, info->h); + dump_node(info->fd, b, info->ft); return 0; } @@ -432,7 +447,7 @@ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) printf("header length too big: %u\n", header_length); return; } - uint32_t header_xsum = x1764_memory(cp, header_length); + uint32_t header_xsum = toku_x1764_memory(cp, header_length); uint32_t expected_xsum = toku_dtoh32(get_unaligned_uint32(&cp[header_length])); if (header_xsum != expected_xsum) { printf("header checksum failed: %u %u\n", header_xsum, expected_xsum); @@ -450,7 +465,7 @@ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) // verify the sub block header uint32_t offset = header_length + 4; for (uint32_t i = 0 ; i < n_sub_blocks; i++) { - uint32_t xsum = x1764_memory(cp + offset, sub_block[i].compressed_size); + uint32_t xsum = toku_x1764_memory(cp + offset, sub_block[i].compressed_size); printf("%u: %u %u %u", i, sub_block[i].compressed_size, sub_block[i].uncompressed_size, sub_block[i].xsum); if (xsum != sub_block[i].xsum) printf(" fail %u offset %" PRIu64, xsum, file_offset + offset); @@ -461,9 +476,9 @@ static void verify_block(unsigned char *cp, uint64_t file_offset, uint64_t size) printf("offset %u expected %" PRIu64 "\n", offset, size); } -static void dump_block(int fd, BLOCKNUM blocknum, FT h) { +static void dump_block(int fd, BLOCKNUM blocknum, FT ft) { DISKOFF offset, size; - toku_translate_blocknum_to_offset_size(h->blocktable, blocknum, &offset, &size); + ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); printf("%" PRId64 " at %" PRId64 " size %" PRId64 "\n", blocknum.b, offset, size); unsigned char *CAST_FROM_VOIDP(vp, toku_malloc(size)); @@ -599,12 +614,13 @@ static int usage(void) { fprintf(stderr, "--dumpdata 0|1 "); fprintf(stderr, "--header "); fprintf(stderr, "--rootnode "); + fprintf(stderr, "--node N "); fprintf(stderr, "--fragmentation "); fprintf(stderr, "--garbage "); fprintf(stderr, "--tsv "); fprintf(stderr, "--translation-table "); fprintf(stderr, "--tsv "); - fprintf(stderr, "ftfilename \n"); + fprintf(stderr, "filename \n"); return 1; } @@ -623,6 +639,10 @@ int main (int argc, const char *const argv[]) { do_header = 1; } else if (strcmp(argv[0], "--rootnode") == 0) { do_rootnode = 1; + } else if (strcmp(argv[0], "--node") == 0 && argc > 1) { + argc--; argv++; + do_node = 1; + do_node_num = make_blocknum(getuint64(argv[0])); } else if (strcmp(argv[0], "--fragmentation") == 0) { do_fragmentation = 1; } else if (strcmp(argv[0], "--garbage") == 0) { @@ -671,26 +691,29 @@ int main (int argc, const char *const argv[]) { if (do_rootnode) { dump_node(fd, ft->h->root_blocknum, ft); } + if (do_node) { + dump_node(fd, do_node_num, ft); + } if (do_fragmentation) { dump_fragmentation(fd, ft, do_tsv); } if (do_translation_table) { - toku_dump_translation_table_pretty(stdout, ft->blocktable); + ft->blocktable.dump_translation_table_pretty(stdout); } if (do_garbage) { dump_garbage_stats(fd, ft); } if (!do_header && !do_rootnode && !do_fragmentation && !do_translation_table && !do_garbage) { printf("Block translation:"); - toku_dump_translation_table(stdout, ft->blocktable); + ft->blocktable.dump_translation_table(stdout); dump_header(ft); struct __dump_node_extra info; info.fd = fd; - info.h = ft; - toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED, - dump_node_wrapper, &info, true, true); + info.ft = ft; + ft->blocktable.iterate(block_table::TRANSLATION_CHECKPOINTED, + dump_node_wrapper, &info, true, true); } } toku_cachefile_close(&cf, false, ZERO_LSN); diff --git a/storage/tokudb/ft-index/util/CMakeLists.txt b/storage/tokudb/ft-index/util/CMakeLists.txt index 08822c4a73c4e..6f6b899e5b7e4 100644 --- a/storage/tokudb/ft-index/util/CMakeLists.txt +++ b/storage/tokudb/ft-index/util/CMakeLists.txt @@ -1,11 +1,16 @@ set(util_srcs context + dbt frwlock kibbutz + memarena mempool + minicron partitioned_counter + queue threadpool scoped_malloc + x1764 ) add_library(util SHARED ${util_srcs}) diff --git a/storage/tokudb/ft-index/utils/tokudb_common.h b/storage/tokudb/ft-index/util/bytestring.h similarity index 83% rename from storage/tokudb/ft-index/utils/tokudb_common.h rename to storage/tokudb/ft-index/util/bytestring.h index aeda0ae5027be..1fea03ecfd199 100644 --- a/storage/tokudb/ft-index/utils/tokudb_common.h +++ b/storage/tokudb/ft-index/util/bytestring.h @@ -1,8 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -#if !defined(TOKUDB_COMMON_H) -#define TOKUDB_COMMON_H /* COPYING CONDITIONS NOTICE: @@ -32,8 +29,8 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2014 Tokutek, Inc. DISCLAIMER: @@ -89,21 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#pragma once -#include -#include -#include -#include -#include -#include -#include +#include "portability/toku_stdint.h" -#define SET_BITS(bitvector, bits) ((bitvector) |= (bits)) -#define REMOVE_BITS(bitvector, bits) ((bitvector) &= ~(bits)) -#define IS_SET_ANY(bitvector, bits) ((bitvector) & (bits)) -#define IS_SET_ALL(bitvector, bits) (((bitvector) & (bits)) == (bits)) - -#define IS_POWER_OF_2(num) ((num) > 0 && ((num) & ((num) - 1)) == 0) - -#endif /* #if !defined(TOKUDB_COMMON_H) */ +struct BYTESTRING { + uint32_t len; + char *data; +}; diff --git a/storage/tokudb/ft-index/util/circular_buffer.cc b/storage/tokudb/ft-index/util/circular_buffer.cc index a453c5b71c763..92d9af521f7cc 100644 --- a/storage/tokudb/ft-index/util/circular_buffer.cc +++ b/storage/tokudb/ft-index/util/circular_buffer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/circular_buffer.h b/storage/tokudb/ft-index/util/circular_buffer.h index 6f40cf3046f91..904dfed7c493d 100644 --- a/storage/tokudb/ft-index/util/circular_buffer.h +++ b/storage/tokudb/ft-index/util/circular_buffer.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_CIRCULAR_BUFFER_H -#define UTIL_CIRCULAR_BUFFER_H - #include #include #include @@ -210,5 +209,3 @@ class circular_buffer { } #include "circular_buffer.cc" - -#endif // UTIL_CIRCULAR_BUFFER_H diff --git a/storage/tokudb/ft-index/util/constexpr.h b/storage/tokudb/ft-index/util/constexpr.h index cfea0b46924af..ed71daaf3fe88 100644 --- a/storage/tokudb/ft-index/util/constexpr.h +++ b/storage/tokudb/ft-index/util/constexpr.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,11 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#pragma once - constexpr char UU() static_tolower(const char a) { return a >= 'A' && a <= 'Z' ? a - 'A' + 'a' : a; } diff --git a/storage/tokudb/ft-index/util/context.cc b/storage/tokudb/ft-index/util/context.cc index 350cac0796088..6166be4129424 100644 --- a/storage/tokudb/ft-index/util/context.cc +++ b/storage/tokudb/ft-index/util/context.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -121,7 +121,7 @@ const toku::context *toku_thread_get_context() { // engine status static struct context_status context_status; -#define CONTEXT_STATUS_INIT(key, legend) TOKUDB_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS) +#define CONTEXT_STATUS_INIT(key, legend) TOKUFT_STATUS_INIT(context_status, key, nullptr, PARCOUNT, "context: " legend, TOKU_ENGINE_STATUS) static void context_status_init(void) { diff --git a/storage/tokudb/ft-index/util/context.h b/storage/tokudb/ft-index/util/context.h index 3d424ff597df3..04aef5c5e3b31 100644 --- a/storage/tokudb/ft-index/util/context.h +++ b/storage/tokudb/ft-index/util/context.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2014 Tokutek, Inc. DISCLAIMER: @@ -91,7 +91,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2014 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include #include diff --git a/storage/tokudb/ft-index/ft/ybt.cc b/storage/tokudb/ft-index/util/dbt.cc similarity index 89% rename from storage/tokudb/ft-index/ft/ybt.cc rename to storage/tokudb/ft-index/util/dbt.cc index 68fd3c178ed5f..aa26a9b0dd982 100644 --- a/storage/tokudb/ft-index/ft/ybt.cc +++ b/storage/tokudb/ft-index/util/dbt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,27 +90,29 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include -#include #include -#include -#include "ybt.h" +#include "portability/memory.h" -DBT * -toku_init_dbt(DBT *ybt) { - memset(ybt, 0, sizeof(*ybt)); - return ybt; +#include "util/dbt.h" + +DBT *toku_init_dbt(DBT *dbt) { + memset(dbt, 0, sizeof(*dbt)); + return dbt; } -DBT * -toku_init_dbt_flags(DBT *ybt, uint32_t flags) { - toku_init_dbt(ybt); - ybt->flags = flags; - return ybt; +DBT toku_empty_dbt(void) { + static const DBT empty_dbt = { .data = 0, .size = 0, .ulen = 0, .flags = 0 }; + return empty_dbt; +} + +DBT *toku_init_dbt_flags(DBT *dbt, uint32_t flags) { + toku_init_dbt(dbt); + dbt->flags = flags; + return dbt; } -DBT_ARRAY * -toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) { +DBT_ARRAY *toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) { uint32_t capacity = 1; while (capacity < size) { capacity *= 2; } @@ -123,8 +125,7 @@ toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) { return dbts; } -void -toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) { +void toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) { if (size != dbts->size) { if (size > dbts->capacity) { const uint32_t old_capacity = dbts->capacity; @@ -152,14 +153,12 @@ toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) { } } -void -toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) { +void toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) { toku_free(dbts->dbts); ZERO_STRUCT(*dbts); } -void -toku_dbt_array_destroy(DBT_ARRAY *dbts) { +void toku_dbt_array_destroy(DBT_ARRAY *dbts) { for (uint32_t i = 0; i < dbts->capacity; i++) { toku_destroy_dbt(&dbts->dbts[i]); } @@ -168,8 +167,7 @@ toku_dbt_array_destroy(DBT_ARRAY *dbts) { -void -toku_destroy_dbt(DBT *dbt) { +void toku_destroy_dbt(DBT *dbt) { switch (dbt->flags) { case DB_DBT_MALLOC: case DB_DBT_REALLOC: @@ -179,8 +177,7 @@ toku_destroy_dbt(DBT *dbt) { } } -DBT * -toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len) { +DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len) { toku_init_dbt(dbt); dbt->size=len; dbt->data=(char*)k; @@ -202,14 +199,6 @@ DBT *toku_copyref_dbt(DBT *dst, const DBT src) { return dst; } -DBT *toku_copy_dbt(DBT *dst, const DBT &src) { - dst->flags = src.flags; - dst->ulen = src.ulen; - dst->size = src.size; - dst->data = src.data; - return dst; -} - DBT *toku_clone_dbt(DBT *dst, const DBT &src) { return toku_memdup_dbt(dst, src.data, src.size); } @@ -220,8 +209,7 @@ toku_sdbt_cleanup(struct simple_dbt *sdbt) { memset(sdbt, 0, sizeof(*sdbt)); } -static inline int -sdbt_realloc(struct simple_dbt *sdbt) { +static inline int sdbt_realloc(struct simple_dbt *sdbt) { void *new_data = toku_realloc(sdbt->data, sdbt->len); int r; if (new_data == NULL) { @@ -233,8 +221,7 @@ sdbt_realloc(struct simple_dbt *sdbt) { return r; } -static inline int -dbt_realloc(DBT *dbt) { +static inline int dbt_realloc(DBT *dbt) { void *new_data = toku_realloc(dbt->data, dbt->ulen); int r; if (new_data == NULL) { @@ -246,13 +233,13 @@ dbt_realloc(DBT *dbt) { return r; } -int -toku_dbt_set (ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt) { // sdbt is the static value used when flags==0 // Otherwise malloc or use the user-supplied memory, as according to the flags in d->flags. +int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt) { int r; - if (!d) r = 0; - else { + if (d == nullptr) { + r = 0; + } else { switch (d->flags) { case (DB_DBT_USERMEM): d->size = len; @@ -325,6 +312,12 @@ bool toku_dbt_is_infinite(const DBT *dbt) { return dbt == toku_dbt_positive_infinity() || dbt == toku_dbt_negative_infinity(); } +bool toku_dbt_is_empty(const DBT *dbt) { + // can't have a null data field with a non-zero size + paranoid_invariant(dbt->data != nullptr || dbt->size == 0); + return dbt->data == nullptr; +} + int toku_dbt_infinite_compare(const DBT *a, const DBT *b) { if (a == b) { return 0; diff --git a/storage/tokudb/ft-index/ft/ybt.h b/storage/tokudb/ft-index/util/dbt.h similarity index 90% rename from storage/tokudb/ft-index/ft/ybt.h rename to storage/tokudb/ft-index/util/dbt.h index ae19f527493b7..4d78068cb6782 100644 --- a/storage/tokudb/ft-index/ft/ybt.h +++ b/storage/tokudb/ft-index/util/dbt.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_YBT_H -#define TOKU_YBT_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -// fttypes.h must be first to make 64-bit file mode work right in linux. -#include "fttypes.h" #include // TODO: John @@ -102,23 +100,24 @@ PATENT RIGHTS GRANT: DBT *toku_init_dbt(DBT *); +// returns: an initialized but empty dbt (for which toku_dbt_is_empty() is true) +DBT toku_empty_dbt(void); + DBT *toku_init_dbt_flags(DBT *, uint32_t flags); void toku_destroy_dbt(DBT *); -DBT *toku_fill_dbt(DBT *dbt, bytevec k, ITEMLEN len); +DBT *toku_fill_dbt(DBT *dbt, const void *k, uint32_t len); DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len); DBT *toku_copyref_dbt(DBT *dst, const DBT src); -DBT *toku_copy_dbt(DBT *dst, const DBT &src); - DBT *toku_clone_dbt(DBT *dst, const DBT &src); -int toku_dbt_set(ITEMLEN len, bytevec val, DBT *d, struct simple_dbt *sdbt); +int toku_dbt_set(uint32_t len, const void *val, DBT *d, struct simple_dbt *sdbt); -int toku_dbt_set_value(DBT *, bytevec *val, ITEMLEN vallen, void **staticptrp, bool ybt1_disposable); +int toku_dbt_set_value(DBT *, const void **val, uint32_t vallen, void **staticptrp, bool dbt1_disposable); void toku_sdbt_cleanup(struct simple_dbt *sdbt); @@ -131,11 +130,12 @@ const DBT *toku_dbt_negative_infinity(void); // returns: true if the given dbt is either positive or negative infinity bool toku_dbt_is_infinite(const DBT *dbt); +// returns: true if the given dbt has no data (ie: dbt->data == nullptr) +bool toku_dbt_is_empty(const DBT *dbt); + // effect: compares two potentially infinity-valued dbts // requires: at least one is infinite (assert otherwise) int toku_dbt_infinite_compare(const DBT *a, const DBT *b); // returns: true if the given dbts have the same data pointer and size bool toku_dbt_equals(const DBT *a, const DBT *b); - -#endif /* TOKU_YBT_H */ diff --git a/storage/tokudb/ft-index/util/dmt.cc b/storage/tokudb/ft-index/util/dmt.cc new file mode 100644 index 0000000000000..3e0b512d7a7ed --- /dev/null +++ b/storage/tokudb/ft-index/util/dmt.cc @@ -0,0 +1,1265 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include +#include + +#include +#include + +namespace toku { + +template +void dmt::create(void) { + toku_mempool_zero(&this->mp); + this->values_same_size = true; + this->value_length = 0; + this->is_array = true; + this->d.a.num_values = 0; + //TODO: maybe allocate enough space for something by default? + // We may be relying on not needing to allocate space the first time (due to limited time spent while a lock is held) +} + +/** + * Note: create_from_sorted_memory_of_fixed_size_elements does not take ownership of 'mem'. + * Owner is still responsible for freeing it. + * While in the OMT a similar function would steal ownership, this doesn't make sense for the DMT because + * we (usually) have to add padding for alignment (mem has all of the elements PACKED). + * Also all current uses (as of Jan 12, 2014) of this function would require mallocing a new array + * in order to allow stealing. + */ +template +void dmt::create_from_sorted_memory_of_fixed_size_elements( + const void *mem, + const uint32_t numvalues, + const uint32_t mem_length, + const uint32_t fixed_value_length) { + this->values_same_size = true; + this->value_length = fixed_value_length; + this->is_array = true; + this->d.a.num_values = numvalues; + const uint8_t pad_bytes = get_fixed_length_alignment_overhead(); + uint32_t aligned_memsize = mem_length + numvalues * pad_bytes; + toku_mempool_construct(&this->mp, aligned_memsize); + if (aligned_memsize > 0) { + paranoid_invariant(numvalues > 0); + void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize); + paranoid_invariant_notnull(ptr); + uint8_t * const CAST_FROM_VOIDP(dest, ptr); + const uint8_t * const CAST_FROM_VOIDP(src, mem); + if (pad_bytes == 0) { + paranoid_invariant(aligned_memsize == mem_length); + memcpy(dest, src, aligned_memsize); + } else { + // TODO(leif): check what vectorizes best: multiplying like this or adding to offsets + const uint32_t fixed_len = this->value_length; + const uint32_t fixed_aligned_len = align(this->value_length); + paranoid_invariant(this->d.a.num_values*fixed_len == mem_length); + for (uint32_t i = 0; i < this->d.a.num_values; i++) { + memcpy(&dest[i*fixed_aligned_len], &src[i*fixed_len], fixed_len); + } + } + } +} + +template +void dmt::clone(const dmt &src) { + *this = src; + toku_mempool_clone(&src.mp, &this->mp); +} + +template +void dmt::clear(void) { + this->is_array = true; + this->d.a.num_values = 0; + this->values_same_size = true; // Reset state + this->value_length = 0; + //TODO(leif): Note that this can mess with our memory_footprint calculation (we may touch past what is marked as 'used' in the mempool) + // One 'fix' is for mempool to also track what was touched, and reset() shouldn't reset that, though realloc() might. + toku_mempool_reset(&this->mp); +} + +template +void dmt::destroy(void) { + this->clear(); + toku_mempool_destroy(&this->mp); +} + +template +uint32_t dmt::size(void) const { + if (this->is_array) { + return this->d.a.num_values; + } else { + return this->nweight(this->d.t.root); + } +} + +template +uint32_t dmt::nweight(const subtree &subtree) const { + if (subtree.is_null()) { + return 0; + } else { + const dmt_node & node = get_node(subtree); + return node.weight; + } +} + +template +template +int dmt::insert(const dmtwriter_t &value, const dmtcmp_t &v, uint32_t *const idx) { + int r; + uint32_t insert_idx; + + r = this->find_zero(v, nullptr, nullptr, &insert_idx); + if (r==0) { + if (idx) *idx = insert_idx; + return DB_KEYEXIST; + } + if (r != DB_NOTFOUND) return r; + + if ((r = this->insert_at(value, insert_idx))) return r; + if (idx) *idx = insert_idx; + + return 0; +} + +template +int dmt::insert_at(const dmtwriter_t &value, const uint32_t idx) { + if (idx > this->size()) { return EINVAL; } + + bool same_size = this->values_same_size && (this->size() == 0 || value.get_size() == this->value_length); + if (this->is_array) { + if (same_size && idx == this->d.a.num_values) { + return this->insert_at_array_end(value); + } + this->convert_from_array_to_tree(); + } + // Is a tree. + paranoid_invariant(!is_array); + if (!same_size) { + this->values_same_size = false; + this->value_length = 0; + } + + this->maybe_resize_tree(&value); + subtree *rebalance_subtree = nullptr; + this->insert_internal(&this->d.t.root, value, idx, &rebalance_subtree); + if (rebalance_subtree != nullptr) { + this->rebalance(rebalance_subtree); + } + return 0; +} + +template +template +int dmt::insert_at_array_end(const dmtwriter_t& value_in) { + paranoid_invariant(this->is_array); + paranoid_invariant(this->values_same_size); + if (this->d.a.num_values == 0) { + this->value_length = value_in.get_size(); + } + paranoid_invariant(this->value_length == value_in.get_size()); + + if (with_resize) { + this->maybe_resize_array_for_insert(); + } + dmtdata_t *dest = this->alloc_array_value_end(); + value_in.write_to(dest); + return 0; +} + +template +dmtdata_t * dmt::alloc_array_value_end(void) { + paranoid_invariant(this->is_array); + paranoid_invariant(this->values_same_size); + this->d.a.num_values++; + + void *ptr = toku_mempool_malloc(&this->mp, align(this->value_length)); + paranoid_invariant_notnull(ptr); + paranoid_invariant(reinterpret_cast(ptr) % ALIGNMENT == 0); + dmtdata_t *CAST_FROM_VOIDP(n, ptr); + paranoid_invariant(n == get_array_value(this->d.a.num_values - 1)); + return n; +} + +template +dmtdata_t * dmt::get_array_value(const uint32_t idx) const { + paranoid_invariant(this->is_array); + paranoid_invariant(this->values_same_size); + + paranoid_invariant(idx < this->d.a.num_values); + return get_array_value_internal(&this->mp, idx); +} + +template +dmtdata_t * dmt::get_array_value_internal(const struct mempool *mempool, const uint32_t idx) const { + void* ptr = toku_mempool_get_pointer_from_base_and_offset(mempool, idx * align(this->value_length)); + dmtdata_t *CAST_FROM_VOIDP(value, ptr); + return value; +} + +//TODO(leif) write microbenchmarks to compare growth factor. Note: growth factor here is actually 2.5 because of mempool_construct +template +void dmt::maybe_resize_array_for_insert(void) { + bool space_available = toku_mempool_get_free_size(&this->mp) >= align(this->value_length); + + if (!space_available) { + const uint32_t n = this->d.a.num_values + 1; + const uint32_t new_n = n <=2 ? 4 : 2*n; + const uint32_t new_space = align(this->value_length) * new_n; + + struct mempool new_kvspace; + toku_mempool_construct(&new_kvspace, new_space); + size_t copy_bytes = this->d.a.num_values * align(this->value_length); + invariant(copy_bytes + align(this->value_length) <= new_space); + paranoid_invariant(copy_bytes <= toku_mempool_get_used_size(&this->mp)); + // Copy over to new mempool + if (this->d.a.num_values > 0) { + void* dest = toku_mempool_malloc(&new_kvspace, copy_bytes); + invariant(dest!=nullptr); + memcpy(dest, get_array_value(0), copy_bytes); + } + toku_mempool_destroy(&this->mp); + this->mp = new_kvspace; + } +} + +template +uint32_t dmt::align(const uint32_t x) const { + return roundup_to_multiple(ALIGNMENT, x); +} + +template +void dmt::prepare_for_serialize(void) { + if (!this->is_array) { + this->convert_from_tree_to_array(); + } +} + +template +void dmt::convert_from_tree_to_array(void) { + paranoid_invariant(!this->is_array); + paranoid_invariant(this->values_same_size); + + const uint32_t num_values = this->size(); + + node_offset *tmp_array; + bool malloced = false; + tmp_array = alloc_temp_node_offsets(num_values); + if (!tmp_array) { + malloced = true; + XMALLOC_N(num_values, tmp_array); + } + this->fill_array_with_subtree_offsets(tmp_array, this->d.t.root); + + struct mempool new_mp; + const uint32_t fixed_len = this->value_length; + const uint32_t fixed_aligned_len = align(this->value_length); + size_t mem_needed = num_values * fixed_aligned_len; + toku_mempool_construct(&new_mp, mem_needed); + uint8_t* CAST_FROM_VOIDP(dest, toku_mempool_malloc(&new_mp, mem_needed)); + paranoid_invariant_notnull(dest); + for (uint32_t i = 0; i < num_values; i++) { + const dmt_node &n = get_node(tmp_array[i]); + memcpy(&dest[i*fixed_aligned_len], &n.value, fixed_len); + } + toku_mempool_destroy(&this->mp); + this->mp = new_mp; + this->is_array = true; + this->d.a.num_values = num_values; + + if (malloced) toku_free(tmp_array); +} + +template +void dmt::convert_from_array_to_tree(void) { + paranoid_invariant(this->is_array); + paranoid_invariant(this->values_same_size); + + //save array-format information to locals + const uint32_t num_values = this->d.a.num_values; + + node_offset *tmp_array; + bool malloced = false; + tmp_array = alloc_temp_node_offsets(num_values); + if (!tmp_array) { + malloced = true; + XMALLOC_N(num_values, tmp_array); + } + + struct mempool old_mp = this->mp; + size_t mem_needed = num_values * align(this->value_length + __builtin_offsetof(dmt_node, value)); + toku_mempool_construct(&this->mp, mem_needed); + + for (uint32_t i = 0; i < num_values; i++) { + dmtwriter_t writer(this->value_length, get_array_value_internal(&old_mp, i)); + tmp_array[i] = node_malloc_and_set_value(writer); + } + this->is_array = false; + this->rebuild_subtree_from_offsets(&this->d.t.root, tmp_array, num_values); + + if (malloced) toku_free(tmp_array); + toku_mempool_destroy(&old_mp); +} + +template +int dmt::delete_at(const uint32_t idx) { + uint32_t n = this->size(); + if (idx >= n) { return EINVAL; } + + if (n == 1) { + this->clear(); //Emptying out the entire dmt. + return 0; + } + if (this->is_array) { + this->convert_from_array_to_tree(); + } + paranoid_invariant(!is_array); + + subtree *rebalance_subtree = nullptr; + this->delete_internal(&this->d.t.root, idx, nullptr, &rebalance_subtree); + if (rebalance_subtree != nullptr) { + this->rebalance(rebalance_subtree); + } + this->maybe_resize_tree(nullptr); + return 0; +} + +template +template +int dmt::iterate(iterate_extra_t *const iterate_extra) const { + return this->iterate_on_range(0, this->size(), iterate_extra); +} + +template +template +int dmt::iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const { + if (right > this->size()) { return EINVAL; } + if (left == right) { return 0; } + if (this->is_array) { + return this->iterate_internal_array(left, right, iterate_extra); + } + return this->iterate_internal(left, right, this->d.t.root, 0, iterate_extra); +} + +template +void dmt::verify(void) const { + uint32_t num_values = this->size(); + invariant(num_values < UINT32_MAX); + size_t pool_used = toku_mempool_get_used_size(&this->mp); + size_t pool_size = toku_mempool_get_size(&this->mp); + size_t pool_frag = toku_mempool_get_frag_size(&this->mp); + invariant(pool_used <= pool_size); + if (this->is_array) { + invariant(this->values_same_size); + invariant(num_values == this->d.a.num_values); + + // We know exactly how much memory should be used. + invariant(pool_used == num_values * align(this->value_length)); + + // Array form must have 0 fragmentation in mempool. + invariant(pool_frag == 0); + } else { + if (this->values_same_size) { + // We know exactly how much memory should be used. + invariant(pool_used == num_values * align(this->value_length + __builtin_offsetof(dmt_node, value))); + } else { + // We can only do a lower bound on memory usage. + invariant(pool_used >= num_values * __builtin_offsetof(dmt_node, value)); + } + std::vector touched(pool_size, false); + verify_internal(this->d.t.root, &touched); + size_t bytes_used = 0; + for (size_t i = 0; i < pool_size; i++) { + if (touched.at(i)) { + ++bytes_used; + } + } + invariant(bytes_used == pool_used); + } +} + +// Verifies all weights are internally consistent. +template +void dmt::verify_internal(const subtree &subtree, std::vector *touched) const { + if (subtree.is_null()) { + return; + } + const dmt_node &node = get_node(subtree); + + if (this->values_same_size) { + invariant(node.value_length == this->value_length); + } + + size_t offset = toku_mempool_get_offset_from_pointer_and_base(&this->mp, &node); + size_t node_size = align(__builtin_offsetof(dmt_node, value) + node.value_length); + invariant(offset <= touched->size()); + invariant(offset+node_size <= touched->size()); + invariant(offset % ALIGNMENT == 0); + // Mark memory as touched and never allocated to multiple nodes. + for (size_t i = offset; i < offset+node_size; ++i) { + invariant(!touched->at(i)); + touched->at(i) = true; + } + + const uint32_t leftweight = this->nweight(node.left); + const uint32_t rightweight = this->nweight(node.right); + + invariant(leftweight + rightweight + 1 == this->nweight(subtree)); + verify_internal(node.left, touched); + verify_internal(node.right, touched); +} + +template +template +void dmt::iterate_ptr(iterate_extra_t *const iterate_extra) { + if (this->is_array) { + this->iterate_ptr_internal_array(0, this->size(), iterate_extra); + } else { + this->iterate_ptr_internal(0, this->size(), this->d.t.root, 0, iterate_extra); + } +} + +template +int dmt::fetch(const uint32_t idx, uint32_t *const value_len, dmtdataout_t *const value) const { + if (idx >= this->size()) { return EINVAL; } + if (this->is_array) { + this->fetch_internal_array(idx, value_len, value); + } else { + this->fetch_internal(this->d.t.root, idx, value_len, value); + } + return 0; +} + +template +template +int dmt::find_zero(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + uint32_t tmp_index; + uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index; + int r; + if (this->is_array) { + r = this->find_internal_zero_array(extra, value_len, value, child_idxp); + } + else { + r = this->find_internal_zero(this->d.t.root, extra, value_len, value, child_idxp); + } + return r; +} + +template +template +int dmt::find(const dmtcmp_t &extra, int direction, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + uint32_t tmp_index; + uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index; + paranoid_invariant(direction != 0); + if (direction < 0) { + if (this->is_array) { + return this->find_internal_minus_array(extra, value_len, value, child_idxp); + } else { + return this->find_internal_minus(this->d.t.root, extra, value_len, value, child_idxp); + } + } else { + if (this->is_array) { + return this->find_internal_plus_array(extra, value_len, value, child_idxp); + } else { + return this->find_internal_plus(this->d.t.root, extra, value_len, value, child_idxp); + } + } +} + +template +size_t dmt::memory_size(void) { + return (sizeof *this) + toku_mempool_get_size(&this->mp); +} + +template +dmt_node_templated & dmt::get_node(const subtree &subtree) const { + paranoid_invariant(!subtree.is_null()); + return get_node(subtree.get_offset()); +} + +template +dmt_node_templated & dmt::get_node(const node_offset offset) const { + void* ptr = toku_mempool_get_pointer_from_base_and_offset(&this->mp, offset); + dmt_node *CAST_FROM_VOIDP(node, ptr); + return *node; +} + +template +void dmt::node_set_value(dmt_node * n, const dmtwriter_t &value) { + n->value_length = value.get_size(); + value.write_to(&n->value); +} + +template +node_offset dmt::node_malloc_and_set_value(const dmtwriter_t &value) { + size_t val_size = value.get_size(); + size_t size_to_alloc = __builtin_offsetof(dmt_node, value) + val_size; + size_to_alloc = align(size_to_alloc); + void* np = toku_mempool_malloc(&this->mp, size_to_alloc); + paranoid_invariant_notnull(np); + dmt_node *CAST_FROM_VOIDP(n, np); + node_set_value(n, value); + + return toku_mempool_get_offset_from_pointer_and_base(&this->mp, np); +} + +template +void dmt::node_free(const subtree &st) { + dmt_node &n = get_node(st); + size_t size_to_free = __builtin_offsetof(dmt_node, value) + n.value_length; + size_to_free = align(size_to_free); + toku_mempool_mfree(&this->mp, &n, size_to_free); +} + +template +void dmt::maybe_resize_tree(const dmtwriter_t * value) { + const ssize_t curr_capacity = toku_mempool_get_size(&this->mp); + const ssize_t curr_free = toku_mempool_get_free_size(&this->mp); + const ssize_t curr_used = toku_mempool_get_used_size(&this->mp); + ssize_t add_size = 0; + if (value) { + add_size = __builtin_offsetof(dmt_node, value) + value->get_size(); + add_size = align(add_size); + } + + const ssize_t need_size = curr_used + add_size; + paranoid_invariant(need_size <= UINT32_MAX); + //TODO(leif) consider different growth rates + const ssize_t new_size = 2*need_size; + paranoid_invariant(new_size <= UINT32_MAX); + + if ((curr_capacity / 2 >= new_size) || // Way too much allocated + (curr_free < add_size)) { // No room in mempool + // Copy all memory and reconstruct dmt in new mempool. + if (curr_free < add_size && toku_mempool_get_frag_size(&this->mp) == 0) { + // TODO(yoni) or TODO(leif) consider doing this not just when frag size is zero, but also when it is a small percentage of the total mempool size + // Offsets remain the same in the new mempool so we can just realloc. + toku_mempool_realloc_larger(&this->mp, new_size); + } else if (!this->d.t.root.is_null()) { + struct mempool new_kvspace; + toku_mempool_construct(&new_kvspace, new_size); + + const dmt_node &n = get_node(this->d.t.root); + node_offset *tmp_array; + bool malloced = false; + tmp_array = alloc_temp_node_offsets(n.weight); + if (!tmp_array) { + malloced = true; + XMALLOC_N(n.weight, tmp_array); + } + this->fill_array_with_subtree_offsets(tmp_array, this->d.t.root); + for (node_offset i = 0; i < n.weight; i++) { + dmt_node &node = get_node(tmp_array[i]); + const size_t bytes_to_copy = __builtin_offsetof(dmt_node, value) + node.value_length; + const size_t bytes_to_alloc = align(bytes_to_copy); + void* newdata = toku_mempool_malloc(&new_kvspace, bytes_to_alloc); + memcpy(newdata, &node, bytes_to_copy); + tmp_array[i] = toku_mempool_get_offset_from_pointer_and_base(&new_kvspace, newdata); + } + + struct mempool old_kvspace = this->mp; + this->mp = new_kvspace; + this->rebuild_subtree_from_offsets(&this->d.t.root, tmp_array, n.weight); + if (malloced) toku_free(tmp_array); + toku_mempool_destroy(&old_kvspace); + } else { + toku_mempool_destroy(&this->mp); + toku_mempool_construct(&this->mp, new_size); + } + } +} + +template +bool dmt::will_need_rebalance(const subtree &subtree, const int leftmod, const int rightmod) const { + if (subtree.is_null()) { return false; } + const dmt_node &n = get_node(subtree); + // one of the 1's is for the root. + // the other is to take ceil(n/2) + const uint32_t weight_left = this->nweight(n.left) + leftmod; + const uint32_t weight_right = this->nweight(n.right) + rightmod; + return ((1+weight_left < (1+1+weight_right)/2) + || + (1+weight_right < (1+1+weight_left)/2)); +} + +template +void dmt::insert_internal(subtree *const subtreep, const dmtwriter_t &value, const uint32_t idx, subtree **const rebalance_subtree) { + if (subtreep->is_null()) { + paranoid_invariant_zero(idx); + const node_offset newoffset = this->node_malloc_and_set_value(value); + dmt_node &newnode = get_node(newoffset); + newnode.weight = 1; + newnode.left.set_to_null(); + newnode.right.set_to_null(); + subtreep->set_offset(newoffset); + } else { + dmt_node &n = get_node(*subtreep); + n.weight++; + if (idx <= this->nweight(n.left)) { + if (*rebalance_subtree == nullptr && this->will_need_rebalance(*subtreep, 1, 0)) { + *rebalance_subtree = subtreep; + } + this->insert_internal(&n.left, value, idx, rebalance_subtree); + } else { + if (*rebalance_subtree == nullptr && this->will_need_rebalance(*subtreep, 0, 1)) { + *rebalance_subtree = subtreep; + } + const uint32_t sub_index = idx - this->nweight(n.left) - 1; + this->insert_internal(&n.right, value, sub_index, rebalance_subtree); + } + } +} + +template +void dmt::delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree) { + paranoid_invariant_notnull(subtreep); + paranoid_invariant_notnull(rebalance_subtree); + paranoid_invariant(!subtreep->is_null()); + dmt_node &n = get_node(*subtreep); + const uint32_t leftweight = this->nweight(n.left); + if (idx < leftweight) { + n.weight--; + if (*rebalance_subtree == nullptr && this->will_need_rebalance(*subtreep, -1, 0)) { + *rebalance_subtree = subtreep; + } + this->delete_internal(&n.left, idx, subtree_replace, rebalance_subtree); + } else if (idx == leftweight) { + // Found the correct index. + if (n.left.is_null()) { + paranoid_invariant_zero(idx); + // Delete n and let parent point to n.right + subtree ptr_this = *subtreep; + *subtreep = n.right; + subtree to_free; + if (subtree_replace != nullptr) { + // Swap self with the other node. Taking over all responsibility. + to_free = *subtree_replace; + dmt_node &ancestor = get_node(*subtree_replace); + if (*rebalance_subtree == &ancestor.right) { + // Take over rebalance responsibility. + *rebalance_subtree = &n.right; + } + n.weight = ancestor.weight; + n.left = ancestor.left; + n.right = ancestor.right; + *subtree_replace = ptr_this; + } else { + to_free = ptr_this; + } + this->node_free(to_free); + } else if (n.right.is_null()) { + // Delete n and let parent point to n.left + subtree to_free = *subtreep; + *subtreep = n.left; + paranoid_invariant(idx>0); + paranoid_invariant_null(subtree_replace); // To be recursive, we're looking for index 0. n is index > 0 here. + this->node_free(to_free); + } else { + if (*rebalance_subtree == nullptr && this->will_need_rebalance(*subtreep, 0, -1)) { + *rebalance_subtree = subtreep; + } + // don't need to copy up value, it's only used by this + // next call, and when that gets to the bottom there + // won't be any more recursion + n.weight--; + this->delete_internal(&n.right, 0, subtreep, rebalance_subtree); + } + } else { + n.weight--; + if (*rebalance_subtree == nullptr && this->will_need_rebalance(*subtreep, 0, -1)) { + *rebalance_subtree = subtreep; + } + this->delete_internal(&n.right, idx - leftweight - 1, subtree_replace, rebalance_subtree); + } +} + +template +template +int dmt::iterate_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const { + int r; + for (uint32_t i = left; i < right; ++i) { + r = f(this->value_length, *get_array_value(i), i, iterate_extra); + if (r != 0) { + return r; + } + } + return 0; +} + +template +template +void dmt::iterate_ptr_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra) { + if (!subtree.is_null()) { + dmt_node &n = get_node(subtree); + const uint32_t idx_root = idx + this->nweight(n.left); + if (left < idx_root) { + this->iterate_ptr_internal(left, right, n.left, idx, iterate_extra); + } + if (left <= idx_root && idx_root < right) { + int r = f(n.value_length, &n.value, idx_root, iterate_extra); + lazy_assert_zero(r); + } + if (idx_root + 1 < right) { + this->iterate_ptr_internal(left, right, n.right, idx_root + 1, iterate_extra); + } + } +} + +template +template +void dmt::iterate_ptr_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) { + for (uint32_t i = left; i < right; ++i) { + int r = f(this->value_length, get_array_value(i), i, iterate_extra); + lazy_assert_zero(r); + } +} + +template +template +int dmt::iterate_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra) const { + if (subtree.is_null()) { return 0; } + int r; + const dmt_node &n = get_node(subtree); + const uint32_t idx_root = idx + this->nweight(n.left); + if (left < idx_root) { + r = this->iterate_internal(left, right, n.left, idx, iterate_extra); + if (r != 0) { return r; } + } + if (left <= idx_root && idx_root < right) { + r = f(n.value_length, n.value, idx_root, iterate_extra); + if (r != 0) { return r; } + } + if (idx_root + 1 < right) { + return this->iterate_internal(left, right, n.right, idx_root + 1, iterate_extra); + } + return 0; +} + +template +void dmt::fetch_internal_array(const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const { + copyout(value_len, value, this->value_length, get_array_value(i)); +} + +template +void dmt::fetch_internal(const subtree &subtree, const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const { + dmt_node &n = get_node(subtree); + const uint32_t leftweight = this->nweight(n.left); + if (i < leftweight) { + this->fetch_internal(n.left, i, value_len, value); + } else if (i == leftweight) { + copyout(value_len, value, &n); + } else { + this->fetch_internal(n.right, i - leftweight - 1, value_len, value); + } +} + +template +void dmt::fill_array_with_subtree_offsets(node_offset *const array, const subtree &subtree) const { + if (!subtree.is_null()) { + const dmt_node &tree = get_node(subtree); + this->fill_array_with_subtree_offsets(&array[0], tree.left); + array[this->nweight(tree.left)] = subtree.get_offset(); + this->fill_array_with_subtree_offsets(&array[this->nweight(tree.left) + 1], tree.right); + } +} + +template +void dmt::rebuild_subtree_from_offsets(subtree *const subtree, const node_offset *const offsets, const uint32_t numvalues) { + if (numvalues==0) { + subtree->set_to_null(); + } else { + uint32_t halfway = numvalues/2; + subtree->set_offset(offsets[halfway]); + dmt_node &newnode = get_node(offsets[halfway]); + newnode.weight = numvalues; + // value is already in there. + this->rebuild_subtree_from_offsets(&newnode.left, &offsets[0], halfway); + this->rebuild_subtree_from_offsets(&newnode.right, &offsets[halfway+1], numvalues-(halfway+1)); + } +} + +//TODO(leif): Note that this can mess with our memory_footprint calculation (we may touch past what is marked as 'used' in the mempool) +template +node_offset* dmt::alloc_temp_node_offsets(uint32_t num_offsets) { + size_t mem_needed = num_offsets * sizeof(node_offset); + size_t mem_free; + mem_free = toku_mempool_get_free_size(&this->mp); + node_offset* CAST_FROM_VOIDP(tmp, toku_mempool_get_next_free_ptr(&this->mp)); + if (mem_free >= mem_needed) { + return tmp; + } + return nullptr; +} + +template +void dmt::rebalance(subtree *const subtree) { + paranoid_invariant(!subtree->is_null()); + + // There is a possible "optimization" here: + // if (this->values_same_size && subtree == &this->d.t.root) { + // this->convert_from_tree_to_array(); + // return; + // } + // but we don't want to do it because it involves actually copying values around + // as opposed to stopping in the middle of rebalancing (like in the OMT) + + node_offset offset = subtree->get_offset(); + const dmt_node &n = get_node(offset); + node_offset *tmp_array; + bool malloced = false; + tmp_array = alloc_temp_node_offsets(n.weight); + if (!tmp_array) { + malloced = true; + XMALLOC_N(n.weight, tmp_array); + } + this->fill_array_with_subtree_offsets(tmp_array, *subtree); + this->rebuild_subtree_from_offsets(subtree, tmp_array, n.weight); + if (malloced) toku_free(tmp_array); +} + +template +void dmt::copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n) { + if (outlen) { + *outlen = n->value_length; + } + if (out) { + *out = n->value; + } +} + +template +void dmt::copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n) { + if (outlen) { + *outlen = n->value_length; + } + if (out) { + *out = &n->value; + } +} + +template +void dmt::copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr) { + if (outlen) { + *outlen = len; + } + if (out) { + *out = *stored_value_ptr; + } +} + +template +void dmt::copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr) { + if (outlen) { + *outlen = len; + } + if (out) { + *out = stored_value_ptr; + } +} + +template +template +int dmt::find_internal_zero_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = 0; + uint32_t limit = this->d.a.num_values; + uint32_t best_pos = subtree::NODE_NULL; + uint32_t best_zero = subtree::NODE_NULL; + + while (min!=limit) { + uint32_t mid = (min + limit) / 2; + int hv = h(this->value_length, *get_array_value(mid), extra); + if (hv<0) { + min = mid+1; + } + else if (hv>0) { + best_pos = mid; + limit = mid; + } + else { + best_zero = mid; + limit = mid; + } + } + if (best_zero!=subtree::NODE_NULL) { + //Found a zero + copyout(value_len, value, this->value_length, get_array_value(best_zero)); + *idxp = best_zero; + return 0; + } + if (best_pos!=subtree::NODE_NULL) *idxp = best_pos; + else *idxp = this->d.a.num_values; + return DB_NOTFOUND; +} + +template +template +int dmt::find_internal_zero(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (subtree.is_null()) { + *idxp = 0; + return DB_NOTFOUND; + } + dmt_node &n = get_node(subtree); + int hv = h(n.value_length, n.value, extra); + if (hv<0) { + int r = this->find_internal_zero(n.right, extra, value_len, value, idxp); + *idxp += this->nweight(n.left)+1; + return r; + } else if (hv>0) { + return this->find_internal_zero(n.left, extra, value_len, value, idxp); + } else { + int r = this->find_internal_zero(n.left, extra, value_len, value, idxp); + if (r==DB_NOTFOUND) { + *idxp = this->nweight(n.left); + copyout(value_len, value, &n); + r = 0; + } + return r; + } +} + +template +template +int dmt::find_internal_plus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = 0; + uint32_t limit = this->d.a.num_values; + uint32_t best = subtree::NODE_NULL; + + while (min != limit) { + const uint32_t mid = (min + limit) / 2; + const int hv = h(this->value_length, *get_array_value(mid), extra); + if (hv > 0) { + best = mid; + limit = mid; + } else { + min = mid + 1; + } + } + if (best == subtree::NODE_NULL) { return DB_NOTFOUND; } + copyout(value_len, value, this->value_length, get_array_value(best)); + *idxp = best; + return 0; +} + +template +template +int dmt::find_internal_plus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (subtree.is_null()) { + return DB_NOTFOUND; + } + dmt_node & n = get_node(subtree); + int hv = h(n.value_length, n.value, extra); + int r; + if (hv > 0) { + r = this->find_internal_plus(n.left, extra, value_len, value, idxp); + if (r == DB_NOTFOUND) { + *idxp = this->nweight(n.left); + copyout(value_len, value, &n); + r = 0; + } + } else { + r = this->find_internal_plus(n.right, extra, value_len, value, idxp); + if (r == 0) { + *idxp += this->nweight(n.left) + 1; + } + } + return r; +} + +template +template +int dmt::find_internal_minus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = 0; + uint32_t limit = this->d.a.num_values; + uint32_t best = subtree::NODE_NULL; + + while (min != limit) { + const uint32_t mid = (min + limit) / 2; + const int hv = h(this->value_length, *get_array_value(mid), extra); + if (hv < 0) { + best = mid; + min = mid + 1; + } else { + limit = mid; + } + } + if (best == subtree::NODE_NULL) { return DB_NOTFOUND; } + copyout(value_len, value, this->value_length, get_array_value(best)); + *idxp = best; + return 0; +} + +template +template +int dmt::find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (subtree.is_null()) { + return DB_NOTFOUND; + } + dmt_node & n = get_node(subtree); + int hv = h(n.value_length, n.value, extra); + if (hv < 0) { + int r = this->find_internal_minus(n.right, extra, value_len, value, idxp); + if (r == 0) { + *idxp += this->nweight(n.left) + 1; + } else if (r == DB_NOTFOUND) { + *idxp = this->nweight(n.left); + copyout(value_len, value, &n); + r = 0; + } + return r; + } else { + return this->find_internal_minus(n.left, extra, value_len, value, idxp); + } +} + +template +uint32_t dmt::get_fixed_length(void) const { + return this->values_same_size ? this->value_length : 0; +} + +template +uint32_t dmt::get_fixed_length_alignment_overhead(void) const { + return this->values_same_size ? align(this->value_length) - this->value_length : 0; +} + +template +bool dmt::value_length_is_fixed(void) const { + return this->values_same_size; +} + +template +void dmt::serialize_values(uint32_t expected_unpadded_memory, struct wbuf *wb) const { + invariant(this->is_array); + invariant(this->values_same_size); + const uint8_t pad_bytes = get_fixed_length_alignment_overhead(); + const uint32_t fixed_len = this->value_length; + const uint32_t fixed_aligned_len = align(this->value_length); + paranoid_invariant(expected_unpadded_memory == this->d.a.num_values * this->value_length); + paranoid_invariant(toku_mempool_get_used_size(&this->mp) >= + expected_unpadded_memory + pad_bytes * this->d.a.num_values); + if (this->d.a.num_values == 0) { + // Nothing to serialize + } else if (pad_bytes == 0) { + // Basically a memcpy + wbuf_nocrc_literal_bytes(wb, get_array_value(0), expected_unpadded_memory); + } else { + uint8_t* const dest = wbuf_nocrc_reserve_literal_bytes(wb, expected_unpadded_memory); + const uint8_t* const src = reinterpret_cast(get_array_value(0)); + //TODO(leif) maybe look at vectorization here + for (uint32_t i = 0; i < this->d.a.num_values; i++) { + memcpy(&dest[i*fixed_len], &src[i*fixed_aligned_len], fixed_len); + } + } +} + +template +void dmt::builder::create(uint32_t _max_values, uint32_t _max_value_bytes) { + this->max_values = _max_values; + this->max_value_bytes = _max_value_bytes; + this->temp.create(); + paranoid_invariant_null(toku_mempool_get_base(&this->temp.mp)); + this->temp_valid = true; + this->sorted_node_offsets = nullptr; + // Include enough space for alignment padding + size_t initial_space = (ALIGNMENT - 1) * _max_values + _max_value_bytes; + + toku_mempool_construct(&this->temp.mp, initial_space); // Adds 25% +} + +template +void dmt::builder::append(const dmtwriter_t &value) { + paranoid_invariant(this->temp_valid); + //NOTE: Always use d.a.num_values for size because we have not yet created root. + if (this->temp.values_same_size && (this->temp.d.a.num_values == 0 || value.get_size() == this->temp.value_length)) { + temp.insert_at_array_end(value); + return; + } + if (this->temp.is_array) { + // Convert to tree format (without weights and linkage) + XMALLOC_N(this->max_values, this->sorted_node_offsets); + + // Include enough space for alignment padding + size_t mem_needed = (ALIGNMENT - 1 + __builtin_offsetof(dmt_node, value)) * max_values + max_value_bytes; + struct mempool old_mp = this->temp.mp; + + const uint32_t num_values = this->temp.d.a.num_values; + toku_mempool_construct(&this->temp.mp, mem_needed); + + // Copy over and get node_offsets + for (uint32_t i = 0; i < num_values; i++) { + dmtwriter_t writer(this->temp.value_length, this->temp.get_array_value_internal(&old_mp, i)); + this->sorted_node_offsets[i] = this->temp.node_malloc_and_set_value(writer); + } + this->temp.is_array = false; + this->temp.values_same_size = false; + this->temp.value_length = 0; + toku_mempool_destroy(&old_mp); + } + paranoid_invariant(!this->temp.is_array); + this->sorted_node_offsets[this->temp.d.a.num_values++] = this->temp.node_malloc_and_set_value(value); +} + +template +bool dmt::builder::value_length_is_fixed(void) { + paranoid_invariant(this->temp_valid); + return this->temp.values_same_size; +} + +template +void dmt::builder::build(dmt *dest) { + invariant(this->temp_valid); + //NOTE: Always use d.a.num_values for size because we have not yet created root. + invariant(this->temp.d.a.num_values <= this->max_values); + // Memory invariant is taken care of incrementally (during append()) + + if (!this->temp.is_array) { + invariant_notnull(this->sorted_node_offsets); + this->temp.rebuild_subtree_from_offsets(&this->temp.d.t.root, this->sorted_node_offsets, this->temp.d.a.num_values); + toku_free(this->sorted_node_offsets); + this->sorted_node_offsets = nullptr; + } + paranoid_invariant_null(this->sorted_node_offsets); + + const size_t used = toku_mempool_get_used_size(&this->temp.mp); + const size_t allocated = toku_mempool_get_size(&this->temp.mp); + // We want to use no more than (about) the actual used space + 25% overhead for mempool growth. + // When we know the elements are fixed-length, we use the better dmt constructor. + // In practice, as of Jan 2014, we use the builder in two cases: + // - When we know the elements are not fixed-length. + // - During upgrade of a pre version 26 basement node. + // During upgrade, we will probably wildly overallocate because we don't account for the values that aren't stored in the dmt, so here we want to shrink the mempool. + // When we know the elements are not fixed-length, we still know how much memory they occupy in total, modulo alignment, so we want to allow for mempool overhead and worst-case alignment overhead, and not shrink the mempool. + const size_t max_allowed = used + (ALIGNMENT-1) * this->temp.size(); + const size_t max_allowed_with_mempool_overhead = max_allowed + max_allowed / 4; + //TODO(leif): get footprint calculation correct (under jemalloc) and add some form of footprint constraint + if (allocated > max_allowed_with_mempool_overhead) { + // Reallocate smaller mempool to save memory + invariant_zero(toku_mempool_get_frag_size(&this->temp.mp)); + struct mempool new_mp; + toku_mempool_construct(&new_mp, used); + void * newbase = toku_mempool_malloc(&new_mp, used); + invariant_notnull(newbase); + memcpy(newbase, toku_mempool_get_base(&this->temp.mp), used); + toku_mempool_destroy(&this->temp.mp); + this->temp.mp = new_mp; + } + + *dest = this->temp; + this->temp_valid = false; + +} +} // namespace toku diff --git a/storage/tokudb/ft-index/util/dmt.h b/storage/tokudb/ft-index/util/dmt.h new file mode 100644 index 0000000000000..d4b032f5d6f37 --- /dev/null +++ b/storage/tokudb/ft-index/util/dmt.h @@ -0,0 +1,732 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +#include + +#include "portability/memory.h" +#include "portability/toku_portability.h" +#include "portability/toku_race_tools.h" +#include "portability/toku_stdint.h" + +#include "ft/serialize/wbuf.h" +#include "util/growable_array.h" +#include "util/mempool.h" + +namespace toku { +typedef uint32_t node_offset; + + +/** + * Dynamic Order Maintenance Tree (DMT) + * + * Maintains a collection of totally ordered values, where each value has weight 1. + * A DMT supports variable sized values. + * The DMT is a mutable datatype. + * + * The Abstraction: + * + * An DMT is a vector of values, $V$, where $|V|$ is the length of the vector. + * The vector is numbered from $0$ to $|V|-1$. + * + * We can create a new DMT, which is the empty vector. + * + * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where + * $|V'|=1+|V|$ and + * + * V'_j = V_j if $ji$. + * + * We can specify $i$ using a kind of function instead of as an integer. + * Let $b$ be a function mapping from values to nonzero integers, such that + * the signum of $b$ is monotically increasing. + * We can specify $i$ as the minimum integer such that $b(V_i)>0$. + * + * We look up a value using its index, or using a Heaviside function. + * For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing. + * When lookup up values, we can look up + * $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.) + * (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.) + * $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.) + * $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.) + * + * When looking up a value using a Heaviside function, we get the value and its index. + * + * Performance: + * Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function. + * The memory required is O(|V|). + * + * Usage: + * The dmt is templated by three parameters: + * - dmtdata_t is what will be stored within the dmt. These could be pointers or real data types (ints, structs). + * - dmtdataout_t is what will be returned by find and related functions. By default, it is the same as dmtdata_t, but you can set it to (dmtdata_t *). + * - dmtwriter_t is a class that effectively handles (de)serialization between the value stored in the dmt and outside the dmt. + * To create an dmt which will store "TXNID"s, for example, it is a good idea to typedef the template: + * typedef dmt txnid_dmt_t; + * If you are storing structs (or you want to edit what is stored), you may want to be able to get a pointer to the data actually stored in the dmt (see find_zero). To do this, use the second template parameter: + * typedef dmt foo_dmt_t; + */ + +namespace dmt_internal { + +class subtree { +private: + uint32_t m_index; +public: + // The maximum mempool size for a dmt is 2**32-2 + static const uint32_t NODE_NULL = UINT32_MAX; + inline void set_to_null(void) { + m_index = NODE_NULL; + } + + inline bool is_null(void) const { + return NODE_NULL == this->get_offset(); + } + + inline node_offset get_offset(void) const { + return m_index; + } + + inline void set_offset(node_offset index) { + paranoid_invariant(index != NODE_NULL); + m_index = index; + } +} __attribute__((__packed__,__aligned__(4))); + +template +class dmt_node_templated { +public: + uint32_t weight; + subtree left; + subtree right; + uint32_t value_length; + dmtdata_t value; +} __attribute__((__aligned__(4))); //NOTE: we cannot use attribute packed or dmtdata_t will call copy constructors (dmtdata_t might not be packed by default) + +} + +using namespace toku::dmt_internal; + +// Each data type used in a dmt requires a dmt_writer class (allows you to insert/etc with dynamic sized types). +// A dmt_writer can be thought of a (de)serializer +// There is no default implementation. +// A dmtwriter instance handles reading/writing 'dmtdata_t's to/from the dmt. +// The class must implement the following functions: +// The size required in a dmt for the dmtdata_t represented: +// size_t get_size(void) const; +// Write the dmtdata_t to memory owned by a dmt: +// void write_to(dmtdata_t *const dest) const; +// Constructor (others are allowed, but this one is required) +// dmtwriter(const uint32_t dmtdata_t_len, dmtdata_t *const src) + +template +class dmt { +private: + typedef dmt_node_templated dmt_node; + +public: + static const uint8_t ALIGNMENT = 4; + + class builder { + public: + void append(const dmtwriter_t &value); + + // Create a dmt builder to build a dmt that will have at most n_values values and use + // at most n_value_bytes bytes in the mempool to store values (not counting node or alignment overhead). + void create(uint32_t n_values, uint32_t n_value_bytes); + + bool value_length_is_fixed(void); + + // Constructs a dmt that contains everything that was append()ed to this builder. + // Destroys this builder and frees associated memory. + void build(dmt *dest); + private: + uint32_t max_values; + uint32_t max_value_bytes; + node_offset *sorted_node_offsets; + bool temp_valid; + dmt temp; + }; + + /** + * Effect: Create an empty DMT. + * Performance: constant time. + */ + void create(void); + + /** + * Effect: Create a DMT containing values. The number of values is in numvalues. + * Each value is of a fixed (at runtime) length. + * mem contains the values in packed form (no alignment padding) + * Caller retains ownership of mem. + * Requires: this has not been created yet + * Rationale: Normally to insert N values takes O(N lg N) amortized time. + * If the N values are known in advance, are sorted, and + * the structure is empty, we can batch insert them much faster. + */ + __attribute__((nonnull)) + void create_from_sorted_memory_of_fixed_size_elements( + const void *mem, + const uint32_t numvalues, + const uint32_t mem_length, + const uint32_t fixed_value_length); + + /** + * Effect: Creates a copy of an dmt. + * Creates this as the clone. + * Each element is copied directly. If they are pointers, the underlying data is not duplicated. + * Performance: O(memory) (essentially a memdup) + * The underlying structures are memcpy'd. Only the values themselves are copied (shallow copy) + */ + void clone(const dmt &src); + + /** + * Effect: Set the tree to be empty. + * Note: Will not reallocate or resize any memory. + * Note: If this dmt had variable sized elements, it will start tracking again (until it gets values of two different sizes) + * Performance: time=O(1) + */ + void clear(void); + + /** + * Effect: Destroy an DMT, freeing all its memory. + * If the values being stored are pointers, their underlying data is not freed. + * Those values may be freed before or after calling ::destroy() + * Rationale: Returns no values since free() cannot fail. + * Rationale: Does not free the underlying pointers to reduce complexity/maintain abstraction layer + * Performance: time=O(1) + */ + void destroy(void); + + /** + * Effect: return |this| (number of values stored in this dmt). + * Performance: time=O(1) + */ + uint32_t size(void) const; + + /** + * Effect: Serialize all values contained in this dmt into a packed form (no alignment padding). + * We serialized to wb. expected_unpadded_memory is the size of memory reserved in the wbuf + * for serialization. (We assert that serialization requires exactly the expected amount) + * Requires: + * ::prepare_for_serialize() has been called and no non-const functions have been called since. + * This dmt has fixed-length values and is in array form. + * Performance: + * O(memory) + */ + void serialize_values(uint32_t expected_unpadded_memory, struct wbuf *wb) const; + + /** + * Effect: Insert value into the DMT. + * If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST. + * Otherwise, let i be the minimum value such that $h(V_i, v)>0$. + * If no such i exists, then let i be |V| + * Then this has the same effect as + * insert_at(tree, value, i); + * If idx!=NULL then i is stored in *idx + * Requires: The signum of h must be monotonically increasing. + * Returns: + * 0 success + * DB_KEYEXIST the key is present (h was equal to zero for some value) + * On nonzero return, dmt is unchanged. + * Performance: time=O(\log N) amortized. + * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. + */ + template + int insert(const dmtwriter_t &value, const dmtcmp_t &v, uint32_t *const idx); + + /** + * Effect: Increases indexes of all items at slot >= idx by 1. + * Insert value into the position at idx. + * Returns: + * 0 success + * EINVAL if idx > this->size() + * On error, dmt is unchanged. + * Performance: time=O(\log N) amortized time. + * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now. + */ + int insert_at(const dmtwriter_t &value, const uint32_t idx); + + /** + * Effect: Delete the item in slot idx. + * Decreases indexes of all items at slot > idx by 1. + * Returns + * 0 success + * EINVAL if idx>=this->size() + * On error, dmt is unchanged. + * Rationale: To delete an item, first find its index using find or find_zero, then delete it. + * Performance: time=O(\log N) amortized. + */ + int delete_at(const uint32_t idx); + + /** + * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. + * The first argument passed to f is a ref-to-const of the value stored in the dmt. + * The second argument passed to f is the index of the value. + * The third argument passed to f is iterate_extra. + * The indices run from 0 (inclusive) to this->size() (exclusive). + * Requires: f != NULL + * Returns: + * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate. + * If f always returns zero, then iterate returns 0. + * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.) + * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt. + * Rationale: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. + * Rationale: We may at some point use functors, but for now this is a smaller change from the old DMT. + */ + template + int iterate(iterate_extra_t *const iterate_extra) const; + + /** + * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. + * The first argument passed to f is a ref-to-const of the value stored in the dmt. + * The second argument passed to f is the index of the value. + * The third argument passed to f is iterate_extra. + * The indices run from 0 (inclusive) to this->size() (exclusive). + * We will iterate only over [left,right) + * + * Requires: left <= right + * Requires: f != NULL + * Returns: + * EINVAL if right > this->size() + * If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate_on_range. + * If f always returns zero, then iterate_on_range returns 0. + * Requires: Don't modify the dmt while running. (E.g., f may not insert or delete values from the dmt.) + * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt. + * Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read. + */ + template + int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const; + + // Attempt to verify this dmt is well formed. (Crashes/asserts/aborts if not well formed) + void verify(void) const; + + /** + * Effect: Iterate over the values of the dmt, from left to right, calling f on each value. + * The first argument passed to f is a pointer to the value stored in the dmt. + * The second argument passed to f is the index of the value. + * The third argument passed to f is iterate_extra. + * The indices run from 0 (inclusive) to this->size() (exclusive). + * Requires: same as for iterate() + * Returns: same as for iterate() + * Performance: same as for iterate() + * Rationale: In general, most iterators should use iterate() since they should not modify the data stored in the dmt. This function is for iterators which need to modify values (for example, free_items). + * Rationale: We assume if you are transforming the data in place, you want to do it to everything at once, so there is not yet an iterate_on_range_ptr (but there could be). + */ + template + void iterate_ptr(iterate_extra_t *const iterate_extra); + + /** + * Effect: Set *value=V_idx + * Returns + * 0 success + * EINVAL if index>=toku_dmt_size(dmt) + * On nonzero return, *value is unchanged + * Performance: time=O(\log N) + */ + int fetch(const uint32_t idx, uint32_t *const value_size, dmtdataout_t *const value) const; + + /** + * Effect: Find the smallest i such that h(V_i, extra)>=0 + * If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value = V_i, and return 0. + * If there is such an i and h(V_i,extra)>0 then set *idxp=i and return DB_NOTFOUND. + * If there is no such i then set *idx=this->size() and return DB_NOTFOUND. + * Note: value is of type dmtdataout_t, which may be of type (dmtdata_t) or (dmtdata_t *) but is fixed by the instantiation. + * If it is the value type, then the value is copied out (even if the value type is a pointer to something else) + * If it is the pointer type, then *value is set to a pointer to the data within the dmt. + * This is determined by the type of the dmt as initially declared. + * If the dmt is declared as dmt, then foo_t's will be stored and foo_t's will be returned by find and related functions. + * If the dmt is declared as dmt, then foo_t's will be stored, and pointers to the stored items will be returned by find and related functions. + * Rationale: + * Structs too small for malloc should be stored directly in the dmt. + * These structs may need to be edited as they exist inside the dmt, so we need a way to get a pointer within the dmt. + * Using separate functions for returning pointers and values increases code duplication and reduces type-checking. + * That also reduces the ability of the creator of a data structure to give advice to its future users. + * Slight overloading in this case seemed to provide a better API and better type checking. + */ + template + int find_zero(const dmtcmp_t &extra, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const; + + /** + * Effect: + * If direction >0 then find the smallest i such that h(V_i,extra)>0. + * If direction <0 then find the largest i such that h(V_i,extra)<0. + * (Direction may not be equal to zero.) + * If value!=NULL then store V_i in *value + * If idxp!=NULL then store i in *idxp. + * Requires: The signum of h is monotically increasing. + * Returns + * 0 success + * DB_NOTFOUND no such value is found. + * On nonzero return, *value and *idxp are unchanged + * Performance: time=O(\log N) + * Rationale: + * Here's how to use the find function to find various things + * Cases for find: + * find first value: ( h(v)=+1, direction=+1 ) + * find last value ( h(v)=-1, direction=-1 ) + * find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 ) + * find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 ) + * find X or successor to X ( same as find first X. ) + * + * Rationale: To help understand heaviside functions and behavor of find: + * There are 7 kinds of heaviside functions. + * The signus of the h must be monotonically increasing. + * Given a function of the following form, A is the element + * returned for direction>0, B is the element returned + * for direction<0, C is the element returned for + * direction==0 (see find_zero) (with a return of 0), and D is the element + * returned for direction==0 (see find_zero) with a return of DB_NOTFOUND. + * If any of A, B, or C are not found, then asking for the + * associated direction will return DB_NOTFOUND. + * See find_zero for more information. + * + * Let the following represent the signus of the heaviside function. + * + * -...- + * A + * D + * + * +...+ + * B + * D + * + * 0...0 + * C + * + * -...-0...0 + * AC + * + * 0...0+...+ + * C B + * + * -...-+...+ + * AB + * D + * + * -...-0...0+...+ + * AC B + */ + template + int find(const dmtcmp_t &extra, int direction, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const; + + /** + * Effect: Return the size (in bytes) of the dmt, as it resides in main memory. + * If the data stored are pointers, don't include the size of what they all point to. + * //TODO(leif or yoni): (maybe rename and) return memory footprint instead of allocated size + */ + size_t memory_size(void); + + // Returns whether all values in the dmt are known to be the same size. + // Note: + // There are no false positives, but false negatives are allowed. + // A false negative can happen if this dmt had 2 (or more) different size values, + // and then enough were deleted so that all the remaining ones are the same size. + // Once that happens, this dmt will never again return true for this function unless/until + // ::clear() is called + bool value_length_is_fixed(void) const; + + + // If this dmt is empty, return value is undefined. + // else if value_length_is_fixed() then it returns the fixed length. + // else returns 0 + uint32_t get_fixed_length(void) const; + + // Preprocesses the dmt so that serialization can happen quickly. + // After this call, serialize_values() can be called but no other mutator function can be called in between. + void prepare_for_serialize(void); + +private: + // Do a bit of verification that subtree and nodes act like packed c structs and do not introduce unnecessary padding for alignment. + ENSURE_POD(subtree); + static_assert(ALIGNMENT > 0, "ALIGNMENT <= 0"); + static_assert((ALIGNMENT & (ALIGNMENT - 1)) == 0, "ALIGNMENT not a power of 2"); + static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node"); + static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded"); + static_assert(__builtin_offsetof(dmt_node, value) % ALIGNMENT == 0, "dmt_node requires padding for alignment"); + ENSURE_POD(dmt_node); + + struct dmt_array { + uint32_t num_values; + }; + + struct dmt_tree { + subtree root; + }; + + /* + Relationship between values_same_size, d.a.num_values, value_length, is_array: + In an empty dmt: + is_array is true + value_same_size is true + value_length is undefined + d.a.num_values is 0 + In a non-empty array dmt: + is_array is true + values_same_size is true + value_length is defined + d.a.num_values > 0 + In a non-empty tree dmt: + is_array = false + value_same_size is true iff all values have been the same size since the last time the dmt turned into a tree. + value_length is defined iff values_same_size is true + d.a.num_values is undefined (the memory is used for the tree) + Note that in tree form, the dmt keeps track of if all values are the same size until the first time they are not. + 'values_same_size' will not become true again (even if we change all values to be the same size) + until/unless the dmt becomes empty, at which point it becomes an array again. + */ + bool values_same_size; + uint32_t value_length; // valid iff values_same_size is true. + struct mempool mp; + bool is_array; + union { + struct dmt_array a; + struct dmt_tree t; + } d; + + // Returns pad bytes per element (for alignment) or 0 if not fixed length. + uint32_t get_fixed_length_alignment_overhead(void) const; + + void verify_internal(const subtree &subtree, std::vector *touched) const; + + // Retrieves the node for a given subtree. + // Requires: !subtree.is_null() + dmt_node & get_node(const subtree &subtree) const; + + // Retrieves the node at a given offset in the mempool. + dmt_node & get_node(const node_offset offset) const; + + // Returns the weight of a subtree rooted at st. + // if st.is_null(), returns 0 + // Perf: O(1) + uint32_t nweight(const subtree &st) const; + + // Allocates space for a node (in the mempool) and uses the dmtwriter to write the value into the node + node_offset node_malloc_and_set_value(const dmtwriter_t &value); + + // Uses the dmtwriter to write a value into node n + void node_set_value(dmt_node *n, const dmtwriter_t &value); + + // (mempool-)free the memory for a node + void node_free(const subtree &st); + + // Effect: Resizes the mempool (holding the array) if necessary to hold one more item of length: this->value_length + // Requires: + // This dmt is in array form (and thus this->values_same_length) + void maybe_resize_array_for_insert(void); + + // Effect: Converts a dmt from array form to tree form. + // Perf: O(n) + // Note: This does not clear the 'this->values_same_size' bit + void convert_to_tree(void); + + // Effect: Resizes the mempool holding a tree if necessary. If value==nullptr then it may shrink if overallocated, + // otherwise resize only happens if there is not enough free space for an insert of value + void maybe_resize_tree(const dmtwriter_t * value); + + // Returns true if the tree rooted at st would need rebalance after adding + // leftmod to the left subtree and rightmod to the right subtree + bool will_need_rebalance(const subtree &st, const int leftmod, const int rightmod) const; + + __attribute__((nonnull)) + void insert_internal(subtree *const subtreep, const dmtwriter_t &value, const uint32_t idx, subtree **const rebalance_subtree); + + template + int insert_at_array_end(const dmtwriter_t& value_in); + + dmtdata_t * alloc_array_value_end(void); + + dmtdata_t * get_array_value(const uint32_t idx) const; + + dmtdata_t * get_array_value_internal(const struct mempool *mempool, const uint32_t idx) const; + + void convert_from_array_to_tree(void); + + void convert_from_tree_to_array(void); + + __attribute__((nonnull(2,5))) + void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree); + + template + int iterate_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const; + + template + void iterate_ptr_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra); + + template + void iterate_ptr_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra); + + template + int iterate_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra) const; + + void fetch_internal_array(const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const; + + void fetch_internal(const subtree &subtree, const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const; + + __attribute__((nonnull)) + void fill_array_with_subtree_offsets(node_offset *const array, const subtree &subtree) const; + + __attribute__((nonnull)) + void rebuild_subtree_from_offsets(subtree *const subtree, const node_offset *const offsets, const uint32_t numvalues); + + __attribute__((nonnull)) + void rebalance(subtree *const subtree); + + __attribute__((nonnull)) + static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n); + + __attribute__((nonnull)) + static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n); + + __attribute__((nonnull)) + static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr); + + __attribute__((nonnull)) + static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr); + + template + int find_internal_zero_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + template + int find_internal_zero(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + template + int find_internal_plus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + template + int find_internal_plus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + template + int find_internal_minus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + template + int find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; + + // Allocate memory for an array: node_offset[num_idx] from pre-allocated contiguous free space in the mempool. + // If there is not enough space, returns nullptr. + node_offset* alloc_temp_node_offsets(uint32_t num_idxs); + + // Returns the aligned size of x. + // If x % ALIGNMENT == 0, returns x + // o.w. returns x + (ALIGNMENT - (x % ALIGNMENT)) + uint32_t align(const uint32_t x) const; +}; + +} // namespace toku + +// include the implementation here +#include "dmt.cc" + diff --git a/storage/tokudb/ft-index/util/doubly_linked_list.h b/storage/tokudb/ft-index/util/doubly_linked_list.h index 57c290e1e273f..738e2736fa19b 100644 --- a/storage/tokudb/ft-index/util/doubly_linked_list.h +++ b/storage/tokudb/ft-index/util/doubly_linked_list.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_DOUBLY_LINKED_LIST_H -#define UTIL_DOUBLY_LINKED_LIST_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -117,7 +117,7 @@ PATENT RIGHTS GRANT: //****************************************************************************** #include -#include +#include namespace toku { @@ -225,5 +225,3 @@ int DoublyLinkedList::iterate(int (*fun)(T container, extra_t extra), extra_t } } - -#endif // UTIL_DOUBLY_LINKED_LIST_H diff --git a/storage/tokudb/ft-index/util/fmutex.h b/storage/tokudb/ft-index/util/fmutex.h new file mode 100644 index 0000000000000..224a6972ba7c8 --- /dev/null +++ b/storage/tokudb/ft-index/util/fmutex.h @@ -0,0 +1,106 @@ +#pragma once + +// fair mutex +struct fmutex { + pthread_mutex_t mutex; + int mutex_held; + int num_want_mutex; + struct queue_item *wait_head; + struct queue_item *wait_tail; +}; + +// item on the queue +struct queue_item { + pthread_cond_t *cond; + struct queue_item *next; +}; + +static void enq_item(struct fmutex *fm, struct queue_item *const item) { + assert(item->next == NULL); + if (fm->wait_tail != NULL) { + fm->wait_tail->next = item; + } else { + assert(fm->wait_head == NULL); + fm->wait_head = item; + } + fm->wait_tail = item; +} + +static pthread_cond_t *deq_item(struct fmutex *fm) { + assert(fm->wait_head != NULL); + assert(fm->wait_tail != NULL); + struct queue_item *item = fm->wait_head; + fm->wait_head = fm->wait_head->next; + if (fm->wait_tail == item) { + fm->wait_tail = NULL; + } + return item->cond; +} + +void fmutex_create(struct fmutex *fm) { + pthread_mutex_init(&fm->mutex, NULL); + fm->mutex_held = 0; + fm->num_want_mutex = 0; + fm->wait_head = NULL; + fm->wait_tail = NULL; +} + +void fmutex_destroy(struct fmutex *fm) { + pthread_mutex_destroy(&fm->mutex); +} + +// Prerequisite: Holds m_mutex. +void fmutex_lock(struct fmutex *fm) { + pthread_mutex_lock(&fm->mutex); + + if (fm->mutex_held == 0 || fm->num_want_mutex == 0) { + // No one holds the lock. Grant the write lock. + fm->mutex_held = 1; + return; + } + + pthread_cond_t cond; + pthread_cond_init(&cond, NULL); + struct queue_item item = { .cond = &cond, .next = NULL }; + enq_item(fm, &item); + + // Wait for our turn. + ++fm->num_want_mutex; + pthread_cond_wait(&cond, &fm->mutex); + pthread_cond_destroy(&cond); + + // Now it's our turn. + assert(fm->num_want_mutex > 0); + assert(fm->mutex_held == 0); + + // Not waiting anymore; grab the lock. + --fm->num_want_mutex; + fm->mutex_held = 1; + + pthread_mutex_unlock(); +} + +void fmutex_mutex_unlock(struct fmutex *fm) { + pthread_mutex_lock(); + + fm->mutex_held = 0; + if (fm->wait_head == NULL) { + assert(fm->num_want_mutex == 0); + return; + } + assert(fm->num_want_mutex > 0); + + // Grant lock to the next waiter + pthread_cond_t *cond = deq_item(fm); + pthread_cond_signal(cond); + + pthread_mutex_unlock(); +} + +int fmutex_users(struct fmutex *fm) const { + return fm->mutex_held + fm->num_want_mutex; +} + +int fmutex_blocked_users(struct fmutex *fm) const { + return fm->num_want_mutex; +} diff --git a/storage/tokudb/ft-index/util/frwlock.cc b/storage/tokudb/ft-index/util/frwlock.cc index 7259c776f83fb..fac0c07967b01 100644 --- a/storage/tokudb/ft-index/util/frwlock.cc +++ b/storage/tokudb/ft-index/util/frwlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/frwlock.h b/storage/tokudb/ft-index/util/frwlock.h index 7811e0d242725..985c92bccb4e1 100644 --- a/storage/tokudb/ft-index/util/frwlock.h +++ b/storage/tokudb/ft-index/util/frwlock.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_FRWLOCK_H -#define UTIL_FRWLOCK_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -176,5 +176,3 @@ ENSURE_POD(frwlock); // include the implementation here // #include "frwlock.cc" - -#endif // UTIL_FRWLOCK_H diff --git a/storage/tokudb/ft-index/util/growable_array.h b/storage/tokudb/ft-index/util/growable_array.h index 763377d0ab0f9..b452f94366c25 100644 --- a/storage/tokudb/ft-index/util/growable_array.h +++ b/storage/tokudb/ft-index/util/growable_array.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_GROWABLE_ARRAY_H -#define UTIL_GROWABLE_ARRAY_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -175,5 +175,3 @@ template class GrowableArray { }; } - -#endif // UTIL_GROWABLE_ARRAY_H diff --git a/storage/tokudb/ft-index/util/kibbutz.cc b/storage/tokudb/ft-index/util/kibbutz.cc index a84a6f4827ff0..ad0c0b3078805 100644 --- a/storage/tokudb/ft-index/util/kibbutz.cc +++ b/storage/tokudb/ft-index/util/kibbutz.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +89,13 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2011-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "kibbutz.h" -#include "toku_config.h" #include + +#include #include +#include "kibbutz.h" + // A Kibbutz is a collection of workers and some work to do. struct todo { void (*f)(void *extra); diff --git a/storage/tokudb/ft-index/util/kibbutz.h b/storage/tokudb/ft-index/util/kibbutz.h index 83e981b916caf..2551588770084 100644 --- a/storage/tokudb/ft-index/util/kibbutz.h +++ b/storage/tokudb/ft-index/util/kibbutz.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_KIBBUTZ_H -#define UTIL_KIBBUTZ_H - // // The kibbutz is another threadpool meant to do arbitrary work. // @@ -116,5 +115,3 @@ void toku_kibbutz_enq (KIBBUTZ k, void (*f)(void*), void *extra); // destroys the kibbutz // void toku_kibbutz_destroy (KIBBUTZ k); - -#endif // UTIL_KIBBUTZ_H diff --git a/storage/tokudb/ft-index/ft/memarena.cc b/storage/tokudb/ft-index/util/memarena.cc similarity index 50% rename from storage/tokudb/ft-index/ft/memarena.cc rename to storage/tokudb/ft-index/util/memarena.cc index 0edb51370bc43..d8c0daa0ba059 100644 --- a/storage/tokudb/ft-index/ft/memarena.cc +++ b/storage/tokudb/ft-index/util/memarena.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,161 +89,142 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." +#include #include #include -#include "memarena.h" - -struct memarena { - char *buf; - size_t buf_used, buf_size; - size_t size_of_other_bufs; // the buf_size of all the other bufs. - char **other_bufs; - int n_other_bufs; -}; - -MEMARENA memarena_create_presized (size_t initial_size) { - MEMARENA XMALLOC(result); - result->buf_size = initial_size; - result->buf_used = 0; - result->other_bufs = NULL; - result->size_of_other_bufs = 0; - result->n_other_bufs = 0; - XMALLOC_N(result->buf_size, result->buf); - return result; -} +#include + +void memarena::create(size_t initial_size) { + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _size_of_other_chunks = 0; + _footprint_of_other_chunks = 0; + _n_other_chunks = 0; -MEMARENA memarena_create (void) { - return memarena_create_presized(1024); + _current_chunk.size = initial_size; + if (_current_chunk.size > 0) { + XMALLOC_N(_current_chunk.size, _current_chunk.buf); + } } -void memarena_clear (MEMARENA ma) { - // Free the other bufs. - int i; - for (i=0; in_other_bufs; i++) { - toku_free(ma->other_bufs[i]); - ma->other_bufs[i]=0; +void memarena::destroy(void) { + if (_current_chunk.buf) { + toku_free(_current_chunk.buf); + } + for (int i = 0; i < _n_other_chunks; i++) { + toku_free(_other_chunks[i].buf); + } + if (_other_chunks) { + toku_free(_other_chunks); } - ma->n_other_bufs=0; - // But reuse the main buffer - ma->buf_used = 0; - ma->size_of_other_bufs = 0; + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _n_other_chunks = 0; } -static size_t -round_to_page (size_t size) { - const size_t _PAGE_SIZE = 4096; - const size_t result = _PAGE_SIZE+((size-1)&~(_PAGE_SIZE-1)); - assert(0==(result&(_PAGE_SIZE-1))); // make sure it's aligned - assert(result>=size); // make sure it's not too small - assert(result= size); // make sure it's not too small + assert(r < size + page_size); // make sure we didn't grow by more than a page. + return r; } -void* malloc_in_memarena (MEMARENA ma, size_t size) { - if (ma->buf_size < ma->buf_used + size) { +static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024; + +void *memarena::malloc_from_arena(size_t size) { + if (_current_chunk.buf == nullptr || _current_chunk.size < _current_chunk.used + size) { // The existing block isn't big enough. // Add the block to the vector of blocks. - if (ma->buf) { - int old_n = ma->n_other_bufs; - REALLOC_N(old_n+1, ma->other_bufs); - assert(ma->other_bufs); - ma->other_bufs[old_n]=ma->buf; - ma->n_other_bufs = old_n+1; - ma->size_of_other_bufs += ma->buf_size; + if (_current_chunk.buf) { + invariant(_current_chunk.size > 0); + int old_n = _n_other_chunks; + XREALLOC_N(old_n + 1, _other_chunks); + _other_chunks[old_n] = _current_chunk; + _n_other_chunks = old_n + 1; + _size_of_other_chunks += _current_chunk.size; + _footprint_of_other_chunks += toku_memory_footprint(_current_chunk.buf, _current_chunk.used); } - // Make a new one - { - size_t new_size = 2*ma->buf_size; - if (new_sizebuf); - ma->buf_used = 0; - ma->buf_size = new_size; + + // Make a new one. Grow the buffer size exponentially until we hit + // the max chunk size, but make it at least `size' bytes so the + // current allocation always fit. + size_t new_size = std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size); + if (new_size < size) { + new_size = size; } + new_size = round_to_page(new_size); // at least size, but round to the next page size + XMALLOC_N(new_size, _current_chunk.buf); + _current_chunk.used = 0; + _current_chunk.size = new_size; } + invariant(_current_chunk.buf != nullptr); + // allocate in the existing block. - char *result=ma->buf+ma->buf_used; - ma->buf_used+=size; - return result; + char *p = _current_chunk.buf + _current_chunk.used; + _current_chunk.used += size; + return p; } -void *memarena_memdup (MEMARENA ma, const void *v, size_t len) { - void *r=malloc_in_memarena(ma, len); - memcpy(r,v,len); - return r; +void memarena::move_memory(memarena *dest) { + // Move memory to dest + XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks); + dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size; + dest->_footprint_of_other_chunks += _footprint_of_other_chunks + toku_memory_footprint(_current_chunk.buf, _current_chunk.used); + for (int i = 0; i < _n_other_chunks; i++) { + dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i]; + } + dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk; + + // Clear out this memarena's memory + toku_free(_other_chunks); + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _size_of_other_chunks = 0; + _footprint_of_other_chunks = 0; + _n_other_chunks = 0; } -void memarena_close(MEMARENA *map) { - MEMARENA ma=*map; - if (ma->buf) { - toku_free(ma->buf); - ma->buf=0; - } - int i; - for (i=0; in_other_bufs; i++) { - toku_free(ma->other_bufs[i]); - } - if (ma->other_bufs) toku_free(ma->other_bufs); - ma->other_bufs=0; - ma->n_other_bufs=0; - toku_free(ma); - *map = 0; +size_t memarena::total_memory_size(void) const { + return sizeof(*this) + + total_size_in_use() + + _n_other_chunks * sizeof(*_other_chunks); } -#if TOKU_WINDOWS_32 -#include -#include -#endif - -void memarena_move_buffers(MEMARENA dest, MEMARENA source) { - int i; - char **other_bufs = dest->other_bufs; - static int move_counter = 0; - move_counter++; - REALLOC_N(dest->n_other_bufs + source->n_other_bufs + 1, other_bufs); -#if TOKU_WINDOWS_32 - if (other_bufs == 0) { - char **new_other_bufs; - printf("_CrtCheckMemory:%d\n", _CrtCheckMemory()); - printf("Z: move_counter:%d dest:%p %p %d source:%p %p %d errno:%d\n", - move_counter, - dest, dest->other_bufs, dest->n_other_bufs, - source, source->other_bufs, source->n_other_bufs, - errno); - new_other_bufs = toku_malloc((dest->n_other_bufs + source->n_other_bufs + 1)*sizeof (char **)); - printf("new_other_bufs=%p errno=%d\n", new_other_bufs, errno); - } -#endif +size_t memarena::total_size_in_use(void) const { + return _size_of_other_chunks + _current_chunk.used; +} - dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size; - source->size_of_other_bufs = 0; +size_t memarena::total_footprint(void) const { + return sizeof(*this) + + _footprint_of_other_chunks + + toku_memory_footprint(_current_chunk.buf, _current_chunk.used) + + _n_other_chunks * sizeof(*_other_chunks); +} - assert(other_bufs); - dest->other_bufs = other_bufs; - for (i=0; in_other_bufs; i++) { - dest->other_bufs[dest->n_other_bufs++] = source->other_bufs[i]; - } - dest->other_bufs[dest->n_other_bufs++] = source->buf; - source->n_other_bufs = 0; - toku_free(source->other_bufs); - source->other_bufs = 0; - source->buf = 0; - source->buf_size = 0; - source->buf_used = 0; +//////////////////////////////////////////////////////////////////////////////// +const void *memarena::chunk_iterator::current(size_t *used) const { + if (_chunk_idx < 0) { + *used = _ma->_current_chunk.used; + return _ma->_current_chunk.buf; + } else if (_chunk_idx < _ma->_n_other_chunks) { + *used = _ma->_other_chunks[_chunk_idx].used; + return _ma->_other_chunks[_chunk_idx].buf; + } + *used = 0; + return nullptr; } -size_t -memarena_total_memory_size (MEMARENA m) -{ - return (memarena_total_size_in_use(m) + - sizeof(*m) + - m->n_other_bufs * sizeof(*m->other_bufs)); +void memarena::chunk_iterator::next() { + _chunk_idx++; } -size_t -memarena_total_size_in_use (MEMARENA m) -{ - return m->size_of_other_bufs + m->buf_used; -} +bool memarena::chunk_iterator::more() const { + if (_chunk_idx < 0) { + return _ma->_current_chunk.buf != nullptr; + } + return _chunk_idx < _ma->_n_other_chunks; +} diff --git a/storage/tokudb/ft-index/util/memarena.h b/storage/tokudb/ft-index/util/memarena.h new file mode 100644 index 0000000000000..8d1b577222ecb --- /dev/null +++ b/storage/tokudb/ft-index/util/memarena.h @@ -0,0 +1,176 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#pragma once + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." +#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." + +/* + * A memarena is used to efficiently store a collection of objects that never move + * The pattern is allocate more and more stuff and free all of the items at once. + * The underlying memory will store 1 or more objects per chunk. Each chunk is + * contiguously laid out in memory but chunks are not necessarily contiguous with + * each other. + */ +class memarena { +public: + memarena() : + _current_chunk(arena_chunk()), + _other_chunks(nullptr), + _n_other_chunks(0), + _size_of_other_chunks(0), + _footprint_of_other_chunks(0) { + } + + // Effect: Create a memarena with the specified initial size + void create(size_t initial_size); + + void destroy(void); + + // Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed. + // In case of ENOMEM, aborts. + void *malloc_from_arena(size_t size); + + // Effect: Move all the memory from this memarena into DEST. + // When SOURCE is closed the memory won't be freed. + // When DEST is closed, the memory will be freed, unless DEST moves its memory to another memarena... + void move_memory(memarena *dest); + + // Effect: Calculate the amount of memory used by a memory arena. + size_t total_memory_size(void) const; + + // Effect: Calculate the used space of the memory arena (ie: excludes unused space) + size_t total_size_in_use(void) const; + + // Effect: Calculate the amount of memory used, according to toku_memory_footprint(), + // which is a more expensive but more accurate count of memory used. + size_t total_footprint(void) const; + + // iterator over the underlying chunks that store objects in the memarena. + // a chunk is represented by a pointer to const memory and a usable byte count. + class chunk_iterator { + public: + chunk_iterator(const memarena *ma) : + _ma(ma), _chunk_idx(-1) { + } + + // returns: base pointer to the current chunk + // *used set to the number of usable bytes + // if more() is false, returns nullptr and *used = 0 + const void *current(size_t *used) const; + + // requires: more() is true + void next(); + + bool more() const; + + private: + // -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk + // >= 0 represents the i'th chunk in the ma->_other_chunks array + const memarena *_ma; + int _chunk_idx; + }; + +private: + struct arena_chunk { + arena_chunk() : buf(nullptr), used(0), size(0) { } + char *buf; + size_t used; + size_t size; + }; + + struct arena_chunk _current_chunk; + struct arena_chunk *_other_chunks; + int _n_other_chunks; + size_t _size_of_other_chunks; // the buf_size of all the other chunks. + size_t _footprint_of_other_chunks; // the footprint of all the other chunks. + + friend class memarena_unit_test; +}; diff --git a/storage/tokudb/ft-index/util/mempool.cc b/storage/tokudb/ft-index/util/mempool.cc index 8a37fd41b4452..23200ee41ad62 100644 --- a/storage/tokudb/ft-index/util/mempool.cc +++ b/storage/tokudb/ft-index/util/mempool.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -130,18 +130,34 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_ */ void toku_mempool_construct(struct mempool *mp, size_t data_size) { if (data_size) { - size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only) - mp->base = toku_xmalloc(mpsize); // allocate buffer for mempool - mp->size = mpsize; - mp->free_offset = 0; // address of first available memory for new data - mp->frag_size = 0; // all allocated space is now in use + // add 25% slack + size_t mp_size = data_size + (data_size / 4); + mp->base = toku_xmalloc_aligned(64, mp_size); + mp->size = mp_size; + mp->free_offset = 0; + mp->frag_size = 0; } else { toku_mempool_zero(mp); - // fprintf(stderr, "Empty mempool created (base constructor)\n"); } } +void toku_mempool_reset(struct mempool *mp) { + mp->free_offset = 0; + mp->frag_size = 0; +} + +void toku_mempool_realloc_larger(struct mempool *mp, size_t data_size) { + invariant(data_size >= mp->free_offset); + + size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only) + void* newmem = toku_xmalloc_aligned(64, mpsize); // allocate new buffer for mempool + memcpy(newmem, mp->base, mp->free_offset); // Copy old info + toku_free(mp->base); + mp->base = newmem; + mp->size = mpsize; +} + void toku_mempool_destroy(struct mempool *mp) { // printf("mempool_destroy %p %p %lu %lu\n", mp, mp->base, mp->size, mp->frag_size); @@ -150,48 +166,61 @@ void toku_mempool_destroy(struct mempool *mp) { toku_mempool_zero(mp); } -void *toku_mempool_get_base(struct mempool *mp) { +void *toku_mempool_get_base(const struct mempool *mp) { return mp->base; } -size_t toku_mempool_get_size(struct mempool *mp) { +void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset) { + return reinterpret_cast(reinterpret_cast(mp->base) + offset); +} + +size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, const void* p) { + paranoid_invariant(p >= mp->base); + return reinterpret_cast(p) - reinterpret_cast(mp->base); +} + +size_t toku_mempool_get_size(const struct mempool *mp) { return mp->size; } -size_t toku_mempool_get_frag_size(struct mempool *mp) { +size_t toku_mempool_get_frag_size(const struct mempool *mp) { return mp->frag_size; } -size_t toku_mempool_get_used_space(struct mempool *mp) { +size_t toku_mempool_get_used_size(const struct mempool *mp) { return mp->free_offset - mp->frag_size; } -size_t toku_mempool_get_free_space(struct mempool *mp) { +void* toku_mempool_get_next_free_ptr(const struct mempool *mp) { + return toku_mempool_get_pointer_from_base_and_offset(mp, mp->free_offset); +} + +size_t toku_mempool_get_offset_limit(const struct mempool *mp) { + return mp->free_offset; +} + +size_t toku_mempool_get_free_size(const struct mempool *mp) { return mp->size - mp->free_offset; } -size_t toku_mempool_get_allocated_space(struct mempool *mp) { +size_t toku_mempool_get_allocated_size(const struct mempool *mp) { return mp->free_offset; } -void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) { +void *toku_mempool_malloc(struct mempool *mp, size_t size) { paranoid_invariant(size < (1U<<31)); paranoid_invariant(mp->size < (1U<<31)); paranoid_invariant(mp->free_offset < (1U<<31)); paranoid_invariant(mp->free_offset <= mp->size); void *vp; - size_t offset = (mp->free_offset + (alignment-1)) & ~(alignment-1); - //printf("mempool_malloc size=%ld base=%p free_offset=%ld mp->size=%ld offset=%ld\n", size, mp->base, mp->free_offset, mp->size, offset); - if (offset + size > mp->size) { - vp = 0; + if (mp->free_offset + size > mp->size) { + vp = nullptr; } else { - vp = (char *)mp->base + offset; - mp->free_offset = offset + size; + vp = reinterpret_cast(mp->base) + mp->free_offset; + mp->free_offset += size; } paranoid_invariant(mp->free_offset <= mp->size); - paranoid_invariant(((long)vp & (alignment-1)) == 0); paranoid_invariant(vp == 0 || toku_mempool_inrange(mp, vp, size)); - //printf("mempool returning %p\n", vp); return vp; } @@ -199,7 +228,8 @@ void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) { void toku_mempool_mfree(struct mempool *mp, void *vp, size_t size) { if (vp) { paranoid_invariant(toku_mempool_inrange(mp, vp, size)); } mp->frag_size += size; - paranoid_invariant(mp->frag_size <= mp->size); + invariant(mp->frag_size <= mp->free_offset); + invariant(mp->frag_size <= mp->size); } @@ -211,10 +241,10 @@ size_t toku_mempool_footprint(struct mempool *mp) { return rval; } -void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp) { +void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp) { new_mp->frag_size = orig_mp->frag_size; new_mp->free_offset = orig_mp->free_offset; new_mp->size = orig_mp->free_offset; // only make the cloned mempool store what is needed - new_mp->base = toku_xmalloc(new_mp->size); + new_mp->base = toku_xmalloc_aligned(64, new_mp->size); memcpy(new_mp->base, orig_mp->base, new_mp->size); } diff --git a/storage/tokudb/ft-index/util/mempool.h b/storage/tokudb/ft-index/util/mempool.h index fa59d7a63ac2d..e1a47e6659149 100644 --- a/storage/tokudb/ft-index/util/mempool.h +++ b/storage/tokudb/ft-index/util/mempool.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_MEMPOOL_H -#define UTIL_MEMPOOL_H - /* a memory pool is a contiguous region of memory that supports single allocations from the pool. these allocated regions are never recycled. when the memory pool no longer has free space, the allocated chunks @@ -123,29 +122,49 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_ */ void toku_mempool_construct(struct mempool *mp, size_t data_size); +/* treat mempool as if it has just been created; ignore any frag and start allocating from beginning again. + */ +void toku_mempool_reset(struct mempool *mp); + +/* reallocate memory for construct mempool + */ +void toku_mempool_realloc_larger(struct mempool *mp, size_t data_size); + /* destroy the memory pool */ void toku_mempool_destroy(struct mempool *mp); /* get the base address of the memory pool */ -void *toku_mempool_get_base(struct mempool *mp); +void *toku_mempool_get_base(const struct mempool *mp); + +/* get the a pointer that is offset bytes in front of base of the memory pool */ +void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset); + +/* get the offset from base of a pointer */ +size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, const void* p); + +/* get the a pointer of the first free byte (if any) */ +void* toku_mempool_get_next_free_ptr(const struct mempool *mp); + +/* get the limit of valid offsets. (anything later was not allocated) */ +size_t toku_mempool_get_offset_limit(const struct mempool *mp); /* get the size of the memory pool */ -size_t toku_mempool_get_size(struct mempool *mp); +size_t toku_mempool_get_size(const struct mempool *mp); /* get the amount of fragmented (wasted) space in the memory pool */ -size_t toku_mempool_get_frag_size(struct mempool *mp); +size_t toku_mempool_get_frag_size(const struct mempool *mp); /* get the amount of space that is holding useful data */ -size_t toku_mempool_get_used_space(struct mempool *mp); +size_t toku_mempool_get_used_size(const struct mempool *mp); /* get the amount of space that is available for new data */ -size_t toku_mempool_get_free_space(struct mempool *mp); +size_t toku_mempool_get_free_size(const struct mempool *mp); /* get the amount of space that has been allocated for use (wasted or not) */ -size_t toku_mempool_get_allocated_space(struct mempool *mp); +size_t toku_mempool_get_allocated_size(const struct mempool *mp); -/* allocate a chunk of memory from the memory pool suitably aligned */ -void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment); +/* allocate a chunk of memory from the memory pool */ +void *toku_mempool_malloc(struct mempool *mp, size_t size); /* free a previously allocated chunk of memory. the free only updates a count of the amount of free space in the memory pool. the memory @@ -160,6 +179,4 @@ static inline int toku_mempool_inrange(struct mempool *mp, void *vp, size_t size /* get memory footprint */ size_t toku_mempool_footprint(struct mempool *mp); -void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp); - -#endif // UTIL_MEMPOOL_H +void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp); diff --git a/storage/tokudb/ft-index/ft/minicron.cc b/storage/tokudb/ft-index/util/minicron.cc similarity index 98% rename from storage/tokudb/ft-index/ft/minicron.cc rename to storage/tokudb/ft-index/util/minicron.cc index 03d4075e1b250..601e1fd40d4bb 100644 --- a/storage/tokudb/ft-index/ft/minicron.cc +++ b/storage/tokudb/ft-index/util/minicron.cc @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,9 +92,8 @@ PATENT RIGHTS GRANT: #include #include -#include "toku_assert.h" -#include "fttypes.h" -#include "minicron.h" +#include "portability/toku_assert.h" +#include "util/minicron.h" static void toku_gettime (toku_timespec_t *a) { diff --git a/storage/tokudb/ft-index/ft/minicron.h b/storage/tokudb/ft-index/util/minicron.h similarity index 97% rename from storage/tokudb/ft-index/ft/minicron.h rename to storage/tokudb/ft-index/util/minicron.h index d6cb0f76c9fa1..b97c56875615e 100644 --- a/storage/tokudb/ft-index/ft/minicron.h +++ b/storage/tokudb/ft-index/util/minicron.h @@ -28,7 +28,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,13 +88,10 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "$Id$" -#ifndef TOKU_MINICRON_H -#define TOKU_MINICRON_H +#pragma once #include #include -#include "fttypes.h" - // Specification: // A minicron is a miniature cron job for executing a job periodically inside a pthread. @@ -127,6 +124,3 @@ uint32_t toku_minicron_get_period_in_seconds_unlocked(struct minicron *p); uint32_t toku_minicron_get_period_in_ms_unlocked(struct minicron *p); int toku_minicron_shutdown(struct minicron *p); bool toku_minicron_has_been_shutdown(struct minicron *p); - - -#endif diff --git a/storage/tokudb/ft-index/util/nb_mutex.h b/storage/tokudb/ft-index/util/nb_mutex.h index f781e9d6dda20..cc350813622d0 100644 --- a/storage/tokudb/ft-index/util/nb_mutex.h +++ b/storage/tokudb/ft-index/util/nb_mutex.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_NB_MUTEX_H -#define UTIL_NB_MUTEX_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -155,5 +155,3 @@ static inline int nb_mutex_writers(NB_MUTEX nb_mutex) { static inline int nb_mutex_users(NB_MUTEX nb_mutex) { return rwlock_users(&nb_mutex->lock); } - -#endif // UTIL_NB_MUTEX_H diff --git a/storage/tokudb/ft-index/util/omt.cc b/storage/tokudb/ft-index/util/omt.cc index 8bff7ef70999d..bb3fc34c51371 100644 --- a/storage/tokudb/ft-index/util/omt.cc +++ b/storage/tokudb/ft-index/util/omt.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -92,7 +92,7 @@ PATENT RIGHTS GRANT: #include #include -#include +#include namespace toku { @@ -207,6 +207,9 @@ void omt::clone(const omt &src) { src.fill_array_with_subtree_values(&this->d.a.values[0], src.d.t.root); } this->d.a.num_values = src.size(); + if (supports_marks) { + this->convert_to_tree(); + } } template diff --git a/storage/tokudb/ft-index/util/omt.h b/storage/tokudb/ft-index/util/omt.h index 6e963badafa6b..02f3f0d759ab6 100644 --- a/storage/tokudb/ft-index/util/omt.h +++ b/storage/tokudb/ft-index/util/omt.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_OMT_H -#define UTIL_OMT_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,6 +87,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -813,5 +813,3 @@ class omt { // include the implementation here #include "omt.cc" - -#endif // UTIL_OMT_H diff --git a/storage/tokudb/ft-index/util/partitioned_counter.cc b/storage/tokudb/ft-index/util/partitioned_counter.cc index 4ac60cc8e2964..70dff209f3bda 100644 --- a/storage/tokudb/ft-index/util/partitioned_counter.cc +++ b/storage/tokudb/ft-index/util/partitioned_counter.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/partitioned_counter.h b/storage/tokudb/ft-index/util/partitioned_counter.h index b7401080f1147..4da0e084a826b 100644 --- a/storage/tokudb/ft-index/util/partitioned_counter.h +++ b/storage/tokudb/ft-index/util/partitioned_counter.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -86,12 +86,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_PARTITIONED_COUNTER_H -#define UTIL_PARTITIONED_COUNTER_H - // Overview: A partitioned_counter provides a counter that can be incremented and the running sum can be read at any time. // We assume that increments are frequent, whereas reading is infrequent. // Implementation hint: Use thread-local storage so each thread increments its own data. The increment does not require a lock or atomic operation. @@ -187,5 +186,3 @@ class PARTITIONED_COUNTER { friend void destroy_thread_local_part_of_partitioned_counters (void *); }; #endif - -#endif // UTIL_PARTITIONED_COUNTER_H diff --git a/storage/tokudb/ft-index/ft/queue.cc b/storage/tokudb/ft-index/util/queue.cc similarity index 95% rename from storage/tokudb/ft-index/ft/queue.cc rename to storage/tokudb/ft-index/util/queue.cc index 37c3bc025f833..7a2fefaefec93 100644 --- a/storage/tokudb/ft-index/ft/queue.cc +++ b/storage/tokudb/ft-index/util/queue.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -128,7 +128,7 @@ struct queue { // q->mutex and q->cond are used as condition variables. -int queue_create (QUEUE *q, uint64_t weight_limit) +int toku_queue_create (QUEUE *q, uint64_t weight_limit) { QUEUE CALLOC(result); if (result==NULL) return get_error_errno(); @@ -143,7 +143,7 @@ int queue_create (QUEUE *q, uint64_t weight_limit) return 0; } -int queue_destroy (QUEUE q) +int toku_queue_destroy (QUEUE q) { if (q->head) return EINVAL; assert(q->contents_weight==0); @@ -153,7 +153,7 @@ int queue_destroy (QUEUE q) return 0; } -int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq) +int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq) { toku_mutex_lock(&q->mutex); assert(!q->eof); @@ -189,7 +189,7 @@ int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_afte return 0; } -int queue_eof (QUEUE q) +int toku_queue_eof (QUEUE q) { toku_mutex_lock(&q->mutex); assert(!q->eof); @@ -199,7 +199,7 @@ int queue_eof (QUEUE q) return 0; } -int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq) +int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq) { toku_mutex_lock(&q->mutex); int result; diff --git a/storage/tokudb/ft-index/ft/queue.h b/storage/tokudb/ft-index/util/queue.h similarity index 94% rename from storage/tokudb/ft-index/ft/queue.h rename to storage/tokudb/ft-index/util/queue.h index ec12a0193d282..88c7d99c200f8 100644 --- a/storage/tokudb/ft-index/ft/queue.h +++ b/storage/tokudb/ft-index/util/queue.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef TOKU_QUEUE_H -#define TOKU_QUEUE_H #ident "$Id$" /* @@ -32,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,11 +87,11 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include "fttypes.h" - // The abstraction: // // queue.h implements a queue suitable for a producer-consumer relationship between two pthreads. @@ -110,21 +108,21 @@ PATENT RIGHTS GRANT: typedef struct queue *QUEUE; -int queue_create (QUEUE *q, uint64_t weight_limit); +int toku_queue_create (QUEUE *q, uint64_t weight_limit); // Effect: Create a queue with a given weight limit. The queue is initially empty. -int queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq); +int toku_queue_enq (QUEUE q, void *item, uint64_t weight, uint64_t *total_weight_after_enq); // Effect: Insert ITEM of weight WEIGHT into queue. If the resulting contents weight too much then block (don't return) until the total weight is low enough. // If total_weight_after_enq!=NULL then return the current weight of the items in the queue (after finishing blocking on overweight, and after enqueueing the item). // If successful return 0. // If an error occurs, return the error number, and the state of the queue is undefined. The item may have been enqueued or not, and in fact the queue may be badly corrupted if the condition variables go awry. If it's just a matter of out-of-memory, then the queue is probably OK. // Requires: There is only a single consumer. (We wake up the consumer using a pthread_cond_signal (which is suitable only for single consumers.) -int queue_eof (QUEUE q); +int toku_queue_eof (QUEUE q); // Effect: Inform the queue that no more values will be inserted. After all the values that have been inserted are dequeued, further dequeue operations will return EOF. // Returns 0 on success. On failure, things are pretty bad (likely to be some sort of mutex failure). -int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq); +int toku_queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_after_deq); // Effect: Wait until the queue becomes nonempty. Then dequeue and return the oldest item. The item and its weight are returned in *ITEM. // If weight!=NULL then return the item's weight in *weight. // If total_weight_after_deq!=NULL then return the current weight of the items in the queue (after dequeuing the item). @@ -132,9 +130,8 @@ int queue_deq (QUEUE q, void **item, uint64_t *weight, uint64_t *total_weight_af // Return EOF is we no more items will be returned. // Usage note: The queue should be destroyed only after any consumers will no longer look at it (for example, they saw EOF). -int queue_destroy (QUEUE q); +int toku_queue_destroy (QUEUE q); // Effect: Destroy the queue. // Requires: The queue must be empty and no consumer should try to dequeue after this (one way to do this is to make sure the consumer saw EOF). // Returns 0 on success. If the queue is not empty, returns EINVAL. Other errors are likely to be bad (some sort of mutex or condvar failure). -#endif diff --git a/storage/tokudb/ft-index/util/rwlock.h b/storage/tokudb/ft-index/util/rwlock.h index cb72e153eb6a6..14b447a450ff9 100644 --- a/storage/tokudb/ft-index/util/rwlock.h +++ b/storage/tokudb/ft-index/util/rwlock.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef UTIL_RWLOCK_H -#define UTIL_RWLOCK_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -99,7 +99,7 @@ PATENT RIGHTS GRANT: * Overview ***************************************** * - * TokuDB employs readers/writers locks for the ephemeral locks (e.g., + * TokuFT employs readers/writers locks for the ephemeral locks (e.g., * on FT nodes) Why not just use the toku_pthread_rwlock API? * * 1) we need multiprocess rwlocks (not just multithreaded) @@ -353,4 +353,3 @@ static inline void rwlock_wait_for_users( toku_cond_destroy(&cond); } -#endif // UTIL_RWLOCK_H diff --git a/storage/tokudb/ft-index/util/scoped_malloc.cc b/storage/tokudb/ft-index/util/scoped_malloc.cc index 7fc847c3af6b7..551bd944bebd6 100644 --- a/storage/tokudb/ft-index/util/scoped_malloc.cc +++ b/storage/tokudb/ft-index/util/scoped_malloc.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,7 +89,7 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#include +#include #include diff --git a/storage/tokudb/ft-index/util/scoped_malloc.h b/storage/tokudb/ft-index/util/scoped_malloc.h index ae8847731f31c..dbd919d155e00 100644 --- a/storage/tokudb/ft-index/util/scoped_malloc.h +++ b/storage/tokudb/ft-index/util/scoped_malloc.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/sort.h b/storage/tokudb/ft-index/util/sort.h index 825909d4e9ffa..2925f7910294d 100644 --- a/storage/tokudb/ft-index/util/sort.h +++ b/storage/tokudb/ft-index/util/sort.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -89,22 +89,11 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." -#ifndef UTIL_SORT_H -#define UTIL_SORT_H +#pragma once #include #include -#if defined(HAVE_CILK) -#include -#define cilk_worker_count (__cilkrts_get_nworkers()) -#else -#define cilk_spawn -#define cilk_sync -#define cilk_for for -#define cilk_worker_count 1 -#endif - namespace toku { template @@ -148,9 +137,8 @@ namespace toku { } const int mid = n / 2; sortdata_t *right_as[2] = { &(as[0])[mid], &(as[1])[mid] }; - const int r1 = cilk_spawn mergesort_internal(as, which, mid, extra); + const int r1 = mergesort_internal(as, which, mid, extra); const int r2 = mergesort_internal(right_as, which, n - mid, extra); - cilk_sync; if (r1 != r2) { // move everything to the same place (r2) memcpy(as[r2], as[r1], mid * (sizeof as[r2][0])); @@ -222,9 +210,8 @@ namespace toku { const int a2 = an / 2; const sortdata_t *akey = &a[a2]; const int b2 = binsearch(*akey, b, bn, 0, extra); - cilk_spawn merge(dest, a, a2, b, b2, extra); + merge(dest, a, a2, b, b2, extra); merge(&dest[a2 + b2], akey, an - a2, &b[b2], bn - b2, extra); - cilk_sync; } } @@ -272,5 +259,3 @@ namespace toku { }; }; - -#endif // UTIL_SORT_H diff --git a/storage/tokudb/ft-index/util/status.h b/storage/tokudb/ft-index/util/status.h index 16a709237ddf1..1ab6d35e56075 100644 --- a/storage/tokudb/ft-index/util/status.h +++ b/storage/tokudb/ft-index/util/status.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,10 +90,11 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #pragma once + #include #include -#define TOKUDB_STATUS_INIT(array,k,c,t,l,inc) do { \ +#define TOKUFT_STATUS_INIT(array,k,c,t,l,inc) do { \ array.status[k].keyname = #k; \ array.status[k].columnname = #c; \ array.status[k].type = t; \ @@ -104,7 +105,7 @@ PATENT RIGHTS GRANT: constexpr_static_assert((inc) == TOKU_ENGINE_STATUS \ || strcmp(#c, "nullptr"), "Missing column name."); \ constexpr_static_assert(static_strncasecmp(#c, "TOKU", strlen("TOKU")), \ - "Do not start column names with toku/tokudb. Names get TOKUDB_ prefix automatically."); \ + "Do not start column names with toku."); \ array.status[k].include = static_cast(inc); \ if (t == PARCOUNT) { \ array.status[k].value.parcount = create_partitioned_counter(); \ diff --git a/storage/tokudb/ft-index/util/tests/marked-omt-test.cc b/storage/tokudb/ft-index/util/tests/marked-omt-test.cc index 883a414c566a1..97e4cf72d61b8 100644 --- a/storage/tokudb/ft-index/util/tests/marked-omt-test.cc +++ b/storage/tokudb/ft-index/util/tests/marked-omt-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/memarena-test.cc b/storage/tokudb/ft-index/util/tests/memarena-test.cc new file mode 100644 index 0000000000000..7374539d11a4a --- /dev/null +++ b/storage/tokudb/ft-index/util/tests/memarena-test.cc @@ -0,0 +1,234 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: + +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#include + +#include "portability/toku_assert.h" + +#include "util/memarena.h" + +class memarena_unit_test { +private: + static const int magic = 37; + + template + void iterate_chunks(memarena *ma, F &fn) { + for (memarena::chunk_iterator it(ma); it.more(); it.next()) { + size_t used = 0; + const void *buf = it.current(&used); + fn(buf, used); + } + } + + void test_create(size_t size) { + memarena ma; + ma.create(size); + invariant(ma._current_chunk.size == size); + invariant(ma._current_chunk.used == 0); + if (size == 0) { + invariant_null(ma._current_chunk.buf); + } else { + invariant_notnull(ma._current_chunk.buf); + } + + // make sure memory was allocated ok by + // writing to buf and reading it back + if (size > 0) { + memset(ma._current_chunk.buf, magic, size); + } + for (size_t i = 0; i < size; i++) { + const char *buf = reinterpret_cast(ma._current_chunk.buf); + invariant(buf[i] == magic); + } + ma.destroy(); + } + + void test_malloc(size_t size) { + memarena ma; + ma.create(14); + void *v = ma.malloc_from_arena(size); + invariant_notnull(v); + + // make sure memory was allocated ok by + // writing to buf and reading it back + if (size > 0) { + memset(ma._current_chunk.buf, magic, size); + } + for (size_t i = 0; i < size; i++) { + const char *c = reinterpret_cast(ma._current_chunk.buf); + invariant(c[i] == magic); + } + ma.destroy(); + } + + static void test_iterate_fn(const void *buf, size_t used) { + for (size_t i = 0; i < used; i++) { + const char *c = reinterpret_cast(buf); + invariant(c[i] == (char) ((intptr_t) &c[i])); + } + } + + void test_iterate(size_t size) { + memarena ma; + ma.create(14); + for (size_t k = 0; k < size / 64; k += 64) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + size_t rest = size % 64; + if (rest != 0) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + + iterate_chunks(&ma, test_iterate_fn); + ma.destroy(); + } + + void test_move_memory(size_t size) { + memarena ma; + ma.create(14); + for (size_t k = 0; k < size / 64; k += 64) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + size_t rest = size % 64; + if (rest != 0) { + void *v = ma.malloc_from_arena(64); + for (size_t i = 0; i < 64; i++) { + char *c = reinterpret_cast(v); + c[i] = (char) ((intptr_t) &c[i]); + } + } + + memarena ma2; + ma.move_memory(&ma2); + iterate_chunks(&ma2, test_iterate_fn); + + ma.destroy(); + ma2.destroy(); + } + +public: + void test() { + test_create(0); + test_create(64); + test_create(128 * 1024 * 1024); + test_malloc(0); + test_malloc(63); + test_malloc(64); + test_malloc(64 * 1024 * 1024); + test_malloc((64 * 1024 * 1024) + 1); + test_iterate(0); + test_iterate(63); + test_iterate(128 * 1024); + test_iterate(64 * 1024 * 1024); + test_iterate((64 * 1024 * 1024) + 1); + test_move_memory(0); + test_move_memory(1); + test_move_memory(63); + test_move_memory(65); + test_move_memory(65 * 1024 * 1024); + test_move_memory(101 * 1024 * 1024); + } +}; + +int main(void) { + memarena_unit_test test; + test.test(); + return 0; +} diff --git a/storage/tokudb/ft-index/ft/tests/minicron-test.cc b/storage/tokudb/ft-index/util/tests/minicron-test.cc similarity index 95% rename from storage/tokudb/ft-index/ft/tests/minicron-test.cc rename to storage/tokudb/ft-index/util/tests/minicron-test.cc index 4345289f4ec19..7729edbda845a 100644 --- a/storage/tokudb/ft-index/ft/tests/minicron-test.cc +++ b/storage/tokudb/ft-index/util/tests/minicron-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,7 +90,7 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include #include "test.h" -#include "minicron.h" +#include "util/minicron.h" #include #include @@ -115,8 +115,8 @@ __attribute__((__noreturn__)) never_run (void *a) { assert(a==0); assert(0); -#if TOKU_WINDOWS || defined(GCOV) - return 0; //ICC ignores the noreturn attribute. +#if defined(GCOV) + return 0; #endif } @@ -125,7 +125,7 @@ static void* test1 (void* v) { struct minicron m; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 0, never_run, 0); assert(r==0); sleep(1); r = toku_minicron_shutdown(&m); assert(r==0); @@ -137,7 +137,7 @@ static void* test2 (void* v) { struct minicron m; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 10000, never_run, 0); assert(r==0); sleep(2); r = toku_minicron_shutdown(&m); assert(r==0); @@ -174,7 +174,7 @@ test3 (void* v) struct tenx tx; gettimeofday(&tx.tv, 0); tx.counter=0; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 1000, run_5x, &tx); assert(r==0); sleep(5); r = toku_minicron_shutdown(&m); assert(r==0); @@ -197,7 +197,7 @@ static void* test4 (void *v) { struct minicron m; int counter = 0; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 2000, run_3sec, &counter); assert(r==0); sleep(10); r = toku_minicron_shutdown(&m); assert(r==0); @@ -209,7 +209,7 @@ static void* test5 (void *v) { struct minicron m; int counter = 0; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 10000, run_3sec, &counter); assert(r==0); toku_minicron_change_period(&m, 2000); sleep(10); @@ -221,7 +221,7 @@ test5 (void *v) { static void* test6 (void *v) { struct minicron m; - ZERO_STRUCT(m); + memset(&m, 0, sizeof(struct minicron)); int r = toku_minicron_setup(&m, 5000, never_run, 0); assert(r==0); toku_minicron_change_period(&m, 0); sleep(7); @@ -233,8 +233,8 @@ test6 (void *v) { static void* test7 (void *v) { struct minicron m; + memset(&m, 0, sizeof(struct minicron)); int counter = 0; - ZERO_STRUCT(m); int r = toku_minicron_setup(&m, 5000, run_3sec, &counter); assert(r==0); sleep(17); r = toku_minicron_shutdown(&m); assert(r==0); diff --git a/storage/tokudb/ft-index/util/tests/omt-test.cc b/storage/tokudb/ft-index/util/tests/omt-test.cc new file mode 100644 index 0000000000000..28daed809656b --- /dev/null +++ b/storage/tokudb/ft-index/util/tests/omt-test.cc @@ -0,0 +1,950 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/* +COPYING CONDITIONS NOTICE: + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation, and provided that the + following conditions are met: + + * Redistributions of source code must retain this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below). + + * Redistributions in binary form must reproduce this COPYING + CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the + DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the + PATENT MARKING NOTICE (below), and the PATENT RIGHTS + GRANT (below) in the documentation and/or other materials + provided with the distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + +COPYRIGHT NOTICE: + + TokuFT, Tokutek Fractal Tree Indexing Library. + Copyright (C) 2007-2013 Tokutek, Inc. + +DISCLAIMER: + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + +UNIVERSITY PATENT NOTICE: + + The technology is licensed by the Massachusetts Institute of + Technology, Rutgers State University of New Jersey, and the Research + Foundation of State University of New York at Stony Brook under + United States of America Serial No. 11/760379 and to the patents + and/or patent applications resulting from it. + +PATENT MARKING NOTICE: + + This software is covered by US Patent No. 8,185,551. + This software is covered by US Patent No. 8,489,638. + +PATENT RIGHTS GRANT: + + "THIS IMPLEMENTATION" means the copyrightable works distributed by + Tokutek as part of the Fractal Tree project. + + "PATENT CLAIMS" means the claims of patents that are owned or + licensable by Tokutek, both currently or in the future; and that in + the absence of this license would be infringed by THIS + IMPLEMENTATION or by using or running THIS IMPLEMENTATION. + + "PATENT CHALLENGE" shall mean a challenge to the validity, + patentability, enforceability and/or non-infringement of any of the + PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. + + Tokutek hereby grants to you, for the term and geographical scope of + the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, + irrevocable (except as stated in this section) patent license to + make, have made, use, offer to sell, sell, import, transfer, and + otherwise run, modify, and propagate the contents of THIS + IMPLEMENTATION, where such license applies only to the PATENT + CLAIMS. This grant does not include claims that would be infringed + only as a consequence of further modifications of THIS + IMPLEMENTATION. If you or your agent or licensee institute or order + or agree to the institution of patent litigation against any entity + (including a cross-claim or counterclaim in a lawsuit) alleging that + THIS IMPLEMENTATION constitutes direct or contributory patent + infringement, or inducement of patent infringement, then any rights + granted to you under this License shall terminate as of the date + such litigation is filed. If you or your agent or exclusive + licensee institute or order or agree to the institution of a PATENT + CHALLENGE, then Tokutek may terminate any rights granted to you + under this License. +*/ + +#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." + +#include "test.h" + +#include + +static void +parse_args (int argc, const char *argv[]) { + const char *argv0=argv[0]; + while (argc>1) { + int resultcode=0; + if (strcmp(argv[1], "-v")==0) { + verbose++; + } else if (strcmp(argv[1], "-q")==0) { + verbose = 0; + } else if (strcmp(argv[1], "-h")==0) { + do_usage: + fprintf(stderr, "Usage:\n%s [-v|-h]\n", argv0); + exit(resultcode); + } else { + resultcode=1; + goto do_usage; + } + argc--; + argv++; + } +} +/* End ".h like" stuff. */ + +struct value { + uint32_t number; +}; +#define V(x) ((struct value *)(x)) + +enum rand_type { + TEST_RANDOM, + TEST_SORTED, + TEST_IDENTITY +}; +enum close_when_done { + CLOSE_WHEN_DONE, + KEEP_WHEN_DONE +}; +enum create_type { + STEAL_ARRAY, + BATCH_INSERT, + INSERT_AT, + INSERT_AT_ALMOST_RANDOM, +}; + +/* Globals */ +typedef void *OMTVALUE; +toku::omt *global_omt; +OMTVALUE* global_values = NULL; +struct value* global_nums = NULL; +uint32_t global_length; + +static void +cleanup_globals (void) { + assert(global_values); + toku_free(global_values); + global_values = NULL; + assert(global_nums); + toku_free(global_nums); + global_nums = NULL; +} + +/* Some test wrappers */ +struct functor { + int (*f)(OMTVALUE, uint32_t, void *); + void *v; +}; +int call_functor(const OMTVALUE &v, uint32_t idx, functor *const ftor); +int call_functor(const OMTVALUE &v, uint32_t idx, functor *const ftor) { + return ftor->f(const_cast(v), idx, ftor->v); +} +static int omt_iterate(toku::omt *omt, int (*f)(OMTVALUE, uint32_t, void*), void*v) { + struct functor ftor = { .f = f, .v = v }; + return omt->iterate(&ftor); +} + +struct heftor { + int (*h)(OMTVALUE, void *v); + void *v; +}; +int call_heftor(const OMTVALUE &v, const heftor &htor); +int call_heftor(const OMTVALUE &v, const heftor &htor) { + return htor.h(const_cast(v), htor.v); +} +static int omt_insert(toku::omt *omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, uint32_t *index) { + struct heftor htor = { .h = h, .v = v }; + return omt->insert(value, htor, index); +} +static int omt_find_zero(toku::omt *V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, uint32_t *index) { + struct heftor htor = { .h = h, .v = extra }; + return V->find_zero(htor, value, index); +} +static int omt_find(toku::omt *V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, uint32_t *index) { + struct heftor htor = { .h = h, .v = extra }; + return V->find(htor, direction, value, index); +} +static int omt_split_at(toku::omt *omt, toku::omt **newomtp, uint32_t index) { + toku::omt *XMALLOC(newomt); + int r = omt->split_at(newomt, index); + if (r != 0) { + toku_free(newomt); + } else { + *newomtp = newomt; + } + return r; +} +static int omt_merge(toku::omt *leftomt, toku::omt *rightomt, toku::omt **newomtp) { + toku::omt *XMALLOC(newomt); + newomt->merge(leftomt, rightomt); + toku_free(leftomt); + toku_free(rightomt); + *newomtp = newomt; + return 0; +} + +const unsigned int random_seed = 0xFEADACBA; + +static void +init_init_values (unsigned int seed, uint32_t num_elements) { + srandom(seed); + + cleanup_globals(); + + XMALLOC_N(num_elements, global_values); + XMALLOC_N(num_elements, global_nums); + global_length = num_elements; +} + +static void +init_identity_values (unsigned int seed, uint32_t num_elements) { + uint32_t i; + + init_init_values(seed, num_elements); + + for (i = 0; i < global_length; i++) { + global_nums[i].number = i; + global_values[i] = (OMTVALUE)&global_nums[i]; + } +} + +static void +init_distinct_sorted_values (unsigned int seed, uint32_t num_elements) { + uint32_t i; + + init_init_values(seed, num_elements); + + uint32_t number = 0; + + for (i = 0; i < global_length; i++) { + number += (uint32_t)(random() % 32) + 1; + global_nums[i].number = number; + global_values[i] = (OMTVALUE)&global_nums[i]; + } +} + +static void +init_distinct_random_values (unsigned int seed, uint32_t num_elements) { + init_distinct_sorted_values(seed, num_elements); + + uint32_t i; + uint32_t choice; + uint32_t choices; + struct value temp; + for (i = 0; i < global_length - 1; i++) { + choices = global_length - i; + choice = random() % choices; + if (choice != i) { + temp = global_nums[i]; + global_nums[i] = global_nums[choice]; + global_nums[choice] = temp; + } + } +} + +static void +init_globals (void) { + XMALLOC_N(1, global_values); + XMALLOC_N(1, global_nums); + global_length = 1; +} + +static void +test_close (enum close_when_done do_close) { + if (do_close == KEEP_WHEN_DONE) { + return; + } + assert(do_close == CLOSE_WHEN_DONE); + global_omt->destroy(); + toku_free(global_omt); +} + +static void +test_create (enum close_when_done do_close) { + XMALLOC(global_omt); + global_omt->create(); + test_close(do_close); +} + +static void +test_create_size (enum close_when_done do_close) { + test_create(KEEP_WHEN_DONE); + assert(global_omt->size() == 0); + test_close(do_close); +} + +static void +test_create_insert_at_almost_random (enum close_when_done do_close) { + uint32_t i; + int r; + uint32_t size = 0; + + test_create(KEEP_WHEN_DONE); + r = global_omt->insert_at(global_values[0], global_omt->size()+1); + CKERR2(r, EINVAL); + r = global_omt->insert_at(global_values[0], global_omt->size()+2); + CKERR2(r, EINVAL); + for (i = 0; i < global_length/2; i++) { + assert(size==global_omt->size()); + r = global_omt->insert_at(global_values[i], i); + CKERR(r); + assert(++size==global_omt->size()); + r = global_omt->insert_at(global_values[global_length-1-i], i+1); + CKERR(r); + assert(++size==global_omt->size()); + } + r = global_omt->insert_at(global_values[0], global_omt->size()+1); + CKERR2(r, EINVAL); + r = global_omt->insert_at(global_values[0], global_omt->size()+2); + CKERR2(r, EINVAL); + assert(size==global_omt->size()); + test_close(do_close); +} + +static void +test_create_insert_at_sequential (enum close_when_done do_close) { + uint32_t i; + int r; + uint32_t size = 0; + + test_create(KEEP_WHEN_DONE); + r = global_omt->insert_at(global_values[0], global_omt->size()+1); + CKERR2(r, EINVAL); + r = global_omt->insert_at(global_values[0], global_omt->size()+2); + CKERR2(r, EINVAL); + for (i = 0; i < global_length; i++) { + assert(size==global_omt->size()); + r = global_omt->insert_at(global_values[i], i); + CKERR(r); + assert(++size==global_omt->size()); + } + r = global_omt->insert_at(global_values[0], global_omt->size()+1); + CKERR2(r, EINVAL); + r = global_omt->insert_at(global_values[0], global_omt->size()+2); + CKERR2(r, EINVAL); + assert(size==global_omt->size()); + test_close(do_close); +} + +static void +test_create_from_sorted_array (enum create_type create_choice, enum close_when_done do_close) { + global_omt = NULL; + + if (create_choice == BATCH_INSERT) { + XMALLOC(global_omt); + global_omt->create_from_sorted_array(global_values, global_length); + } + else if (create_choice == STEAL_ARRAY) { + XMALLOC(global_omt); + OMTVALUE* XMALLOC_N(global_length, values_copy); + memcpy(values_copy, global_values, global_length*sizeof(*global_values)); + global_omt->create_steal_sorted_array(&values_copy, global_length, global_length); + assert(values_copy==NULL); + } + else if (create_choice == INSERT_AT) { + test_create_insert_at_sequential(KEEP_WHEN_DONE); + } + else if (create_choice == INSERT_AT_ALMOST_RANDOM) { + test_create_insert_at_almost_random(KEEP_WHEN_DONE); + } + else { + assert(false); + } + + assert(global_omt!=NULL); + test_close(do_close); +} + +static void +test_create_from_sorted_array_size (enum create_type create_choice, enum close_when_done do_close) { + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + assert(global_omt->size()==global_length); + test_close(do_close); +} + +static void +test_fetch_verify (toku::omt *omtree, OMTVALUE* val, uint32_t len ) { + uint32_t i; + int r; + OMTVALUE v = (OMTVALUE)&i; + OMTVALUE oldv = v; + + assert(len == omtree->size()); + for (i = 0; i < len; i++) { + assert(oldv!=val[i]); + v = NULL; + r = omtree->fetch(i, &v); + CKERR(r); + assert(v != NULL); + assert(v != oldv); + assert(v == val[i]); + assert(V(v)->number == V(val[i])->number); + v = oldv; + } + + for (i = len; i < len*2; i++) { + v = oldv; + r = omtree->fetch(i, &v); + CKERR2(r, EINVAL); + assert(v == oldv); + } + +} + +static void +test_create_fetch_verify (enum create_type create_choice, enum close_when_done do_close) { + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + test_fetch_verify(global_omt, global_values, global_length); + test_close(do_close); +} + +static int iterate_helper_error_return = 1; + +static int +iterate_helper (OMTVALUE v, uint32_t idx, void* extra) { + if (extra == NULL) return iterate_helper_error_return; + OMTVALUE* vals = (OMTVALUE *)extra; + assert(v != NULL); + assert(v == vals[idx]); + assert(V(v)->number == V(vals[idx])->number); + return 0; +} + +static void +test_iterate_verify (toku::omt *omtree, OMTVALUE* vals, uint32_t len) { + int r; + iterate_helper_error_return = 0; + r = omt_iterate(omtree, iterate_helper, (void*)vals); + CKERR(r); + iterate_helper_error_return = 0xFEEDABBA; + r = omt_iterate(omtree, iterate_helper, NULL); + if (!len) { + CKERR2(r, 0); + } + else { + CKERR2(r, iterate_helper_error_return); + } +} + +static void +test_create_iterate_verify (enum create_type create_choice, enum close_when_done do_close) { + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + test_iterate_verify(global_omt, global_values, global_length); + test_close(do_close); +} + + +static void +permute_array (uint32_t* arr, uint32_t len) { + // + // create a permutation of 0...size-1 + // + uint32_t i = 0; + for (i = 0; i < len; i++) { + arr[i] = i; + } + for (i = 0; i < len - 1; i++) { + uint32_t choices = len - i; + uint32_t choice = random() % choices; + if (choice != i) { + uint32_t temp = arr[i]; + arr[i] = arr[choice]; + arr[choice] = temp; + } + } +} + +static void +test_create_set_at (enum create_type create_choice, enum close_when_done do_close) { + uint32_t i = 0; + + struct value* old_nums = NULL; + XMALLOC_N(global_length, old_nums); + + uint32_t* perm = NULL; + XMALLOC_N(global_length, perm); + + OMTVALUE* old_values = NULL; + XMALLOC_N(global_length, old_values); + + permute_array(perm, global_length); + + // + // These are going to be the new global_values + // + for (i = 0; i < global_length; i++) { + old_nums[i] = global_nums[i]; + old_values[i] = &old_nums[i]; + global_values[i] = &old_nums[i]; + } + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + int r; + r = global_omt->set_at(global_values[0], global_length); + CKERR2(r,EINVAL); + r = global_omt->set_at(global_values[0], global_length+1); + CKERR2(r,EINVAL); + for (i = 0; i < global_length; i++) { + uint32_t choice = perm[i]; + global_values[choice] = &global_nums[choice]; + global_nums[choice].number = (uint32_t)random(); + r = global_omt->set_at(global_values[choice], choice); + CKERR(r); + test_iterate_verify(global_omt, global_values, global_length); + test_fetch_verify(global_omt, global_values, global_length); + } + r = global_omt->set_at(global_values[0], global_length); + CKERR2(r,EINVAL); + r = global_omt->set_at(global_values[0], global_length+1); + CKERR2(r,EINVAL); + + toku_free(perm); + toku_free(old_values); + toku_free(old_nums); + + test_close(do_close); +} + +static int +insert_helper (OMTVALUE value, void* extra_insert) { + OMTVALUE to_insert = (OMTVALUE)extra_insert; + assert(to_insert); + + if (V(value)->number < V(to_insert)->number) return -1; + if (V(value)->number > V(to_insert)->number) return +1; + return 0; +} + +static void +test_create_insert (enum close_when_done do_close) { + uint32_t i = 0; + + uint32_t* perm = NULL; + XMALLOC_N(global_length, perm); + + permute_array(perm, global_length); + + test_create(KEEP_WHEN_DONE); + int r; + uint32_t size = global_length; + global_length = 0; + while (global_length < size) { + uint32_t choice = perm[global_length]; + OMTVALUE to_insert = &global_nums[choice]; + uint32_t idx = UINT32_MAX; + + assert(global_length==global_omt->size()); + r = omt_insert(global_omt, to_insert, insert_helper, to_insert, &idx); + CKERR(r); + assert(idx <= global_length); + if (idx > 0) { + assert(V(to_insert)->number > V(global_values[idx-1])->number); + } + if (idx < global_length) { + assert(V(to_insert)->number < V(global_values[idx])->number); + } + global_length++; + assert(global_length==global_omt->size()); + /* Make room */ + for (i = global_length-1; i > idx; i--) { + global_values[i] = global_values[i-1]; + } + global_values[idx] = to_insert; + test_fetch_verify(global_omt, global_values, global_length); + test_iterate_verify(global_omt, global_values, global_length); + + idx = UINT32_MAX; + r = omt_insert(global_omt, to_insert, insert_helper, to_insert, &idx); + CKERR2(r, DB_KEYEXIST); + assert(idx < global_length); + assert(V(global_values[idx])->number == V(to_insert)->number); + assert(global_length==global_omt->size()); + + test_iterate_verify(global_omt, global_values, global_length); + test_fetch_verify(global_omt, global_values, global_length); + } + + toku_free(perm); + + test_close(do_close); +} + +static void +test_create_delete_at (enum create_type create_choice, enum close_when_done do_close) { + uint32_t i = 0; + int r = ENOSYS; + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + + assert(global_length == global_omt->size()); + r = global_omt->delete_at(global_length); + CKERR2(r,EINVAL); + assert(global_length == global_omt->size()); + r = global_omt->delete_at(global_length+1); + CKERR2(r,EINVAL); + while (global_length > 0) { + assert(global_length == global_omt->size()); + uint32_t index_to_delete = random()%global_length; + r = global_omt->delete_at(index_to_delete); + CKERR(r); + for (i = index_to_delete+1; i < global_length; i++) { + global_values[i-1] = global_values[i]; + } + global_length--; + test_fetch_verify(global_omt, global_values, global_length); + test_iterate_verify(global_omt, global_values, global_length); + } + assert(global_length == 0); + assert(global_length == global_omt->size()); + r = global_omt->delete_at(global_length); + CKERR2(r, EINVAL); + assert(global_length == global_omt->size()); + r = global_omt->delete_at(global_length+1); + CKERR2(r, EINVAL); + test_close(do_close); +} + +static void +test_split_merge (enum create_type create_choice, enum close_when_done do_close) { + int r = ENOSYS; + uint32_t i = 0; + toku::omt *left_split = NULL; + toku::omt *right_split = NULL; + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + + for (i = 0; i <= global_length; i++) { + r = omt_split_at(global_omt, &right_split, global_length+1); + CKERR2(r,EINVAL); + r = omt_split_at(global_omt, &right_split, global_length+2); + CKERR2(r,EINVAL); + + // + // test successful split + // + r = omt_split_at(global_omt, &right_split, i); + CKERR(r); + left_split = global_omt; + global_omt = NULL; + assert(left_split->size() == i); + assert(right_split->size() == global_length - i); + test_fetch_verify(left_split, global_values, i); + test_iterate_verify(left_split, global_values, i); + test_fetch_verify(right_split, &global_values[i], global_length - i); + test_iterate_verify(right_split, &global_values[i], global_length - i); + // + // verify that new global_omt's cannot do bad splits + // + r = omt_split_at(left_split, &global_omt, i+1); + CKERR2(r,EINVAL); + assert(left_split->size() == i); + assert(right_split->size() == global_length - i); + r = omt_split_at(left_split, &global_omt, i+2); + CKERR2(r,EINVAL); + assert(left_split->size() == i); + assert(right_split->size() == global_length - i); + r = omt_split_at(right_split, &global_omt, global_length - i + 1); + CKERR2(r,EINVAL); + assert(left_split->size() == i); + assert(right_split->size() == global_length - i); + r = omt_split_at(right_split, &global_omt, global_length - i + 1); + CKERR2(r,EINVAL); + assert(left_split->size() == i); + assert(right_split->size() == global_length - i); + + // + // test merge + // + r = omt_merge(left_split,right_split,&global_omt); + CKERR(r); + left_split = NULL; + right_split = NULL; + assert(global_omt->size() == global_length); + test_fetch_verify(global_omt, global_values, global_length); + test_iterate_verify(global_omt, global_values, global_length); + } + test_close(do_close); +} + + +static void +init_values (enum rand_type rand_choice) { + const uint32_t test_size = 100; + if (rand_choice == TEST_RANDOM) { + init_distinct_random_values(random_seed, test_size); + } + else if (rand_choice == TEST_SORTED) { + init_distinct_sorted_values(random_seed, test_size); + } + else if (rand_choice == TEST_IDENTITY) { + init_identity_values( random_seed, test_size); + } + else assert(false); +} + +static void +test_create_array (enum create_type create_choice, enum rand_type rand_choice) { + /* ********************************************************************** */ + init_values(rand_choice); + test_create_from_sorted_array( create_choice, CLOSE_WHEN_DONE); + test_create_from_sorted_array_size(create_choice, CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_create_fetch_verify( create_choice, CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_create_iterate_verify( create_choice, CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_create_set_at( create_choice, CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_create_delete_at( create_choice, CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_create_insert( CLOSE_WHEN_DONE); + /* ********************************************************************** */ + init_values(rand_choice); + test_split_merge( create_choice, CLOSE_WHEN_DONE); +} + +typedef struct { + uint32_t first_zero; + uint32_t first_pos; +} h_extra; + + +static int +test_heaviside (OMTVALUE v_omt, void* x) { + OMTVALUE v = (OMTVALUE) v_omt; + h_extra* extra = (h_extra*)x; + assert(v && x); + assert(extra->first_zero <= extra->first_pos); + + uint32_t value = V(v)->number; + if (value < extra->first_zero) return -1; + if (value < extra->first_pos) return 0; + return 1; +} + +static void +heavy_extra (h_extra* extra, uint32_t first_zero, uint32_t first_pos) { + extra->first_zero = first_zero; + extra->first_pos = first_pos; +} + +static void +test_find_dir (int dir, void* extra, int (*h)(OMTVALUE, void*), + int r_expect, bool idx_will_change, uint32_t idx_expect, + uint32_t number_expect, bool UU(cursor_valid)) { + uint32_t idx = UINT32_MAX; + uint32_t old_idx = idx; + OMTVALUE omt_val; + int r; + + omt_val = NULL; + + /* Verify we can pass NULL value. */ + omt_val = NULL; + idx = old_idx; + if (dir == 0) { + r = omt_find_zero(global_omt, h, extra, NULL, &idx); + } + else { + r = omt_find( global_omt, h, extra, dir, NULL, &idx); + } + CKERR2(r, r_expect); + if (idx_will_change) { + assert(idx == idx_expect); + } + else { + assert(idx == old_idx); + } + assert(omt_val == NULL); + + /* Verify we can pass NULL idx. */ + omt_val = NULL; + idx = old_idx; + if (dir == 0) { + r = omt_find_zero(global_omt, h, extra, &omt_val, 0); + } + else { + r = omt_find( global_omt, h, extra, dir, &omt_val, 0); + } + CKERR2(r, r_expect); + assert(idx == old_idx); + if (r == DB_NOTFOUND) { + assert(omt_val == NULL); + } + else { + assert(V(omt_val)->number == number_expect); + } + + /* Verify we can pass NULL both. */ + omt_val = NULL; + idx = old_idx; + if (dir == 0) { + r = omt_find_zero(global_omt, h, extra, NULL, 0); + } + else { + r = omt_find( global_omt, h, extra, dir, NULL, 0); + } + CKERR2(r, r_expect); + assert(idx == old_idx); + assert(omt_val == NULL); +} + +static void +test_find (enum create_type create_choice, enum close_when_done do_close) { + h_extra extra; + init_identity_values(random_seed, 100); + test_create_from_sorted_array(create_choice, KEEP_WHEN_DONE); + +/* + -...- + A +*/ + heavy_extra(&extra, global_length, global_length); + test_find_dir(-1, &extra, test_heaviside, 0, true, global_length-1, global_length-1, true); + test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, true, global_length, global_length, false); + + +/* + +...+ + B +*/ + heavy_extra(&extra, 0, 0); + test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(+1, &extra, test_heaviside, 0, true, 0, 0, true); + test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, true, 0, 0, false); + +/* + 0...0 + C +*/ + heavy_extra(&extra, 0, global_length); + test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(0, &extra, test_heaviside, 0, true, 0, 0, true); + +/* + -...-0...0 + AC +*/ + heavy_extra(&extra, global_length/2, global_length); + test_find_dir(-1, &extra, test_heaviside, 0, true, global_length/2-1, global_length/2-1, true); + test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(0, &extra, test_heaviside, 0, true, global_length/2, global_length/2, true); + +/* + 0...0+...+ + C B +*/ + heavy_extra(&extra, 0, global_length/2); + test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, false, 0, 0, false); + test_find_dir(+1, &extra, test_heaviside, 0, true, global_length/2, global_length/2, true); + test_find_dir(0, &extra, test_heaviside, 0, true, 0, 0, true); + +/* + -...-+...+ + AB +*/ + heavy_extra(&extra, global_length/2, global_length/2); + test_find_dir(-1, &extra, test_heaviside, 0, true, global_length/2-1, global_length/2-1, true); + test_find_dir(+1, &extra, test_heaviside, 0, true, global_length/2, global_length/2, true); + test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, true, global_length/2, global_length/2, false); + +/* + -...-0...0+...+ + AC B +*/ + heavy_extra(&extra, global_length/3, 2*global_length/3); + test_find_dir(-1, &extra, test_heaviside, 0, true, global_length/3-1, global_length/3-1, true); + test_find_dir(+1, &extra, test_heaviside, 0, true, 2*global_length/3, 2*global_length/3, true); + test_find_dir(0, &extra, test_heaviside, 0, true, global_length/3, global_length/3, true); + + /* Cleanup */ + test_close(do_close); +} + +static void +runtests_create_choice (enum create_type create_choice) { + test_create_array(create_choice, TEST_SORTED); + test_create_array(create_choice, TEST_RANDOM); + test_create_array(create_choice, TEST_IDENTITY); + test_find( create_choice, CLOSE_WHEN_DONE); +} + +static void +test_clone(uint32_t nelts) +// Test that each clone operation gives the right data back. If nelts is +// zero, also tests that you still get a valid omt back and that the way +// to deallocate it still works. +{ + toku::omt *src = NULL, *dest = NULL; + int r; + + XMALLOC(src); + src->create(); + for (long i = 0; i < nelts; ++i) { + r = src->insert_at((OMTVALUE) i, i); + assert_zero(r); + } + + XMALLOC(dest); + dest->clone(*src); + assert(dest != NULL); + assert(dest->size() == nelts); + for (long i = 0; i < nelts; ++i) { + OMTVALUE v; + long l; + r = dest->fetch(i, &v); + assert_zero(r); + l = (long) v; + assert(l == i); + } + dest->destroy(); + toku_free(dest); + src->destroy(); + toku_free(src); +} + +int +test_main(int argc, const char *argv[]) { + parse_args(argc, argv); + init_globals(); + test_create( CLOSE_WHEN_DONE); + test_create_size( CLOSE_WHEN_DONE); + runtests_create_choice(BATCH_INSERT); + runtests_create_choice(STEAL_ARRAY); + runtests_create_choice(INSERT_AT); + runtests_create_choice(INSERT_AT_ALMOST_RANDOM); + test_clone(0); + test_clone(1); + test_clone(1000); + test_clone(10000); + cleanup_globals(); + return 0; +} + diff --git a/storage/tokudb/ft-index/util/tests/omt-tmpl-test.cc b/storage/tokudb/ft-index/util/tests/omt-tmpl-test.cc index 8a9e13af89d5b..455502d50e906 100644 --- a/storage/tokudb/ft-index/util/tests/omt-tmpl-test.cc +++ b/storage/tokudb/ft-index/util/tests/omt-tmpl-test.cc @@ -30,7 +30,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/queue-test.cc b/storage/tokudb/ft-index/util/tests/queue-test.cc similarity index 94% rename from storage/tokudb/ft-index/ft/tests/queue-test.cc rename to storage/tokudb/ft-index/util/tests/queue-test.cc index edc2c628f9467..d15e9ccab96da 100644 --- a/storage/tokudb/ft-index/ft/tests/queue-test.cc +++ b/storage/tokudb/ft-index/util/tests/queue-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -94,7 +94,7 @@ PATENT RIGHTS GRANT: #include #include #include -#include "queue.h" +#include "util/queue.h" static int verbose=1; @@ -108,7 +108,7 @@ static void *start_0 (void *arg) { long count = 0; while (1) { uint64_t this_max_weight; - int r=queue_deq(q, &item, &weight, &this_max_weight); + int r=toku_queue_deq(q, &item, &weight, &this_max_weight); if (r==EOF) break; assert(r==0); if (this_max_weight>d_max_weight) d_max_weight=this_max_weight; @@ -123,7 +123,7 @@ static void *start_0 (void *arg) { static void enq (QUEUE q, long v, uint64_t weight) { uint64_t this_max_weight; - int r = queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight); + int r = toku_queue_enq(q, (void*)v, (weight==0)?0:1, &this_max_weight); assert(r==0); if (this_max_weight>e_max_weight) e_max_weight=this_max_weight; //printf("E(%ld)=%ld %ld\n", v, this_max_weight, e_max_weight); @@ -138,7 +138,7 @@ static void queue_test_0 (uint64_t weight) d_max_weight = 0; QUEUE q; int r; - r = queue_create(&q, weight); assert(r==0); + r = toku_queue_create(&q, weight); assert(r==0); toku_pthread_t thread; r = toku_pthread_create(&thread, NULL, start_0, q); assert(r==0); enq(q, 0L, weight); @@ -148,12 +148,12 @@ static void queue_test_0 (uint64_t weight) sleep(1); enq(q, 4L, weight); enq(q, 5L, weight); - r = queue_eof(q); assert(r==0); + r = toku_queue_eof(q); assert(r==0); void *result; r = toku_pthread_join(thread, &result); assert(r==0); assert(result==NULL); assert(count_0==6); - r = queue_destroy(q); + r = toku_queue_destroy(q); assert(d_max_weight <= weight); assert(e_max_weight <= weight); } diff --git a/storage/tokudb/ft-index/util/tests/rwlock_condvar.h b/storage/tokudb/ft-index/util/tests/rwlock_condvar.h index db4b759ea52fa..58e7a61ae42ac 100644 --- a/storage/tokudb/ft-index/util/tests/rwlock_condvar.h +++ b/storage/tokudb/ft-index/util/tests/rwlock_condvar.h @@ -33,7 +33,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc b/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc index a1be929fce091..7597c4fa5a7ee 100644 --- a/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc +++ b/storage/tokudb/ft-index/util/tests/sort-tmpl-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test-kibbutz.cc b/storage/tokudb/ft-index/util/tests/test-kibbutz.cc index 1f73037892c26..dd5a7facf641f 100644 --- a/storage/tokudb/ft-index/util/tests/test-kibbutz.cc +++ b/storage/tokudb/ft-index/util/tests/test-kibbutz.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc b/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc index ce797c068d8b0..80b97ff69c628 100644 --- a/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc +++ b/storage/tokudb/ft-index/util/tests/test-kibbutz2.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc b/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc index de54c21efd2be..ac04da16b853c 100644 --- a/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc +++ b/storage/tokudb/ft-index/util/tests/test-rwlock-cheapness.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test-rwlock.cc b/storage/tokudb/ft-index/util/tests/test-rwlock.cc index 42ceb00ad19eb..c4988aab85df8 100644 --- a/storage/tokudb/ft-index/util/tests/test-rwlock.cc +++ b/storage/tokudb/ft-index/util/tests/test-rwlock.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test.h b/storage/tokudb/ft-index/util/tests/test.h index 0760b9bf1fbe8..6ca60105d93a8 100644 --- a/storage/tokudb/ft-index/util/tests/test.h +++ b/storage/tokudb/ft-index/util/tests/test.h @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc b/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc index 8bc239ac6fc72..8bf0b646e4a1d 100644 --- a/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc +++ b/storage/tokudb/ft-index/util/tests/test_circular_buffer.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc b/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc index 6fad884ed8e4e..94e6b0a34890e 100644 --- a/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc +++ b/storage/tokudb/ft-index/util/tests/test_doubly_linked_list.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc b/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc index 5af214f75acb1..ce09aa042295d 100644 --- a/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc +++ b/storage/tokudb/ft-index/util/tests/test_partitioned_counter.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -201,9 +201,6 @@ static inline void increment (void) { head->prev = cp; } head = cp; -#ifdef __INTEL_COMPILER - __memory_barrier(); // for some reason I don't understand, ICC needs a memory barrier here. -Bradley -#endif cp->counter = 0; cp->inited = true; cp->myid = idcounter++; diff --git a/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc b/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc index 419f992576b36..2e42e4d4b4e8a 100644 --- a/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc +++ b/storage/tokudb/ft-index/util/tests/test_partitioned_counter_5833.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc b/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc index f1ba10dad84f8..3395a30238bf6 100644 --- a/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc +++ b/storage/tokudb/ft-index/util/tests/threadpool-nproc-limit.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/threadpool-test.cc b/storage/tokudb/ft-index/util/tests/threadpool-test.cc index 6815cce8f8f3c..b9bebc5db7d1f 100644 --- a/storage/tokudb/ft-index/util/tests/threadpool-test.cc +++ b/storage/tokudb/ft-index/util/tests/threadpool-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc b/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc index f4d875a894193..b7744cbf54c45 100644 --- a/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc +++ b/storage/tokudb/ft-index/util/tests/threadpool-testrunf.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: diff --git a/storage/tokudb/ft-index/ft/tests/x1764-test.cc b/storage/tokudb/ft-index/util/tests/x1764-test.cc similarity index 88% rename from storage/tokudb/ft-index/ft/tests/x1764-test.cc rename to storage/tokudb/ft-index/util/tests/x1764-test.cc index 721cb0b122283..5f47e007f503c 100644 --- a/storage/tokudb/ft-index/ft/tests/x1764-test.cc +++ b/storage/tokudb/ft-index/util/tests/x1764-test.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,17 +88,17 @@ PATENT RIGHTS GRANT: #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - - #include "test.h" +#include + static void test0 (void) { - uint32_t c = x1764_memory("", 0); + uint32_t c = toku_x1764_memory("", 0); assert(c==~(0U)); struct x1764 cs; - x1764_init(&cs); - x1764_add(&cs, "", 0); - c = x1764_finish(&cs); + toku_x1764_init(&cs); + toku_x1764_add(&cs, "", 0); + c = toku_x1764_finish(&cs); assert(c==~(0U)); } @@ -110,7 +110,7 @@ test1 (void) { for (i=0; i<=8; i++) { uint64_t expect64 = (i==8) ? v : v&((1LL<<(8*i))-1); uint32_t expect = expect64 ^ (expect64>>32); - c = x1764_memory(&v, i); + c = toku_x1764_memory(&v, i); //printf("i=%d c=%08x expect=%08x\n", i, c, expect); assert(c==~expect); } @@ -127,33 +127,33 @@ test2 (void) { int j; for (j=i; j<=N; j++) { // checksum from i (inclusive to j (exclusive) - uint32_t c = x1764_memory(&v[i], j-i); + uint32_t c = toku_x1764_memory(&v[i], j-i); // Now compute the checksum incrementally with various strides. int stride; for (stride=1; stride<=j-i; stride++) { int k; struct x1764 s; - x1764_init(&s); + toku_x1764_init(&s); for (k=i; k+stride<=j; k+=stride) { - x1764_add(&s, &v[k], stride); + toku_x1764_add(&s, &v[k], stride); } - x1764_add(&s, &v[k], j-k); - uint32_t c2 = x1764_finish(&s); + toku_x1764_add(&s, &v[k], j-k); + uint32_t c2 = toku_x1764_finish(&s); assert(c2==c); } // Now use some random strides. { int k=i; struct x1764 s; - x1764_init(&s); + toku_x1764_init(&s); while (1) { stride=random()%16; if (k+stride>j) break; - x1764_add(&s, &v[k], stride); + toku_x1764_add(&s, &v[k], stride); k+=stride; } - x1764_add(&s, &v[k], j-k); - uint32_t c2 = x1764_finish(&s); + toku_x1764_add(&s, &v[k], j-k); + uint32_t c2 = toku_x1764_finish(&s); assert(c2==c); } } @@ -170,8 +170,8 @@ test3 (void) for (int off=0; off<32; off++) { if (verbose) {printf("."); fflush(stdout);} for (int len=0; len+off @@ -137,5 +136,3 @@ int toku_thread_pool_run(struct toku_thread_pool *pool, int dowait, int *nthread // Print the state of the thread pool void toku_thread_pool_print(struct toku_thread_pool *pool, FILE *out); - -#endif // UTIL_THREADPOOL_H diff --git a/storage/tokudb/ft-index/ft/x1764.cc b/storage/tokudb/ft-index/util/x1764.cc similarity index 96% rename from storage/tokudb/ft-index/ft/x1764.cc rename to storage/tokudb/ft-index/util/x1764.cc index c95ff32756013..5fb20daccee09 100644 --- a/storage/tokudb/ft-index/ft/x1764.cc +++ b/storage/tokudb/ft-index/util/x1764.cc @@ -29,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -90,13 +90,13 @@ PATENT RIGHTS GRANT: #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #include -#include +#include #include "x1764.h" #define PRINT 0 -uint32_t x1764_memory_simple (const void *buf, int len) +uint32_t toku_x1764_memory_simple (const void *buf, int len) { const uint64_t *CAST_FROM_VOIDP(lbuf, buf); uint64_t c=0; @@ -118,7 +118,7 @@ uint32_t x1764_memory_simple (const void *buf, int len) return ~((c&0xFFFFFFFF) ^ (c>>32)); } -uint32_t x1764_memory (const void *vbuf, int len) +uint32_t toku_x1764_memory (const void *vbuf, int len) { const uint8_t *CAST_FROM_VOIDP(buf, vbuf); int len_4_words = 4*sizeof(uint64_t); @@ -149,13 +149,13 @@ uint32_t x1764_memory (const void *vbuf, int len) } -void x1764_init(struct x1764 *l) { +void toku_x1764_init(struct x1764 *l) { l->sum=0; l->input=0; l->n_input_bytes=0; } -void x1764_add (struct x1764 *l, const void *vbuf, int len) { +void toku_x1764_add (struct x1764 *l, const void *vbuf, int len) { if (PRINT) printf("%d: n_input_bytes=%d len=%d\n", __LINE__, l->n_input_bytes, len); int n_input_bytes = l->n_input_bytes; const unsigned char *CAST_FROM_VOIDP(cbuf, vbuf); @@ -287,7 +287,7 @@ void x1764_add (struct x1764 *l, const void *vbuf, int len) { l->input = input; if (PRINT) printf("%d: n_input_bytes=%d\n", __LINE__, l->n_input_bytes); } -uint32_t x1764_finish (struct x1764 *l) { +uint32_t toku_x1764_finish (struct x1764 *l) { if (PRINT) printf("%d: n_input_bytes=%d\n", __LINE__, l->n_input_bytes); int len = l->n_input_bytes; if (len>0) { diff --git a/storage/tokudb/ft-index/ft/x1764.h b/storage/tokudb/ft-index/util/x1764.h similarity index 91% rename from storage/tokudb/ft-index/ft/x1764.h rename to storage/tokudb/ft-index/util/x1764.h index 090167c8fc7c9..1d83e5a185359 100644 --- a/storage/tokudb/ft-index/ft/x1764.h +++ b/storage/tokudb/ft-index/util/x1764.h @@ -1,7 +1,5 @@ /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ifndef X1764_H -#define X1764_H #ident "$Id$" /* COPYING CONDITIONS NOTICE: @@ -31,7 +29,7 @@ COPYING CONDITIONS NOTICE: COPYRIGHT NOTICE: - TokuDB, Tokutek Fractal Tree Indexing Library. + TokuFT, Tokutek Fractal Tree Indexing Library. Copyright (C) 2007-2013 Tokutek, Inc. DISCLAIMER: @@ -88,6 +86,8 @@ PATENT RIGHTS GRANT: under this License. */ +#pragma once + #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." @@ -99,11 +99,11 @@ PATENT RIGHTS GRANT: // If any odd bytes numbers are left at the end, they are filled in at the low end. -uint32_t x1764_memory (const void *buf, int len); +uint32_t toku_x1764_memory (const void *buf, int len); // Effect: Compute x1764 on the bytes of buf. Return the 32 bit answer. -uint32_t x1764_memory_simple (const void *buf, int len); -// Effect: Same as x1764_memory, but not highly optimized (more likely to be correct). Useful for testing the optimized version. +uint32_t toku_x1764_memory_simple (const void *buf, int len); +// Effect: Same as toku_x1764_memory, but not highly optimized (more likely to be correct). Useful for testing the optimized version. // For incrementally computing an x1764, use the following interfaces. @@ -113,14 +113,11 @@ struct x1764 { int n_input_bytes; }; -void x1764_init(struct x1764 *l); +void toku_x1764_init(struct x1764 *l); // Effect: Initialize *l. -void x1764_add (struct x1764 *l, const void *vbuf, int len); +void toku_x1764_add (struct x1764 *l, const void *vbuf, int len); // Effect: Add more bytes to *l. -uint32_t x1764_finish (struct x1764 *l); +uint32_t toku_x1764_finish (struct x1764 *l); // Effect: Return the final 32-bit result. - - -#endif diff --git a/storage/tokudb/ft-index/utils/CMakeLists.txt b/storage/tokudb/ft-index/utils/CMakeLists.txt deleted file mode 100644 index 06d964f389fe6..0000000000000 --- a/storage/tokudb/ft-index/utils/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO) - -set(utils tokudb_gen tokudb_load tokudb_dump) -foreach(util ${utils}) - add_executable(${util} ${util}.cc) - set_target_properties(${util} PROPERTIES - COMPILE_DEFINITIONS "IS_TDB=1;USE_TDB=1;TDB_IS_STATIC=1") - target_link_libraries(${util} ${LIBTOKUDB}_static ft_static z lzma ${LIBTOKUPORTABILITY}_static ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) - - add_space_separated_property(TARGET ${util} COMPILE_FLAGS -fvisibility=hidden) - - if(BDB_FOUND) - add_executable(${util}.bdb ${util}.cc) - set_property(TARGET ${util}.bdb APPEND PROPERTY - COMPILE_DEFINITIONS "IS_TDB=0;USE_BDB=1;TOKU_ALLOW_DEPRECATED") - set_target_properties(${util}.bdb PROPERTIES - INCLUDE_DIRECTORIES "${BDB_INCLUDE_DIR};${CMAKE_CURRENT_BINARY_DIR}/../toku_include;${CMAKE_CURRENT_SOURCE_DIR}/../toku_include;${CMAKE_CURRENT_SOURCE_DIR}/../portability;${CMAKE_CURRENT_SOURCE_DIR}/..") - target_link_libraries(${util}.bdb ${LIBTOKUPORTABILITY} ${BDB_LIBRARIES}) - add_space_separated_property(TARGET ${util}.bdb COMPILE_FLAGS -fvisibility=hidden) - endif() -endforeach(util) diff --git a/storage/tokudb/ft-index/utils/parseTraceFiles.py b/storage/tokudb/ft-index/utils/parseTraceFiles.py deleted file mode 100755 index f53ef62011115..0000000000000 --- a/storage/tokudb/ft-index/utils/parseTraceFiles.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -import sys -try: - data = open(sys.argv[1]) -except: - print "Could not open '%s'" % (sys.argv[1][0]) - exit(0) - -ts_factor = 1. -ts_prev = 0. - -threadlist = [] - -for line in data: - line = line.rstrip("\n") - vals = line.split() - [n, tid, ts, funcline] = vals[0:4] - # 'note' is all text following funcline - note = '' - for v in vals[4:-1]: - note += v+' ' - note += vals[-1] - - if ( note == 'calibrate done' ): - ts_factor = float(ts) - ts_prev - print "Factor = ", ts_factor, "("+str(ts_factor/1000000000)[0:4]+"GHz)" - - time = (float(ts)-ts_prev)/ts_factor - - # create a list of threads - # - each thread has a list of pairs, where time is the accumulated time for that note - # - search threadlist for thread_id (tid) - # - if found, search corresponding list of pairs for the current note - # - if found, update (+=) the time - # - if not found, create a new pair - # - if not found, create a new thread, entry - found_thread = 0 - for thread in threadlist: - if tid == thread[0]: - found_thread = 1 - notetimelist = thread[1] - found_note = 0 - for notetime in notetimelist: - if note == notetime[0]: - found_note = 1 - notetime[1] += time - break - if found_note == 0: - thread[1].append([note, time]) - break - if found_thread == 0: - notetime = [] - notetime.append([note, time]) - threadlist.append([tid, notetime]) - - ts_prev = float(ts) - -# trim out unneeded -for thread in threadlist: - trimlist = [] - for notetime in thread[1]: - if notetime[0][0:9] == 'calibrate': - trimlist.append(notetime) - for notetime in trimlist: - thread[1].remove(notetime) -print '' - -# sum times to calculate percent (of 100) -total_time = 0 -for thread in threadlist: - for [note, time] in thread[1]: - total_time += time - -print ' thread operation time(sec) percent' -for thread in threadlist: - print 'tid : %5s' % thread[0] - for [note, time] in thread[1]: - print ' %20s %f %5d' % (note, time, 100. * time/total_time) - - - diff --git a/storage/tokudb/ft-index/utils/tokudb_common_funcs.h b/storage/tokudb/ft-index/utils/tokudb_common_funcs.h deleted file mode 100644 index 51f3733b00e6b..0000000000000 --- a/storage/tokudb/ft-index/utils/tokudb_common_funcs.h +++ /dev/null @@ -1,351 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -#if !defined(TOKUDB_COMMON_FUNCS_H) -#define TOKUDB_COMMON_FUNCS_H - -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." - -#include "tokudb_common.h" - -//DB_ENV->err disabled since it does not use db_strerror -#define PRINT_ERROR(retval, ...) \ -do { \ -if (0) g.dbenv->err(g.dbenv, retval, __VA_ARGS__); \ -else { \ - fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ - fprintf(stderr, "%s: %s:", g.progname, db_strerror(retval)); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - fflush(stderr); \ -} \ -} while (0) - -//DB_ENV->err disabled since it does not use db_strerror, errx does not exist. -#define PRINT_ERRORX(...) \ -do { \ -if (0) g.dbenv->err(g.dbenv, 0, __VA_ARGS__); \ -else { \ - fprintf(stderr, "\tIn %s:%d %s()\n", __FILE__, __LINE__, __FUNCTION__); \ - fprintf(stderr, "%s: ", g.progname); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - fflush(stderr); \ -} \ -} while (0) - -int strtoint32 (char* str, int32_t* num, int32_t min, int32_t max, int base); -int strtouint32 (char* str, uint32_t* num, uint32_t min, uint32_t max, int base); -int strtoint64 (char* str, int64_t* num, int64_t min, int64_t max, int base); -int strtouint64 (char* str, uint64_t* num, uint64_t min, uint64_t max, int base); - -/* - * Convert a string to an integer of type "type". - * - * - * Sets errno and returns: - * EINVAL: str == NULL, num == NULL, or string not of the form [ \t]*[+-]?[0-9]+ - * ERANGE: value out of range specified. (Range of [min, max]) - * - * *num is unchanged on error. - * Returns: - * - */ -#define DEF_STR_TO(name, type, bigtype, strtofunc, frmt) \ -int name(char* str, type* num, type min, type max, int base) \ -{ \ - char* test; \ - bigtype value; \ - \ - assert(str); \ - assert(num); \ - assert(min <= max); \ - assert(g.dbenv || g.progname); \ - assert(base == 0 || (base >= 2 && base <= 36)); \ - \ - errno = 0; \ - while (isspace(*str)) str++; \ - value = strtofunc(str, &test, base); \ - if ((*test != '\0' && *test != '\n') || test == str) { \ - PRINT_ERRORX("%s: Invalid numeric argument\n", str); \ - errno = EINVAL; \ - goto error; \ - } \ - if (errno != 0) { \ - PRINT_ERROR(errno, "%s\n", str); \ - } \ - if (value < min) { \ - PRINT_ERRORX("%s: Less than minimum value (%" frmt ")\n", str, min); \ - goto error; \ - } \ - if (value > max) { \ - PRINT_ERRORX("%s: Greater than maximum value (%" frmt ")\n", str, max); \ - goto error; \ - } \ - *num = value; \ - return EXIT_SUCCESS; \ -error: \ - return errno; \ -} - -DEF_STR_TO(strtoint32, int32_t, int64_t, strtoll, PRId32) -DEF_STR_TO(strtouint32, uint32_t, uint64_t, strtoull, PRIu32) -DEF_STR_TO(strtoint64, int64_t, int64_t, strtoll, PRId64) -DEF_STR_TO(strtouint64, uint64_t, uint64_t, strtoull, PRIu64) - -static inline void -outputbyte(uint8_t ch) -{ - if (g.plaintext) { - if (ch == '\\') printf("\\\\"); - else if (isprint(ch)) printf("%c", ch); - else printf("\\%02x", ch); - } - else printf("%02x", ch); -} - -static inline void -outputstring(char* str) -{ - char* p; - - for (p = str; *p != '\0'; p++) { - outputbyte((uint8_t)*p); - } -} - -static inline void -outputplaintextstring(char* str) -{ - bool old_plaintext = g.plaintext; - g.plaintext = true; - outputstring(str); - g.plaintext = old_plaintext; -} - -static inline int -hextoint(int ch) -{ - if (ch >= '0' && ch <= '9') { - return ch - '0'; - } - if (ch >= 'a' && ch <= 'z') { - return ch - 'a' + 10; - } - if (ch >= 'A' && ch <= 'Z') { - return ch - 'A' + 10; - } - return EOF; -} - -static inline int -printabletocstring(char* inputstr, char** poutputstr) -{ - char highch; - char lowch; - char nextch; - char* cstring; - - assert(inputstr); - assert(poutputstr); - assert(*poutputstr == NULL); - - cstring = (char*)toku_malloc((strlen(inputstr) + 1) * sizeof(char)); - if (cstring == NULL) { - PRINT_ERROR(errno, "printabletocstring"); - goto error; - } - - for (*poutputstr = cstring; *inputstr != '\0'; inputstr++) { - if (*inputstr == '\\') { - if ((highch = *++inputstr) == '\\') { - *cstring++ = '\\'; - continue; - } - if (highch == '\0' || (lowch = *++inputstr) == '\0') { - PRINT_ERROR(0, "unexpected end of input data or key/data pair"); - goto error; - } - if (!isxdigit(highch)) { - PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", highch); - goto error; - } - if (!isxdigit(lowch)) { - PRINT_ERROR(0, "Unexpected '%c' (non-hex) input.\n", lowch); - goto error; - } - nextch = (char)((hextoint(highch) << 4) | hextoint(lowch)); - if (nextch == '\0') { - /* Database names are c strings, and cannot have extra NULL terminators. */ - PRINT_ERROR(0, "Unexpected '\\00' in input.\n"); - goto error; - } - *cstring++ = nextch; - } - else *cstring++ = *inputstr; - } - /* Terminate the string. */ - *cstring = '\0'; - return EXIT_SUCCESS; - -error: - PRINT_ERROR(0, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -static inline int -verify_library_version(void) -{ - int major; - int minor; - - db_version(&major, &minor, NULL); - if (major != DB_VERSION_MAJOR || minor != DB_VERSION_MINOR) { - PRINT_ERRORX("version %d.%d doesn't match library version %d.%d\n", - DB_VERSION_MAJOR, DB_VERSION_MINOR, major, minor); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - -static int last_caught = 0; - -static void catch_signal(int which_signal) { - last_caught = which_signal; - if (last_caught == 0) last_caught = SIGINT; -} - -static inline void -init_catch_signals(void) { - signal(SIGINT, catch_signal); - signal(SIGTERM, catch_signal); -#ifdef SIGHUP - signal(SIGHUP, catch_signal); -#endif -#ifdef SIGPIPE - signal(SIGPIPE, catch_signal); -#endif -} - -static inline int -caught_any_signals(void) { - return last_caught != 0; -} - -static inline void -resend_signals(void) { - if (last_caught) { - signal(last_caught, SIG_DFL); - raise(last_caught); - } -} - -#include -#if IS_TDB && TOKU_WINDOWS -#include -#endif -static int test_main (int argc, char *const argv[]); -int -main(int argc, char *const argv[]) { - int r; -#if IS_TDB && TOKU_WINDOWS - toku_ydb_init(); -#endif -#if !IS_TDB && DB_VERSION_MINOR==4 && DB_VERSION_MINOR == 7 - r = db_env_set_func_malloc(toku_malloc); assert(r==0); - r = db_env_set_func_free(toku_free); assert(r==0); - r = db_env_set_func_realloc(toku_realloc); assert(r==0); -#endif - r = test_main(argc, argv); -#if IS_TDB && TOKU_WINDOWS - toku_ydb_destroy(); -#endif - return r; -} - -#endif /* #if !defined(TOKUDB_COMMON_H) */ diff --git a/storage/tokudb/ft-index/utils/tokudb_gen.cc b/storage/tokudb/ft-index/utils/tokudb_gen.cc deleted file mode 100644 index 492c0ac6186ee..0000000000000 --- a/storage/tokudb/ft-index/utils/tokudb_gen.cc +++ /dev/null @@ -1,478 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if IS_TDB -#include -#endif - -#include "tokudb_common.h" - -typedef struct { - DB_ENV* dbenv; - bool plaintext; - char* progname; -} gen_globals; - -gen_globals g; -#include "tokudb_common_funcs.h" - -static int usage(void); -static void generate_keys(void); -static int get_delimiter(char* str); - - - -char dbt_delimiter = '\n'; -char sort_delimiter[3]; -uint32_t lengthmin = 0; -bool set_lengthmin = false; -uint32_t lengthlimit = 0; -bool set_lengthlimit= false; -uint64_t numkeys = 0; -bool set_numkeys = false; -bool header = true; -bool footer = true; -bool justheader = false; -bool justfooter = false; -bool outputkeys = true; -uint32_t seed = 1; -bool set_seed = false; -bool printableonly = false; -bool leadingspace = true; -bool force_unique = true; -bool dupsort = false; - -static int test_main (int argc, char *const argv[]) { - int ch; - - /* Set up the globals. */ - memset(&g, 0, sizeof(g)); - - g.progname = argv[0]; - - if (verify_library_version() != 0) goto error; - - strcpy(sort_delimiter, ""); - - while ((ch = getopt(argc, argv, "PpTo:r:m:M:n:uVhHfFd:s:DS")) != EOF) { - switch (ch) { - case ('P'): { - printableonly = true; - break; - } - case ('p'): { - g.plaintext = true; - leadingspace = true; - break; - } - case ('T'): { - g.plaintext = true; - leadingspace = false; - header = false; - footer = false; - break; - } - case ('o'): { - if (freopen(optarg, "w", stdout) == NULL) { - PRINT_ERROR(errno, "%s: reopen\n", optarg); - goto error; - } - break; - } - case ('r'): { - if (strtouint32(optarg, &seed, 0, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-r) Random seed invalid.", optarg); - goto error; - } - set_seed = true; - break; - } - case ('m'): { - if (strtouint32(optarg, &lengthmin, 0, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-m) Min length of keys/values invalid.", optarg); - goto error; - } - set_lengthmin = true; - break; - } - case ('M'): { - if (strtouint32(optarg, &lengthlimit, 1, UINT32_MAX, 10)) { - PRINT_ERRORX("%s: (-M) Limit of key/value length invalid.", optarg); - goto error; - } - set_lengthlimit = true; - break; - } - case ('n'): { - if (strtouint64(optarg, &numkeys, 0, UINT64_MAX, 10)) { - PRINT_ERRORX("%s: (-n) Number of keys to generate invalid.", optarg); - goto error; - } - set_numkeys = true; - break; - } - case ('u'): { - force_unique = false; - break; - } - case ('h'): { - header = false; - break; - } - case ('H'): { - justheader = true; - break; - } - case ('f'): { - footer = false; - break; - } - case ('F'): { - justfooter = true; - break; - } - case ('d'): { - int temp = get_delimiter(optarg); - if (temp == EOF) { - PRINT_ERRORX("%s: (-d) Key (or value) delimiter must be one character.", - optarg); - goto error; - } - if (isxdigit(temp)) { - PRINT_ERRORX("%c: (-d) Key (or value) delimiter cannot be a hex digit.", - temp); - goto error; - } - dbt_delimiter = (char)temp; - break; - } - case ('s'): { - int temp = get_delimiter(optarg); - if (temp == EOF) { - PRINT_ERRORX("%s: (-s) Sorting (Between key/value pairs) delimiter must be one character.", - optarg); - goto error; - } - if (isxdigit(temp)) { - PRINT_ERRORX("%c: (-s) Sorting (Between key/value pairs) delimiter cannot be a hex digit.", - temp); - goto error; - } - sort_delimiter[0] = (char)temp; - sort_delimiter[1] = '\0'; -#if TOKU_WINDOWS - if (!strcmp(sort_delimiter, "\n")) { - strcpy(sort_delimiter, "\r\n"); - } -#endif - break; - } - case ('V'): { - printf("%s\n", db_version(NULL, NULL, NULL)); - return EXIT_SUCCESS; - } - case 'D': { - fprintf(stderr, "Duplicates no longer supported by tokudb\n"); - return EXIT_FAILURE; - } - case 'S': { - fprintf(stderr, "Dupsort no longer supported by tokudb\n"); - return EXIT_FAILURE; - } - case ('?'): - default: { - return (usage()); - } - } - } - argc -= optind; - argv += optind; - - if (justheader && !header) { - PRINT_ERRORX("The -h and -H options may not both be specified.\n"); - goto error; - } - if (justfooter && !footer) { - PRINT_ERRORX("The -f and -F options may not both be specified.\n"); - goto error; - } - if (justfooter && justheader) { - PRINT_ERRORX("The -H and -F options may not both be specified.\n"); - goto error; - } - if (justfooter && header) { - PRINT_ERRORX("-F implies -h\n"); - header = false; - } - if (justheader && footer) { - PRINT_ERRORX("-H implies -f\n"); - footer = false; - } - if (!leadingspace) { - if (footer) { - PRINT_ERRORX("-p implies -f\n"); - footer = false; - } - if (header) { - PRINT_ERRORX("-p implies -h\n"); - header = false; - } - } - if (justfooter || justheader) outputkeys = false; - else if (!set_numkeys) - { - PRINT_ERRORX("Using default number of keys. (-n 1024).\n"); - numkeys = 1024; - } - if (outputkeys && !set_seed) { - PRINT_ERRORX("Using default seed. (-r 1).\n"); - seed = 1; - } - if (outputkeys && !set_lengthmin) { - PRINT_ERRORX("Using default lengthmin. (-m 0).\n"); - lengthmin = 0; - } - if (outputkeys && !set_lengthlimit) { - PRINT_ERRORX("Using default lengthlimit. (-M 1024).\n"); - lengthlimit = 1024; - } - if (outputkeys && lengthmin >= lengthlimit) { - PRINT_ERRORX("Max key size must be greater than min key size.\n"); - goto error; - } - - if (argc != 0) { - return usage(); - } - if (header) { - printf("VERSION=3\n"); - printf("format=%s\n", g.plaintext ? "print" : "bytevalue"); - printf("type=btree\n"); - // printf("db_pagesize=%d\n", 4096); //Don't write pagesize which would be useless. - if (dupsort) - printf("dupsort=%d\n", dupsort); - printf("HEADER=END\n"); - } - if (outputkeys) generate_keys(); - if (footer) printf("DATA=END\n"); - return EXIT_SUCCESS; - -error: - fprintf(stderr, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -static int usage() -{ - fprintf(stderr, - "usage: %s [-PpTuVhHfFDS] [-o output] [-r seed] [-m minsize] [-M limitsize]\n" - " %*s[-n numpairs] [-d delimiter] [-s delimiter]\n", - g.progname, (int)strlen(g.progname) + 1, ""); - return EXIT_FAILURE; -} - -static uint8_t randbyte(void) -{ - static uint32_t numsavedbits = 0; - static uint64_t savedbits = 0; - uint8_t retval; - - if (numsavedbits < 8) { - savedbits |= ((uint64_t)random()) << numsavedbits; - numsavedbits += 31; /* Random generates 31 random bits. */ - } - retval = savedbits & 0xff; - numsavedbits -= 8; - savedbits >>= 8; - return retval; -} - -/* Almost-uniformly random int from [0,limit) */ -static int32_t random_below(int32_t limit) -{ - assert(limit > 0); - return random() % limit; -} - -static void generate_keys() -{ - bool usedemptykey = false; - uint64_t numgenerated = 0; - uint64_t totalsize = 0; - char identifier[24]; /* 8 bytes * 2 = 16; 16+1=17; 17+null terminator = 18. Extra padding. */ - int length; - int i; - uint8_t ch; - - srandom(seed); - while (numgenerated < numkeys) { - numgenerated++; - - /* Each key is preceded by a space (unless using -T). */ - if (leadingspace) printf(" "); - - /* Generate a key. */ - { - /* Pick a key length. */ - length = random_below(lengthlimit - lengthmin) + lengthmin; - - /* Output 'length' random bytes. */ - for (i = 0; i < length; i++) { - do {ch = randbyte();} - while (printableonly && !isprint(ch)); - outputbyte(ch); - } - totalsize += length; - if (force_unique) { - if (length == 0 && !usedemptykey) usedemptykey = true; - else { - /* Append identifier to ensure uniqueness. */ - sprintf(identifier, "x%" PRIx64, numgenerated); - outputstring(identifier); - totalsize += strlen(identifier); - } - } - } - printf("%c", dbt_delimiter); - - /* Each value is preceded by a space (unless using -T). */ - if (leadingspace) printf(" "); - - /* Generate a value. */ - { - /* Pick a key length. */ - length = random_below(lengthlimit - lengthmin) + lengthmin; - - /* Output 'length' random bytes. */ - for (i = 0; i < length; i++) { - do {ch = randbyte();} - while (printableonly && !isprint(ch)); - outputbyte(ch); - } - totalsize += length; - } - printf("%c", dbt_delimiter); - - printf("%s", sort_delimiter); - } -} - -int get_delimiter(char* str) -{ - if (strlen(str) == 2 && str[0] == '\\') { - switch (str[1]) { - case ('a'): return '\a'; - case ('b'): return '\b'; -#ifndef __ICL - case ('e'): return '\e'; -#endif - case ('f'): return '\f'; - case ('n'): return '\n'; - case ('r'): return '\r'; - case ('t'): return '\t'; - case ('v'): return '\v'; - case ('0'): return '\0'; - case ('\\'): return '\\'; - default: return EOF; - } - } - if (strlen(str) == 1) return str[0]; - return EOF; -} diff --git a/storage/tokudb/ft-index/utils/tokudb_load.cc b/storage/tokudb/ft-index/utils/tokudb_load.cc deleted file mode 100644 index 2072b2f7f8d81..0000000000000 --- a/storage/tokudb/ft-index/utils/tokudb_load.cc +++ /dev/null @@ -1,977 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/* -COPYING CONDITIONS NOTICE: - - This program is free software; you can redistribute it and/or modify - it under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation, and provided that the - following conditions are met: - - * Redistributions of source code must retain this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below). - - * Redistributions in binary form must reproduce this COPYING - CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the - DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the - PATENT MARKING NOTICE (below), and the PATENT RIGHTS - GRANT (below) in the documentation and/or other materials - provided with the distribution. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - -COPYRIGHT NOTICE: - - TokuDB, Tokutek Fractal Tree Indexing Library. - Copyright (C) 2007-2013 Tokutek, Inc. - -DISCLAIMER: - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - -UNIVERSITY PATENT NOTICE: - - The technology is licensed by the Massachusetts Institute of - Technology, Rutgers State University of New Jersey, and the Research - Foundation of State University of New York at Stony Brook under - United States of America Serial No. 11/760379 and to the patents - and/or patent applications resulting from it. - -PATENT MARKING NOTICE: - - This software is covered by US Patent No. 8,185,551. - This software is covered by US Patent No. 8,489,638. - -PATENT RIGHTS GRANT: - - "THIS IMPLEMENTATION" means the copyrightable works distributed by - Tokutek as part of the Fractal Tree project. - - "PATENT CLAIMS" means the claims of patents that are owned or - licensable by Tokutek, both currently or in the future; and that in - the absence of this license would be infringed by THIS - IMPLEMENTATION or by using or running THIS IMPLEMENTATION. - - "PATENT CHALLENGE" shall mean a challenge to the validity, - patentability, enforceability and/or non-infringement of any of the - PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS. - - Tokutek hereby grants to you, for the term and geographical scope of - the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free, - irrevocable (except as stated in this section) patent license to - make, have made, use, offer to sell, sell, import, transfer, and - otherwise run, modify, and propagate the contents of THIS - IMPLEMENTATION, where such license applies only to the PATENT - CLAIMS. This grant does not include claims that would be infringed - only as a consequence of further modifications of THIS - IMPLEMENTATION. If you or your agent or licensee institute or order - or agree to the institution of patent litigation against any entity - (including a cross-claim or counterclaim in a lawsuit) alleging that - THIS IMPLEMENTATION constitutes direct or contributory patent - infringement, or inducement of patent infringement, then any rights - granted to you under this License shall terminate as of the date - such litigation is filed. If you or your agent or exclusive - licensee institute or order or agree to the institution of a PATENT - CHALLENGE, then Tokutek may terminate any rights granted to you - under this License. -*/ - -#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "tokudb_common.h" - -typedef struct { - bool leadingspace; - bool plaintext; - bool overwritekeys; - bool header; - bool eof; - bool keys; - bool is_private; - char* progname; - char* homedir; - char* database; - char* subdatabase; - char** config_options; - int32_t version; - int exitcode; - uint64_t linenumber; - DBTYPE dbtype; - DB* db; - DB_ENV* dbenv; - struct { - char* data[2]; - } get_dbt; - struct { - char* data; - } read_header; -} load_globals; - -load_globals g; -#include "tokudb_common_funcs.h" - -static int usage (void); -static int load_database (void); -static int create_init_env(void); -static int read_header (void); -static int open_database (void); -static int read_keys (void); -static int apply_commandline_options(void); -static int close_database (void); -static int doublechararray(char** pmem, uint64_t* size); - -int test_main(int argc, char *const argv[]) { - int ch; - int retval; - char** next_config_option; - - /* Set up the globals. */ - memset(&g, 0, sizeof(g)); - g.leadingspace = true; - g.overwritekeys = true; - g.dbtype = DB_UNKNOWN; - //g.dbtype = DB_BTREE; - g.progname = argv[0]; - g.header = true; - - if (verify_library_version() != 0) goto error; - - next_config_option = g.config_options = (char**) calloc(argc, sizeof(char*)); - if (next_config_option == NULL) { - PRINT_ERROR(errno, "main: calloc\n"); - goto error; - } - while ((ch = getopt(argc, argv, "c:f:h:nP:r:Tt:V")) != EOF) { - switch (ch) { - case ('c'): { - *next_config_option++ = optarg; - break; - } - case ('f'): { - if (freopen(optarg, "r", stdin) == NULL) { - fprintf(stderr, - "%s: %s: reopen: %s\n", - g.progname, optarg, strerror(errno)); - goto error; - } - break; - } - case ('h'): { - g.homedir = optarg; - break; - } - case ('n'): { - /* g.overwritekeys = false; */ - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('P'): { - /* Clear password. */ - memset(optarg, 0, strlen(optarg)); - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('r'): { - PRINT_ERRORX("-%c option not supported.\n", ch); - goto error; - } - case ('T'): { - g.plaintext = true; - g.leadingspace = false; - g.header = false; - break; - } - case ('t'): { - if (!strcmp(optarg, "btree")) { - g.dbtype = DB_BTREE; - break; - } - if (!strcmp(optarg, "hash") || !strcmp(optarg, "recno") || !strcmp(optarg, "queue")) { - fprintf(stderr, "%s: db type %s not supported.\n", g.progname, optarg); - goto error; - } - fprintf(stderr, "%s: Unrecognized db type %s.\n", g.progname, optarg); - goto error; - } - case ('V'): { - printf("%s\n", db_version(NULL, NULL, NULL)); - goto cleanup; - } - case ('?'): - default: { - g.exitcode = usage(); - goto cleanup; - } - } - } - argc -= optind; - argv += optind; - - if (argc != 1) { - g.exitcode = usage(); - goto cleanup; - } - init_catch_signals(); - - g.database = argv[0]; - if (create_init_env() != 0) goto error; - if (caught_any_signals()) goto cleanup; - while (!g.eof) { - if (load_database() != 0) goto error; - if (caught_any_signals()) goto cleanup; - } - if (false) { -error: - g.exitcode = EXIT_FAILURE; - fprintf(stderr, "%s: Quitting out due to errors.\n", g.progname); - } -cleanup: - if (g.dbenv && (retval = g.dbenv->close(g.dbenv, 0)) != 0) { - g.exitcode = EXIT_FAILURE; - fprintf(stderr, "%s: dbenv->close: %s\n", g.progname, db_strerror(retval)); - } - if (g.config_options) toku_free(g.config_options); - if (g.subdatabase) toku_free(g.subdatabase); - if (g.read_header.data) toku_free(g.read_header.data); - if (g.get_dbt.data[0]) toku_free(g.get_dbt.data[0]); - if (g.get_dbt.data[1]) toku_free(g.get_dbt.data[1]); - resend_signals(); - - return g.exitcode; -} - -int load_database() -{ - int retval; - - /* Create a database handle. */ - retval = db_create(&g.db, g.dbenv, 0); - if (retval != 0) { - PRINT_ERROR(retval, "db_create"); - return EXIT_FAILURE; - } - - if (g.header && read_header() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - if (apply_commandline_options() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - - /* - TODO: If/when supporting encryption - if (g.password && (retval = db->set_flags(db, DB_ENCRYPT))) { - PRINT_ERROR(ret, "DB->set_flags: DB_ENCRYPT"); - goto error; - } - */ - if (open_database() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - if (read_keys() != 0) goto error; - if (g.eof) goto cleanup; - if (caught_any_signals()) goto cleanup; - - if (false) { -error: - g.exitcode = EXIT_FAILURE; - } -cleanup: - - if (close_database() != 0) g.exitcode = EXIT_FAILURE; - - return g.exitcode; -} - -int usage() -{ - fprintf(stderr, - "usage: %s [-TV] [-c name=value] [-f file] [-h home] [-t btree] db_file\n", - g.progname); - return EXIT_FAILURE; -} - -int create_init_env() -{ - int retval; - DB_ENV* dbenv; - int flags; - //TODO: Experiments to determine right cache size for tokudb, or maybe command line argument. - //int cache = 1 << 20; /* 1 megabyte */ - - retval = db_env_create(&dbenv, 0); - if (retval) { - fprintf(stderr, "%s: db_dbenv_create: %s\n", g.progname, db_strerror(retval)); - goto error; - } - ///TODO: UNCOMMENT/IMPLEMENT dbenv->set_errfile(dbenv, stderr); - dbenv->set_errpfx(dbenv, g.progname); - /* - TODO: If/when supporting encryption - if (g.password && (retval = dbenv->set_encrypt(dbenv, g.password, DB_ENCRYPT_AES))) { - PRINT_ERROR(retval, "set_passwd"); - goto error; - } - */ - - /* Open the dbenvironment. */ - g.is_private = false; - flags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG; ///TODO: UNCOMMENT/IMPLEMENT | DB_USE_ENVIRON; - //TODO: Transactions.. SET_BITS(flags, DB_INIT_TXN); - - /* - ///TODO: UNCOMMENT/IMPLEMENT Notes: We require DB_PRIVATE - if (!dbenv->open(dbenv, g.homedir, flags, 0)) goto success; - */ - - /* - ///TODO: UNCOMMENT/IMPLEMENT - retval = dbenv->set_cachesize(dbenv, 0, cache, 1); - if (retval) { - PRINT_ERROR(retval, "DB_ENV->set_cachesize"); - goto error; - } - */ - g.is_private = true; - //TODO: Do we want to support transactions/logging even in single-process mode? - //Maybe if the db already exists. - //If db does not exist.. makes sense not to log or have transactions - //REMOVE_BITS(flags, DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN); - SET_BITS(flags, DB_CREATE | DB_PRIVATE); - - retval = dbenv->open(dbenv, g.homedir ? g.homedir : ".", flags, 0); - if (retval) { - PRINT_ERROR(retval, "DB_ENV->open"); - goto error; - } - g.dbenv = dbenv; - return EXIT_SUCCESS; - -error: - return EXIT_FAILURE; -} - -#define PARSE_NUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - if ((retval = dbfunction(db, num)) != 0) goto printerror; \ - continue; \ -} -#define PARSE_UNSUPPORTEDNUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} -#define PARSE_IGNOREDNUMBER(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 1, INT32_MAX, 10)) goto error; \ - PRINT_ERRORX("%s option not supported yet (ignored).\n", field); \ - continue; \ -} - -#define PARSE_FLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - if ((retval = db->set_flags(db, flag)) != 0) { \ - PRINT_ERROR(retval, "set_flags: %s", field); \ - goto error; \ - } \ - continue; \ -} - -#define PARSE_UNSUPPORTEDFLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} - -#define PARSE_IGNOREDFLAG(match, flag) \ -if (!strcmp(field, match)) { \ - if (strtoint32(value, &num, 0, 1, 10)) { \ - PRINT_ERRORX("%s: boolean name=value pairs require a value of 0 or 1", \ - field); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported yet (ignored).\n", field); \ - continue; \ -} - -#define PARSE_CHAR(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strlen(value) != 1) { \ - PRINT_ERRORX("%s=%s: Expected 1-byte value", \ - field, value); \ - goto error; \ - } \ - if ((retval = dbfunction(db, value[0])) != 0) { \ - goto printerror; \ - } \ - continue; \ -} - -#define PARSE_UNSUPPORTEDCHAR(match, dbfunction) \ -if (!strcmp(field, match)) { \ - if (strlen(value) != 1) { \ - PRINT_ERRORX("%s=%s: Expected 1-byte value", \ - field, value); \ - goto error; \ - } \ - PRINT_ERRORX("%s option not supported.\n", field); \ - goto error; \ -} - -#define PARSE_COMMON_CONFIGURATIONS() \ - PARSE_IGNOREDNUMBER( "bt_minkey", db->set_bt_minkey); \ - PARSE_IGNOREDFLAG( "chksum", DB_CHKSUM); \ - PARSE_IGNOREDNUMBER( "db_lorder", db->set_lorder); \ - PARSE_IGNOREDNUMBER( "db_pagesize", db->set_pagesize); \ - PARSE_UNSUPPORTEDNUMBER("extentsize", db->set_q_extentsize); \ - PARSE_UNSUPPORTEDNUMBER("h_ffactor", db->set_h_ffactor); \ - PARSE_UNSUPPORTEDNUMBER("h_nelem", db->set_h_nelem); \ - PARSE_UNSUPPORTEDNUMBER("re_len", db->set_re_len); \ - PARSE_UNSUPPORTEDCHAR( "re_pad", db->set_re_pad); \ - PARSE_UNSUPPORTEDFLAG( "recnum", DB_RECNUM); \ - PARSE_UNSUPPORTEDFLAG( "renumber", DB_RENUMBER); - - - -int read_header() -{ - static uint64_t datasize = 1 << 10; - uint64_t idx = 0; - char* field; - char* value; - int ch; - int32_t num; - int retval; - int r; - - assert(g.header); - - if (g.read_header.data == NULL && (g.read_header.data = (char*)toku_malloc(datasize * sizeof(char))) == NULL) { - PRINT_ERROR(errno, "read_header: malloc"); - goto error; - } - while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - idx = 0; - /* Read a line. */ - while (true) { - if ((ch = getchar()) == EOF) { - g.eof = true; - if (ferror(stdin)) goto formaterror; - break; - } - if (ch == '\n') break; - - g.read_header.data[idx] = (char)ch; - idx++; - - /* Ensure room exists for next character/null terminator. */ - if (idx == datasize && doublechararray(&g.read_header.data, &datasize)) goto error; - } - if (idx == 0 && g.eof) goto success; - g.read_header.data[idx] = '\0'; - - field = g.read_header.data; - if ((value = strchr(g.read_header.data, '=')) == NULL) goto formaterror; - value[0] = '\0'; - value++; - - if (field[0] == '\0' || value[0] == '\0') goto formaterror; - - if (!strcmp(field, "HEADER")) break; - if (!strcmp(field, "VERSION")) { - if (strtoint32(value, &g.version, 1, INT32_MAX, 10)) goto error; - if (g.version != 3) { - PRINT_ERRORX("line %" PRIu64 ": VERSION %d is unsupported", g.linenumber, g.version); - goto error; - } - continue; - } - if (!strcmp(field, "format")) { - if (!strcmp(value, "bytevalue")) { - g.plaintext = false; - continue; - } - if (!strcmp(value, "print")) { - g.plaintext = true; - continue; - } - goto formaterror; - } - if (!strcmp(field, "type")) { - if (!strcmp(value, "btree")) { - g.dbtype = DB_BTREE; - continue; - } - if (!strcmp(value, "hash") || strcmp(value, "recno") || strcmp(value, "queue")) { - PRINT_ERRORX("db type %s not supported.\n", value); - goto error; - } - PRINT_ERRORX("line %" PRIu64 ": unknown type %s", g.linenumber, value); - goto error; - } - if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) { - if (g.subdatabase != NULL) { - toku_free(g.subdatabase); - g.subdatabase = NULL; - } - if ((retval = printabletocstring(value, &g.subdatabase))) { - PRINT_ERROR(retval, "error reading db name"); - goto error; - } - continue; - } - if (!strcmp(field, "keys")) { - int32_t temp; - if (strtoint32(value, &temp, 0, 1, 10)) { - PRINT_ERROR(0, - "%s: boolean name=value pairs require a value of 0 or 1", - field); - goto error; - } - g.keys = (bool)temp; - if (!g.keys) { - PRINT_ERRORX("keys=0 not supported"); - goto error; - } - continue; - } - PARSE_COMMON_CONFIGURATIONS(); - - PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field); - goto error; - } -success: - r = 0; - - if (false) { -formaterror: - r = EXIT_FAILURE; - PRINT_ERRORX("line %" PRIu64 ": unexpected format", g.linenumber); - } - if (false) { -error: - r = EXIT_FAILURE; - } - return r; -} - -int apply_commandline_options() -{ - int r = -1; - unsigned idx; - char* field; - char* value = NULL; - int32_t num; - int retval; - - for (idx = 0; g.config_options[idx]; idx++) { - if (value) { - /* Restore the field=value format. */ - value[-1] = '='; - value = NULL; - } - field = g.config_options[idx]; - - if ((value = strchr(field, '=')) == NULL) { - PRINT_ERRORX("command-line configuration uses name=value format"); - goto error; - } - value[0] = '\0'; - value++; - - if (field[0] == '\0' || value[0] == '\0') { - PRINT_ERRORX("command-line configuration uses name=value format"); - goto error; - } - - if (!strcmp(field, "database") || !strcmp(field, "subdatabase")) { - if (g.subdatabase != NULL) { - toku_free(g.subdatabase); - g.subdatabase = NULL; - } - if ((retval = printabletocstring(value, &g.subdatabase))) { - PRINT_ERROR(retval, "error reading db name"); - goto error; - } - continue; - } - if (!strcmp(field, "keys")) { - int32_t temp; - if (strtoint32(value, &temp, 0, 1, 10)) { - PRINT_ERROR(0, - "%s: boolean name=value pairs require a value of 0 or 1", - field); - goto error; - } - g.keys = (bool)temp; - if (!g.keys) { - PRINT_ERRORX("keys=0 not supported"); - goto error; - } - continue; - } - PARSE_COMMON_CONFIGURATIONS(); - - PRINT_ERRORX("unknown input-file header configuration keyword \"%s\"", field); - goto error; - } - if (value) { - /* Restore the field=value format. */ - value[-1] = '='; - value = NULL; - } - r = 0; - -error: - return r; -} - -int open_database() -{ - DB* db = g.db; - int retval; - - int open_flags = 0; - //TODO: Transaction auto commit stuff - //if (TXN_ON(dbenv)) SET_BITS(open_flags, DB_AUTO_COMMIT); - - //Try to see if it exists first. - retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666); - if (retval == ENOENT) { - //Does not exist and we did not specify a type. - //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented. - /* - if (g.dbtype == DB_UNKNOWN) { - PRINT_ERRORX("no database type specified"); - goto error; - }*/ - SET_BITS(open_flags, DB_CREATE); - //Try creating it. - retval = db->open(db, NULL, g.database, g.subdatabase, g.dbtype, open_flags, 0666); - } - if (retval != 0) { - PRINT_ERROR(retval, "DB->open: %s", g.database); - goto error; - } - //TODO: Uncomment when DB_UNKNOWN + db->get_type are implemented. - /* - if ((retval = db->get_type(db, &opened_type)) != 0) { - PRINT_ERROR(retval, "DB->get_type"); - goto error; - } - if (opened_type != DB_BTREE) { - PRINT_ERRORX("Unsupported db type %d\n", opened_type); - goto error; - } - if (g.dbtype != DB_UNKNOWN && opened_type != g.dbtype) { - PRINT_ERRORX("DBTYPE %d does not match opened DBTYPE %d.\n", g.dbtype, opened_type); - goto error; - }*/ - return EXIT_SUCCESS; -error: - fprintf(stderr, "Quitting out due to errors.\n"); - return EXIT_FAILURE; -} - -int doublechararray(char** pmem, uint64_t* size) -{ - assert(pmem); - assert(size); - assert(IS_POWER_OF_2(*size)); - - *size <<= 1; - if (*size == 0) { - /* Overflowed uint64_t. */ - PRINT_ERRORX("Line %" PRIu64 ": Line too long.\n", g.linenumber); - goto error; - } - if ((*pmem = (char*)toku_realloc(*pmem, *size)) == NULL) { - PRINT_ERROR(errno, "doublechararray: realloc"); - goto error; - } - return EXIT_SUCCESS; - -error: - return EXIT_FAILURE; -} - -static int get_dbt(DBT* pdbt) -{ - /* Need to store a key and value. */ - static uint64_t datasize[2] = {1 << 10, 1 << 10}; - static int which = 0; - char* datum; - uint64_t idx = 0; - int highch; - int lowch; - - /* *pdbt should have been memset to 0 before being called. */ - which = 1 - which; - if (g.get_dbt.data[which] == NULL && - (g.get_dbt.data[which] = (char*)toku_malloc(datasize[which] * sizeof(char))) == NULL) { - PRINT_ERROR(errno, "get_dbt: malloc"); - goto error; - } - - datum = g.get_dbt.data[which]; - - if (g.plaintext) { - int firstch; - int nextch = EOF; - - for (firstch = getchar(); firstch != EOF; firstch = getchar()) { - switch (firstch) { - case ('\n'): { - /* Done reading this key/value. */ - nextch = EOF; - break; - } - case ('\\'): { - /* Escaped \ or two hex digits. */ - highch = getchar(); - if (highch == '\\') { - nextch = '\\'; - break; - } - else if (highch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - else if (!isxdigit(highch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch); - goto error; - } - - lowch = getchar(); - if (lowch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - else if (!isxdigit(lowch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch); - goto error; - } - - nextch = (hextoint(highch) << 4) | hextoint(lowch); - break; - } - default: { - if (isprint(firstch)) { - nextch = firstch; - break; - } - PRINT_ERRORX("Line %" PRIu64 ": Nonprintable character found.", g.linenumber); - goto error; - } - } - if (nextch == EOF) { - break; - } - if (idx == datasize[which]) { - /* Overflow, double the memory. */ - if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error; - datum = g.get_dbt.data[which]; - } - datum[idx] = (char)nextch; - idx++; - } - if (firstch == EOF) g.eof = true; - } - else { - for (highch = getchar(); highch != EOF; highch = getchar()) { - if (highch == '\n') { - /* Done reading this key/value. */ - break; - } - - lowch = getchar(); - if (lowch == EOF) { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file (2 hex digits per byte).\n", g.linenumber); - goto error; - } - if (!isxdigit(highch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, highch); - goto error; - } - if (!isxdigit(lowch)) { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected '%c' (non-hex) input.\n", g.linenumber, lowch); - goto error; - } - if (idx == datasize[which]) { - /* Overflow, double the memory. */ - if (doublechararray(&g.get_dbt.data[which], &datasize[which])) goto error; - datum = g.get_dbt.data[which]; - } - datum[idx] = (char)((hextoint(highch) << 4) | hextoint(lowch)); - idx++; - } - if (highch == EOF) g.eof = true; - } - - /* Done reading. */ - pdbt->size = idx; - pdbt->data = (void*)datum; - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -static int insert_pair(DBT* key, DBT* data) -{ - DB* db = g.db; - - int retval = db->put(db, NULL, key, data, g.overwritekeys ? 0 : DB_NOOVERWRITE); - if (retval != 0) { - //TODO: Check for transaction failures/etc.. retry if necessary. - PRINT_ERROR(retval, "DB->put"); - if (!(retval == DB_KEYEXIST && g.overwritekeys)) goto error; - } - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -int read_keys() -{ - DBT key; - DBT data; - int spacech; - - char footer[sizeof("ATA=END\n")]; - - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - - - //TODO: Start transaction/end transaction/abort/retry/etc - - if (!g.leadingspace) { - assert(g.plaintext); - while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - if (get_dbt(&key) != 0) goto error; - if (g.eof) { - if (key.size == 0) { - //Last entry had no newline. Done. - break; - } - PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber); - goto error; - } - g.linenumber++; - if (get_dbt(&data) != 0) goto error; - if (insert_pair(&key, &data) != 0) goto error; - } - } - else while (!g.eof) { - if (caught_any_signals()) goto success; - g.linenumber++; - spacech = getchar(); - switch (spacech) { - case (EOF): { - /* Done. */ - g.eof = true; - goto success; - } - case (' '): { - /* Time to read a key. */ - if (get_dbt(&key) != 0) goto error; - break; - } - case ('D'): { - if (fgets(footer, sizeof("ATA=END\n"), stdin) != NULL && - (!strcmp(footer, "ATA=END") || !strcmp(footer, "ATA=END\n"))) - { - goto success; - } - goto unexpectedinput; - } - default: { -unexpectedinput: - PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading key.\n", g.linenumber); - goto error; - } - } - - if (g.eof) { - PRINT_ERRORX("Line %" PRIu64 ": Key exists but value missing.", g.linenumber); - goto error; - } - g.linenumber++; - spacech = getchar(); - switch (spacech) { - case (EOF): { - g.eof = true; - PRINT_ERRORX("Line %" PRIu64 ": Unexpected end of file while reading value.\n", g.linenumber); - goto error; - } - case (' '): { - /* Time to read a key. */ - if (get_dbt(&data) != 0) goto error; - break; - } - default: { - PRINT_ERRORX("Line %" PRIu64 ": Unexpected input while reading value.\n", g.linenumber); - goto error; - } - } - if (insert_pair(&key, &data) != 0) goto error; - } -success: - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} - -int close_database() -{ - DB* db = g.db; - int retval; - - assert(db); - if ((retval = db->close(db, 0)) != 0) { - PRINT_ERROR(retval, "DB->close"); - goto error; - } - return EXIT_SUCCESS; -error: - return EXIT_FAILURE; -} diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 6ad6d72231b32..d2194a50c5b79 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -136,8 +136,8 @@ static inline uint get_key_parts(const KEY *key); #include "tokudb_buffer.h" #include "tokudb_status.h" #include "tokudb_card.h" -#include "hatoku_hton.h" #include "ha_tokudb.h" +#include "hatoku_hton.h" #include static const char *ha_tokudb_exts[] = { @@ -1249,6 +1249,7 @@ ha_tokudb::ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg):handler(hton, t tokudb_active_index = MAX_KEY; invalidate_icp(); trx_handler_list.data = this; + in_rpl_write_rows = in_rpl_delete_rows = in_rpl_update_rows = false; TOKUDB_HANDLER_DBUG_VOID_RETURN; } @@ -3561,12 +3562,27 @@ int ha_tokudb::is_val_unique(bool* is_unique, uchar* record, KEY* key_info, uint return error; } +static void maybe_do_unique_checks_delay(THD *thd) { + if (thd->slave_thread) { + uint64_t delay_ms = THDVAR(thd, rpl_unique_checks_delay); + if (delay_ms) + usleep(delay_ms * 1000); + } +} + +static bool do_unique_checks(THD *thd, bool do_rpl_event) { + if (do_rpl_event && thd->slave_thread && opt_readonly && !THDVAR(thd, rpl_unique_checks)) + return false; + else + return !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS); +} + int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { - int error; + int error = 0; // // first do uniqueness checks // - if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { + if (share->has_unique_keys && do_unique_checks(thd, in_rpl_write_rows)) { for (uint keynr = 0; keynr < table_share->keys; keynr++) { bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); bool is_unique = false; @@ -3579,13 +3595,18 @@ int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { if (!is_unique_key) { continue; } + + maybe_do_unique_checks_delay(thd); + // // if unique key, check uniqueness constraint // but, we do not need to check it if the key has a null // and we do not need to check it if unique_checks is off // error = is_val_unique(&is_unique, record, &table->key_info[keynr], keynr, txn); - if (error) { goto cleanup; } + if (error) { + goto cleanup; + } if (!is_unique) { error = DB_KEYEXIST; last_dup_key = keynr; @@ -3593,7 +3614,6 @@ int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { } } } - error = 0; cleanup: return error; } @@ -3696,15 +3716,8 @@ void ha_tokudb::test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val) { tokudb_my_free(tmp_pk_val_data); } -// // set the put flags for the main dictionary -// -void ha_tokudb::set_main_dict_put_flags( - THD* thd, - bool opt_eligible, - uint32_t* put_flags - ) -{ +void ha_tokudb::set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags) { uint32_t old_prelock_flags = 0; uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); bool in_hot_index = share->num_DBs > curr_num_DBs; @@ -3724,8 +3737,7 @@ void ha_tokudb::set_main_dict_put_flags( { *put_flags = old_prelock_flags; } - else if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS) - && !is_replace_into(thd) && !is_insert_ignore(thd)) + else if (!do_unique_checks(thd, in_rpl_write_rows | in_rpl_update_rows) && !is_replace_into(thd) && !is_insert_ignore(thd)) { *put_flags = old_prelock_flags; } @@ -3747,22 +3759,18 @@ void ha_tokudb::set_main_dict_put_flags( int ha_tokudb::insert_row_to_main_dictionary(uchar* record, DBT* pk_key, DBT* pk_val, DB_TXN* txn) { int error = 0; - uint32_t put_flags = mult_put_flags[primary_key]; - THD *thd = ha_thd(); uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); - assert(curr_num_DBs == 1); - + + uint32_t put_flags = mult_put_flags[primary_key]; + THD *thd = ha_thd(); set_main_dict_put_flags(thd, true, &put_flags); - error = share->file->put( - share->file, - txn, - pk_key, - pk_val, - put_flags - ); + // for test, make unique checks have a very long duration + if ((put_flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); + error = share->file->put(share->file, txn, pk_key, pk_val, put_flags); if (error) { last_dup_key = primary_key; goto cleanup; @@ -3776,14 +3784,18 @@ int ha_tokudb::insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN int error = 0; uint curr_num_DBs = share->num_DBs; set_main_dict_put_flags(thd, true, &mult_put_flags[primary_key]); - uint32_t i, flags = mult_put_flags[primary_key]; + uint32_t flags = mult_put_flags[primary_key]; + + // for test, make unique checks have a very long duration + if ((flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); // the insert ignore optimization uses DB_NOOVERWRITE_NO_ERROR, // which is not allowed with env->put_multiple. // we have to insert the rows one by one in this case. if (flags & DB_NOOVERWRITE_NO_ERROR) { DB * src_db = share->key_file[primary_key]; - for (i = 0; i < curr_num_DBs; i++) { + for (uint32_t i = 0; i < curr_num_DBs; i++) { DB * db = share->key_file[i]; if (i == primary_key) { // if it's the primary key, insert the rows @@ -3844,7 +3856,7 @@ int ha_tokudb::insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN // error otherwise // int ha_tokudb::write_row(uchar * record) { - TOKUDB_HANDLER_DBUG_ENTER(""); + TOKUDB_HANDLER_DBUG_ENTER("%p", record); DBT row, prim_key; int error; @@ -3882,10 +3894,7 @@ int ha_tokudb::write_row(uchar * record) { if (share->has_auto_inc && record == table->record[0]) { tokudb_pthread_mutex_lock(&share->mutex); ulonglong curr_auto_inc = retrieve_auto_increment( - table->field[share->ai_field_index]->key_type(), - field_offset(table->field[share->ai_field_index], table), - record - ); + table->field[share->ai_field_index]->key_type(), field_offset(table->field[share->ai_field_index], table), record); if (curr_auto_inc > share->last_auto_increment) { share->last_auto_increment = curr_auto_inc; if (delay_updating_ai_metadata) { @@ -4053,7 +4062,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) { memset((void *) &prim_row, 0, sizeof(prim_row)); memset((void *) &old_prim_row, 0, sizeof(old_prim_row)); - ha_statistic_increment(&SSV::ha_update_count); #if MYSQL_VERSION_ID < 50600 if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { @@ -4100,7 +4108,6 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) { } txn = using_ignore ? sub_trans : transaction; - if (hidden_primary_key) { memset((void *) &prim_key, 0, sizeof(prim_key)); prim_key.data = (void *) current_ident; @@ -4112,10 +4119,8 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) { create_dbt_key_from_table(&old_prim_key, primary_key, primary_key_buff, old_row, &has_null); } - // // do uniqueness checks - // - if (share->has_unique_keys && !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { + if (share->has_unique_keys && do_unique_checks(thd, in_rpl_update_rows)) { for (uint keynr = 0; keynr < table_share->keys; keynr++) { bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); if (keynr == primary_key && !share->pk_has_string) { @@ -4156,6 +4161,10 @@ int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) { set_main_dict_put_flags(thd, false, &mult_put_flags[primary_key]); + // for test, make unique checks have a very long duration + if ((mult_put_flags[primary_key] & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) + maybe_do_unique_checks_delay(thd); + error = db_env->update_multiple( db_env, share->key_file[primary_key], @@ -5630,13 +5639,11 @@ DBT *ha_tokudb::get_pos(DBT * to, uchar * pos) { DBUG_RETURN(to); } -// // Retrieves a row with based on the primary key saved in pos // Returns: // 0 on success // HA_ERR_KEY_NOT_FOUND if not found // error otherwise -// int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) { TOKUDB_HANDLER_DBUG_ENTER(""); DBT db_pos; @@ -5649,12 +5656,20 @@ int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) { ha_statistic_increment(&SSV::ha_read_rnd_count); tokudb_active_index = MAX_KEY; + // test rpl slave by inducing a delay before the point query + THD *thd = ha_thd(); + if (thd->slave_thread && (in_rpl_delete_rows || in_rpl_update_rows)) { + uint64_t delay_ms = THDVAR(thd, rpl_lookup_rows_delay); + if (delay_ms) + usleep(delay_ms * 1000); + } + info.ha = this; info.buf = buf; info.keynr = primary_key; error = share->file->getf_set(share->file, transaction, - get_cursor_isolation_flags(lock.type, ha_thd()), + get_cursor_isolation_flags(lock.type, thd), key, smart_dbt_callback_rowread_ptquery, &info); if (error == DB_NOTFOUND) { @@ -8177,6 +8192,37 @@ void ha_tokudb::remove_from_trx_handler_list() { trx->handlers = list_delete(trx->handlers, &trx_handler_list); } +void ha_tokudb::rpl_before_write_rows() { + in_rpl_write_rows = true; +} + +void ha_tokudb::rpl_after_write_rows() { + in_rpl_write_rows = false; +} + +void ha_tokudb::rpl_before_delete_rows() { + in_rpl_delete_rows = true; +} + +void ha_tokudb::rpl_after_delete_rows() { + in_rpl_delete_rows = false; +} + +void ha_tokudb::rpl_before_update_rows() { + in_rpl_update_rows = true; +} + +void ha_tokudb::rpl_after_update_rows() { + in_rpl_update_rows = false; +} + +bool ha_tokudb::rpl_lookup_rows() { + if (!in_rpl_delete_rows && !in_rpl_update_rows) + return true; + else + return THDVAR(ha_thd(), rpl_lookup_rows); +} + // table admin #include "ha_tokudb_admin.cc" diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h index 8b56b80a7f0c0..5023e62320151 100644 --- a/storage/tokudb/ha_tokudb.h +++ b/storage/tokudb/ha_tokudb.h @@ -797,6 +797,19 @@ class ha_tokudb : public handler { private: int do_optimize(THD *thd); int map_to_handler_error(int error); + +public: + void rpl_before_write_rows(); + void rpl_after_write_rows(); + void rpl_before_delete_rows(); + void rpl_after_delete_rows(); + void rpl_before_update_rows(); + void rpl_after_update_rows(); + bool rpl_lookup_rows(); +private: + bool in_rpl_write_rows; + bool in_rpl_delete_rows; + bool in_rpl_update_rows; }; #if TOKU_INCLUDE_OPTION_STRUCTS diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index 5623b6f9e8257..6ae5eaea5f3d2 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -92,6 +92,7 @@ PATENT RIGHTS GRANT: #define MYSQL_SERVER 1 #include "hatoku_defines.h" #include +#include #include "stdint.h" #if defined(_WIN32) @@ -330,9 +331,25 @@ static void handle_ydb_error(int error) { sql_print_error(" "); sql_print_error("************************************************************"); break; + case TOKUDB_UPGRADE_FAILURE: + sql_print_error("%s upgrade failed. A clean shutdown of the previous version is required.", tokudb_hton_name); + break; + default: + sql_print_error("%s unknown error %d", tokudb_hton_name, error); + break; } } +static int tokudb_set_product_name(void) { + size_t n = strlen(tokudb_hton_name); + char tokudb_product_name[n+1]; + memset(tokudb_product_name, 0, sizeof tokudb_product_name); + for (size_t i = 0; i < n; i++) + tokudb_product_name[i] = tolower(tokudb_hton_name[i]); + int r = db_env_set_toku_product_name(tokudb_product_name); + return r; +} + static int tokudb_init_func(void *p) { TOKUDB_DBUG_ENTER("%p", p); int r; @@ -351,6 +368,12 @@ static int tokudb_init_func(void *p) { } #endif + r = tokudb_set_product_name(); + if (r) { + sql_print_error("%s can not set product name error %d", tokudb_hton_name, r); + goto error; + } + tokudb_pthread_mutex_init(&tokudb_mutex, MY_MUTEX_INIT_FAST); (void) my_hash_init(&tokudb_open_tables, table_alias_charset, 32, 0, 0, (my_hash_get_key) tokudb_get_key, 0, 0); @@ -820,6 +843,12 @@ static int tokudb_rollback(handlerton * hton, THD * thd, bool all) { static int tokudb_xa_prepare(handlerton* hton, THD* thd, bool all) { TOKUDB_DBUG_ENTER(""); int r = 0; + + /* if support_xa is disable, just return */ + if (!THDVAR(thd, support_xa)) { + TOKUDB_DBUG_RETURN(r); + } + DBUG_PRINT("trans", ("preparing transaction %s", all ? "all" : "stmt")); tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(thd, hton); DB_TXN* txn = all ? trx->all : trx->stmt; @@ -1423,9 +1452,35 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = { MYSQL_SYSVAR(check_jemalloc), #endif MYSQL_SYSVAR(bulk_fetch), +#if TOKU_INCLUDE_XA + MYSQL_SYSVAR(support_xa), +#endif + MYSQL_SYSVAR(rpl_unique_checks), + MYSQL_SYSVAR(rpl_unique_checks_delay), + MYSQL_SYSVAR(rpl_lookup_rows), + MYSQL_SYSVAR(rpl_lookup_rows_delay), NULL }; +// Split ./database/table-dictionary into database, table and dictionary strings +static void tokudb_split_dname(const char *dname, String &database_name, String &table_name, String &dictionary_name) { + const char *splitter = strchr(dname, '/'); + if (splitter) { + const char *database_ptr = splitter+1; + const char *table_ptr = strchr(database_ptr, '/'); + if (table_ptr) { + database_name.append(database_ptr, table_ptr - database_ptr); + table_ptr += 1; + const char *dictionary_ptr = strchr(table_ptr, '-'); + if (dictionary_ptr) { + table_name.append(table_ptr, dictionary_ptr - table_ptr); + dictionary_ptr += 1; + dictionary_name.append(dictionary_ptr); + } + } + } +} + struct st_mysql_storage_engine tokudb_storage_engine = { MYSQL_HANDLERTON_INTERFACE_VERSION }; static struct st_mysql_information_schema tokudb_file_map_information_schema = { MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION }; @@ -1471,31 +1526,12 @@ static int tokudb_file_map(TABLE *table, THD *thd) { assert(iname_len == curr_val.size - 1); table->field[1]->store(iname, iname_len, system_charset_info); - // denormalize the dname - const char *database_name = NULL; - size_t database_len = 0; - const char *table_name = NULL; - size_t table_len = 0; - const char *dictionary_name = NULL; - size_t dictionary_len = 0; - database_name = strchr(dname, '/'); - if (database_name) { - database_name += 1; - table_name = strchr(database_name, '/'); - if (table_name) { - database_len = table_name - database_name; - table_name += 1; - dictionary_name = strchr(table_name, '-'); - if (dictionary_name) { - table_len = dictionary_name - table_name; - dictionary_name += 1; - dictionary_len = strlen(dictionary_name); - } - } - } - table->field[2]->store(database_name, database_len, system_charset_info); - table->field[3]->store(table_name, table_len, system_charset_info); - table->field[4]->store(dictionary_name, dictionary_len, system_charset_info); + // split the dname + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[2]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[3]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[4]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); error = schema_table_store_record(thd, table); } @@ -1526,10 +1562,12 @@ static int tokudb_file_map_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_file_map(table, thd); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -1556,6 +1594,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_info_field_info[] = { {"bt_num_blocks_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"bt_size_allocated", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"bt_size_in_use", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1593,25 +1634,25 @@ static int tokudb_report_fractal_tree_info_for_db(const DBT *dname, const DBT *i // Recalculate and check just to be safe. { size_t dname_len = strlen((const char *)dname->data); - size_t iname_len = strlen((const char *)iname->data); assert(dname_len == dname->size - 1); + table->field[0]->store((char *)dname->data, dname_len, system_charset_info); + size_t iname_len = strlen((const char *)iname->data); assert(iname_len == iname->size - 1); - table->field[0]->store( - (char *)dname->data, - dname_len, - system_charset_info - ); - table->field[1]->store( - (char *)iname->data, - iname_len, - system_charset_info - ); + table->field[1]->store((char *)iname->data, iname_len, system_charset_info); } table->field[2]->store(bt_num_blocks_allocated, false); table->field[3]->store(bt_num_blocks_in_use, false); table->field[4]->store(bt_size_allocated, false); table->field[5]->store(bt_size_in_use, false); + // split the dname + { + String database_name, table_name, dictionary_name; + tokudb_split_dname((const char *)dname->data, database_name, table_name, dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + } error = schema_table_store_record(thd, table); exit: @@ -1635,12 +1676,7 @@ static int tokudb_fractal_tree_info(TABLE *table, THD *thd) { goto cleanup; } while (error == 0) { - error = tmp_cursor->c_get( - tmp_cursor, - &curr_key, - &curr_val, - DB_NEXT - ); + error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT); if (!error) { error = tokudb_report_fractal_tree_info_for_db(&curr_key, &curr_val, table, thd); } @@ -1673,10 +1709,12 @@ static int tokudb_fractal_tree_info_fill_table(THD *thd, TABLE_LIST *tables, CON rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_fractal_tree_info(table, thd); + if (error) + my_error(error, MYF(0)); } //3938: unlock the status flag lock @@ -1704,6 +1742,9 @@ static ST_FIELD_INFO tokudb_fractal_tree_block_map_field_info[] = { {"blocknum", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, {"offset", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, {"size", 0, MYSQL_TYPE_LONGLONG, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, + {"table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1776,19 +1817,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D // See #5789 // Recalculate and check just to be safe. size_t dname_len = strlen((const char *)dname->data); - size_t iname_len = strlen((const char *)iname->data); assert(dname_len == dname->size - 1); + table->field[0]->store((char *)dname->data, dname_len, system_charset_info); + + size_t iname_len = strlen((const char *)iname->data); assert(iname_len == iname->size - 1); - table->field[0]->store( - (char *)dname->data, - dname_len, - system_charset_info - ); - table->field[1]->store( - (char *)iname->data, - iname_len, - system_charset_info - ); + table->field[1]->store((char *)iname->data, iname_len, system_charset_info); + table->field[2]->store(e.checkpoint_counts[i], false); table->field[3]->store(e.blocknums[i], false); static const int64_t freelist_null = -1; @@ -1807,6 +1842,13 @@ static int tokudb_report_fractal_tree_block_map_for_db(const DBT *dname, const D table->field[5]->store(e.sizes[i], false); } + // split the dname + String database_name, table_name, dictionary_name; + tokudb_split_dname((const char *)dname->data, database_name, table_name,dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + error = schema_table_store_record(thd, table); } @@ -1847,12 +1889,7 @@ static int tokudb_fractal_tree_block_map(TABLE *table, THD *thd) { goto cleanup; } while (error == 0) { - error = tmp_cursor->c_get( - tmp_cursor, - &curr_key, - &curr_val, - DB_NEXT - ); + error = tmp_cursor->c_get(tmp_cursor, &curr_key, &curr_val, DB_NEXT); if (!error) { error = tokudb_report_fractal_tree_block_map_for_db(&curr_key, &curr_val, table, thd); } @@ -1885,10 +1922,12 @@ static int tokudb_fractal_tree_block_map_fill_table(THD *thd, TABLE_LIST *tables rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { error = tokudb_fractal_tree_block_map(table, thd); + if (error) + my_error(error, MYF(0)); } //3938: unlock the status flag lock @@ -2036,11 +2075,13 @@ static int tokudb_trx_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) { rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_trx_extra e = { thd, tables->table }; error = db_env->iterate_live_transactions(db_env, tokudb_trx_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -2067,6 +2108,9 @@ static ST_FIELD_INFO tokudb_lock_waits_field_info[] = { {"lock_waits_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"lock_waits_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"lock_waits_start_time", 0, MYSQL_TYPE_LONGLONG, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"lock_waits_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -2092,6 +2136,13 @@ static int tokudb_lock_waits_callback(DB *db, uint64_t requesting_txnid, const D tokudb_pretty_right_key(db, right_key, &right_str); table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info); table->field[5]->store(start_time, false); + + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[6]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[7]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[8]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + int error = schema_table_store_record(thd, table); return error; } @@ -2107,11 +2158,13 @@ static int tokudb_lock_waits_fill_table(THD *thd, TABLE_LIST *tables, COND *cond rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_lock_waits_extra e = { thd, tables->table }; error = db_env->iterate_pending_lock_requests(db_env, tokudb_lock_waits_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); @@ -2137,6 +2190,9 @@ static ST_FIELD_INFO tokudb_locks_field_info[] = { {"locks_dname", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"locks_key_left", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"locks_key_right", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_schema", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, + {"locks_table_dictionary_name", 256, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -2168,6 +2224,12 @@ static int tokudb_locks_callback(uint64_t txn_id, uint64_t client_id, iterate_ro tokudb_pretty_right_key(db, &right_key, &right_str); table->field[4]->store(right_str.ptr(), right_str.length(), system_charset_info); + String database_name, table_name, dictionary_name; + tokudb_split_dname(dname, database_name, table_name, dictionary_name); + table->field[5]->store(database_name.c_ptr(), database_name.length(), system_charset_info); + table->field[6]->store(table_name.c_ptr(), table_name.length(), system_charset_info); + table->field[7]->store(dictionary_name.c_ptr(), dictionary_name.length(), system_charset_info); + error = schema_table_store_record(thd, table); } return error; @@ -2184,11 +2246,13 @@ static int tokudb_locks_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) { rw_rdlock(&tokudb_hton_initialized_lock); if (!tokudb_hton_initialized) { - my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), "TokuDB"); - error = -1; + error = ER_PLUGIN_IS_NOT_LOADED; + my_error(error, MYF(0), tokudb_hton_name); } else { struct tokudb_locks_extra e = { thd, tables->table }; error = db_env->iterate_live_transactions(db_env, tokudb_locks_callback, &e); + if (error) + my_error(error, MYF(0)); } rw_unlock(&tokudb_hton_initialized_lock); diff --git a/storage/tokudb/hatoku_hton.h b/storage/tokudb/hatoku_hton.h index 80ddc7c7abae8..58d34f01af657 100644 --- a/storage/tokudb/hatoku_hton.h +++ b/storage/tokudb/hatoku_hton.h @@ -450,18 +450,41 @@ static TYPELIB tokudb_empty_scan_typelib = { NULL }; -static MYSQL_THDVAR_ENUM(empty_scan, - PLUGIN_VAR_OPCMDARG, +static MYSQL_THDVAR_ENUM(empty_scan, PLUGIN_VAR_OPCMDARG, "TokuDB algorithm to check if the table is empty when opened. ", NULL, NULL, TOKUDB_EMPTY_SCAN_RL, &tokudb_empty_scan_typelib ); #if TOKUDB_CHECK_JEMALLOC static uint tokudb_check_jemalloc; -static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", NULL, NULL, 1, 0, 1, 0); +static MYSQL_SYSVAR_UINT(check_jemalloc, tokudb_check_jemalloc, 0, "Check if jemalloc is linked", + NULL, NULL, 1, 0, 1, 0); #endif -static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", NULL /*check*/, NULL /*update*/, true /*default*/); +static MYSQL_THDVAR_BOOL(bulk_fetch, PLUGIN_VAR_THDLOCAL, "enable bulk fetch", + NULL /*check*/, NULL /*update*/, true /*default*/); + +#if TOKU_INCLUDE_XA +static MYSQL_THDVAR_BOOL(support_xa, + PLUGIN_VAR_OPCMDARG, + "Enable TokuDB support for the XA two-phase commit", + NULL, // check + NULL, // update + true // default +); +#endif + +static MYSQL_THDVAR_BOOL(rpl_unique_checks, PLUGIN_VAR_THDLOCAL, "enable unique checks on replication slave", + NULL /*check*/, NULL /*update*/, true /*default*/); + +static MYSQL_THDVAR_ULONGLONG(rpl_unique_checks_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to unique checks test on replication slave", + NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/); + +static MYSQL_THDVAR_BOOL(rpl_lookup_rows, PLUGIN_VAR_THDLOCAL, "lookup a row on rpl slave", + NULL /*check*/, NULL /*update*/, true /*default*/); + +static MYSQL_THDVAR_ULONGLONG(rpl_lookup_rows_delay, PLUGIN_VAR_THDLOCAL, "time in milliseconds to add to lookups on replication slave", + NULL, NULL, 0 /*default*/, 0 /*min*/, ~0ULL /*max*/, 1 /*blocksize*/); extern HASH tokudb_open_tables; extern pthread_mutex_t tokudb_mutex; diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result new file mode 100644 index 0000000000000..cd8608f4387f1 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk.result @@ -0,0 +1,17 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +include/diff_tables.inc [master:test.t, slave:test.t] +delete from t where a=2; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result new file mode 100644 index 0000000000000..ae2aea84287cb --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_delete_pk_lookup1.result @@ -0,0 +1,17 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +include/diff_tables.inc [master:test.t, slave:test.t] +delete from t where a=2; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart > 5; +@tend-@tstart > 5 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result new file mode 100644 index 0000000000000..fc961fd0c1324 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup0.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result new file mode 100644 index 0000000000000..5325f6c3c6d59 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc0_lookup1.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result new file mode 100644 index 0000000000000..5325f6c3c6d59 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup0.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result new file mode 100644 index 0000000000000..5325f6c3c6d59 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_pk_uc1_lookup1.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a)) engine=tokudb; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b +1 3 +2 2 +3 5 +4 3 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result new file mode 100644 index 0000000000000..0b958b89d0ffa --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup0.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +1 +select * from t; +a b c +1 3 -1 +2 2 -2 +3 5 -3 +4 3 -4 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result new file mode 100644 index 0000000000000..83dcdb394dfdb --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_update_unique_uc0_lookup1.result @@ -0,0 +1,27 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=tokudb; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); +include/diff_tables.inc [master:test.t, slave:test.t] +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; +@tend-@tstart <= 5 +0 +select * from t; +a b c +1 3 -1 +2 2 -2 +3 5 -3 +4 3 -4 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result new file mode 100644 index 0000000000000..6db2036d933d8 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk.result @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result new file mode 100644 index 0000000000000..3bcd3e8ccdd6f --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_pk_uc1.result @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, primary key(a)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +0 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result new file mode 100644 index 0000000000000..9eb1f2edf2041 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique.result @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +1 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result new file mode 100644 index 0000000000000..3bed6ea282a4e --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/r/rpl_tokudb_write_unique_uc1.result @@ -0,0 +1,14 @@ +include/master-slave.inc +[connection master] +drop table if exists t; +create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=tokudb; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); +select unix_timestamp()-@tstart <= 10; +unix_timestamp()-@tstart <= 10 +0 +include/diff_tables.inc [master:test.t, slave:test.t] +drop table if exists t; +include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt new file mode 100644 index 0000000000000..dc139282dc4c3 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test new file mode 100644 index 0000000000000..fb42f40bb62d4 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk.test @@ -0,0 +1,63 @@ +# test replicated delete rows log events on a table with a primary key. +# the slave is read only with tokudb rpl row lookups OFF. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +delete from t where a=2; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt new file mode 100644 index 0000000000000..4675b07763d07 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=0 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test new file mode 100644 index 0000000000000..bf5edbd2c1b39 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_delete_pk_lookup1.test @@ -0,0 +1,66 @@ +# test replicated delete rows log events on a table with a primary key. +# the slave is read only with tokudb rpl row lookups ON. +# this will cause SLOW deletes. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +delete from t where a=2; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart > 5; # assert big delay in the delete time + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt new file mode 100644 index 0000000000000..dc139282dc4c3 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test new file mode 100644 index 0000000000000..998987349c7b5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup0.test @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt new file mode 100644 index 0000000000000..d546dd00669b1 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test new file mode 100644 index 0000000000000..998987349c7b5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc0_lookup1.test @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt new file mode 100644 index 0000000000000..5cfe5f83a91eb --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test new file mode 100644 index 0000000000000..998987349c7b5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup0.test @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt new file mode 100644 index 0000000000000..7cd575c52bb14 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test new file mode 100644 index 0000000000000..998987349c7b5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_pk_uc1_lookup1.test @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a)) engine=$engine; +# show create table t; +insert into t values (1,0); +insert into t values (2,0),(3,0); +insert into t values (4,0); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt new file mode 100644 index 0000000000000..dc139282dc4c3 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test new file mode 100644 index 0000000000000..11401ac0ce024 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup0.test @@ -0,0 +1,70 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine; +# show create table t; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt new file mode 100644 index 0000000000000..d546dd00669b1 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=OFF --tokudb-rpl-lookup-rows-delay=10000 --tokudb-rpl-lookup-rows=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test new file mode 100644 index 0000000000000..ea77447bc75c0 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_update_unique_uc0_lookup1.test @@ -0,0 +1,69 @@ +# test replicated update rows log events on a table with a primary key. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, c bigint not null, primary key(a), unique key(c)) engine=$engine; +# show create table t; +insert into t values (1,0,-1); +insert into t values (2,0,-2),(3,0,-3); +insert into t values (4,0,-4); + +# wait for the inserts to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# delete a row +connection master; +update t set b=b+1 where a=2; +update t set b=b+2 where a=1; +update t set b=b+3 where a=4; +update t set b=b+4 where a=3; +update t set b=b+1 where 1<=a and a<=3; +select unix_timestamp() into @tstart; + +# wait for the delete to finish on the slave +connection master; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; +connection master; +select unix_timestamp() into @tend; +select @tend-@tstart <= 5; # assert no delay in the delete time + +connection slave; +select * from t; + +# diff tables +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt new file mode 100644 index 0000000000000..9baf0d65ecfab --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test new file mode 100644 index 0000000000000..c77e4b49605e5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk.test @@ -0,0 +1,53 @@ +# test replicated write rows log events on a table with a primary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# insert into t values (5); # test read-only +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt new file mode 100644 index 0000000000000..b1df0b6daf017 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=10000 --tokudb-rpl-unique-checks=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test new file mode 100644 index 0000000000000..c77e4b49605e5 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_pk_uc1.test @@ -0,0 +1,53 @@ +# test replicated write rows log events on a table with a primary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, primary key(a)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1); +insert into t values (2),(3); +insert into t values (4); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# insert into t values (5); # test read-only +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt new file mode 100644 index 0000000000000..9baf0d65ecfab --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=OFF diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test new file mode 100644 index 0000000000000..cf6a26b423dae --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique.test @@ -0,0 +1,52 @@ +# test replicated write rows log events on a table with a primary key and a unique secondary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt new file mode 100644 index 0000000000000..0518efd3da5c1 --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1-slave.opt @@ -0,0 +1 @@ +--read-only=ON --tokudb-rpl-unique-checks-delay=5000 --tokudb-rpl-unique-checks=ON diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test new file mode 100644 index 0000000000000..cf6a26b423dae --- /dev/null +++ b/storage/tokudb/mysql-test/rpl/t/rpl_tokudb_write_unique_uc1.test @@ -0,0 +1,52 @@ +# test replicated write rows log events on a table with a primary key and a unique secondary key. +# the slave is read only with tokudb unique checks disabled. + +source include/have_tokudb.inc; +let $engine=tokudb; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +# initialize +connection master; +disable_warnings; +drop table if exists t; +enable_warnings; + +connection slave; +# show variables like 'read_only'; +# show variables like 'tokudb_rpl_unique_checks%'; + +# insert some rows +connection master; +# select @@binlog_format; +# select @@autocommit; +eval create table t (a bigint not null, b bigint not null, primary key(a), unique key(b)) engine=$engine; +# show create table t; +select unix_timestamp() into @tstart; +insert into t values (1,2); +insert into t values (2,3),(3,4); +insert into t values (4,5); + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +connection master; +select unix_timestamp()-@tstart <= 10; + +connection slave; +# show create table t; + +# diff tables +connection master; +--let $diff_tables= master:test.t, slave:test.t +source include/diff_tables.inc; + +# cleanup +connection master; +drop table if exists t; + +sync_slave_with_master; +# source include/sync_slave_sql_with_master.inc; + +source include/rpl_end.inc; + diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def index 5acbffc6ec082..215060678954e 100644 --- a/storage/tokudb/mysql-test/tokudb/disabled.def +++ b/storage/tokudb/mysql-test/tokudb/disabled.def @@ -58,3 +58,4 @@ hotindex-del-1: No online ALTER in MariaDB 5.5 mvcc-19: No online ALTER in MariaDB 5.5 mvcc-20: No online ALTER in MariaDB 5.5 mvcc-27: No online OPTIMIZE in MariaDB 5.5 +cluster_key_part: engine options on partitioned tables diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_create_select_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_create_select.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_create_select_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_create_select.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_create_temp_select.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_delete_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_delete.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_delete_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_delete.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_insert_select_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_insert_select.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_insert_select_dup_key.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_replace_select_nonpart.result b/storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result similarity index 100% rename from storage/tokudb/mysql-test/tokudb/r/bf_replace_select_nonpart.result rename to storage/tokudb/mysql-test/tokudb/r/bf_replace_select.result diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result b/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result new file mode 100644 index 0000000000000..2c72c7129db39 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/bf_select_hash_part.result @@ -0,0 +1,278 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +CREATE TABLE `t` ( +`num` int(10) unsigned NOT NULL auto_increment, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY HASH (num) PARTITIONS 8; +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +1048576 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +1 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +SELECT count(*) from t where num > 500000; +count(*) +548576 +1 +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_select_part.result b/storage/tokudb/mysql-test/tokudb/r/bf_select_part.result deleted file mode 100644 index f36fc0e1ce3c3..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/r/bf_select_part.result +++ /dev/null @@ -1,543 +0,0 @@ -set default_storage_engine='tokudb'; -drop table if exists t,t1,t2,t3; -CREATE TABLE `t` ( -`num` int(10) unsigned auto_increment NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -); -INSERT INTO t values (null,null); -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -SELECT count(*) FROM t; -count(*) -8388608 -CREATE TABLE `t1` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -) as select * from t; -CREATE TABLE `t2` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -) PARTITION BY HASH (num) -PARTITIONS 8 as select * from t; -CREATE TABLE `t3` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -) PARTITION BY RANGE (num) -(PARTITION p0 VALUES LESS THAN (1000000), -PARTITION p1 VALUES LESS THAN (2000000), -PARTITION p2 VALUES LESS THAN (3000000), -PARTITION p3 VALUES LESS THAN (4000000), -PARTITION p4 VALUES LESS THAN (5000000), -PARTITION p5 VALUES LESS THAN (6000000), -PARTITION p6 VALUES LESS THAN (7000000), -PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t; -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -SELECT count(*) from t2; -count(*) -8388608 -1 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t1; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -SELECT count(*) from t3; -count(*) -8388608 -1 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t2 where num > 7000000; -count(*) -1847274 -1 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t1 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -SELECT count(*) from t3 where num > 7000000; -count(*) -1847274 -1 -drop table t,t1,t2,t3; diff --git a/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result b/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result new file mode 100644 index 0000000000000..c13324aa34e99 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/bf_select_range_part.result @@ -0,0 +1,286 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +CREATE TABLE `t` ( +`num` int(10) unsigned NOT NULL auto_increment, +`val` varchar(32) DEFAULT NULL, +PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (100000), +PARTITION p1 VALUES LESS THAN (200000), +PARTITION p2 VALUES LESS THAN (300000), +PARTITION p3 VALUES LESS THAN (400000), +PARTITION p4 VALUES LESS THAN (500000), +PARTITION p5 VALUES LESS THAN (600000), +PARTITION p6 VALUES LESS THAN (700000), +PARTITION p7 VALUES LESS THAN MAXVALUE); +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; +count(*) +1048576 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +SELECT count(*) from t; +count(*) +1048576 +1 +set tokudb_bulk_fetch=ON; +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +set tokudb_bulk_fetch=OFF; +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +SELECT count(*) from t where num > 700000; +count(*) +348576 +1 +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result b/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result new file mode 100644 index 0000000000000..cd8fc34031459 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/cluster_key_part.result @@ -0,0 +1,28 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +create table t ( +x int not null, +y int not null, +primary key(x)) +partition by hash(x) partitions 2; +show create table t; +Table Create Table +t CREATE TABLE `t` ( + `x` int(11) NOT NULL, + `y` int(11) NOT NULL, + PRIMARY KEY (`x`) +) ENGINE=TokuDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY HASH (x) +PARTITIONS 2 */ +alter table t add clustering key(y); +show create table t; +Table Create Table +t CREATE TABLE `t` ( + `x` int(11) NOT NULL, + `y` int(11) NOT NULL, + PRIMARY KEY (`x`), + CLUSTERING KEY `y` (`y`) +) ENGINE=TokuDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY HASH (x) +PARTITIONS 2 */ +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result deleted file mode 100644 index 9c45ecea77114..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_innodb.result +++ /dev/null @@ -1,107 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb; -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); -explain select x,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=0; -x id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index (x) where x=0 and id=0; -y id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=1; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x)where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index(x) where x=0 and id=1; -y id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=3; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=3; -x id -2 3 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=0; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result deleted file mode 100644 index 749b347ce806e..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_1_tokudb.result +++ /dev/null @@ -1,107 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb; -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); -explain select x,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=0; -x id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x) where x=0 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index (x) where x=0 and id=0; -y id -0 0 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=0 and id=1; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select y,id from t force index (x)where x=0 and id=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 -flush status; -select y,id from t force index(x) where x=0 and id=1; -y id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=3; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=3; -x id -2 3 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select x,id from t force index (x) where x=2 and id=0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const x x 8 const,const 1 Using index -flush status; -select x,id from t force index (x) where x=2 and id=0; -x id -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result deleted file mode 100644 index d0c1b064d53e8..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_innodb.result +++ /dev/null @@ -1,42 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb; -insert into t values (0,0,0,0),(0,1,0,1); -explain select c,a,b from t where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const PRIMARY,c PRIMARY 8 const,const 1 -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const c c 12 const,const,const 1 Using index -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result b/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result deleted file mode 100644 index ec20262e391d9..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/r/ext_key_2_tokudb.result +++ /dev/null @@ -1,42 +0,0 @@ -drop table if exists t; -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; -@@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=on -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb; -insert into t values (0,0,0,0),(0,1,0,1); -explain select c,a,b from t where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const PRIMARY,c PRIMARY 8 const,const 1 -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t const c c 12 const,const,const 1 Using index -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -c a b -0 0 1 -show status like 'handler_read%'; -Variable_name Value -Handler_read_first 0 -Handler_read_key 1 -Handler_read_last 0 -Handler_read_next 0 -Handler_read_prev 0 -Handler_read_rnd 0 -Handler_read_rnd_deleted 0 -Handler_read_rnd_next 0 -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result index db63d23e3823f..018900c7b98f6 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result @@ -5,65 +5,64 @@ create table t (id int primary key); select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; replace into t values (1); set autocommit=0; replace into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result index 10e3830506de0..b9fca50b507a1 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_timeout.result @@ -5,35 +5,35 @@ create table t (id int primary key); select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time -REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name +REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR HY000: Lock wait timeout exceeded; try restarting transaction commit; select * from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name select * from information_schema.tokudb_lock_waits; -requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time +requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result index 9fce0695983ca..a07f7ba52fe25 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks.result @@ -4,7 +4,7 @@ drop table if exists t; create table t (id int primary key); set autocommit=0; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name insert into t values (1); insert into t values (3); insert into t values (5); @@ -13,16 +13,16 @@ insert into t values (2); insert into t values (4); insert into t values (6); select * from information_schema.tokudb_locks order by locks_trx_id,locks_key_left; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0003000000 0003000000 -TRX_ID MYSQL_ID ./test/t-main 0005000000 0005000000 -TRX_ID MYSQL_ID ./test/t-main 0002000000 0002000000 -TRX_ID MYSQL_ID ./test/t-main 0004000000 0004000000 -TRX_ID MYSQL_ID ./test/t-main 0006000000 0006000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0003000000 0003000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0005000000 0005000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0002000000 0002000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0004000000 0004000000 test t main +TRX_ID MYSQL_ID ./test/t-main 0006000000 0006000000 test t main commit; commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name commit; drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result index 628ff46ffc4fd..0a5862e9322b5 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_locks_released.result @@ -4,22 +4,21 @@ drop table if exists t; create table t (id int primary key); set autocommit=0; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name set autocommit=0; set tokudb_prelock_empty=OFF; insert into t values (1); set autocommit=0; insert into t values (1); select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 -TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name +TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; select * from information_schema.tokudb_locks; -locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right +locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result b/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result index a417483cefb30..369c14fe4fe2c 100644 --- a/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result +++ b/storage/tokudb/mysql-test/tokudb/r/information-schema-global-status.result @@ -16,6 +16,8 @@ TOKUDB_BASEMENTS_FETCHED_PRELOCKED_RANGE_SECONDS TOKUDB_BASEMENTS_FETCHED_TARGET_QUERY TOKUDB_BASEMENTS_FETCHED_TARGET_QUERY_BYTES TOKUDB_BASEMENTS_FETCHED_TARGET_QUERY_SECONDS +TOKUDB_BASEMENT_DESERIALIZATION_FIXED_KEY +TOKUDB_BASEMENT_DESERIALIZATION_VARIABLE_KEY TOKUDB_BROADCASE_MESSAGES_INJECTED_AT_ROOT TOKUDB_BUFFERS_DECOMPRESSED_FOR_WRITE TOKUDB_BUFFERS_DECOMPRESSED_PREFETCH @@ -43,6 +45,7 @@ TOKUDB_CACHETABLE_MISS TOKUDB_CACHETABLE_MISS_TIME TOKUDB_CACHETABLE_PREFETCHES TOKUDB_CACHETABLE_SIZE_CACHEPRESSURE +TOKUDB_CACHETABLE_SIZE_CLONED TOKUDB_CACHETABLE_SIZE_CURRENT TOKUDB_CACHETABLE_SIZE_LEAF TOKUDB_CACHETABLE_SIZE_LIMIT diff --git a/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result b/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result new file mode 100644 index 0000000000000..c265f38cdc249 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/tokudb_support_xa.result @@ -0,0 +1,126 @@ +'#--------------------begin------------------------#' +SET @session_start_value = @@session.tokudb_support_xa; +SELECT @session_start_value; +@session_start_value +1 +SET @global_start_value = @@global.tokudb_support_xa; +SELECT @global_start_value; +@global_start_value +1 +SET @@session.tokudb_support_xa = 0; +SET @@session.tokudb_support_xa = DEFAULT; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = 0; +SET @@global.tokudb_support_xa = DEFAULT; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 +'#--------------------case#1 valid set support_xa------------------------#' +SET @@session.tokudb_support_xa = 0; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +0 +SET @@session.tokudb_support_xa = 1; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = 0; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +0 +SET @@global.tokudb_support_xa = 1; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 +'#--------------------case#2 invalid set support_xa------------------------#' +SET @@session.tokudb_support_xa = -0.6; +ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa' +SET @@session.tokudb_support_xa = 1.6; +ERROR 42000: Incorrect argument type to variable 'tokudb_support_xa' +SET @@session.tokudb_support_xa = "T"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T' +SET @@session.tokudb_support_xa = "Y"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y' +SET @@session.tokudb_support_xa = OF; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +0 +SET @@global.tokudb_support_xa = 2; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of '2' +SET @@global.tokudb_support_xa = "T"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'T' +SET @@global.tokudb_support_xa = "Y"; +ERROR 42000: Variable 'tokudb_support_xa' can't be set to the value of 'Y' +'#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#' +'#--------------------xa.test with tokudb_support_xa OFF ------------------------#' +SET @@global.tokudb_support_xa = OFF; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +0 +create table t1 (a int) engine=tokudb; +xa start 'test1'; +insert t1 values (10); +xa end 'test1'; +xa prepare 'test1'; +xa rollback 'test1'; +select * from t1; +a +xa start 'test2'; +xa start 'test-bad'; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +insert t1 values (20); +xa prepare 'test2'; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +xa end 'test2'; +xa prepare 'test2'; +xa commit 'test2'; +select * from t1; +a +20 +xa start 'testa','testb'; +insert t1 values (30); +commit; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the ACTIVE state +xa end 'testa','testb'; +begin; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state +create table t2 (a int); +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the IDLE state +xa start 'testa','testb'; +ERROR XAE08: XAER_DUPID: The XID already exists +xa start 'testa','testb', 123; +ERROR XAE08: XAER_DUPID: The XID already exists +xa start 0x7465737462, 0x2030405060, 0xb; +insert t1 values (40); +xa end 'testb',' 0@P`',11; +xa prepare 'testb',0x2030405060,11; +start transaction; +ERROR XAE07: XAER_RMFAIL: The command cannot be executed when global transaction is in the PREPARED state +xa recover; +formatID gtrid_length bqual_length data +11 5 5 testb 0@P` +xa prepare 'testa','testb'; +xa recover; +formatID gtrid_length bqual_length data +11 5 5 testb 0@P` +1 5 5 testatestb +xa commit 'testb',0x2030405060,11; +ERROR XAE04: XAER_NOTA: Unknown XID +xa rollback 'testa','testb'; +xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your XYZ server version for the right syntax to use near '' at line 1 +select * from t1; +a +20 +drop table t1; +'#--------------------end------------------------#' +SET @@session.tokudb_support_xa = @session_start_value; +SELECT @@session.tokudb_support_xa; +@@session.tokudb_support_xa +1 +SET @@global.tokudb_support_xa = @global_start_value; +SELECT @@global.tokudb_support_xa; +@@global.tokudb_support_xa +1 diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_create_select_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_create_select.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_create_select_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_create_select.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_create_temp_select.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_delete_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_delete.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_delete_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_delete.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_insert_select_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_insert_select.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_insert_select_dup_key.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_replace_select_nonpart.test b/storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test similarity index 100% rename from storage/tokudb/mysql-test/tokudb/t/bf_replace_select_nonpart.test rename to storage/tokudb/mysql-test/tokudb/t/bf_replace_select.test diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test b/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test new file mode 100644 index 0000000000000..67fefcb45bdef --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/bf_select_hash_part.test @@ -0,0 +1,100 @@ +# Verify that index and range scans on a hash partitioned tokudb table are not slow on tables +# due to tokudb bulk fetch not being used. + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +let $maxq = 20; +let $debug = 0; + +# create the hash partition table +CREATE TABLE `t` ( + `num` int(10) unsigned NOT NULL auto_increment, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY HASH (num) PARTITIONS 8; + +# put 1M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo index scan $time_bf_on $time_bf_off; } + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 500000; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 500000; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo range scan $time_bf_on $time_bf_off; } + +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_select_part.test b/storage/tokudb/mysql-test/tokudb/t/bf_select_part.test deleted file mode 100644 index d9faa4b0bb23c..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/bf_select_part.test +++ /dev/null @@ -1,220 +0,0 @@ -# Verify that index and range scans are not slow -# on tables during create select statements -# against hash and range partitioned tables -# due to tokudb bulk fetch not being used - -source include/have_tokudb.inc; -source include/have_partition.inc; -source include/big_test.inc; -set default_storage_engine='tokudb'; -disable_warnings; -drop table if exists t,t1,t2,t3; -enable_warnings; - -let $maxq = 10; - -CREATE TABLE `t` ( - `num` int(10) unsigned auto_increment NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -); - -# put 8M rows into t -INSERT INTO t values (null,null); -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -INSERT INTO t SELECT null,val FROM t; -SELECT count(*) FROM t; - -# Create first table from source table t -CREATE TABLE `t1` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -) as select * from t; - -# Create second table from source table t -CREATE TABLE `t2` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -) PARTITION BY HASH (num) -PARTITIONS 8 as select * from t; - -# Create third table from source table t; -CREATE TABLE `t3` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -) PARTITION BY RANGE (num) -(PARTITION p0 VALUES LESS THAN (1000000), - PARTITION p1 VALUES LESS THAN (2000000), - PARTITION p2 VALUES LESS THAN (3000000), - PARTITION p3 VALUES LESS THAN (4000000), - PARTITION p4 VALUES LESS THAN (5000000), - PARTITION p5 VALUES LESS THAN (6000000), - PARTITION p6 VALUES LESS THAN (7000000), - PARTITION p7 VALUES LESS THAN MAXVALUE) as select * from t; - - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - SELECT count(*) from t1; - inc $i; -} -let $time_elapsed_select = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -# echo Index scans took $time_elapsed_select seconds.; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - SELECT count(*) from t2; - inc $i; -} - -let $time_elapsed_select_hash = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -# echo Index scans took $time_elapsed_select_hash seconds.; - -# This check evaluates whether the time elapsed during the select statement -# against a hashed partition table is on par with the select statment -# against a non-partitioned table, which will confirm that bulk fetch is in fact being used. -let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`; -echo $verdict; -if (!$verdict) { echo index scan t2 $time_elapsed_select_hash $time_elapsed_select; } - -###################################################################### - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - SELECT count(*) from t1; - inc $i; -} -let $time_elapsed_select = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select seconds.; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - SELECT count(*) from t3; - inc $i; -} - -let $time_elapsed_select_range = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select_range seconds.; - -# This check evaluates whether the time elapsed during the select statement -# against a range partition table is on par with the select statment -# against a non-partitioned table, which will confirm that bulk fetch is in fact being used. -let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`; -echo $verdict; -if (!$verdict) { echo index scan t3 $time_elapsed_select_range $time_elapsed_select; } - -######################################################################### - -let $maxrq = 30; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxrq) { - SELECT count(*) from t1 where num > 7000000; - inc $i; -} -let $time_elapsed_select = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select seconds.; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxrq) { - SELECT count(*) from t2 where num > 7000000; - inc $i; -} - -let $time_elapsed_select_hash = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select_hash seconds.; - - -# This check evaluates whether the time elapsed during the select statement -# against a hash partition table is on par with the select statment -# against a non-partitioned table, which will confirm that bulk fetch is in fact being used. -let $verdict = `select abs($time_elapsed_select_hash - $time_elapsed_select) <= $time_elapsed_select`; -echo $verdict; -if (!$verdict) { echo range scan t2 $time_elapsed_select_hash $time_elapsed_select; } - -######################################################################### - -let $maxrq = 30; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxrq) { - SELECT count(*) from t1 where num > 7000000; - inc $i; -} -let $time_elapsed_select = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select seconds.; - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxrq) { - SELECT count(*) from t3 where num > 7000000; - inc $i; -} - -let $time_elapsed_select_range = `select to_seconds(now()) - $s`; - -# The following line can be used to display the time elapsed data -# which could be useful for debugging. -#echo Index scans took $time_elapsed_select_range seconds.; - - -# This check evaluates whether the time elapsed during the select statement -# against a range partition table is on par with the select statment -# against a non-partitioned table, which will confirm that bulk fetch is in fact being used. -let $verdict = `select abs($time_elapsed_select_range - $time_elapsed_select) <= $time_elapsed_select`; -echo $verdict; -if (!$verdict) { echo range scan t3 $time_elapsed_select_range $time_elapsed_select; } - -drop table t,t1,t2,t3; diff --git a/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test b/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test new file mode 100644 index 0000000000000..0a1d7de374704 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/bf_select_range_part.test @@ -0,0 +1,108 @@ +# Verify that index and range scans on a range partitioned tokudb table are not slow on tables +# due to tokudb bulk fetch not being used. + +source include/have_tokudb.inc; +source include/have_partition.inc; +source include/big_test.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; + +let $maxq = 20; +let $debug = 0; + +# create the range partition table +CREATE TABLE `t` ( + `num` int(10) unsigned NOT NULL auto_increment, + `val` varchar(32) DEFAULT NULL, + PRIMARY KEY (`num`) +) PARTITION BY RANGE (num) +(PARTITION p0 VALUES LESS THAN (100000), + PARTITION p1 VALUES LESS THAN (200000), + PARTITION p2 VALUES LESS THAN (300000), + PARTITION p3 VALUES LESS THAN (400000), + PARTITION p4 VALUES LESS THAN (500000), + PARTITION p5 VALUES LESS THAN (600000), + PARTITION p6 VALUES LESS THAN (700000), + PARTITION p7 VALUES LESS THAN MAXVALUE); + +# put 1M rows into t +INSERT INTO t values (null,null); +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +INSERT INTO t SELECT null,val FROM t; +SELECT count(*) FROM t; + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo index scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo index scan $time_bf_on $time_bf_off; } + +set tokudb_bulk_fetch=ON; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 700000; + inc $i; +} +let $time_bf_on = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_on; } + +set tokudb_bulk_fetch=OFF; +let $s = `select unix_timestamp()`; +let $i = 0; +while ($i < $maxq) { + SELECT count(*) from t where num > 700000; + inc $i; +} +let $time_bf_off = `select unix_timestamp() - $s`; + +if ($debug) { echo range scans took $time_bf_off.; } + +# check that the scan time with bulk fetch off is at least 1.5 times as long as with bulk fetch on +let $verdict = `select $time_bf_off > $time_bf_on && $time_bf_off >= 1.5 * $time_bf_on`; +echo $verdict; +if (!$verdict) { echo range scan $time_bf_on $time_bf_off; } + +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test b/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test new file mode 100644 index 0000000000000..8da7aa1078f0e --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/cluster_key_part.test @@ -0,0 +1,23 @@ +# Test that clustering keys can be created on partitioned tokudb tables + +source include/have_tokudb.inc; +source include/have_partition.inc; +set default_storage_engine='tokudb'; + +disable_warnings; +drop table if exists t; +enable_warnings; + +create table t ( + x int not null, + y int not null, + primary key(x)) +partition by hash(x) partitions 2; + +show create table t; + +alter table t add key(y) clustering=yes; + +show create table t; + +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test deleted file mode 100644 index f23e02ddff334..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_innodb.test +++ /dev/null @@ -1,44 +0,0 @@ -source include/have_innodb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=innodb; - -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); - -explain select x,id from t force index (x) where x=0 and id=0; -flush status; -select x,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select y,id from t force index (x) where x=0 and id=0; -flush status; -select y,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=0 and id=1; -flush status; -select x,id from t force index (x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select y,id from t force index (x)where x=0 and id=1; -flush status; -select y,id from t force index(x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=3; -flush status; -select x,id from t force index (x) where x=2 and id=3; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=0; -flush status; -select x,id from t force index (x) where x=2 and id=0; -show status like 'handler_read%'; - -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test deleted file mode 100644 index 802385e8e9afa..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/ext_key_1_tokudb.test +++ /dev/null @@ -1,44 +0,0 @@ -#source include/have_tokudb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (id int not null, x int not null, y int not null, primary key(id), key(x)) engine=tokudb; - -insert into t values (0,0,0),(1,1,1),(2,2,2),(3,2,3),(4,2,4); - -explain select x,id from t force index (x) where x=0 and id=0; -flush status; -select x,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select y,id from t force index (x) where x=0 and id=0; -flush status; -select y,id from t force index (x) where x=0 and id=0; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=0 and id=1; -flush status; -select x,id from t force index (x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select y,id from t force index (x)where x=0 and id=1; -flush status; -select y,id from t force index(x) where x=0 and id=1; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=3; -flush status; -select x,id from t force index (x) where x=2 and id=3; -show status like 'handler_read%'; - -explain select x,id from t force index (x) where x=2 and id=0; -flush status; -select x,id from t force index (x) where x=2 and id=0; -show status like 'handler_read%'; - -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test deleted file mode 100644 index 265275c0e5087..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_innodb.test +++ /dev/null @@ -1,24 +0,0 @@ -source include/have_innodb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=innodb; - -insert into t values (0,0,0,0),(0,1,0,1); - -explain select c,a,b from t where c=0 and a=0 and b=1; -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test b/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test deleted file mode 100644 index b12f056f1a6d0..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/ext_key_2_tokudb.test +++ /dev/null @@ -1,24 +0,0 @@ -#source include/have_tokudb.inc; - -disable_warnings; -drop table if exists t; -enable_warnings; - -set session optimizer_switch='extended_keys=on'; -select @@optimizer_switch; - -create table t (a int not null, b int not null, c int not null, d int not null, primary key(a,b), key(c,a)) engine=tokudb; - -insert into t values (0,0,0,0),(0,1,0,1); - -explain select c,a,b from t where c=0 and a=0 and b=1; -flush status; -select c,a,b from t where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -explain select c,a,b from t force index (c) where c=0 and a=0 and b=1; -flush status; -select c,a,b from t force index (c) where c=0 and a=0 and b=1; -show status like 'handler_read%'; - -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt b/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt deleted file mode 100644 index d76fda471ca28..0000000000000 --- a/storage/tokudb/mysql-test/tokudb/t/hotindex-insert-bigchar.opt +++ /dev/null @@ -1 +0,0 @@ ---loose-tokudb-max-lock-memory=320M diff --git a/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test b/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test new file mode 100644 index 0000000000000..ba0b1f92a135b --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/tokudb_support_xa.test @@ -0,0 +1,133 @@ +--source include/load_sysvars.inc +let $engine=TokuDB; + +--echo '#--------------------begin------------------------#' +SET @session_start_value = @@session.tokudb_support_xa; +SELECT @session_start_value; + +SET @global_start_value = @@global.tokudb_support_xa; +SELECT @global_start_value; + +SET @@session.tokudb_support_xa = 0; +SET @@session.tokudb_support_xa = DEFAULT; +SELECT @@session.tokudb_support_xa; +SET @@global.tokudb_support_xa = 0; +SET @@global.tokudb_support_xa = DEFAULT; +SELECT @@global.tokudb_support_xa; + +--echo '#--------------------case#1 valid set support_xa------------------------#' +# for session +SET @@session.tokudb_support_xa = 0; +SELECT @@session.tokudb_support_xa; +SET @@session.tokudb_support_xa = 1; +SELECT @@session.tokudb_support_xa; + +# for global +SET @@global.tokudb_support_xa = 0; +SELECT @@global.tokudb_support_xa; +SET @@global.tokudb_support_xa = 1; +SELECT @@global.tokudb_support_xa; + +--echo '#--------------------case#2 invalid set support_xa------------------------#' +# for session +--Error ER_WRONG_TYPE_FOR_VAR +SET @@session.tokudb_support_xa = -0.6; +--Error ER_WRONG_TYPE_FOR_VAR +SET @@session.tokudb_support_xa = 1.6; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@session.tokudb_support_xa = "T"; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@session.tokudb_support_xa = "Y"; +SET @@session.tokudb_support_xa = OF; +SELECT @@session.tokudb_support_xa; + +# for global +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = 2; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = "T"; +--Error ER_WRONG_VALUE_FOR_VAR +SET @@global.tokudb_support_xa = "Y"; + + +--echo '#--------------------case#3 xa.test port from tokudb_mariadb/xa.test ------------------------#' +--echo '#--------------------xa.test with tokudb_support_xa OFF ------------------------#' +SET @@global.tokudb_support_xa = OFF; +SELECT @@global.tokudb_support_xa; +create table t1 (a int) engine=tokudb; +xa start 'test1'; +insert t1 values (10); +xa end 'test1'; +xa prepare 'test1'; +xa rollback 'test1'; +select * from t1; + +xa start 'test2'; +--error ER_XAER_RMFAIL +xa start 'test-bad'; +insert t1 values (20); +--error ER_XAER_RMFAIL +xa prepare 'test2'; +xa end 'test2'; +xa prepare 'test2'; +xa commit 'test2'; +select * from t1; + +xa start 'testa','testb'; +insert t1 values (30); + +--error ER_XAER_RMFAIL +commit; + +xa end 'testa','testb'; + +--error ER_XAER_RMFAIL +begin; +--error ER_XAER_RMFAIL +create table t2 (a int); + +connect (con1,localhost,root,,); +connection con1; + +--error ER_XAER_DUPID +xa start 'testa','testb'; +--error ER_XAER_DUPID +xa start 'testa','testb', 123; + +# gtrid [ , bqual [ , formatID ] ] +xa start 0x7465737462, 0x2030405060, 0xb; +insert t1 values (40); +xa end 'testb',' 0@P`',11; +xa prepare 'testb',0x2030405060,11; + +--error ER_XAER_RMFAIL +start transaction; + +xa recover; + +connection default; + +xa prepare 'testa','testb'; + +xa recover; + +--error ER_XAER_NOTA +xa commit 'testb',0x2030405060,11; +xa rollback 'testa','testb'; + +--replace_regex /MariaDB/XYZ/ /MySQL/XYZ/ +--error ER_PARSE_ERROR +xa start 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'; + +select * from t1; + +disconnect con1; +connection default; +drop table t1; + +--echo '#--------------------end------------------------#' +SET @@session.tokudb_support_xa = @session_start_value; +SELECT @@session.tokudb_support_xa; + +SET @@global.tokudb_support_xa = @global_start_value; +SELECT @@global.tokudb_support_xa; diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result b/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result deleted file mode 100644 index 58e8f0f8b2041..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_alter_table/r/fractional_time_alter_table.result +++ /dev/null @@ -1,174 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; -DROP TABLE IF EXISTS foo; -set tokudb_disable_slow_alter=on; -create table foo ( -a timestamp, -b timestamp(1), -c timestamp(2), -d timestamp(3), -e timestamp(4), -f timestamp(5), -g timestamp(6) -) engine=TokuDB; -alter table foo change a a timestamp(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - `b` timestamp(2) NOT NULL DEFAULT '0000-00-00 00:00:00.00', - `c` timestamp(2) NOT NULL DEFAULT '0000-00-00 00:00:00.00', - `d` timestamp(3) NOT NULL DEFAULT '0000-00-00 00:00:00.000', - `e` timestamp(4) NOT NULL DEFAULT '0000-00-00 00:00:00.0000', - `f` timestamp(5) NOT NULL DEFAULT '0000-00-00 00:00:00.00000', - `g` timestamp(6) NOT NULL DEFAULT '0000-00-00 00:00:00.000000' -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b timestamp(1); -alter table foo change b b timestamp(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d timestamp(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change f f timestamp(6); -alter table foo change f f timestamp(5); -alter table foo change f f timestamp(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo ( -a datetime, -b datetime(1), -c datetime(2), -d datetime(3), -e datetime(4), -f datetime(5), -g datetime(6) -) engine=TokuDB; -alter table foo change a a datetime(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` datetime DEFAULT NULL, - `b` datetime(2) DEFAULT NULL, - `c` datetime(2) DEFAULT NULL, - `d` datetime(3) DEFAULT NULL, - `e` datetime(4) DEFAULT NULL, - `f` datetime(5) DEFAULT NULL, - `g` datetime(6) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b datetime(1); -alter table foo change b b datetime(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d datetime(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d datetime(5); -alter table foo change d d datetime(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change g g datetime(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo ( -a time, -b time(1), -c time(2), -d time(3), -e time(4), -f time(5), -g time(6) -) engine=TokuDB; -alter table foo change a a time(1); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change a a time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(2); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` time DEFAULT NULL, - `b` time(2) DEFAULT NULL, - `c` time(2) DEFAULT NULL, - `d` time(3) DEFAULT NULL, - `e` time(4) DEFAULT NULL, - `f` time(5) DEFAULT NULL, - `g` time(6) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -alter table foo change b b time(1); -alter table foo change b b time(3); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(4); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change b b time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d time(2); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change d d time(5); -alter table foo change d d time(6); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -alter table foo change g g time(5); -ERROR 42000: Table 'foo' uses an extension that doesn't exist in this MariaDB version -drop table foo; -create table foo (a int, b int) engine=TokuDB; -insert into foo values (1,2); -alter table foo add column tt timestamp(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -a tt b -1 1981-11-01 10:11:13.123 2 -alter table foo drop column tt; -alter table foo add column tt datetime(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -a tt b -1 1981-11-01 10:11:13.123 2 -alter table foo drop column tt; -alter table foo add column tt time(3) NOT NULL Default '10:11:13.123' after a; -select * from foo; -a tt b -1 10:11:13.123 2 -alter table foo drop column tt; -drop table foo; diff --git a/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test b/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test deleted file mode 100644 index eee661023a8f3..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_alter_table/t/fractional_time_alter_table.test +++ /dev/null @@ -1,164 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS foo; ---enable_warnings -set tokudb_disable_slow_alter=on; - -create table foo ( - a timestamp, - b timestamp(1), - c timestamp(2), - d timestamp(3), - e timestamp(4), - f timestamp(5), - g timestamp(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a timestamp(6); - -alter table foo change b b timestamp(2); -show create table foo; -alter table foo change b b timestamp(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b timestamp(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d timestamp(6); - -alter table foo change f f timestamp(6); -alter table foo change f f timestamp(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change f f timestamp(4); -drop table foo; - - -create table foo ( - a datetime, - b datetime(1), - c datetime(2), - d datetime(3), - e datetime(4), - f datetime(5), - g datetime(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a datetime(6); - -alter table foo change b b datetime(2); -show create table foo; -alter table foo change b b datetime(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b datetime(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d datetime(2); -alter table foo change d d datetime(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d datetime(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change g g datetime(5); -drop table foo; - - -create table foo ( - a time, - b time(1), - c time(2), - d time(3), - e time(4), - f time(5), - g time(6) -) engine=TokuDB; - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(2); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change a a time(6); - -alter table foo change b b time(2); -show create table foo; -alter table foo change b b time(1); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(3); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(4); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change b b time(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d time(2); -alter table foo change d d time(5); ---error ER_UNSUPPORTED_EXTENSION -alter table foo change d d time(6); - ---error ER_UNSUPPORTED_EXTENSION -alter table foo change g g time(5); -drop table foo; - - -create table foo (a int, b int) engine=TokuDB; -insert into foo values (1,2); -alter table foo add column tt timestamp(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - -alter table foo add column tt datetime(3) NOT NULL Default '1981-11-01 10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - - -alter table foo add column tt time(3) NOT NULL Default '10:11:13.123' after a; -select * from foo; -alter table foo drop column tt; - -drop table foo; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result b/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result deleted file mode 100644 index 44e4e02b7d4e1..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/4676.result +++ /dev/null @@ -1,13 +0,0 @@ -DROP TABLE IF EXISTS t; -CREATE TABLE t (a INT) ENGINE='tokudb' PARTITION BY KEY (a) (PARTITION part0, PARTITION part1); -SHOW CREATE TABLE t; -Table Create Table -t CREATE TABLE `t` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -/*!50100 PARTITION BY KEY (a) -(PARTITION part0 ENGINE = TokuDB, - PARTITION part1 ENGINE = TokuDB) */ -ALTER TABLE t TRUNCATE PARTITION part0; -ALTER TABLE t TRUNCATE PARTITION part1; -DROP TABLE IF EXISTS t; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result b/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result deleted file mode 100644 index 72fa3c1269606..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/4677.result +++ /dev/null @@ -1,11 +0,0 @@ -drop table if exists t; -create table t (a int primary key) engine='tokudb'; -begin; -insert into t values (1); -insert into t values (3); -begin; -insert into t values (2); -insert into t values (4); -commit; -commit; -drop table t; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result b/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result deleted file mode 100644 index 904ad618427fa..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/fractional_time.result +++ /dev/null @@ -1,62 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; -DROP TABLE IF EXISTS foo; -create table foo (a timestamp(6), b timestamp(4), c timestamp(5), primary key (a))engine=tokudb; -insert into foo values ('2010-12-10 14:12:09.123452', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123454', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123451', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123453', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -select * from foo; -a b c -2010-12-10 14:12:09.123451 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123452 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 7 NULL 2 Using where -select * from foo where a > '2010-12-10 14:12:09.123452'; -a b c -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -alter table foo change a a datetime(6), change b b datetime(4), change c c datetime(5); -show create table foo; -Table Create Table -foo CREATE TABLE `foo` ( - `a` datetime(6) NOT NULL DEFAULT '0000-00-00 00:00:00.000000', - `b` datetime(4) DEFAULT NULL, - `c` datetime(5) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -select * from foo; -a b c -2010-12-10 14:12:09.123451 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123452 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 8 NULL 2 Using where -select * from foo where a > '2010-12-10 14:12:09.123452'; -a b c -2010-12-10 14:12:09.123453 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -2010-12-10 14:12:09.123454 2010-12-10 14:12:09.1234 2010-12-10 14:12:09.12345 -drop table foo; -create table foo (a time(6), b time(4), c time(5), primary key (a))engine=TokuDB; -insert into foo values ('14:12:09.123452', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123454', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123451', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123453', '14:12:09.123416', '14:12:09.123451'); -select * from foo; -a b c -14:12:09.123451 14:12:09.1234 14:12:09.12345 -14:12:09.123452 14:12:09.1234 14:12:09.12345 -14:12:09.123453 14:12:09.1234 14:12:09.12345 -14:12:09.123454 14:12:09.1234 14:12:09.12345 -explain select * from foo where a > '14:12:09.123452'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range PRIMARY PRIMARY 6 NULL 2 Using where -select * from foo where a > '14:12:09.123452'; -a b c -14:12:09.123453 14:12:09.1234 14:12:09.12345 -14:12:09.123454 14:12:09.1234 14:12:09.12345 -DROP TABLE foo; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/part_index_scan.result b/storage/tokudb/mysql-test/tokudb_bugs/r/part_index_scan.result deleted file mode 100644 index 0d416c734b78a..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/part_index_scan.result +++ /dev/null @@ -1,248 +0,0 @@ -set default_storage_engine='tokudb'; -drop table if exists t,t1,t2,t3; -CREATE TABLE `t` ( -`num` int(10) unsigned auto_increment NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -); -INSERT INTO t values (null,null); -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -SELECT count(*) FROM t; -count(*) -8388608 -CREATE TABLE `t1` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -); -CREATE TABLE `t2` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -) -PARTITION BY HASH (num) PARTITIONS 10; -CREATE TABLE `t3` ( -`num` int(10) unsigned NOT NULL, -`val` varchar(32) DEFAULT NULL, -PRIMARY KEY (`num`) -) -PARTITION BY RANGE (num) -(PARTITION p0 VALUES LESS THAN (1000000), -PARTITION p1 VALUES LESS THAN (2000000), -PARTITION p2 VALUES LESS THAN (3000000), -PARTITION p3 VALUES LESS THAN (4000000), -PARTITION p4 VALUES LESS THAN (5000000), -PARTITION p5 VALUES LESS THAN (6000000), -PARTITION p6 VALUES LESS THAN (7000000), -PARTITION p7 VALUES LESS THAN (8000000), -PARTITION px VALUES LESS THAN MAXVALUE); -insert into t1 select * from t; -insert into t2 select * from t; -insert into t3 select * from t; -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t1; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -select count(*) from t2; -count(*) -8388608 -1 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -select count(*) from t3; -count(*) -8388608 -1 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t1 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -select count(*) from t2 where num>7000000; -count(*) -1847274 -1 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -select count(*) from t3 where num>7000000; -count(*) -1847274 -1 -drop table if exists t,t1,t2,t3; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result b/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result deleted file mode 100644 index 841642132a3d3..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/simple_icp.result +++ /dev/null @@ -1,178 +0,0 @@ -drop table if exists a,b,c,foo; -create table a (a int auto_increment, primary key (a)) engine=TokuDB; -create table b (a int auto_increment, primary key (a)) engine=TokuDB; -create table c (a int auto_increment, primary key (a)) engine=TokuDB; -insert into a values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into b values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into c values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -create table foo (a int, b int, c int, d int, e int, key(a,b,c)) engine=TokuDB; -insert into foo (a,b,c) select * from a,b,c; -flush status; -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 0 -explain select * from foo where a between 5 and 6 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 800 Using where -select * from foo where a between 5 and 6 and c=10; -a b c d e -5 1 10 NULL NULL -5 2 10 NULL NULL -5 3 10 NULL NULL -5 4 10 NULL NULL -5 5 10 NULL NULL -5 6 10 NULL NULL -5 7 10 NULL NULL -5 8 10 NULL NULL -5 9 10 NULL NULL -5 10 10 NULL NULL -5 11 10 NULL NULL -5 12 10 NULL NULL -5 13 10 NULL NULL -5 14 10 NULL NULL -5 15 10 NULL NULL -5 16 10 NULL NULL -5 17 10 NULL NULL -5 18 10 NULL NULL -5 19 10 NULL NULL -5 20 10 NULL NULL -6 1 10 NULL NULL -6 2 10 NULL NULL -6 3 10 NULL NULL -6 4 10 NULL NULL -6 5 10 NULL NULL -6 6 10 NULL NULL -6 7 10 NULL NULL -6 8 10 NULL NULL -6 9 10 NULL NULL -6 10 10 NULL NULL -6 11 10 NULL NULL -6 12 10 NULL NULL -6 13 10 NULL NULL -6 14 10 NULL NULL -6 15 10 NULL NULL -6 16 10 NULL NULL -6 17 10 NULL NULL -6 18 10 NULL NULL -6 19 10 NULL NULL -6 20 10 NULL NULL -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 41 -flush status; -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 0 -explain select * from foo where a between 5 and 6 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 800 Using where -select * from foo where a between 5 and 6 and c=10 order by a desc; -a b c d e -6 20 10 NULL NULL -6 19 10 NULL NULL -6 18 10 NULL NULL -6 17 10 NULL NULL -6 16 10 NULL NULL -6 15 10 NULL NULL -6 14 10 NULL NULL -6 13 10 NULL NULL -6 12 10 NULL NULL -6 11 10 NULL NULL -6 10 10 NULL NULL -6 9 10 NULL NULL -6 8 10 NULL NULL -6 7 10 NULL NULL -6 6 10 NULL NULL -6 5 10 NULL NULL -6 4 10 NULL NULL -6 3 10 NULL NULL -6 2 10 NULL NULL -6 1 10 NULL NULL -5 20 10 NULL NULL -5 19 10 NULL NULL -5 18 10 NULL NULL -5 17 10 NULL NULL -5 16 10 NULL NULL -5 15 10 NULL NULL -5 14 10 NULL NULL -5 13 10 NULL NULL -5 12 10 NULL NULL -5 11 10 NULL NULL -5 10 10 NULL NULL -5 9 10 NULL NULL -5 8 10 NULL NULL -5 7 10 NULL NULL -5 6 10 NULL NULL -5 5 10 NULL NULL -5 4 10 NULL NULL -5 3 10 NULL NULL -5 2 10 NULL NULL -5 1 10 NULL NULL -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 41 -flush status; -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 0 -explain select * from foo where a > 19 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 1713 Using where -select * from foo where a > 19 and c=10 order by a desc; -a b c d e -20 20 10 NULL NULL -20 19 10 NULL NULL -20 18 10 NULL NULL -20 17 10 NULL NULL -20 16 10 NULL NULL -20 15 10 NULL NULL -20 14 10 NULL NULL -20 13 10 NULL NULL -20 12 10 NULL NULL -20 11 10 NULL NULL -20 10 10 NULL NULL -20 9 10 NULL NULL -20 8 10 NULL NULL -20 7 10 NULL NULL -20 6 10 NULL NULL -20 5 10 NULL NULL -20 4 10 NULL NULL -20 3 10 NULL NULL -20 2 10 NULL NULL -20 1 10 NULL NULL -show status like '%Handler_read_prev%'; -Variable_name Value -Handler_read_prev 21 -flush status; -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 0 -explain select * from foo where a > 19 and c=10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE foo range a a 5 NULL 1408 Using where -select * from foo where a > 19 and c=10; -a b c d e -20 1 10 NULL NULL -20 2 10 NULL NULL -20 3 10 NULL NULL -20 4 10 NULL NULL -20 5 10 NULL NULL -20 6 10 NULL NULL -20 7 10 NULL NULL -20 8 10 NULL NULL -20 9 10 NULL NULL -20 10 10 NULL NULL -20 11 10 NULL NULL -20 12 10 NULL NULL -20 13 10 NULL NULL -20 14 10 NULL NULL -20 15 10 NULL NULL -20 16 10 NULL NULL -20 17 10 NULL NULL -20 18 10 NULL NULL -20 19 10 NULL NULL -20 20 10 NULL NULL -show status like '%Handler_read_next%'; -Variable_name Value -Handler_read_next 21 -drop table foo,a,b,c; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result b/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result new file mode 100644 index 0000000000000..022a4d56d75c0 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb718.result @@ -0,0 +1,9 @@ +set default_storage_engine='tokudb'; +drop table if exists t; +create table t (id int primary key); +begin; +insert into t values (1),(2); +select * from information_schema.tokudb_fractal_tree_info; +ERROR HY000: Unknown error -30994 +commit; +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result b/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result deleted file mode 100644 index 4f8e2ce89f5f6..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr.result +++ /dev/null @@ -1,851 +0,0 @@ -drop table if exists t1,t2,t3,t4; -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; -set @innodb_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; -create table t1(a int); -show create table t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a int); -insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C; -create table t3 ( -a char(8) not null, b char(8) not null, filler char(200), -key(a) -); -insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), -'filler-1' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), -'filler-2' from t2 A; -select a,filler from t3 where a >= 'c-9011=w'; -a filler -select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=z', 'c-1013=z', 'err'); -insert into t3 values ('a-1014=w', 'a-1014=w', 'err'); -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -delete from t3 where b in ('c-1013=z', 'a-1014=w'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=w', 'del-me', 'inserted'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -c-1013=w inserted -delete from t3 where b='del-me'; -alter table t3 add primary key(b); -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', -'c-1022=w', 'c-1023=w', 'c-1024=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -c-1024=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or -b IN ('c-1021=w', 'c-1022=w', 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w') or -(b>='c-1021=w' and b<= 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -drop table if exists t4; -create table t4 (a varchar(10), b int, c char(10), filler char(200), -key idx1 (a, b, c)); -insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500; -explain -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 16 Using where; Rowid-ordered scan -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -a b c filler -NULL NULL NULL NULL-15 -NULL NULL NULL NULL-14 -NULL NULL NULL NULL-13 -NULL NULL NULL NULL-12 -NULL NULL NULL NULL-11 -NULL NULL NULL NULL-10 -NULL NULL NULL NULL-9 -NULL NULL NULL NULL-8 -NULL NULL NULL NULL-7 -NULL NULL NULL NULL-6 -NULL NULL NULL NULL-5 -NULL NULL NULL NULL-4 -NULL NULL NULL NULL-3 -NULL NULL NULL NULL-2 -NULL NULL NULL NULL-1 -explain -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 32 Using where; Rowid-ordered scan -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -drop table t1, t2, t3, t4; -create table t1 (a int, b int not null,unique key (a,b),index(b)); -insert ignore into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(null,7),(9,9),(8,8),(7,7),(null,9),(null,9),(6,6); -Warnings: -Warning 1062 Duplicate entry '6-6' for key 'a' -create table t2 like t1; -insert into t2 select * from t1; -alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); -select * from t1 where a is null; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -select * from t1 where (a is null or a > 0 and a < 3) and b > 7 limit 3; -a b c -NULL 9 0 -NULL 9 0 -select * from t1 where a is null and b=9 or a is null and b=7 limit 3; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -drop table t1, t2; -set storage_engine= @save_storage_engine; -set @mrr_buffer_size_save= @@mrr_buffer_size; -set mrr_buffer_size=64; -Warnings: -Warning 1292 Truncated incorrect mrr_buffer_size value: '64' -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=TokuDB; -insert into t2 select -concat('a-', 1000 + A.a, '-a'), -concat('b-', 1000 + B.a, '-b'), -concat('c-', 1000 + C.a, '-c'), -'filler' -from t1 A, t1 B, t1 C; -explain -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range a a 9 NULL 100 Using where; Rowid-ordered scan -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -count(length(a) + length(filler)) -100 -drop table t2; -create table t2 (a char(100), b char(100), c char(100), d int, -filler char(10), key(d), primary key (a,b,c)) engine= tokudb; -insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; -explain select * from t2 force index (d) where d < 10; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range d d 5 NULL # Using where; Rowid-ordered scan -drop table t2; -drop table t1; -set @@mrr_buffer_size= @mrr_buffer_size_save; -create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=tokuDB; -select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4); -f1 f2 f3 f4 -1 1 1 A -2 2 2 A -3 3 3 A -4 4 4 A -5 5 5 A -6 6 6 A -7 7 7 A -8 8 8 A -9 9 9 A -10 10 10 A -drop table t1; - -BUG#37977: Wrong result returned on GROUP BY + OR + innodb - -CREATE TABLE t1 ( -`pk` int(11) NOT NULL AUTO_INCREMENT, -`int_nokey` int(11) NOT NULL, -`int_key` int(11) NOT NULL, -`date_key` date NOT NULL, -`date_nokey` date NOT NULL, -`time_key` time NOT NULL, -`time_nokey` time NOT NULL, -`datetime_key` datetime NOT NULL, -`datetime_nokey` datetime NOT NULL, -`varchar_key` varchar(5) DEFAULT NULL, -`varchar_nokey` varchar(5) DEFAULT NULL, -PRIMARY KEY (`pk`), -KEY `int_key` (`int_key`), -KEY `date_key` (`date_key`), -KEY `time_key` (`time_key`), -KEY `datetime_key` (`datetime_key`), -KEY `varchar_key` (`varchar_key`) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,5,5,'2009-10-16','2009-10-16','09:28:15','09:28:15','2007-09-14 05:34:08','2007-09-14 05:34:08','qk','qk'), -(2,6,6,'0000-00-00','0000-00-00','23:06:39','23:06:39','0000-00-00 00:00:00','0000-00-00 00:00:00','j','j'), -(3,10,10,'2000-12-18','2000-12-18','22:16:19','22:16:19','2006-11-04 15:42:50','2006-11-04 15:42:50','aew','aew'), -(4,0,0,'2001-09-18','2001-09-18','00:00:00','00:00:00','2004-03-23 13:23:35','2004-03-23 13:23:35',NULL,NULL), -(5,6,6,'2007-08-16','2007-08-16','22:13:38','22:13:38','2004-08-19 11:01:28','2004-08-19 11:01:28','qu','qu'); -select pk from t1 WHERE `varchar_key` > 'kr' group by pk; -pk -1 -5 -select pk from t1 WHERE `int_nokey` IS NULL OR `varchar_key` > 'kr' group by pk; -pk -1 -5 -drop table t1; -# -# BUG#39447: Error with NOT NULL condition and LIMIT 1 -# -CREATE TABLE t1 ( -id int(11) NOT NULL, -parent_id int(11) DEFAULT NULL, -name varchar(10) DEFAULT NULL, -PRIMARY KEY (id), -KEY ind_parent_id (parent_id) -) ENGINE=TokuDB; -insert into t1 (id, parent_id, name) values -(10,NULL,'A'), -(20,10,'B'), -(30,10,'C'), -(40,NULL,'D'), -(50,40,'E'), -(60,40,'F'), -(70,NULL,'J'); -SELECT id FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id -60 -This must show type=index, extra=Using where -explain SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL PRIMARY 4 NULL 1 Using where -SELECT * FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -id parent_id name -60 40 F -drop table t1; -# -# BUG#628785: multi_range_read.cc:430: int DsMrr_impl::dsmrr_init(): Assertion `do_sort_keys || do_rowid_fetch' failed -# -set @save_join_cache_level= @@join_cache_level; -set @save_optimizer_switch= @@optimizer_switch; -SET SESSION join_cache_level=9; -Warnings: -Warning 1292 Truncated incorrect join_cache_level value: '9' -SET SESSION optimizer_switch='mrr_sort_keys=off'; -CREATE TABLE `t1` ( -`pk` int(11) NOT NULL AUTO_INCREMENT, -`col_int_nokey` int(11) DEFAULT NULL, -`col_int_key` int(11) DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -PRIMARY KEY (`pk`), -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB AUTO_INCREMENT=101 DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (1,6,NULL,'r','r'); -INSERT INTO `t1` VALUES (2,8,0,'c','c'); -INSERT INTO `t1` VALUES (97,7,0,'z','z'); -INSERT INTO `t1` VALUES (98,1,1,'j','j'); -INSERT INTO `t1` VALUES (99,7,8,'c','c'); -INSERT INTO `t1` VALUES (100,2,5,'f','f'); -SELECT table1 .`col_varchar_key` -FROM t1 table1 STRAIGHT_JOIN ( t1 table3 JOIN t1 table4 ON table4 .`pk` = table3 .`col_int_nokey` ) ON table4 .`col_varchar_nokey` ; -col_varchar_key -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'r' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -Warning 1292 Truncated incorrect DOUBLE value: 'c' -DROP TABLE t1; -set join_cache_level=@save_join_cache_level; -set optimizer_switch=@save_optimizer_switch; -# -# BUG#623300: Query with join_cache_level = 6 returns extra rows in maria-5.3-dsmrr-cpk -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7); -INSERT INTO t1 VALUES (11,1); -INSERT INTO t1 VALUES (12,5); -INSERT INTO t1 VALUES (13,3); -INSERT INTO t1 VALUES (14,6); -INSERT INTO t1 VALUES (15,92); -INSERT INTO t1 VALUES (16,7); -INSERT INTO t1 VALUES (17,NULL); -INSERT INTO t1 VALUES (18,3); -INSERT INTO t1 VALUES (19,5); -INSERT INTO t1 VALUES (20,1); -INSERT INTO t1 VALUES (21,2); -INSERT INTO t1 VALUES (22,NULL); -INSERT INTO t1 VALUES (23,1); -INSERT INTO t1 VALUES (24,0); -INSERT INTO t1 VALUES (25,210); -INSERT INTO t1 VALUES (26,8); -INSERT INTO t1 VALUES (27,7); -INSERT INTO t1 VALUES (28,5); -INSERT INTO t1 VALUES (29,NULL); -CREATE TABLE t2 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES (1,NULL); -INSERT INTO t2 VALUES (2,7); -INSERT INTO t2 VALUES (3,9); -INSERT INTO t2 VALUES (4,7); -INSERT INTO t2 VALUES (5,4); -INSERT INTO t2 VALUES (6,2); -INSERT INTO t2 VALUES (7,6); -INSERT INTO t2 VALUES (8,8); -INSERT INTO t2 VALUES (9,NULL); -INSERT INTO t2 VALUES (10,5); -INSERT INTO t2 VALUES (11,NULL); -INSERT INTO t2 VALUES (12,6); -INSERT INTO t2 VALUES (13,188); -INSERT INTO t2 VALUES (14,2); -INSERT INTO t2 VALUES (15,1); -INSERT INTO t2 VALUES (16,1); -INSERT INTO t2 VALUES (17,0); -INSERT INTO t2 VALUES (18,9); -INSERT INTO t2 VALUES (19,NULL); -INSERT INTO t2 VALUES (20,4); -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level = 0; -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; -col_int_nokey -2 -4 -4 -4 -6 -6 -6 -7 -8 -9 -9 -NULL -NULL -NULL -SET join_cache_level = 6; -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; -col_int_nokey -2 -4 -4 -4 -6 -6 -6 -7 -8 -9 -9 -NULL -NULL -NULL -set join_cache_level= @my_save_join_cache_level; -drop table t1, t2; -# -# BUG#623315: Query returns less rows when run with join_cache_level=6 on maria-5.3-dsmrr-cpk -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_nokey int(11) DEFAULT NULL, -col_int_key int(11) DEFAULT NULL, -col_varchar_key varchar(1) DEFAULT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7,8,'v'); -INSERT INTO t1 VALUES (11,1,9,'r'); -INSERT INTO t1 VALUES (12,5,9,'a'); -INSERT INTO t1 VALUES (13,3,186,'m'); -INSERT INTO t1 VALUES (14,6,NULL,'y'); -INSERT INTO t1 VALUES (15,92,2,'j'); -INSERT INTO t1 VALUES (16,7,3,'d'); -INSERT INTO t1 VALUES (17,NULL,0,'z'); -INSERT INTO t1 VALUES (18,3,133,'e'); -INSERT INTO t1 VALUES (19,5,1,'h'); -INSERT INTO t1 VALUES (20,1,8,'b'); -INSERT INTO t1 VALUES (21,2,5,'s'); -INSERT INTO t1 VALUES (22,NULL,5,'e'); -INSERT INTO t1 VALUES (23,1,8,'j'); -INSERT INTO t1 VALUES (24,0,6,'e'); -INSERT INTO t1 VALUES (25,210,51,'f'); -INSERT INTO t1 VALUES (26,8,4,'v'); -INSERT INTO t1 VALUES (27,7,7,'x'); -INSERT INTO t1 VALUES (28,5,6,'m'); -INSERT INTO t1 VALUES (29,NULL,4,'c'); -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level=6; -select count(*) from -(SELECT table2.pk FROM -t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key -ON table3.col_int_nokey) foo; -count(*) -480 -SET join_cache_level=0; -select count(*) from -(SELECT table2.pk FROM -t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key -ON table3.col_int_nokey) foo; -count(*) -480 -set join_cache_level= @my_save_join_cache_level; -drop table t1; -# -# BUG#671340: Diverging results in with mrr_sort_keys=ON|OFF and join_cache_level=5 -# -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_key int(11) NOT NULL, -col_varchar_key varchar(1) NOT NULL, -col_varchar_nokey varchar(1) NOT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(10,8,'v','v'), -(11,8,'f','f'), -(12,5,'v','v'), -(13,8,'s','s'), -(14,8,'a','a'), -(15,6,'p','p'), -(16,7,'z','z'), -(17,2,'a','a'), -(18,5,'h','h'), -(19,7,'h','h'), -(20,2,'v','v'), -(21,9,'v','v'), -(22,142,'b','b'), -(23,3,'y','y'), -(24,0,'v','v'), -(25,3,'m','m'), -(26,5,'z','z'), -(27,9,'n','n'), -(28,1,'d','d'), -(29,107,'a','a'); -CREATE TABLE t2 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_int_key int(11) NOT NULL, -col_varchar_key varchar(1) NOT NULL, -col_varchar_nokey varchar(1) NOT NULL, -PRIMARY KEY (pk), -KEY col_int_key (col_int_key), -KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES -(1,9,'x','x'), -(2,5,'g','g'), -(3,1,'o','o'), -(4,0,'g','g'), -(5,1,'v','v'), -(6,190,'m','m'), -(7,6,'x','x'), -(8,3,'c','c'), -(9,4,'z','z'), -(10,3,'i','i'), -(11,186,'x','x'), -(12,1,'g','g'), -(13,8,'q','q'), -(14,226,'m','m'), -(15,133,'p','p'), -(16,6,'e','e'), -(17,3,'t','t'), -(18,8,'j','j'), -(19,5,'h','h'), -(20,7,'w','w'); -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM -t2 AS table1, t1 AS table2, t2 AS table3 -WHERE -table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; -count(*) sum(table1.col_int_key*table2.pk) -240 185955 -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'v' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'p' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -Warning 1292 Truncated incorrect DOUBLE value: 'z' -set @my_save_join_cache_level= @@join_cache_level; -set @my_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=6; -set join_buffer_size=1536; -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM -t2 AS table1, t1 AS table2, t2 AS table3 -WHERE -table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; -count(*) sum(table1.col_int_key*table2.pk) -240 185955 -drop table t1,t2; -set join_cache_level=@my_save_join_cache_level; -set join_buffer_size=@my_save_join_buffer_size; -# -# BUG#665669: Result differences on query re-execution -# -create table t1 (pk int primary key, b int, c int default 0, index idx(b)) engine=Tokudb; -insert into t1(pk,b) values (3, 30), (2, 20), (9, 90), (7, 70), (4, 40), (5, 50), (10, 100), (12, 120); -set @bug665669_tmp=@@optimizer_switch; -set optimizer_switch='mrr=off'; -explain select * from t1 where b > 1000; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range idx idx 5 NULL 1 Using where -# The following two must produce indentical results: -select * from t1 where pk < 2 or pk between 3 and 4; -pk b c -3 30 0 -4 40 0 -select * from t1 where pk < 2 or pk between 3 and 4; -pk b c -3 30 0 -4 40 0 -drop table t1; -set optimizer_switch = @bug665669_tmp; -# -# Bug#43360 - Server crash with a simple multi-table update -# -CREATE TABLE t1 ( -a CHAR(2) NOT NULL PRIMARY KEY, -b VARCHAR(20) NOT NULL, -KEY (b) -) ENGINE=TokuDB; -CREATE TABLE t2 ( -a CHAR(2) NOT NULL PRIMARY KEY, -b VARCHAR(20) NOT NULL, -KEY (b) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -('AB','MySQLAB'), -('JA','Sun Microsystems'), -('MS','Microsoft'), -('IB','IBM- Inc.'), -('GO','Google Inc.'); -INSERT INTO t2 VALUES -('AB','Sweden'), -('JA','USA'), -('MS','United States of America'), -('IB','North America'), -('GO','South America'); -Warnings: -Warning 1265 Data truncated for column 'b' at row 3 -UPDATE t1,t2 SET t1.b=UPPER(t1.b) WHERE t1.b LIKE 'United%'; -SELECT * FROM t1; -a b -GO Google Inc. -IB IBM- Inc. -MS Microsoft -AB MySQLAB -JA Sun Microsystems -SELECT * FROM t2; -a b -IB North America -GO South America -AB Sweden -MS United States of Ame -JA USA -DROP TABLE t1,t2; -# -# Testcase backport: Bug#43249 -# (Note: Fixed by patch for BUG#42580) -# -CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=Tokudb; -INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -c1 c2 c3 -08:29:45 NULL 2009-02-01 -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -c1 c2 c3 -08:29:45 NULL 2009-02-01 -drop table `t1`; -# -# BUG#707925: Wrong result with join_cache_level=6 optimizer_use_mrr = -# force (incremental, BKA join) -# -set @_save_join_cache_level= @@join_cache_level; -set join_cache_level = 6; -CREATE TABLE t1 ( -f1 int(11), f2 int(11), f3 varchar(1), f4 varchar(1), -PRIMARY KEY (f1), -KEY (f3), -KEY (f2) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES ('11','8','f','f'),('12','5','v','v'),('13','8','s','s'), -('14','8','a','a'),('15','6','p','p'),('16','7','z','z'),('17','2','a','a'), -('18','5','h','h'),('19','7','h','h'),('20','2','v','v'),('21','9','v','v'), -('22','142','b','b'),('23','3','y','y'),('24','0','v','v'),('25','3','m','m'), -('26','5','z','z'),('27','9','n','n'),('28','1','d','d'),('29','107','a','a'); -select count(*) from ( -SELECT alias1.f2 -FROM -t1 AS alias1 JOIN ( -t1 AS alias2 FORCE KEY (f3) JOIN -t1 AS alias3 FORCE KEY (f2) ON alias3.f2 = alias2.f2 AND alias3.f4 = alias2.f3 -) ON alias3.f1 <= alias2.f1 -) X; -count(*) -361 -set join_cache_level=@_save_join_cache_level; -set optimizer_switch= @innodb_mrr_tmp; -drop table t1; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result b/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result deleted file mode 100644 index 73de058d22d3b..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/tokudb_mrr2.result +++ /dev/null @@ -1,441 +0,0 @@ -drop table if exists t1,t2,t3,t4; -set @maria_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; -set @mrr_buffer_size_save= @@mrr_buffer_size; -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; -create table t1(a int); -show create table t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL -) ENGINE=TokuDB DEFAULT CHARSET=latin1 -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a int); -insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C; -create table t3 ( -a char(8) not null, b char(8) not null, filler char(200), -key(a) -); -insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), -'filler-1' from t2 A; -insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), -'filler-2' from t2 A; -select a,filler from t3 where a >= 'c-9011=w'; -a filler -select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=z', 'c-1013=z', 'err'); -insert into t3 values ('a-1014=w', 'a-1014=w', 'err'); -select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or -(a>='c-1014=w' and a <= 'c-1015=w'); -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -delete from t3 where b in ('c-1013=z', 'a-1014=w'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -insert into t3 values ('c-1013=w', 'del-me', 'inserted'); -select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or -a='c-1014=w' or a='c-1015=w'; -a filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1011=w filler-1 -c-1012=w filler-1 -c-1013=w filler-1 -c-1014=w filler-1 -c-1015=w filler-1 -c-1011=w filler-2 -c-1012=w filler-2 -c-1013=w filler-2 -c-1014=w filler-2 -c-1015=w filler-2 -c-1013=w inserted -delete from t3 where b='del-me'; -alter table t3 add primary key(b); -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', -'c-1022=w', 'c-1023=w', 'c-1024=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -c-1024=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or -b IN ('c-1021=w', 'c-1022=w', 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or -b IN ('c-1019=w', 'c-1020=w') or -(b>='c-1021=w' and b<= 'c-1023=w'); -b filler -c-1011=w filler -c-1012=w filler -c-1013=w filler -c-1014=w filler -c-1015=w filler -c-1016=w filler -c-1017=w filler -c-1018=w filler -c-1019=w filler -c-1020=w filler -c-1021=w filler -c-1022=w filler -c-1023=w filler -drop table if exists t4; -create table t4 (a varchar(10), b int, c char(10), filler char(200), -key idx1 (a, b, c)); -insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15; -insert into t4 (a,b,c,filler) -select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500; -explain -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 16 Using where; Rowid-ordered scan -select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1' - or c='no-such-row2'); -a b c filler -NULL NULL NULL NULL-15 -NULL NULL NULL NULL-14 -NULL NULL NULL NULL-13 -NULL NULL NULL NULL-12 -NULL NULL NULL NULL-11 -NULL NULL NULL NULL-10 -NULL NULL NULL NULL-9 -NULL NULL NULL NULL-8 -NULL NULL NULL NULL-7 -NULL NULL NULL NULL-6 -NULL NULL NULL NULL-5 -NULL NULL NULL NULL-4 -NULL NULL NULL NULL-3 -NULL NULL NULL NULL-2 -NULL NULL NULL NULL-1 -explain -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 range idx1 idx1 29 NULL 32 Using where; Rowid-ordered scan -select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2'); -a b c filler -b-1 NULL c-1 NULL-15 -b-1 NULL c-1 NULL-14 -b-1 NULL c-1 NULL-13 -b-1 NULL c-1 NULL-12 -b-1 NULL c-1 NULL-11 -b-1 NULL c-1 NULL-10 -b-1 NULL c-1 NULL-9 -b-1 NULL c-1 NULL-8 -b-1 NULL c-1 NULL-7 -b-1 NULL c-1 NULL-6 -b-1 NULL c-1 NULL-5 -b-1 NULL c-1 NULL-4 -b-1 NULL c-1 NULL-3 -b-1 NULL c-1 NULL-2 -b-1 NULL c-1 NULL-1 -bb-1 NULL cc-2 NULL-15 -bb-1 NULL cc-2 NULL-14 -bb-1 NULL cc-2 NULL-13 -bb-1 NULL cc-2 NULL-12 -bb-1 NULL cc-2 NULL-11 -bb-1 NULL cc-2 NULL-10 -bb-1 NULL cc-2 NULL-9 -bb-1 NULL cc-2 NULL-8 -bb-1 NULL cc-2 NULL-7 -bb-1 NULL cc-2 NULL-6 -bb-1 NULL cc-2 NULL-5 -bb-1 NULL cc-2 NULL-4 -bb-1 NULL cc-2 NULL-3 -bb-1 NULL cc-2 NULL-2 -bb-1 NULL cc-2 NULL-1 -drop table t1, t2, t3, t4; -create table t1 (a int, b int not null,unique key (a,b),index(b)); -insert ignore into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(null,7),(9,9),(8,8),(7,7),(null,9),(null,9),(6,6); -Warnings: -Warning 1062 Duplicate entry '6-6' for key 'a' -create table t2 like t1; -insert into t2 select * from t1; -alter table t1 modify b blob not null, add c int not null, drop key a, add unique key (a,b(20),c), drop key b, add key (b(10)); -select * from t1 where a is null; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -select * from t1 where (a is null or a > 0 and a < 3) and b > 7 limit 3; -a b c -NULL 9 0 -NULL 9 0 -select * from t1 where a is null and b=9 or a is null and b=7 limit 3; -a b c -NULL 7 0 -NULL 9 0 -NULL 9 0 -drop table t1, t2; -set storage_engine= @save_storage_engine; -set @@mrr_buffer_size= @mrr_buffer_size_save; -# -# Crash in quick_range_seq_next() in maria-5.3-dsmrr-cpk with join_cache_level = {8,1} -# -set @save_join_cache_level= @@join_cache_level; -SET SESSION join_cache_level = 8; -CREATE TABLE `t1` ( -`col_int_key` int(11) DEFAULT NULL, -`col_datetime_key` datetime DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (6,'2005-10-07 00:00:00','e','e'); -INSERT INTO `t1` VALUES (51,'2000-07-15 05:00:34','f','f'); -CREATE TABLE `t2` ( -`col_int_key` int(11) DEFAULT NULL, -`col_datetime_key` datetime DEFAULT NULL, -`col_varchar_key` varchar(1) DEFAULT NULL, -`col_varchar_nokey` varchar(1) DEFAULT NULL, -KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1; -INSERT INTO `t2` VALUES (2,'2004-10-11 18:13:16','w','w'); -INSERT INTO `t2` VALUES (2,'1900-01-01 00:00:00','d','d'); -SELECT table2 .`col_datetime_key` -FROM t2 JOIN ( t1 table2 JOIN t2 table3 ON table3 .`col_varchar_key` < table2 .`col_varchar_key` ) ON table3 .`col_varchar_nokey` ; -col_datetime_key -Warnings: -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -Warning 1292 Truncated incorrect DOUBLE value: 'd' -drop table t1, t2; -set join_cache_level=@save_join_cache_level; -CREATE TABLE t1( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,9,'x'), (2,5,'g'), (3,1,'o'), (4,0,'g'), (5,1,'v'), -(6,190,'m'), (7,6,'x'), (8,3,'c'), (9,4,'z'), (10,3,'i'), -(11,186,'x'), (12,1,'g'), (13,8,'q'), (14,226,'m'), (15,133,'p'); -CREATE TABLE t2( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t2 SELECT * FROM t1; -INSERT INTO t2 VALUES (77, 333, 'z'); -CREATE TABLE t3( -pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, -PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t3 SELECT * FROM t1; -INSERT INTO t3 VALUES -(88, 442, 'y'), (99, 445, 'w'), (87, 442, 'z'), (98, 445, 'v'), (86, 442, 'x'), -(97, 445, 't'), (85, 442, 'b'), (96, 445, 'l'), (84, 442, 'a'), (95, 445, 'k'); -set @save_join_cache_level=@@join_cache_level; -set join_cache_level=1; -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -COUNT(t1.v) -120 -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 7 NULL 15 Using index -1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 16 Using where; Using join buffer (flat, BNL join) -1 SIMPLE t3 range PRIMARY PRIMARY 4 NULL 25 Using where; Using join buffer (flat, BNL join) -SELECT COUNT(t1.v) FROM t1, t2, t3 -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -COUNT(t1.v) -120 -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2, t3 -WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 7 NULL 15 Using index -1 SIMPLE t2 index PRIMARY,idx idx 7 NULL 16 Using where; Using index; Using join buffer (flat, BNL join) -1 SIMPLE t3 ref PRIMARY,idx idx 3 test.t2.v 3 Using where; Using index -set join_cache_level=@save_join_cache_level; -DROP TABLE t1,t2,t3; -# -# BUG#671361: virtual int Mrr_ordered_index_reader::refill_buffer(): Assertion `!know_key_tuple_params -# (works only on Maria because we need 1024-byte long key) -# -SET SESSION join_cache_level = 6; -SET SESSION join_buffer_size = 1024; -CREATE TABLE t1 ( -pk int(11) NOT NULL AUTO_INCREMENT, -col_varchar_1024_latin1_key varchar(1024) DEFAULT NULL, -PRIMARY KEY (pk), -KEY col_varchar_1024_latin1_key (col_varchar_1024_latin1_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES -(1,'z'), (2,'abcdefjhjkl'), (3,'in'), (4,'abcdefjhjkl'), (6,'abcdefjhjkl'), -(11,'zx'), (12,'abcdefjhjm'), (13,'jn'), (14,'abcdefjhjp'), (16,'abcdefjhjr'); -CREATE TABLE t2 ( -col_varchar_10_latin1 varchar(10) DEFAULT NULL -) ENGINE=TokuDB; -INSERT INTO t2 VALUES ('foo'), ('foo'); -EXPLAIN SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE -table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE table2 ALL NULL NULL NULL NULL 2 Using where -1 SIMPLE table1 ref PRIMARY,col_varchar_1024_latin1_key col_varchar_1024_latin1_key 1027 test.table2.col_varchar_10_latin1 2 Using where; Using index -SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE -table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; -count(*) -0 -drop table t1, t2; -# -# BUG#693747: Assertion multi_range_read.cc:908: int DsMrr_impl::dsmrr_init( -# -set @_save_join_cache_level= @@join_cache_level; -set @_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=8; -set join_buffer_size=10240; -CREATE TABLE t1 ( -f2 varchar(32) COLLATE latin1_swedish_ci, -f3 int(11), -f4 varchar(1024) COLLATE utf8_bin, -f5 varchar(1024) COLLATE latin1_bin, -KEY (f5) -) ENGINE=TokuDB; -# Fill the table with some data -SELECT alias2.* , alias1.f2 -FROM -t1 AS alias1 -LEFT JOIN t1 AS alias2 ON alias1.f2 = alias2.f5 -WHERE -alias2.f3 < 0; -f2 f3 f4 f5 f2 -set join_cache_level=@_save_join_cache_level; -set join_buffer_size=@_save_join_buffer_size; -set optimizer_switch=@maria_mrr_tmp; -drop table t1; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test b/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test deleted file mode 100644 index ac827ddee513c..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/4676.test +++ /dev/null @@ -1,15 +0,0 @@ ---source include/have_partition.inc - -let $engine='tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS t; ---enable_warnings - -eval CREATE TABLE t (a INT) ENGINE=$engine PARTITION BY KEY (a) (PARTITION part0, PARTITION part1); -SHOW CREATE TABLE t; - -ALTER TABLE t TRUNCATE PARTITION part0; -ALTER TABLE t TRUNCATE PARTITION part1; - -DROP TABLE IF EXISTS t; \ No newline at end of file diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test b/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test deleted file mode 100644 index 2a05e104a9bc3..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/4677.test +++ /dev/null @@ -1,30 +0,0 @@ -let $engine='tokudb'; - ---disable_warnings -drop table if exists t; ---enable_warnings - -eval create table t (a int primary key) engine=$engine; - -connect (conn1,localhost,root,,); - -connection default; -begin; -insert into t values (1); -insert into t values (3); - -connection conn1; -begin; -insert into t values (2); -insert into t values (4); - -connection default; -commit; - -connection conn1; -commit; - -connection default; -disconnect conn1; - -drop table t; \ No newline at end of file diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test b/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test deleted file mode 100644 index c31bf8fc66b93..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/fractional_time.test +++ /dev/null @@ -1,36 +0,0 @@ -SET DEFAULT_STORAGE_ENGINE = 'tokudb'; - ---disable_warnings -DROP TABLE IF EXISTS foo; ---enable_warnings -create table foo (a timestamp(6), b timestamp(4), c timestamp(5), primary key (a))engine=tokudb; - -insert into foo values ('2010-12-10 14:12:09.123452', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123454', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123451', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); -insert into foo values ('2010-12-10 14:12:09.123453', '2010-12-10 14:12:09.123416', '2010-12-10 14:12:09.123451'); - -select * from foo; -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -select * from foo where a > '2010-12-10 14:12:09.123452'; - -alter table foo change a a datetime(6), change b b datetime(4), change c c datetime(5); -show create table foo; -select * from foo; -explain select * from foo where a > '2010-12-10 14:12:09.123452'; -select * from foo where a > '2010-12-10 14:12:09.123452'; -drop table foo; - -create table foo (a time(6), b time(4), c time(5), primary key (a))engine=TokuDB; -insert into foo values ('14:12:09.123452', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123454', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123451', '14:12:09.123416', '14:12:09.123451'); -insert into foo values ('14:12:09.123453', '14:12:09.123416', '14:12:09.123451'); - -select * from foo; -explain select * from foo where a > '14:12:09.123452'; -select * from foo where a > '14:12:09.123452'; - - -# Final cleanup. -DROP TABLE foo; \ No newline at end of file diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/part_index_scan.test b/storage/tokudb/mysql-test/tokudb_bugs/t/part_index_scan.test deleted file mode 100644 index b38a979752b91..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/part_index_scan.test +++ /dev/null @@ -1,157 +0,0 @@ -# verify that index scans on parititions are not slow -# due to tokudb bulk fetch not being used - -source include/have_tokudb.inc; -source include/have_partition.inc; -source include/big_test.inc; -set default_storage_engine='tokudb'; -disable_warnings; -drop table if exists t,t1,t2,t3; -enable_warnings; - -let $debug = 0; -let $maxq = 10; - -CREATE TABLE `t` ( - `num` int(10) unsigned auto_increment NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -); - -# put 8M rows into t -if ($debug) { let $ts = `select now()`; echo "start $ts"; } -INSERT INTO t values (null,null); -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -INSERT INTO t SELECT null,null FROM t; -if ($debug) { let $ts = `select now()`; echo "select $ts"; } -SELECT count(*) FROM t; -if ($debug) { let $ts = `select now()`; echo "select done $ts"; } - -CREATE TABLE `t1` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -); - -CREATE TABLE `t2` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -) -PARTITION BY HASH (num) PARTITIONS 10; - -CREATE TABLE `t3` ( - `num` int(10) unsigned NOT NULL, - `val` varchar(32) DEFAULT NULL, - PRIMARY KEY (`num`) -) -PARTITION BY RANGE (num) -(PARTITION p0 VALUES LESS THAN (1000000), - PARTITION p1 VALUES LESS THAN (2000000), - PARTITION p2 VALUES LESS THAN (3000000), - PARTITION p3 VALUES LESS THAN (4000000), - PARTITION p4 VALUES LESS THAN (5000000), - PARTITION p5 VALUES LESS THAN (6000000), - PARTITION p6 VALUES LESS THAN (7000000), - PARTITION p7 VALUES LESS THAN (8000000), - PARTITION px VALUES LESS THAN MAXVALUE); - -if ($debug) { let $ts = `select now()`; echo "insert t1 $ts"; } -insert into t1 select * from t; -if ($debug) { let $ts = `select now()`; echo "insert t2 $ts"; } -insert into t2 select * from t; -if ($debug) { let $ts = `select now()`; echo "insert t3 $ts"; } -insert into t3 select * from t; -if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; } - -# verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1 -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t1; - inc $i; -} -let $t1 = `select to_seconds(now()) - $s`; - -if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; } - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t2; - inc $i; -} -let $t2 = `select to_seconds(now()) - $s`; -let $d = `select abs($t2 - $t1) <= $t1`; -echo $d; - -if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; } - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t3; - inc $i; -} -let $t3 = `select to_seconds(now()) - $s`; -let $d = `select abs($t3 - $t1) <= $t1`; -echo $d; - -if ($debug) { let $ts = `select now()`; echo "select t1 $ts"; } - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t1 where num>7000000; - inc $i; -} -let $t1 = `select to_seconds(now()) - $s`; - -if ($debug) { let $ts = `select now()`; echo "select t2 $ts"; } - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t2 where num>7000000; - inc $i; -} -let $t2 = `select to_seconds(now()) - $s`; -let $d = `select abs($t2 - $t1) <= $t1`; -echo $d; - -if ($debug) { let $ts = `select now()`; echo "select t3 $ts"; } - -let $s = `select to_seconds(now())`; -let $i = 0; -while ($i < $maxq) { - select count(*) from t3 where num>7000000; - inc $i; -} -let $t3 = `select to_seconds(now()) - $s`; -let $d = `select abs($t3 - $t1) <= $t1`; -echo $d; - -if ($debug) { let $ts = `select now()`; echo "done $ts"; } - -drop table if exists t,t1,t2,t3; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test b/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test deleted file mode 100644 index afc98affa85ae..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/simple_icp.test +++ /dev/null @@ -1,43 +0,0 @@ -#-- source include/have_tokudb.inc - ---disable_warnings -drop table if exists a,b,c,foo; ---enable_warnings - -create table a (a int auto_increment, primary key (a)) engine=TokuDB; -create table b (a int auto_increment, primary key (a)) engine=TokuDB; -create table c (a int auto_increment, primary key (a)) engine=TokuDB; - -insert into a values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into b values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); -insert into c values (),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(),(); - -create table foo (a int, b int, c int, d int, e int, key(a,b,c)) engine=TokuDB; - -insert into foo (a,b,c) select * from a,b,c; - -flush status; -show status like '%Handler_read_next%'; -explain select * from foo where a between 5 and 6 and c=10; -select * from foo where a between 5 and 6 and c=10; -show status like '%Handler_read_next%'; - -flush status; -show status like '%Handler_read_prev%'; -explain select * from foo where a between 5 and 6 and c=10; -select * from foo where a between 5 and 6 and c=10 order by a desc; -show status like '%Handler_read_prev%'; - -flush status; -show status like '%Handler_read_prev%'; -explain select * from foo where a > 19 and c=10; -select * from foo where a > 19 and c=10 order by a desc; -show status like '%Handler_read_prev%'; - -flush status; -show status like '%Handler_read_next%'; -explain select * from foo where a > 19 and c=10; -select * from foo where a > 19 and c=10; -show status like '%Handler_read_next%'; - -drop table foo,a,b,c; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test new file mode 100644 index 0000000000000..877087776b27d --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb718.test @@ -0,0 +1,13 @@ +# test DB-718, a crash caused by broken error handling in tokudb's fractal_tree_info information schema +source include/have_tokudb.inc; +set default_storage_engine='tokudb'; +disable_warnings; +drop table if exists t; +enable_warnings; +create table t (id int primary key); +begin; +insert into t values (1),(2); +--error 34542 +select * from information_schema.tokudb_fractal_tree_info; +commit; +drop table t; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test deleted file mode 100644 index a485983896569..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr.test +++ /dev/null @@ -1,462 +0,0 @@ -#-- source include/have_tokudb.inc - ---disable_warnings -drop table if exists t1,t2,t3,t4; ---enable_warnings - -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; - -set @innodb_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; - ---source include/mrr_tests.inc - -set storage_engine= @save_storage_engine; - -# Try big rowid sizes -set @mrr_buffer_size_save= @@mrr_buffer_size; -set mrr_buffer_size=64; - -# By default InnoDB will fill values only for key parts used by the query, -# which will cause DS-MRR to supply an invalid tuple on scan restoration. -# Verify that DS-MRR's code extra(HA_EXTRA_RETRIEVE_ALL_COLS) call has effect: -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=TokuDB; - -insert into t2 select - concat('a-', 1000 + A.a, '-a'), - concat('b-', 1000 + B.a, '-b'), - concat('c-', 1000 + C.a, '-c'), - 'filler' -from t1 A, t1 B, t1 C; - -explain -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a'; -drop table t2; - -# Try a very big rowid -create table t2 (a char(100), b char(100), c char(100), d int, - filler char(10), key(d), primary key (a,b,c)) engine= tokudb; -insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; ---replace_column 9 # -explain select * from t2 force index (d) where d < 10; -drop table t2; - -drop table t1; -set @@mrr_buffer_size= @mrr_buffer_size_save; - -# -# BUG#33033 "MySQL/InnoDB crashes with simple select range query" -# -create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=tokuDB; - ---disable_query_log -let $1=55; - -while ($1) -{ - eval insert into t1(f1,f2,f3,f4) values ($1,$1,$1,'A'); - dec $1; -} ---enable_query_log - -# The following must not crash: -select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4); - -drop table t1; - ---echo ---echo BUG#37977: Wrong result returned on GROUP BY + OR + innodb ---echo -CREATE TABLE t1 ( - `pk` int(11) NOT NULL AUTO_INCREMENT, - `int_nokey` int(11) NOT NULL, - `int_key` int(11) NOT NULL, - `date_key` date NOT NULL, - `date_nokey` date NOT NULL, - `time_key` time NOT NULL, - `time_nokey` time NOT NULL, - `datetime_key` datetime NOT NULL, - `datetime_nokey` datetime NOT NULL, - `varchar_key` varchar(5) DEFAULT NULL, - `varchar_nokey` varchar(5) DEFAULT NULL, - PRIMARY KEY (`pk`), - KEY `int_key` (`int_key`), - KEY `date_key` (`date_key`), - KEY `time_key` (`time_key`), - KEY `datetime_key` (`datetime_key`), - KEY `varchar_key` (`varchar_key`) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES -(1,5,5,'2009-10-16','2009-10-16','09:28:15','09:28:15','2007-09-14 05:34:08','2007-09-14 05:34:08','qk','qk'), -(2,6,6,'0000-00-00','0000-00-00','23:06:39','23:06:39','0000-00-00 00:00:00','0000-00-00 00:00:00','j','j'), -(3,10,10,'2000-12-18','2000-12-18','22:16:19','22:16:19','2006-11-04 15:42:50','2006-11-04 15:42:50','aew','aew'), -(4,0,0,'2001-09-18','2001-09-18','00:00:00','00:00:00','2004-03-23 13:23:35','2004-03-23 13:23:35',NULL,NULL), -(5,6,6,'2007-08-16','2007-08-16','22:13:38','22:13:38','2004-08-19 11:01:28','2004-08-19 11:01:28','qu','qu'); -select pk from t1 WHERE `varchar_key` > 'kr' group by pk; -select pk from t1 WHERE `int_nokey` IS NULL OR `varchar_key` > 'kr' group by pk; -drop table t1; - ---echo # ---echo # BUG#39447: Error with NOT NULL condition and LIMIT 1 ---echo # -CREATE TABLE t1 ( - id int(11) NOT NULL, - parent_id int(11) DEFAULT NULL, - name varchar(10) DEFAULT NULL, - PRIMARY KEY (id), - KEY ind_parent_id (parent_id) -) ENGINE=TokuDB; - -insert into t1 (id, parent_id, name) values -(10,NULL,'A'), -(20,10,'B'), -(30,10,'C'), -(40,NULL,'D'), -(50,40,'E'), -(60,40,'F'), -(70,NULL,'J'); - -SELECT id FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; ---echo This must show type=index, extra=Using where -explain SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -SELECT * FROM t1 WHERE parent_id IS NOT NULL ORDER BY id DESC LIMIT 1; -drop table t1; - - --- echo # --- echo # BUG#628785: multi_range_read.cc:430: int DsMrr_impl::dsmrr_init(): Assertion `do_sort_keys || do_rowid_fetch' failed --- echo # -set @save_join_cache_level= @@join_cache_level; -set @save_optimizer_switch= @@optimizer_switch; -SET SESSION join_cache_level=9; -SET SESSION optimizer_switch='mrr_sort_keys=off'; - -CREATE TABLE `t1` ( - `pk` int(11) NOT NULL AUTO_INCREMENT, - `col_int_nokey` int(11) DEFAULT NULL, - `col_int_key` int(11) DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - PRIMARY KEY (`pk`), - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB AUTO_INCREMENT=101 DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (1,6,NULL,'r','r'); -INSERT INTO `t1` VALUES (2,8,0,'c','c'); -INSERT INTO `t1` VALUES (97,7,0,'z','z'); -INSERT INTO `t1` VALUES (98,1,1,'j','j'); -INSERT INTO `t1` VALUES (99,7,8,'c','c'); -INSERT INTO `t1` VALUES (100,2,5,'f','f'); -SELECT table1 .`col_varchar_key` -FROM t1 table1 STRAIGHT_JOIN ( t1 table3 JOIN t1 table4 ON table4 .`pk` = table3 .`col_int_nokey` ) ON table4 .`col_varchar_nokey` ; -DROP TABLE t1; -set join_cache_level=@save_join_cache_level; -set optimizer_switch=@save_optimizer_switch; - ---echo # ---echo # BUG#623300: Query with join_cache_level = 6 returns extra rows in maria-5.3-dsmrr-cpk ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - PRIMARY KEY (pk) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES (10,7); -INSERT INTO t1 VALUES (11,1); -INSERT INTO t1 VALUES (12,5); -INSERT INTO t1 VALUES (13,3); -INSERT INTO t1 VALUES (14,6); -INSERT INTO t1 VALUES (15,92); -INSERT INTO t1 VALUES (16,7); -INSERT INTO t1 VALUES (17,NULL); -INSERT INTO t1 VALUES (18,3); -INSERT INTO t1 VALUES (19,5); -INSERT INTO t1 VALUES (20,1); -INSERT INTO t1 VALUES (21,2); -INSERT INTO t1 VALUES (22,NULL); -INSERT INTO t1 VALUES (23,1); -INSERT INTO t1 VALUES (24,0); -INSERT INTO t1 VALUES (25,210); -INSERT INTO t1 VALUES (26,8); -INSERT INTO t1 VALUES (27,7); -INSERT INTO t1 VALUES (28,5); -INSERT INTO t1 VALUES (29,NULL); - -CREATE TABLE t2 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - PRIMARY KEY (pk) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES (1,NULL); -INSERT INTO t2 VALUES (2,7); -INSERT INTO t2 VALUES (3,9); -INSERT INTO t2 VALUES (4,7); -INSERT INTO t2 VALUES (5,4); -INSERT INTO t2 VALUES (6,2); -INSERT INTO t2 VALUES (7,6); -INSERT INTO t2 VALUES (8,8); -INSERT INTO t2 VALUES (9,NULL); -INSERT INTO t2 VALUES (10,5); -INSERT INTO t2 VALUES (11,NULL); -INSERT INTO t2 VALUES (12,6); -INSERT INTO t2 VALUES (13,188); -INSERT INTO t2 VALUES (14,2); -INSERT INTO t2 VALUES (15,1); -INSERT INTO t2 VALUES (16,1); -INSERT INTO t2 VALUES (17,0); -INSERT INTO t2 VALUES (18,9); -INSERT INTO t2 VALUES (19,NULL); -INSERT INTO t2 VALUES (20,4); - -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level = 0; - ---sorted_result -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; - -SET join_cache_level = 6; - ---sorted_result -SELECT table2.col_int_nokey -FROM t1 table1 JOIN t2 table2 ON table2.pk = table1.col_int_nokey -WHERE table1.pk ; - -set join_cache_level= @my_save_join_cache_level; -drop table t1, t2; - ---echo # ---echo # BUG#623315: Query returns less rows when run with join_cache_level=6 on maria-5.3-dsmrr-cpk ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_nokey int(11) DEFAULT NULL, - col_int_key int(11) DEFAULT NULL, - col_varchar_key varchar(1) DEFAULT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES (10,7,8,'v'); -INSERT INTO t1 VALUES (11,1,9,'r'); -INSERT INTO t1 VALUES (12,5,9,'a'); -INSERT INTO t1 VALUES (13,3,186,'m'); -INSERT INTO t1 VALUES (14,6,NULL,'y'); -INSERT INTO t1 VALUES (15,92,2,'j'); -INSERT INTO t1 VALUES (16,7,3,'d'); -INSERT INTO t1 VALUES (17,NULL,0,'z'); -INSERT INTO t1 VALUES (18,3,133,'e'); -INSERT INTO t1 VALUES (19,5,1,'h'); -INSERT INTO t1 VALUES (20,1,8,'b'); -INSERT INTO t1 VALUES (21,2,5,'s'); -INSERT INTO t1 VALUES (22,NULL,5,'e'); -INSERT INTO t1 VALUES (23,1,8,'j'); -INSERT INTO t1 VALUES (24,0,6,'e'); -INSERT INTO t1 VALUES (25,210,51,'f'); -INSERT INTO t1 VALUES (26,8,4,'v'); -INSERT INTO t1 VALUES (27,7,7,'x'); -INSERT INTO t1 VALUES (28,5,6,'m'); -INSERT INTO t1 VALUES (29,NULL,4,'c'); - -set @my_save_join_cache_level= @@join_cache_level; -SET join_cache_level=6; -select count(*) from -(SELECT table2.pk FROM - t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key - ON table3.col_int_nokey) foo; - -SET join_cache_level=0; -select count(*) from -(SELECT table2.pk FROM - t1 LEFT JOIN t1 table2 JOIN t1 table3 ON table3.col_varchar_key = table2.col_varchar_key - ON table3.col_int_nokey) foo; - -set join_cache_level= @my_save_join_cache_level; -drop table t1; - - ---echo # ---echo # BUG#671340: Diverging results in with mrr_sort_keys=ON|OFF and join_cache_level=5 ---echo # -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_key int(11) NOT NULL, - col_varchar_key varchar(1) NOT NULL, - col_varchar_nokey varchar(1) NOT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES - (10,8,'v','v'), - (11,8,'f','f'), - (12,5,'v','v'), - (13,8,'s','s'), - (14,8,'a','a'), - (15,6,'p','p'), - (16,7,'z','z'), - (17,2,'a','a'), - (18,5,'h','h'), - (19,7,'h','h'), - (20,2,'v','v'), - (21,9,'v','v'), - (22,142,'b','b'), - (23,3,'y','y'), - (24,0,'v','v'), - (25,3,'m','m'), - (26,5,'z','z'), - (27,9,'n','n'), - (28,1,'d','d'), - (29,107,'a','a'); - -CREATE TABLE t2 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_int_key int(11) NOT NULL, - col_varchar_key varchar(1) NOT NULL, - col_varchar_nokey varchar(1) NOT NULL, - PRIMARY KEY (pk), - KEY col_int_key (col_int_key), - KEY col_varchar_key (col_varchar_key,col_int_key) -) ENGINE=TokuDB; -INSERT INTO t2 VALUES - (1,9,'x','x'), - (2,5,'g','g'), - (3,1,'o','o'), - (4,0,'g','g'), - (5,1,'v','v'), - (6,190,'m','m'), - (7,6,'x','x'), - (8,3,'c','c'), - (9,4,'z','z'), - (10,3,'i','i'), - (11,186,'x','x'), - (12,1,'g','g'), - (13,8,'q','q'), - (14,226,'m','m'), - (15,133,'p','p'), - (16,6,'e','e'), - (17,3,'t','t'), - (18,8,'j','j'), - (19,5,'h','h'), - (20,7,'w','w'); - -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM - t2 AS table1, t1 AS table2, t2 AS table3 -WHERE - table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; - -set @my_save_join_cache_level= @@join_cache_level; -set @my_save_join_buffer_size= @@join_buffer_size; -set join_cache_level=6; -set join_buffer_size=1536; ---disable_warnings -SELECT count(*), sum(table1.col_int_key*table2.pk) -FROM - t2 AS table1, t1 AS table2, t2 AS table3 -WHERE - table3.col_varchar_nokey = table2.col_varchar_key AND table3.pk > table2.col_varchar_nokey ; ---enable_warnings -drop table t1,t2; -set join_cache_level=@my_save_join_cache_level; -set join_buffer_size=@my_save_join_buffer_size; - - ---echo # ---echo # BUG#665669: Result differences on query re-execution ---echo # -create table t1 (pk int primary key, b int, c int default 0, index idx(b)) engine=Tokudb; -insert into t1(pk,b) values (3, 30), (2, 20), (9, 90), (7, 70), (4, 40), (5, 50), (10, 100), (12, 120); -set @bug665669_tmp=@@optimizer_switch; -set optimizer_switch='mrr=off'; -explain select * from t1 where b > 1000; ---echo # The following two must produce indentical results: -select * from t1 where pk < 2 or pk between 3 and 4; -select * from t1 where pk < 2 or pk between 3 and 4; -drop table t1; -set optimizer_switch = @bug665669_tmp; ---echo # ---echo # Bug#43360 - Server crash with a simple multi-table update ---echo # -CREATE TABLE t1 ( - a CHAR(2) NOT NULL PRIMARY KEY, - b VARCHAR(20) NOT NULL, - KEY (b) -) ENGINE=TokuDB; - -CREATE TABLE t2 ( - a CHAR(2) NOT NULL PRIMARY KEY, - b VARCHAR(20) NOT NULL, - KEY (b) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES -('AB','MySQLAB'), -('JA','Sun Microsystems'), -('MS','Microsoft'), -('IB','IBM- Inc.'), -('GO','Google Inc.'); - -INSERT INTO t2 VALUES -('AB','Sweden'), -('JA','USA'), -('MS','United States of America'), -('IB','North America'), -('GO','South America'); - -UPDATE t1,t2 SET t1.b=UPPER(t1.b) WHERE t1.b LIKE 'United%'; - -SELECT * FROM t1; - -SELECT * FROM t2; - -DROP TABLE t1,t2; - ---echo # ---echo # Testcase backport: Bug#43249 ---echo # (Note: Fixed by patch for BUG#42580) ---echo # -CREATE TABLE t1(c1 TIME NOT NULL, c2 TIME NULL, c3 DATE, PRIMARY KEY(c1), UNIQUE INDEX(c2)) engine=Tokudb; -INSERT INTO t1 VALUES('8:29:45',NULL,'2009-02-01'); -# first time, good results: -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -# second time, bad results: -SELECT * FROM t1 WHERE c2 <=> NULL ORDER BY c2 LIMIT 2; -drop table `t1`; - ---echo # ---echo # BUG#707925: Wrong result with join_cache_level=6 optimizer_use_mrr = ---echo # force (incremental, BKA join) ---echo # -set @_save_join_cache_level= @@join_cache_level; -set join_cache_level = 6; -CREATE TABLE t1 ( - f1 int(11), f2 int(11), f3 varchar(1), f4 varchar(1), - PRIMARY KEY (f1), - KEY (f3), - KEY (f2) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES ('11','8','f','f'),('12','5','v','v'),('13','8','s','s'), -('14','8','a','a'),('15','6','p','p'),('16','7','z','z'),('17','2','a','a'), -('18','5','h','h'),('19','7','h','h'),('20','2','v','v'),('21','9','v','v'), -('22','142','b','b'),('23','3','y','y'),('24','0','v','v'),('25','3','m','m'), -('26','5','z','z'),('27','9','n','n'),('28','1','d','d'),('29','107','a','a'); - -select count(*) from ( - SELECT alias1.f2 - FROM - t1 AS alias1 JOIN ( - t1 AS alias2 FORCE KEY (f3) JOIN - t1 AS alias3 FORCE KEY (f2) ON alias3.f2 = alias2.f2 AND alias3.f4 = alias2.f3 - ) ON alias3.f1 <= alias2.f1 -) X; - -set join_cache_level=@_save_join_cache_level; -set optimizer_switch= @innodb_mrr_tmp; -drop table t1; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test deleted file mode 100644 index 3be77674f22eb..0000000000000 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_mrr2.test +++ /dev/null @@ -1,213 +0,0 @@ --- source include/have_maria.inc -# -# MRR/Maria tests. -# - ---disable_warnings -drop table if exists t1,t2,t3,t4; ---enable_warnings - -set @maria_mrr_tmp=@@optimizer_switch; -set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on'; - -set @mrr_buffer_size_save= @@mrr_buffer_size; - -set @save_storage_engine= @@storage_engine; -set storage_engine=TokuDB; - ---source include/mrr_tests.inc -set storage_engine= @save_storage_engine; - -set @@mrr_buffer_size= @mrr_buffer_size_save; - ---echo # ---echo # Crash in quick_range_seq_next() in maria-5.3-dsmrr-cpk with join_cache_level = {8,1} ---echo # -set @save_join_cache_level= @@join_cache_level; -SET SESSION join_cache_level = 8; -CREATE TABLE `t1` ( - `col_int_key` int(11) DEFAULT NULL, - `col_datetime_key` datetime DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1; -INSERT INTO `t1` VALUES (6,'2005-10-07 00:00:00','e','e'); -INSERT INTO `t1` VALUES (51,'2000-07-15 05:00:34','f','f'); -CREATE TABLE `t2` ( - `col_int_key` int(11) DEFAULT NULL, - `col_datetime_key` datetime DEFAULT NULL, - `col_varchar_key` varchar(1) DEFAULT NULL, - `col_varchar_nokey` varchar(1) DEFAULT NULL, - KEY `col_varchar_key` (`col_varchar_key`,`col_int_key`) -) ENGINE=TokuDB DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1; -INSERT INTO `t2` VALUES (2,'2004-10-11 18:13:16','w','w'); -INSERT INTO `t2` VALUES (2,'1900-01-01 00:00:00','d','d'); -SELECT table2 .`col_datetime_key` -FROM t2 JOIN ( t1 table2 JOIN t2 table3 ON table3 .`col_varchar_key` < table2 .`col_varchar_key` ) ON table3 .`col_varchar_nokey` ; - -drop table t1, t2; -set join_cache_level=@save_join_cache_level; - -# -# Bug #665049: index condition pushdown with Maria -# - -CREATE TABLE t1( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t1 VALUES - (1,9,'x'), (2,5,'g'), (3,1,'o'), (4,0,'g'), (5,1,'v'), - (6,190,'m'), (7,6,'x'), (8,3,'c'), (9,4,'z'), (10,3,'i'), - (11,186,'x'), (12,1,'g'), (13,8,'q'), (14,226,'m'), (15,133,'p'); - -CREATE TABLE t2( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t2 SELECT * FROM t1; -INSERT INTO t2 VALUES (77, 333, 'z'); - -CREATE TABLE t3( - pk int NOT NULL, i int NOT NULL, v varchar(1) NOT NULL, - PRIMARY KEY (pk), INDEX idx (v, i) -) ENGINE=TokuDB; -INSERT INTO t3 SELECT * FROM t1; -INSERT INTO t3 VALUES - (88, 442, 'y'), (99, 445, 'w'), (87, 442, 'z'), (98, 445, 'v'), (86, 442, 'x'), - (97, 445, 't'), (85, 442, 'b'), (96, 445, 'l'), (84, 442, 'a'), (95, 445, 'k'); - -set @save_join_cache_level=@@join_cache_level; -set join_cache_level=1; - -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -EXPLAIN -SELECT COUNT(t1.v) FROM t1, t2 IGNORE INDEX (idx), t3 IGNORE INDEX (idx) - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; - -SELECT COUNT(t1.v) FROM t1, t2, t3 - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; -EXPLAIN - SELECT COUNT(t1.v) FROM t1, t2, t3 - WHERE t3.v = t2.v AND t3.i < t2.i AND t3.pk > 0 AND t2.pk > 0; - -set join_cache_level=@save_join_cache_level; - -DROP TABLE t1,t2,t3; - ---echo # ---echo # BUG#671361: virtual int Mrr_ordered_index_reader::refill_buffer(): Assertion `!know_key_tuple_params ---echo # (works only on Maria because we need 1024-byte long key) ---echo # - -SET SESSION join_cache_level = 6; -SET SESSION join_buffer_size = 1024; -CREATE TABLE t1 ( - pk int(11) NOT NULL AUTO_INCREMENT, - col_varchar_1024_latin1_key varchar(1024) DEFAULT NULL, - PRIMARY KEY (pk), - KEY col_varchar_1024_latin1_key (col_varchar_1024_latin1_key) -) ENGINE=TokuDB; - -INSERT INTO t1 VALUES - (1,'z'), (2,'abcdefjhjkl'), (3,'in'), (4,'abcdefjhjkl'), (6,'abcdefjhjkl'), - (11,'zx'), (12,'abcdefjhjm'), (13,'jn'), (14,'abcdefjhjp'), (16,'abcdefjhjr'); - -CREATE TABLE t2 ( - col_varchar_10_latin1 varchar(10) DEFAULT NULL -) ENGINE=TokuDB; -INSERT INTO t2 VALUES ('foo'), ('foo'); - -EXPLAIN SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE - table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; - -SELECT count(*) -FROM t1 AS table1, t2 AS table2 -WHERE - table1.col_varchar_1024_latin1_key = table2.col_varchar_10_latin1 AND table1.pk<>0 ; - -drop table t1, t2; - ---echo # ---echo # BUG#693747: Assertion multi_range_read.cc:908: int DsMrr_impl::dsmrr_init( ---echo # -set @_save_join_cache_level= @@join_cache_level; -set @_save_join_buffer_size= @@join_buffer_size; - -set join_cache_level=8; -set join_buffer_size=10240; - -CREATE TABLE t1 ( - f2 varchar(32) COLLATE latin1_swedish_ci, - f3 int(11), - f4 varchar(1024) COLLATE utf8_bin, - f5 varchar(1024) COLLATE latin1_bin, - KEY (f5) -) ENGINE=TokuDB; - ---echo # Fill the table with some data ---disable_query_log -INSERT IGNORE INTO t1 VALUES -('cueikuirqr','0','f4-data','hcueikuirqrzflno'),('her','0','f4-data','ehcueikuirqrzfln'), -('YKAOE','0','f4-data','qieehcueikuirqrz'),('youre','0','f4-data','nkqieehcueikuirq'), -('b','0','f4-data','the'),('MGUDG','0','f4-data','m'), -('UXAGU','0','f4-data','HZXVA'),('bwbgsnkqie','0','f4-data','something'), -('s','0','f4-data','slelfhjawbwbgsnk'),('the','0','f4-data','if'), -('TDLKE','0','f4-data','MGWNJ'),('do','0','f4-data','see'), -('why','0','f4-data','mean'),('THKCG','0','f4-data','YFLDY'), -('x','0','f4-data','e'),('yncitaeysb','0','f4-data','tgyncitaeysbgucs'), -('ZEOXX','0','f4-data','jawbwbgsnkqieehc'),('hjawbwbgsn','0','f4-data','fhjawbwbgsnkqiee'), -('all','0','f4-data','sbgucsgqslelfhja'),('the','0','f4-data','would'), -('mtgyncitae','0','f4-data','ISNQQ'),('KNCUI','0','f4-data','want'), -('is','0','f4-data','i'),('out','0','f4-data','jvcmjlmtgyncitae'), -('it','0','f4-data','you'),('LHDIH','0','f4-data','txmtxyjvcmjlmtgy'), -('z','0','f4-data','ntxmtxyjvcmjlmtg'),('vyhnmvgmcn','0','f4-data','AIGQK'), -('ytvyhnmvgm','0','f4-data','z'),('t','0','f4-data','on'), -('xqegbytvyh','0','f4-data','ixqegbytvyhnmvgm'),('WGVRU','0','f4-data','h'), -('b','0','f4-data','z'),('who','0','f4-data','gddixqegbytvy'), -('PMLFL','0','f4-data','vgmcntxmtxyjvcmj'),('back','0','f4-data','n'), -('i','0','f4-data','PZGUB'),('f','0','f4-data','the'), -('PNXVP','0','f4-data','v'),('MAKKL','0','f4-data','CGCWF'), -('RMDAV','0','f4-data','v'),('l','0','f4-data','n'), -('rhnoypgddi','0','f4-data','VIZNE'),('t','0','f4-data','a'), -('like','0','f4-data','JSHPZ'),('pskeywslmk','0','f4-data','q'), -('QZZJJ','0','f4-data','c'),('atlxepskey','0','f4-data','YJRMA'), -('YUVOU','0','f4-data','eywslmkdrhnoypgd'),('some','0','f4-data','r'), -('c','0','f4-data','her'),('o','0','f4-data','EMURT'), -('if','0','f4-data','had'),('when','0','f4-data','CLVWT'), -('blfufrcdjm','0','f4-data','IZCZN'),('vutblfufrc','0','f4-data','how'), -('why','0','f4-data','I'),('IXLYQ','0','f4-data','weuwuvutblfufrcd'), -('here','0','f4-data','m'),('ZOCTJ','0','f4-data','IDSFD'), -('kqsweuwuvu','0','f4-data','oh'),('ykqsweuwuv','0','f4-data','zykqsweuwuvutblf'), -('zezykqsweu','0','f4-data','t'),('q','0','f4-data','o'), -('IBKAU','0','f4-data','oh'),('ivjisuzezy','0','f4-data','XHXKE'), -('xsivjisuze','0','f4-data','plxsivjisuzezykq'),('have','0','f4-data','uvplxsivjisuzezy'), -('on','0','f4-data','me'),('ijkfuvplxs','0','f4-data','OGEHV'), -('u','0','f4-data','okay'),('i','0','f4-data','pajzbbojshnijkfu'), -('of','0','f4-data','g'),('for','0','f4-data','Im'), -('or','0','f4-data','ZOJHX'),('n','0','f4-data','you'), -('that','0','f4-data','just'),('bbojshnijk','0','f4-data','JYGSJ'), -('k','0','f4-data','y'),('k','0','f4-data','y'), -('be','0','f4-data','m'),('fnbmxwicrk','0','f4-data','t'), -('yaffpegvav','0','f4-data','have'),('crkdymahya','0','f4-data','QQWQI'), -('t','0','f4-data','hnijkfuvplxsivji'),('dgxpajzbbo','0','f4-data','vavdgxpajzbbojsh'), -('g','0','f4-data','pegvavdgxpajzbbo'),('Im','0','f4-data','ffpegvavdgxpajzb'); ---enable_query_log - - -SELECT alias2.* , alias1.f2 -FROM - t1 AS alias1 - LEFT JOIN t1 AS alias2 ON alias1.f2 = alias2.f5 -WHERE - alias2.f3 < 0; - -set join_cache_level=@_save_join_cache_level; -set join_buffer_size=@_save_join_buffer_size; -set optimizer_switch=@maria_mrr_tmp; - -drop table t1; diff --git a/storage/tokudb/scripts/make.mysql.bash b/storage/tokudb/scripts/make.mysql.bash index a0e5db48a47c3..1bf258c5c3bb4 100755 --- a/storage/tokudb/scripts/make.mysql.bash +++ b/storage/tokudb/scripts/make.mysql.bash @@ -52,7 +52,7 @@ cmake_build_type=RelWithDebInfo mysql_tree= tokudbengine_tree= ftindex_tree= -jemalloc_version=3.3.0 +jemalloc_version=3.6.0 jemalloc_tree= backup_tree= diff --git a/storage/tokudb/scripts/make.mysql.debug.env.bash b/storage/tokudb/scripts/make.mysql.debug.env.bash index b2bfaef1b71f8..b7c270cfbd7fb 100755 --- a/storage/tokudb/scripts/make.mysql.debug.env.bash +++ b/storage/tokudb/scripts/make.mysql.debug.env.bash @@ -57,7 +57,7 @@ git_tag= mysql=mysql-5.5 mysql_tree=mysql-5.5.35 jemalloc=jemalloc -jemalloc_tree=3.3.1 +jemalloc_tree=3.6.0 tokudbengine=tokudb-engine tokudbengine_tree=master ftindex=ft-index diff --git a/storage/tokudb/scripts/tokustat.py b/storage/tokudb/scripts/tokustat.py index 8e9233e59434a..3ecac68e769f1 100755 --- a/storage/tokudb/scripts/tokustat.py +++ b/storage/tokudb/scripts/tokustat.py @@ -9,10 +9,10 @@ def usage(): print "diff the tokudb engine status" print "--host=HOSTNAME (default: localhost)" print "--port=PORT" - print "--sleeptime=SLEEPTIME (default: 10 seconds)" + print "--iterations=MAX_ITERATIONS (default: forever)" + print "--interval=TIME_BETWEEN_SAMPLES (default: 10 seconds)" print "--q='show engine tokudb status'" print "--q='select * from information_schema.global_status'" - return 1 def convert(v): @@ -23,14 +23,11 @@ def convert(v): v = float(v) return v -def printit(stats, rs, sleeptime): - # print rs +def printit(stats, rs, interval): for t in rs: l = len(t) # grab the last 2 fields in t k = t[l-2] v = t[l-1] - # print k, v # debug - # try to convert v try: v = convert(v) except: @@ -41,11 +38,11 @@ def printit(stats, rs, sleeptime): print k, "|", oldv, "|", v, try: d = v - oldv - if sleeptime != 1: - if d >= sleeptime: - e = d / sleeptime + if interval != 1: + if d >= interval: + e = d / interval else: - e = float(d) / sleeptime + e = float(d) / interval print "|", d, "|", e else: print "|", d @@ -59,7 +56,9 @@ def main(): port = None user = None passwd = None - sleeptime = 10 + interval = 10 + iterations = 0 + q = 'show engine tokudb status' for a in sys.argv[1:]: @@ -71,6 +70,9 @@ def main(): continue return usage() + iterations = int(iterations) + interval = int(interval) + connect_parameters = {} if host is not None: if host[0] == '/': @@ -93,7 +95,9 @@ def main(): print "connected" stats = {} - while 1: + i = 0 + while iterations == 0 or i <= iterations: + i += 1 try: c = db.cursor() n = c.execute(q) @@ -105,8 +109,8 @@ def main(): return 2 try: - printit(stats, rs, int(sleeptime)) - time.sleep(int(sleeptime)) + printit(stats, rs, interval) + time.sleep(interval) except: print "printit", sys.exc_info() return 3