Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: fix deferred writes; improve flush #13888

Merged
merged 44 commits into from
Mar 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
ffd4d2f
vstart.sh: larger wal device
liewegas Mar 9, 2017
5973839
os/bluestore: wal -> deferred
liewegas Mar 8, 2017
bc5bfdd
os/bluestore: update freelist on initial commit
liewegas Mar 8, 2017
bcd2a32
os/bluestore: write padded data into buffer cache
liewegas Mar 9, 2017
6f2f8b3
os/bluestore: pin writing cache buffers until txc is finished
liewegas Mar 6, 2017
83e33a3
os/bluestore: no need to Onode::flush() in _do_read
liewegas Mar 6, 2017
a56cd6b
os/bluestore: no need to Onode::flush() on truncate
liewegas Mar 8, 2017
3238162
os/bluestore: make flush() only wait for kv commit
liewegas Mar 8, 2017
eff1e83
os/bluestore: separate _txc_finish_kv into _txc_{applied,committed}_kv
liewegas Mar 8, 2017
78df9b3
os/bluestore: release deferred throttle on io finish, before cleanup
liewegas Mar 8, 2017
5cb5a90
os/bluestore: revert throttle perfcounters
liewegas Mar 8, 2017
9ee0c84
os/bluestore: add OpSequencer::drain()
liewegas Mar 14, 2017
2d0d375
os/bluestore: make Sequencer::flush() more efficient
liewegas Mar 9, 2017
4aa44d2
os/bluestore: make OnodeSpace onode_map private
liewegas Mar 11, 2017
9b28d61
os/bluestore: keep onode refs for lifetime of obc
liewegas Mar 11, 2017
3cf2b0f
os/bluestore: keep all OpSequencers registered
liewegas Mar 14, 2017
986776d
os/bluestore: reimplement/rename _sync -> _flush_all
liewegas Mar 8, 2017
3dc82d5
os/bluestore: move _osr_reap_done
liewegas Mar 8, 2017
5fafd1f
os/bluestore: fix OpSequencer/Sequencer lifecycle
liewegas Mar 10, 2017
6db031b
os/bluestore: restructure deferred write queue
liewegas Mar 9, 2017
78b9cea
os/bluestore: move many initializations into header
liewegas Mar 9, 2017
d3a425f
os/bluestore: avoid waking up kv thread on deferred write completion
liewegas Mar 9, 2017
44d4983
os/bluestore: only discard deallocated regions of a blob if !shared
liewegas Mar 13, 2017
a4b9012
os/bluestore: batch up to bluestore_deferred_batch_ops before submitting
liewegas Mar 9, 2017
7a3e85f
os/bluestore: flush old/discarded OpSequencers too
liewegas Mar 14, 2017
c1f0108
os/bluestore: debug alloc release
liewegas Mar 14, 2017
1fefeeb
os/bluestore: avoid extra dev flush on single device when all io is d…
liewegas Mar 9, 2017
ba159de
os/bluestore: drop obsolete comment
liewegas Mar 10, 2017
e46081c
ceph_test_objectstore: fix Synthetic to never modify bufferlists
liewegas Mar 10, 2017
3f9c216
os/bluestore: prevent throttle deadlock due to deferred writes
liewegas Mar 13, 2017
3a3d9ad
os/bluestore: make throttles tunable online
liewegas Mar 14, 2017
f4d4c9c
os/bluestore: remove dead _do_deferred_op code
liewegas Mar 14, 2017
81e8682
os/bluestore: fix perfcounters for deferred io
liewegas Mar 14, 2017
01ef844
os/bluestore: take Collection ref from SharedBlob
liewegas Mar 15, 2017
e83fc55
os/bluestore: flush_cache on umount, fsck finish, etc.
liewegas Mar 16, 2017
3ad789c
os/bluestore: nicer Onode dout prefix
liewegas Mar 16, 2017
ed9f54b
os/bluestore: better debugging around collections
liewegas Mar 16, 2017
4de29d0
os/bluestore/KernelDevice: drop unused flush_lock
liewegas Mar 16, 2017
d93d6d0
unittest_bluestore_types: fix Collection using tests
liewegas Mar 16, 2017
52c93f5
ceph_test_objectstore: set bluestore cache shards to 5
liewegas Mar 17, 2017
e4d547e
os/bluestore: simplify flush() wake-up condition
liewegas Mar 17, 2017
9732b6c
os/bluestore: move cached items around on collection split
liewegas Mar 17, 2017
d8fa788
os/bluestore: clean up flush_all()
liewegas Mar 17, 2017
def1760
os/bluestore: handle zombie OpSequencers
liewegas Mar 18, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/common/Throttle.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ class Throttle {
*/
int64_t get_max() const { return max.read(); }

/**
* return true if past midpoint
*/
bool past_midpoint() const {
return count.read() >= max.read() / 2;
}

/**
* set the new max number, and wait until the number of taken slots drains
* and drops below this limit.
Expand Down
16 changes: 6 additions & 10 deletions src/common/config_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -1051,9 +1051,9 @@ OPTION(bluestore_min_alloc_size, OPT_U32, 0)
OPTION(bluestore_min_alloc_size_hdd, OPT_U32, 64*1024)
OPTION(bluestore_min_alloc_size_ssd, OPT_U32, 4*1024)
OPTION(bluestore_max_alloc_size, OPT_U32, 0)
OPTION(bluestore_prefer_wal_size, OPT_U32, 0)
OPTION(bluestore_prefer_wal_size_hdd, OPT_U32, 32768)
OPTION(bluestore_prefer_wal_size_ssd, OPT_U32, 0)
OPTION(bluestore_prefer_deferred_size, OPT_U32, 0)
OPTION(bluestore_prefer_deferred_size_hdd, OPT_U32, 32768)
OPTION(bluestore_prefer_deferred_size_ssd, OPT_U32, 0)
OPTION(bluestore_compression_mode, OPT_STR, "none") // force|aggressive|passive|none
OPTION(bluestore_compression_algorithm, OPT_STR, "snappy")
OPTION(bluestore_compression_min_blob_size, OPT_U32, 128*1024)
Expand Down Expand Up @@ -1104,14 +1104,11 @@ OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL, true)
OPTION(bluestore_fsck_on_mkfs, OPT_BOOL, true)
OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL, false)
OPTION(bluestore_sync_submit_transaction, OPT_BOOL, false) // submit kv txn in queueing thread (not kv_sync_thread)
OPTION(bluestore_sync_wal_apply, OPT_BOOL, true) // perform initial wal work synchronously (possibly in combination with aio so we only *queue* ios)
OPTION(bluestore_wal_threads, OPT_INT, 4)
OPTION(bluestore_wal_thread_timeout, OPT_INT, 30)
OPTION(bluestore_wal_thread_suicide_timeout, OPT_INT, 120)
OPTION(bluestore_max_ops, OPT_U64, 512)
OPTION(bluestore_max_bytes, OPT_U64, 64*1024*1024)
OPTION(bluestore_wal_max_ops, OPT_U64, 512)
OPTION(bluestore_wal_max_bytes, OPT_U64, 128*1024*1024)
OPTION(bluestore_deferred_max_ops, OPT_U64, 512)
OPTION(bluestore_deferred_max_bytes, OPT_U64, 128*1024*1024)
OPTION(bluestore_deferred_batch_ops, OPT_U64, 8)
OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
OPTION(bluestore_clone_cow, OPT_BOOL, true) // do copy-on-write for clones
Expand All @@ -1126,7 +1123,6 @@ OPTION(bluestore_debug_prefragment_max, OPT_INT, 1048576)
OPTION(bluestore_debug_inject_read_err, OPT_BOOL, false)
OPTION(bluestore_debug_randomize_serial_transaction, OPT_INT, 0)
OPTION(bluestore_debug_omit_block_device_write, OPT_BOOL, false)
OPTION(bluestore_inject_wal_apply_delay, OPT_FLOAT, 0)
OPTION(bluestore_shard_finishers, OPT_BOOL, false)

OPTION(kstore_max_ops, OPT_U64, 512)
Expand Down
12 changes: 11 additions & 1 deletion src/os/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,15 @@ class ObjectStore {
*/
struct Sequencer_impl : public RefCountedObject {
CephContext* cct;

// block until any previous transactions are visible. specifically,
// collection_list and collection_empty need to reflect prior operations.
virtual void flush() = 0;

// called when we are done with the impl. the impl may have a different
// (longer) lifecycle than the Sequencer.
virtual void discard() {}

/**
* Async flush_commit
*
Expand Down Expand Up @@ -165,8 +172,11 @@ class ObjectStore {
Sequencer_implRef p;

explicit Sequencer(string n)
: name(n), shard_hint(spg_t()), p(NULL) {}
: name(n), shard_hint(spg_t()), p(NULL) {
}
~Sequencer() {
if (p)
p->discard(); // tell impl we are done with it
}

/// return a unique string identifier for this sequencer
Expand Down