Skip to content

Commit

Permalink
os/bluestore/KernelDevice: debug support to detect stalled aios
Browse files Browse the repository at this point in the history
This is additional bookkeeping to track the oldest aio in flight, so we
make it optional, and disabled by default.  It is useful for identifying
buggy kernel aio or bad devices.

Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Nov 7, 2016
1 parent 3bbfcf1 commit 3b1b5a9
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/common/config_opts.h
Expand Up @@ -937,6 +937,8 @@ OPTION(bdev_aio, OPT_BOOL, true)
OPTION(bdev_aio_poll_ms, OPT_INT, 250) // milliseconds
OPTION(bdev_aio_max_queue_depth, OPT_INT, 32)
OPTION(bdev_block_size, OPT_INT, 4096)
OPTION(bdev_debug_aio, OPT_BOOL, false)
OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT, 60.0)

// if yes, osd will unbind all NVMe devices from kernel driver and bind them
// to the uio_pci_generic driver. The purpose is to prevent the case where
Expand Down
50 changes: 50 additions & 0 deletions src/os/bluestore/KernelDevice.cc
Expand Up @@ -259,6 +259,10 @@ void KernelDevice::_aio_thread()
for (int i = 0; i < r; ++i) {
IOContext *ioc = static_cast<IOContext*>(aio[i]->priv);
_aio_log_finish(ioc, aio[i]->offset, aio[i]->length);
if (aio[i]->queue_item.is_linked()) {
std::lock_guard<std::mutex> l(debug_queue_lock);
debug_aio_unlink(*aio[i]);
}
int left = --ioc->num_running;
int r = aio[i]->get_return_value();
dout(10) << __func__ << " finished aio " << aio[i] << " r " << r
Expand All @@ -275,6 +279,25 @@ void KernelDevice::_aio_thread()
}
}
}
if (g_conf->bdev_debug_aio) {
utime_t now = ceph_clock_now(NULL);
std::lock_guard<std::mutex> l(debug_queue_lock);
if (debug_oldest) {
if (debug_stall_since == utime_t()) {
debug_stall_since = now;
} else {
utime_t cutoff = now;
cutoff -= g_conf->bdev_debug_aio_suicide_timeout;
if (debug_stall_since < cutoff) {
derr << __func__ << " stalled aio " << debug_oldest
<< " since " << debug_stall_since << ", timeout is "
<< g_conf->bdev_debug_aio_suicide_timeout
<< "s, suicide" << dendl;
assert(0 == "stalled aio... buggy kernel or bad device?");
}
}
}
}
reap_ioc();
if (g_conf->bdev_inject_crash) {
++inject_crash_count;
Expand Down Expand Up @@ -310,6 +333,29 @@ void KernelDevice::_aio_log_start(
}
}

void KernelDevice::debug_aio_link(FS::aio_t& aio)
{
if (debug_queue.empty()) {
debug_oldest = &aio;
}
debug_queue.push_back(aio);
}

void KernelDevice::debug_aio_unlink(FS::aio_t& aio)
{
if (aio.queue_item.is_linked()) {
debug_queue.erase(debug_queue.iterator_to(aio));
if (debug_oldest == &aio) {
if (debug_queue.empty()) {
debug_oldest = nullptr;
} else {
debug_oldest = &debug_queue.front();
}
debug_stall_since = utime_t();
}
}
}

void KernelDevice::_aio_log_finish(
IOContext *ioc,
uint64_t offset,
Expand Down Expand Up @@ -365,6 +411,10 @@ void KernelDevice::aio_submit(IOContext *ioc)
// do not dereference txc (or it's contents) after we submit (if
// done == true and we don't loop)
int retries = 0;
if (g_conf->bdev_debug_aio) {
std::lock_guard<std::mutex> l(debug_queue_lock);
debug_aio_link(*cur);
}
int r = aio_queue.submit(*cur, &retries);
if (retries)
derr << __func__ << " retries " << retries << dendl;
Expand Down
8 changes: 8 additions & 0 deletions src/os/bluestore/KernelDevice.h
Expand Up @@ -64,6 +64,14 @@ class KernelDevice : public BlockDevice {

int direct_read_unaligned(uint64_t off, uint64_t len, char *buf);

// stalled aio debugging
FS::aio_list_t debug_queue;
std::mutex debug_queue_lock;
FS::aio_t *debug_oldest;
utime_t debug_stall_since;
void debug_aio_link(FS::aio_t& aio);
void debug_aio_unlink(FS::aio_t& aio);

public:
KernelDevice(aio_callback_t cb, void *cbpriv);

Expand Down
2 changes: 2 additions & 0 deletions src/test/objectstore/store_test.cc
Expand Up @@ -5745,6 +5745,8 @@ int main(int argc, char **argv) {
g_ceph_context->_conf->set_val("bluestore_debug_randomize_serial_transaction",
"10");

g_ceph_context->_conf->set_val("bdev_debug_aio", "true");

// specify device size
g_ceph_context->_conf->set_val("bluestore_block_size", "10240000000");

Expand Down

0 comments on commit 3b1b5a9

Please sign in to comment.