Skip to content

Commit

Permalink
Merge tag 'for-linus-20180727' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:
 "Bigger than usual at this time, mostly due to the O_DIRECT corruption
  issue and the fact that I was on vacation last week. This contains:

   - NVMe pull request with two fixes for the FC code, and two target
     fixes (Christoph)

   - a DIF bio reset iteration fix (Greg Edwards)

   - two nbd reply and requeue fixes (Josef)

   - SCSI timeout fixup (Keith)

   - a small series that fixes an issue with bio_iov_iter_get_pages(),
     which ended up causing corruption for larger sized O_DIRECT writes
     that ended up racing with buffered writes (Martin Wilck)"

* tag 'for-linus-20180727' of git://git.kernel.dk/linux-block:
  block: reset bi_iter.bi_done after splitting bio
  block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
  blkdev: __blkdev_direct_IO_simple: fix leak in error case
  block: bio_iov_iter_get_pages: fix size of last iovec
  nvmet: only check for filebacking on -ENOTBLK
  nvmet: fixup crash on NULL device path
  scsi: set timed out out mq requests to complete
  blk-mq: export setting request completion state
  nvme: if_ready checks to fail io to deleting controller
  nvmet-fc: fix target sgl list on large transfers
  nbd: handle unexpected replies better
  nbd: don't requeue the same request twice.
  • Loading branch information
torvalds committed Jul 27, 2018
2 parents 864af0d + 5151842 commit eb181a8
Show file tree
Hide file tree
Showing 14 changed files with 209 additions and 56 deletions.
54 changes: 41 additions & 13 deletions block/bio.c
Expand Up @@ -903,25 +903,27 @@ int bio_add_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL(bio_add_page);

/**
* bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
* __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
* Pins as many pages from *iter and appends them to @bio's bvec array. The
* Pins pages from *iter and appends them to @bio's bvec array. The
* pages will have to be released using put_page() when done.
* For multi-segment *iter, this function only adds pages from the
* the next non-empty segment of the iov iterator.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
size_t offset, diff;
size_t offset;
ssize_t size;

size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;

/*
* Deep magic below: We need to walk the pinned pages backwards
Expand All @@ -934,21 +936,46 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
bio->bi_iter.bi_size += size;
bio->bi_vcnt += nr_pages;

diff = (nr_pages * PAGE_SIZE - offset) - size;
while (nr_pages--) {
bv[nr_pages].bv_page = pages[nr_pages];
bv[nr_pages].bv_len = PAGE_SIZE;
bv[nr_pages].bv_offset = 0;
while (idx--) {
bv[idx].bv_page = pages[idx];
bv[idx].bv_len = PAGE_SIZE;
bv[idx].bv_offset = 0;
}

bv[0].bv_offset += offset;
bv[0].bv_len -= offset;
if (diff)
bv[bio->bi_vcnt - 1].bv_len -= diff;
bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;

iov_iter_advance(iter, size);
return 0;
}

/**
* bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
* Pins pages from *iter and appends them to @bio's bvec array. The
* pages will have to be released using put_page() when done.
* The function tries, but does not guarantee, to pin as many pages as
* fit into the bio, or are requested in *iter, whatever is smaller.
* If MM encounters an error pinning the requested pages, it stops.
* Error is returned only if 0 pages could be pinned.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
unsigned short orig_vcnt = bio->bi_vcnt;

do {
int ret = __bio_iov_iter_get_pages(bio, iter);

if (unlikely(ret))
return bio->bi_vcnt > orig_vcnt ? 0 : ret;

} while (iov_iter_count(iter) && !bio_full(bio));

return 0;
}
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);

static void submit_bio_wait_endio(struct bio *bio)
Expand Down Expand Up @@ -1866,6 +1893,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
bio_integrity_trim(split);

bio_advance(bio, split->bi_iter.bi_size);
bio->bi_iter.bi_done = 0;

if (bio_flagged(bio, BIO_TRACE_COMPLETION))
bio_set_flag(split, BIO_TRACE_COMPLETION);
Expand Down
4 changes: 1 addition & 3 deletions block/blk-mq.c
Expand Up @@ -558,10 +558,8 @@ static void __blk_mq_complete_request(struct request *rq)
bool shared = false;
int cpu;

if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
MQ_RQ_IN_FLIGHT)
if (!blk_mq_mark_complete(rq))
return;

if (rq->internal_tag != -1)
blk_mq_sched_completed_request(rq);

Expand Down
96 changes: 79 additions & 17 deletions drivers/block/nbd.c
Expand Up @@ -112,12 +112,16 @@ struct nbd_device {
struct task_struct *task_setup;
};

#define NBD_CMD_REQUEUED 1

struct nbd_cmd {
struct nbd_device *nbd;
struct mutex lock;
int index;
int cookie;
struct completion send_complete;
blk_status_t status;
unsigned long flags;
u32 cmd_cookie;
};

#if IS_ENABLED(CONFIG_DEBUG_FS)
Expand Down Expand Up @@ -146,6 +150,35 @@ static inline struct device *nbd_to_dev(struct nbd_device *nbd)
return disk_to_dev(nbd->disk);
}

static void nbd_requeue_cmd(struct nbd_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);

if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
blk_mq_requeue_request(req, true);
}

#define NBD_COOKIE_BITS 32

static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
u32 tag = blk_mq_unique_tag(req);
u64 cookie = cmd->cmd_cookie;

return (cookie << NBD_COOKIE_BITS) | tag;
}

static u32 nbd_handle_to_tag(u64 handle)
{
return (u32)handle;
}

static u32 nbd_handle_to_cookie(u64 handle)
{
return (u32)(handle >> NBD_COOKIE_BITS);
}

static const char *nbdcmd_to_ascii(int cmd)
{
switch (cmd) {
Expand Down Expand Up @@ -319,6 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
config = nbd->config;

if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;

if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
Expand All @@ -343,7 +379,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
nbd_mark_nsock_dead(nbd, nsock, 1);
mutex_unlock(&nsock->tx_lock);
}
blk_mq_requeue_request(req, true);
mutex_unlock(&cmd->lock);
nbd_requeue_cmd(cmd);
nbd_config_put(nbd);
return BLK_EH_DONE;
}
Expand All @@ -353,6 +390,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
set_bit(NBD_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
done:
Expand Down Expand Up @@ -430,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
struct iov_iter from;
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u64 handle;
u32 type;
u32 nbd_cmd_flags = 0;
u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;

iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
Expand Down Expand Up @@ -474,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
goto send_pages;
}
iov_iter_advance(&from, sent);
} else {
cmd->cmd_cookie++;
}
cmd->index = index;
cmd->cookie = nsock->cookie;
Expand All @@ -482,7 +522,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
}
memcpy(request.handle, &tag, sizeof(tag));
handle = nbd_cmd_handle(cmd);
memcpy(request.handle, &handle, sizeof(handle));

dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
req, nbdcmd_to_ascii(type),
Expand All @@ -500,6 +541,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
nsock->pending = req;
nsock->sent = sent;
}
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
Expand Down Expand Up @@ -541,6 +583,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
*/
nsock->pending = req;
nsock->sent = sent;
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
Expand Down Expand Up @@ -573,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct nbd_reply reply;
struct nbd_cmd *cmd;
struct request *req = NULL;
u64 handle;
u16 hwq;
u32 tag;
struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
struct iov_iter to;
int ret = 0;

reply.magic = 0;
iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
Expand All @@ -594,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-EPROTO);
}

memcpy(&tag, reply.handle, sizeof(u32));

memcpy(&handle, reply.handle, sizeof(handle));
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
Expand All @@ -606,11 +651,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
return ERR_PTR(-ENOENT);
}
cmd = blk_mq_rq_to_pdu(req);

mutex_lock(&cmd->lock);
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
ret = -ENOENT;
goto out;
}
if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
req);
ret = -ENOENT;
goto out;
}
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
cmd->status = BLK_STS_IOERR;
return cmd;
goto out;
}

dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
Expand All @@ -635,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (nbd_disconnected(config) ||
config->num_connections <= 1) {
cmd->status = BLK_STS_IOERR;
return cmd;
goto out;
}
return ERR_PTR(-EIO);
ret = -EIO;
goto out;
}
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
req, bvec.bv_len);
}
} else {
/* See the comment in nbd_queue_rq. */
wait_for_completion(&cmd->send_complete);
}
return cmd;
out:
mutex_unlock(&cmd->lock);
return ret ? ERR_PTR(ret) : cmd;
}

static void recv_work(struct work_struct *work)
Expand Down Expand Up @@ -805,7 +864,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
*/
blk_mq_start_request(req);
if (unlikely(nsock->pending && nsock->pending != req)) {
blk_mq_requeue_request(req, true);
nbd_requeue_cmd(cmd);
ret = 0;
goto out;
}
Expand All @@ -818,7 +877,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
blk_mq_requeue_request(req, true);
nbd_requeue_cmd(cmd);
ret = 0;
}
out:
Expand All @@ -842,7 +901,8 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* that the server is misbehaving (or there was an error) before we're
* done sending everything over the wire.
*/
init_completion(&cmd->send_complete);
mutex_lock(&cmd->lock);
clear_bit(NBD_CMD_REQUEUED, &cmd->flags);

/* We can be called directly from the user space process, which means we
* could possibly have signals pending so our sendmsg will fail. In
Expand All @@ -854,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
ret = BLK_STS_IOERR;
else if (!ret)
ret = BLK_STS_OK;
complete(&cmd->send_complete);
mutex_unlock(&cmd->lock);

return ret;
}
Expand Down Expand Up @@ -1460,6 +1520,8 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->nbd = set->driver_data;
cmd->flags = 0;
mutex_init(&cmd->lock);
return 0;
}

Expand Down
10 changes: 7 additions & 3 deletions drivers/nvme/host/fabrics.c
Expand Up @@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
/*
* For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* anything marked for failfast or nvme multipath is immediately failed.
* if the controller is deleting or if anything is marked for failfast or
* nvme multipath it is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvmf_fail_nonready_command(struct request *rq)
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
if (ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
Expand Down
3 changes: 2 additions & 1 deletion drivers/nvme/host/fabrics.h
Expand Up @@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_fail_nonready_command(struct request *rq);
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);

Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/fc.c
Expand Up @@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,

if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq);
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);

ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
Expand Down

0 comments on commit eb181a8

Please sign in to comment.