Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
block: Add copy offload support infrastructure
Introduce blkdev_issue_copy which supports source and destination bdevs,
and a array of (source, destination and copy length) tuples.
Introduce REQ_COP copy offload operation flag. Create a read-write
bio pair with a token as payload and submitted to the device in order.
the read request populates token with source specific information which
is then passed with write request.
Ths design is courtsey Mikulas Patocka<mpatocka@>'s token based copy

Larger copy operation may be divided if necessary by looking at device
limits.

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Arnav Dawn <arnav.dawn@samsung.com>
  • Loading branch information
nj-shetty authored and intel-lab-lkp committed Feb 7, 2022
1 parent 992a1fe commit 12a9801
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 0 deletions.
216 changes: 216 additions & 0 deletions block/blk-lib.c
Expand Up @@ -135,6 +135,222 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);

/*
* Wait on and process all in-flight BIOs. This must only be called once
* all bios have been issued so that the refcount can only decrease.
* This just waits for all bios to make it through bio_copy_end_io. IO
* errors are propagated through cio->io_error.
*/
static int cio_await_completion(struct cio *cio)
{
int ret = 0;

while (atomic_read(&cio->refcount)) {
cio->waiter = current;
__set_current_state(TASK_UNINTERRUPTIBLE);
blk_io_schedule();
/* wake up sets us TASK_RUNNING */
cio->waiter = NULL;
ret = cio->io_err;
}
kvfree(cio);

return ret;
}

static void bio_copy_end_io(struct bio *bio)
{
struct copy_ctx *ctx = bio->bi_private;
struct cio *cio = ctx->cio;
sector_t clen;
int ri = ctx->range_idx;

if (bio->bi_status) {
cio->io_err = bio->bi_status;
clen = (bio->bi_iter.bi_sector - ctx->start_sec) << SECTOR_SHIFT;
cio->rlist[ri].comp_len = min_t(sector_t, clen, cio->rlist[ri].comp_len);
}
__free_page(bio->bi_io_vec[0].bv_page);
kfree(ctx);
bio_put(bio);

if (atomic_dec_and_test(&cio->refcount) && cio->waiter)
wake_up_process(cio->waiter);
}

/*
* blk_copy_offload - Use device's native copy offload feature
* Go through user provide payload, prepare new payload based on device's copy offload limits.
*/
int blk_copy_offload(struct block_device *src_bdev, int nr_srcs,
struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask)
{
struct request_queue *sq = bdev_get_queue(src_bdev);
struct request_queue *dq = bdev_get_queue(dst_bdev);
struct bio *read_bio, *write_bio;
struct copy_ctx *ctx;
struct cio *cio;
struct page *token;
sector_t src_blk, copy_len, dst_blk;
sector_t remaining, max_copy_len = LONG_MAX;
int ri = 0, ret = 0;

cio = kzalloc(sizeof(struct cio), GFP_KERNEL);
if (!cio)
return -ENOMEM;
atomic_set(&cio->refcount, 0);
cio->rlist = rlist;

max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors,
(sector_t)dq->limits.max_copy_sectors);
max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors,
(sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT;

for (ri = 0; ri < nr_srcs; ri++) {
cio->rlist[ri].comp_len = rlist[ri].len;
for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst;
remaining > 0;
remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) {
copy_len = min(remaining, max_copy_len);

token = alloc_page(gfp_mask);
if (unlikely(!token)) {
ret = -ENOMEM;
goto err_token;
}

read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE,
gfp_mask);
if (!read_bio) {
ret = -ENOMEM;
goto err_read_bio;
}
read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT;
read_bio->bi_iter.bi_size = copy_len;
__bio_add_page(read_bio, token, PAGE_SIZE, 0);
ret = submit_bio_wait(read_bio);
if (ret) {
bio_put(read_bio);
goto err_read_bio;
}
bio_put(read_bio);
ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask);
if (!ctx) {
ret = -ENOMEM;
goto err_read_bio;
}
ctx->cio = cio;
ctx->range_idx = ri;
ctx->start_sec = rlist[ri].src;

write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE,
gfp_mask);
if (!write_bio) {
ret = -ENOMEM;
goto err_read_bio;
}

write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT;
write_bio->bi_iter.bi_size = copy_len;
__bio_add_page(write_bio, token, PAGE_SIZE, 0);
write_bio->bi_end_io = bio_copy_end_io;
write_bio->bi_private = ctx;
atomic_inc(&cio->refcount);
submit_bio(write_bio);
}
}

/* Wait for completion of all IO's*/
return cio_await_completion(cio);

err_read_bio:
__free_page(token);
err_token:
rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining));

cio->io_err = ret;
return cio_await_completion(cio);
}

static inline int blk_copy_sanity_check(struct block_device *src_bdev,
struct block_device *dst_bdev, struct range_entry *rlist, int nr)
{
unsigned int align_mask = max(
bdev_logical_block_size(dst_bdev), bdev_logical_block_size(src_bdev)) - 1;
sector_t len = 0;
int i;

for (i = 0; i < nr; i++) {
if (rlist[i].len)
len += rlist[i].len;
else
return -EINVAL;
if ((rlist[i].dst & align_mask) || (rlist[i].src & align_mask) ||
(rlist[i].len & align_mask))
return -EINVAL;
rlist[i].comp_len = 0;
}

if (!len && len >= MAX_COPY_TOTAL_LENGTH)
return -EINVAL;

return 0;
}

static inline bool blk_check_copy_offload(struct request_queue *src_q,
struct request_queue *dest_q)
{
if (dest_q->limits.copy_offload == BLK_COPY_OFFLOAD &&
src_q->limits.copy_offload == BLK_COPY_OFFLOAD)
return true;

return false;
}

/*
* blkdev_issue_copy - queue a copy
* @src_bdev: source block device
* @nr_srcs: number of source ranges to copy
* @src_rlist: array of source ranges
* @dest_bdev: destination block device
* @gfp_mask: memory allocation flags (for bio_alloc)
* @flags: BLKDEV_COPY_* flags to control behaviour
*
* Description:
* Copy source ranges from source block device to destination block device.
* length of a source range cannot be zero.
*/
int blkdev_issue_copy(struct block_device *src_bdev, int nr,
struct range_entry *rlist, struct block_device *dest_bdev,
gfp_t gfp_mask, int flags)
{
struct request_queue *src_q = bdev_get_queue(src_bdev);
struct request_queue *dest_q = bdev_get_queue(dest_bdev);
int ret = -EINVAL;

if (!src_q || !dest_q)
return -ENXIO;

if (!nr)
return -EINVAL;

if (nr >= MAX_COPY_NR_RANGE)
return -EINVAL;

if (bdev_read_only(dest_bdev))
return -EPERM;

ret = blk_copy_sanity_check(src_bdev, dest_bdev, rlist, nr);
if (ret)
return ret;

if (blk_check_copy_offload(src_q, dest_q))
ret = blk_copy_offload(src_bdev, nr, rlist, dest_bdev, gfp_mask);

return ret;
}
EXPORT_SYMBOL(blkdev_issue_copy);

/**
* __blkdev_issue_write_same - generate number of bios with same page
* @bdev: target blockdev
Expand Down
2 changes: 2 additions & 0 deletions block/blk-settings.c
Expand Up @@ -545,6 +545,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);

t->max_copy_sectors = min_not_zero(t->max_copy_sectors, b->max_copy_sectors);

t->misaligned |= b->misaligned;

alignment = queue_limit_alignment_offset(b, start);
Expand Down
2 changes: 2 additions & 0 deletions block/blk.h
Expand Up @@ -292,6 +292,8 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
break;
}

if (unlikely(op_is_copy(bio->bi_opf)))
return false;
/*
* All drivers must accept single-segments bios that are <= PAGE_SIZE.
* This is a quick and dirty check that relies on the fact that
Expand Down
20 changes: 20 additions & 0 deletions include/linux/blk_types.h
Expand Up @@ -418,6 +418,7 @@ enum req_flag_bits {
/* for driver use */
__REQ_DRV,
__REQ_SWAP, /* swapping request. */
__REQ_COPY, /* copy request*/
__REQ_NR_BITS, /* stops here */
};

Expand All @@ -442,6 +443,7 @@ enum req_flag_bits {

#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
#define REQ_COPY (1ULL << __REQ_COPY)

#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
Expand Down Expand Up @@ -498,6 +500,11 @@ static inline bool op_is_discard(unsigned int op)
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
}

static inline bool op_is_copy(unsigned int op)
{
return (op & REQ_COPY);
}

/*
* Check if a bio or request operation is a zone management operation, with
* the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
Expand Down Expand Up @@ -532,4 +539,17 @@ struct blk_rq_stat {
u64 batch;
};

struct cio {
atomic_t refcount;
blk_status_t io_err;
struct range_entry *rlist;
struct task_struct *waiter; /* waiting task (NULL if none) */
};

struct copy_ctx {
int range_idx;
sector_t start_sec;
struct cio *cio;
};

#endif /* __LINUX_BLK_TYPES_H */
3 changes: 3 additions & 0 deletions include/linux/blkdev.h
Expand Up @@ -1120,6 +1120,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
struct bio **biop);
struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask);
int blkdev_issue_copy(struct block_device *src_bdev, int nr_srcs,
struct range_entry *src_rlist, struct block_device *dest_bdev,
gfp_t gfp_mask, int flags);

#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
Expand Down
14 changes: 14 additions & 0 deletions include/uapi/linux/fs.h
Expand Up @@ -64,6 +64,20 @@ struct fstrim_range {
__u64 minlen;
};

/* Maximum no of entries supported */
#define MAX_COPY_NR_RANGE (1 << 12)

/* maximum total copy length */
#define MAX_COPY_TOTAL_LENGTH (1 << 21)

/* Source range entry for copy */
struct range_entry {
__u64 src;
__u64 dst;
__u64 len;
__u64 comp_len;
};

/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
Expand Down

0 comments on commit 12a9801

Please sign in to comment.