Skip to content

Commit

Permalink
block: replace bi_bdev with a gendisk pointer and partitions index
Browse files Browse the repository at this point in the history
This way we don't need a block_device structure to submit I/O.  The
block_device has different life time rules from the gendisk and
request_queue and is usually only available when the block device node
is open.  Other callers need to explicitly create one (e.g. the lightnvm
passthrough code, or the new nvme multipathing code).

For the actual I/O path all that we need is the gendisk, which exists
once per block device.  But given that the block layer also does
partition remapping we additionally need a partition index, which is
used for said remapping in generic_make_request.

Note that all the block drivers generally want request_queue or
sometimes the gendisk, so this removes a layer of indirection all
over the stack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Christoph Hellwig authored and axboe committed Aug 23, 2017
1 parent c2ee070 commit 74d4699
Show file tree
Hide file tree
Showing 99 changed files with 358 additions and 357 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/sysdev/axonram.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev)
static blk_qc_t
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
{
struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
struct axon_ram_bank *bank = bio->bi_disk->private_data;
unsigned long phys_mem, phys_end;
void *user_mem;
struct bio_vec vec;
Expand Down
18 changes: 8 additions & 10 deletions block/bio-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
iv = bip->bip_vec + bip->bip_vcnt;

if (bip->bip_vcnt &&
bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
bvec_gap_to_prev(bio->bi_disk->queue,
&bip->bip_vec[bip->bip_vcnt - 1], offset))
return 0;

Expand Down Expand Up @@ -190,7 +190,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
static blk_status_t bio_integrity_process(struct bio *bio,
struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct blk_integrity_iter iter;
struct bvec_iter bviter;
struct bio_vec bv;
Expand All @@ -199,7 +199,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;

iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
iter.disk_name = bio->bi_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
iter.seed = proc_iter->bi_sector;
iter.prot_buf = prot_buf;
Expand Down Expand Up @@ -236,20 +236,18 @@ static blk_status_t bio_integrity_process(struct bio *bio,
bool bio_integrity_prep(struct bio *bio)
{
struct bio_integrity_payload *bip;
struct blk_integrity *bi;
struct request_queue *q;
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct request_queue *q = bio->bi_disk->queue;
void *buf;
unsigned long start, end;
unsigned int len, nr_pages;
unsigned int bytes, offset, i;
unsigned int intervals;
blk_status_t status;

bi = bdev_get_integrity(bio->bi_bdev);
if (!bi)
return true;

q = bdev_get_queue(bio->bi_bdev);
if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
return true;

Expand Down Expand Up @@ -354,7 +352,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
struct bio_integrity_payload *bip =
container_of(work, struct bio_integrity_payload, bip_work);
struct bio *bio = bip->bip_bio;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct bvec_iter iter = bio->bi_iter;

/*
Expand Down Expand Up @@ -411,7 +409,7 @@ bool __bio_integrity_endio(struct bio *bio)
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);

bip->bip_iter.bi_sector += bytes_done >> 9;
Expand All @@ -428,7 +426,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
void bio_integrity_trim(struct bio *bio)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);

bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
}
Expand Down
10 changes: 5 additions & 5 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -593,10 +593,10 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));

/*
* most users will be overriding ->bi_bdev with a new target,
* most users will be overriding ->bi_disk with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_disk = bio_src->bi_disk;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
Expand Down Expand Up @@ -681,7 +681,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
if (!bio)
return NULL;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_disk = bio_src->bi_disk;
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
Expand Down Expand Up @@ -1830,8 +1830,8 @@ void bio_endio(struct bio *bio)
goto again;
}

if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bio->bi_disk->queue, bio,
blk_status_to_errno(bio->bi_status));
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
Expand Down
100 changes: 49 additions & 51 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1910,40 +1910,15 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}

/*
* If bio->bi_dev is a partition, remap the location
*/
static inline void blk_partition_remap(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;

/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
if (bdev != bdev->bd_contains &&
(bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)) {
struct hd_struct *p = bdev->bd_part;

bio->bi_iter.bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;

trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev,
bio->bi_iter.bi_sector - p->start_sect);
}
}

static void handle_bad_sector(struct bio *bio)
{
char b[BDEVNAME_SIZE];

printk(KERN_INFO "attempt to access beyond end of device\n");
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
bdevname(bio->bi_bdev, b),
bio->bi_opf,
bio_devname(bio, b), bio->bi_opf,
(unsigned long long)bio_end_sector(bio),
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
(long long)get_capacity(bio->bi_disk));
}

#ifdef CONFIG_FAIL_MAKE_REQUEST
Expand Down Expand Up @@ -1981,6 +1956,38 @@ static inline bool should_fail_request(struct hd_struct *part,

#endif /* CONFIG_FAIL_MAKE_REQUEST */

/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
static inline int blk_partition_remap(struct bio *bio)
{
struct hd_struct *p;
int ret = 0;

/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
if (!bio->bi_partno ||
(!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET))
return 0;

rcu_read_lock();
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) {
bio->bi_iter.bi_sector += p->start_sect;
bio->bi_partno = 0;
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
bio->bi_iter.bi_sector - p->start_sect);
} else {
printk("%s: fail for partition %d\n", __func__, bio->bi_partno);
ret = -EIO;
}
rcu_read_unlock();

return ret;
}

/*
* Check whether this bio extends beyond the end of the device.
*/
Expand All @@ -1992,7 +1999,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
return 0;

/* Test device or partition size, when known. */
maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
maxsector = get_capacity(bio->bi_disk);
if (maxsector) {
sector_t sector = bio->bi_iter.bi_sector;

Expand All @@ -2017,20 +2024,18 @@ generic_make_request_checks(struct bio *bio)
int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
char b[BDEVNAME_SIZE];
struct hd_struct *part;

might_sleep();

if (bio_check_eod(bio, nr_sectors))
goto end_io;

q = bdev_get_queue(bio->bi_bdev);
q = bio->bi_disk->queue;
if (unlikely(!q)) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector);
bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
goto end_io;
}

Expand All @@ -2042,17 +2047,11 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;

part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(&part_to_disk(part)->part0,
bio->bi_iter.bi_size))
if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
goto end_io;

/*
* If this device has partitions, remap block n
* of partition p to block n+start(p) of the disk.
*/
blk_partition_remap(bio);
if (blk_partition_remap(bio))
goto end_io;

if (bio_check_eod(bio, nr_sectors))
goto end_io;
Expand Down Expand Up @@ -2081,16 +2080,16 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_WRITE_SAME:
if (!bdev_write_same(bio->bi_bdev))
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
case REQ_OP_ZONE_REPORT:
case REQ_OP_ZONE_RESET:
if (!bdev_is_zoned(bio->bi_bdev))
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
case REQ_OP_WRITE_ZEROES:
if (!bdev_write_zeroes_sectors(bio->bi_bdev))
if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
break;
default:
Expand Down Expand Up @@ -2197,7 +2196,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct request_queue *q = bio->bi_disk->queue;

if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
Expand All @@ -2215,7 +2214,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
if (q == bio->bi_disk->queue)
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
Expand Down Expand Up @@ -2258,7 +2257,7 @@ blk_qc_t submit_bio(struct bio *bio)
unsigned int count;

if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
count = queue_logical_block_size(bio->bi_disk->queue);
else
count = bio_sectors(bio);

Expand All @@ -2275,8 +2274,7 @@ blk_qc_t submit_bio(struct bio *bio)
current->comm, task_pid_nr(current),
op_is_write(bio_op(bio)) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(bio->bi_bdev, b),
count);
bio_devname(bio, b), count);
}
}

Expand Down Expand Up @@ -3049,8 +3047,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;

if (bio->bi_bdev)
rq->rq_disk = bio->bi_bdev->bd_disk;
if (bio->bi_disk)
rq->rq_disk = bio->bi_disk;
}

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
Expand Down
2 changes: 1 addition & 1 deletion block/blk-flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
return -ENXIO;

bio = bio_alloc(gfp_mask, 0);
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;

ret = submit_bio_wait(bio);
Expand Down
8 changes: 4 additions & 4 deletions block/blk-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,

bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, op, 0);

bio->bi_iter.bi_size = req_sects << 9;
Expand Down Expand Up @@ -168,7 +168,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
while (nr_sects) {
bio = next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
Expand Down Expand Up @@ -241,7 +241,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
while (nr_sects) {
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE_ZEROES;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
Expand Down Expand Up @@ -323,7 +323,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);

while (nr_sects != 0) {
Expand Down
2 changes: 1 addition & 1 deletion block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;

/* must be same device and not a special request */
if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq))
return false;

/* only merge integrity protected bio into ditto rq */
Expand Down
4 changes: 2 additions & 2 deletions block/blk-zoned.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int blkdev_report_zones(struct block_device *bdev,
if (!bio)
return -ENOMEM;

bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blk_zone_start(q, sector);
bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);

Expand Down Expand Up @@ -234,7 +234,7 @@ int blkdev_reset_zones(struct block_device *bdev,

bio = bio_alloc(gfp_mask, 0);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);

ret = submit_bio_wait(bio);
Expand Down

0 comments on commit 74d4699

Please sign in to comment.