forked from torvalds/linux
Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
block: Add concurrent positioning ranges support
The Concurrent Positioning Ranges VPD page (for SCSI) and Log (for ATA)
contain parameters describing the number of sets of contiguous LBAs that
can be served independently by a single LUN multi-actuator disk. This
patch provides the blk_queue_set_cranges() function allowing a device
driver to signal to the block layer that a disk has multiple actuators,
each one serving a contiguous range of sectors. To describe the set
of sector ranges representing the different actuators of a device, the
data type struct blk_cranges is introduced.
For a device with multiple actuators, a struct blk_cranges is attached
to the device request queue by the disk_set_cranges() function. The
function disk_alloc_cranges() is provided for drivers to allocate this
structure.
The blk_cranges structure contains kobjects (struct kobject) to register
with sysfs the set of sector ranges defined by a device. On initial
device scan, this registration is done from blk_register_queue() using
the block layer internal function disk_register_cranges(). If a driver
calls disk_set_cranges() for a registered queue, e.g. when a device
is revalidated, disk_set_cranges() will execute disk_register_cranges()
to update the queue sysfs attribute files.
The sysfs file structure created starts from the cranges sub-directory
and contains the start sector and number of sectors served by an
actuator, with the information for each actuator grouped in one
directory per actuator. E.g. for a dual actuator drive, we have:
$ tree /sys/block/sdk/queue/cranges/
/sys/block/sdk/queue/cranges/
|-- 0
| |-- nr_sectors
| `-- sector
`-- 1
|-- nr_sectors
`-- sector
For a regular single actuator device, the cranges directory does not
exist.
Device revalidation may lead to changes to this structure and to the
attribute values. When manipulated, the queue sysfs_lock and
sysfs_dir_lock are held for atomicity, similarly to how the blk-mq and
elevator sysfs queue sub-directories are protected.
The code related to the management of cranges is added in the new
file block/blk-cranges.c.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>- Loading branch information
1 parent
a3bb6c0
commit 2055ecb9ea719e82d4db70d303fdac48c33acd47
Showing
5 changed files
with
362 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,310 @@ | ||
| // SPDX-License-Identifier: GPL-2.0 | ||
| /* | ||
| * Block device concurrent positioning ranges. | ||
| * | ||
| * Copyright (C) 2021 Western Digital Corporation or its Affiliates. | ||
| */ | ||
| #include <linux/kernel.h> | ||
| #include <linux/blkdev.h> | ||
| #include <linux/slab.h> | ||
| #include <linux/init.h> | ||
|
|
||
| #include "blk.h" | ||
|
|
||
| static ssize_t blk_crange_sector_show(struct blk_crange *cr, char *page) | ||
| { | ||
| return sprintf(page, "%llu\n", cr->sector); | ||
| } | ||
|
|
||
| static ssize_t blk_crange_nr_sectors_show(struct blk_crange *cr, char *page) | ||
| { | ||
| return sprintf(page, "%llu\n", cr->nr_sectors); | ||
| } | ||
|
|
||
| struct blk_crange_sysfs_entry { | ||
| struct attribute attr; | ||
| ssize_t (*show)(struct blk_crange *cr, char *page); | ||
| }; | ||
|
|
||
| static struct blk_crange_sysfs_entry blk_crange_sector_entry = { | ||
| .attr = { .name = "sector", .mode = 0444 }, | ||
| .show = blk_crange_sector_show, | ||
| }; | ||
|
|
||
| static struct blk_crange_sysfs_entry blk_crange_nr_sectors_entry = { | ||
| .attr = { .name = "nr_sectors", .mode = 0444 }, | ||
| .show = blk_crange_nr_sectors_show, | ||
| }; | ||
|
|
||
| static struct attribute *blk_crange_attrs[] = { | ||
| &blk_crange_sector_entry.attr, | ||
| &blk_crange_nr_sectors_entry.attr, | ||
| NULL, | ||
| }; | ||
| ATTRIBUTE_GROUPS(blk_crange); | ||
|
|
||
| static ssize_t blk_crange_sysfs_show(struct kobject *kobj, | ||
| struct attribute *attr, char *page) | ||
| { | ||
| struct blk_crange_sysfs_entry *entry = | ||
| container_of(attr, struct blk_crange_sysfs_entry, attr); | ||
| struct blk_crange *cr = container_of(kobj, struct blk_crange, kobj); | ||
| ssize_t ret; | ||
|
|
||
| mutex_lock(&cr->queue->sysfs_lock); | ||
| ret = entry->show(cr, page); | ||
| mutex_unlock(&cr->queue->sysfs_lock); | ||
|
|
||
| return ret; | ||
| } | ||
|
|
||
| static const struct sysfs_ops blk_crange_sysfs_ops = { | ||
| .show = blk_crange_sysfs_show, | ||
| }; | ||
|
|
||
| /* | ||
| * crange entries are not freed individually, but alltogether with the | ||
| * struct blk_cranges and its array of range entries. since kobject_add() | ||
| * takes a reference on the parent struct blk_cranges kobj, the array of | ||
| * crange entries cannot be freed until kobject_del() is called for all entries. | ||
| * So we do not need to do anything here, but still need this nop release | ||
| * operation to avoid complaints from the kobject code. | ||
| */ | ||
| static void blk_crange_sysfs_nop_release(struct kobject *kobj) | ||
| { | ||
| } | ||
|
|
||
| static struct kobj_type blk_crange_ktype = { | ||
| .sysfs_ops = &blk_crange_sysfs_ops, | ||
| .default_groups = blk_crange_groups, | ||
| .release = blk_crange_sysfs_nop_release, | ||
| }; | ||
|
|
||
| /* | ||
| * This will be executed only after all range entries are removed | ||
| * with kobject_del(), at which point, it is safe to free everything, | ||
| * including the array of range entries. | ||
| */ | ||
| static void blk_cranges_sysfs_release(struct kobject *kobj) | ||
| { | ||
| struct blk_cranges *cranges = | ||
| container_of(kobj, struct blk_cranges, kobj); | ||
|
|
||
| kfree(cranges); | ||
| } | ||
|
|
||
| static struct kobj_type blk_cranges_ktype = { | ||
| .release = blk_cranges_sysfs_release, | ||
| }; | ||
|
|
||
| /** | ||
| * blk_register_cranges - register with sysfs a set of concurrent ranges | ||
| * @disk: Target disk | ||
| * @new_cranges: New set of concurrent ranges | ||
| * | ||
| * Register with sysfs a set of concurrent ranges for @disk. If @new_cranges | ||
| * is not NULL, this set of concurrent ranges is registered and the | ||
| * old set specified by q->cranges is unregistered. Otherwise, q->cranges | ||
| * is registered if it is not already. | ||
| */ | ||
| int disk_register_cranges(struct gendisk *disk, struct blk_cranges *new_cranges) | ||
| { | ||
| struct request_queue *q = disk->queue; | ||
| struct blk_cranges *cranges; | ||
| int i, ret; | ||
|
|
||
| lockdep_assert_held(&q->sysfs_dir_lock); | ||
| lockdep_assert_held(&q->sysfs_lock); | ||
|
|
||
| /* If a new range set is specified, unregister the old one */ | ||
| if (new_cranges) { | ||
| if (q->cranges) | ||
| disk_unregister_cranges(disk); | ||
| q->cranges = new_cranges; | ||
| } | ||
|
|
||
| cranges = q->cranges; | ||
| if (!cranges) | ||
| return 0; | ||
|
|
||
| /* | ||
| * At this point, cranges is the new set of sector ranges that needs | ||
| * to be registered with sysfs. | ||
| */ | ||
| WARN_ON(cranges->sysfs_registered); | ||
| ret = kobject_init_and_add(&cranges->kobj, &blk_cranges_ktype, | ||
| &q->kobj, "%s", "cranges"); | ||
| if (ret) { | ||
| q->cranges = NULL; | ||
| kfree(cranges); | ||
| return ret; | ||
| } | ||
|
|
||
| for (i = 0; i < cranges->nr_ranges; i++) { | ||
| cranges->ranges[i].queue = q; | ||
| ret = kobject_init_and_add(&cranges->ranges[i].kobj, | ||
| &blk_crange_ktype, &cranges->kobj, | ||
| "%d", i); | ||
| if (ret) { | ||
| while (--i >= 0) | ||
| kobject_del(&cranges->ranges[i].kobj); | ||
| kobject_del(&cranges->kobj); | ||
| kobject_put(&cranges->kobj); | ||
| return ret; | ||
| } | ||
| } | ||
|
|
||
| cranges->sysfs_registered = true; | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| void disk_unregister_cranges(struct gendisk *disk) | ||
| { | ||
| struct request_queue *q = disk->queue; | ||
| struct blk_cranges *cranges = q->cranges; | ||
| int i; | ||
|
|
||
| lockdep_assert_held(&q->sysfs_dir_lock); | ||
| lockdep_assert_held(&q->sysfs_lock); | ||
|
|
||
| if (!cranges) | ||
| return; | ||
|
|
||
| if (cranges->sysfs_registered) { | ||
| for (i = 0; i < cranges->nr_ranges; i++) | ||
| kobject_del(&cranges->ranges[i].kobj); | ||
| kobject_del(&cranges->kobj); | ||
| kobject_put(&cranges->kobj); | ||
| } else { | ||
| kfree(cranges); | ||
| } | ||
|
|
||
| q->cranges = NULL; | ||
| } | ||
|
|
||
| static bool disk_check_ranges(struct gendisk *disk, struct blk_cranges *cr) | ||
| { | ||
| sector_t capacity = get_capacity(disk); | ||
| sector_t min_sector = (sector_t)-1; | ||
| sector_t max_sector = 0; | ||
| int i; | ||
|
|
||
| /* | ||
| * Sector ranges may overlap but should overall contain all sectors | ||
| * within the disk capacity. | ||
| */ | ||
| for (i = 0; i < cr->nr_ranges; i++) { | ||
| min_sector = min(min_sector, cr->ranges[i].sector); | ||
| max_sector = max(max_sector, cr->ranges[i].sector + | ||
| cr->ranges[i].nr_sectors); | ||
| } | ||
|
|
||
| if (min_sector != 0 || max_sector < capacity) { | ||
| pr_warn("Invalid concurrent ranges: missing sectors\n"); | ||
| return false; | ||
| } | ||
|
|
||
| if (max_sector > capacity) { | ||
| pr_warn("Invalid concurrent ranges: beyond capacity\n"); | ||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| static bool disk_cranges_changed(struct gendisk *disk, struct blk_cranges *new) | ||
| { | ||
| struct blk_cranges *old = disk->queue->cranges; | ||
| int i; | ||
|
|
||
| if (!old) | ||
| return true; | ||
|
|
||
| if (old->nr_ranges != new->nr_ranges) | ||
| return true; | ||
|
|
||
| for (i = 0; i < old->nr_ranges; i++) { | ||
| if (new->ranges[i].sector != old->ranges[i].sector || | ||
| new->ranges[i].nr_sectors != old->ranges[i].nr_sectors) | ||
| return true; | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| /** | ||
| * disk_alloc_cranges - Allocate a concurrent positioning range structure | ||
| * @disk: target disk | ||
| * @nr_ranges: Number of concurrent ranges | ||
| * | ||
| * Allocate a struct blk_cranges structure with @nr_ranges range descriptors. | ||
| */ | ||
| struct blk_cranges *disk_alloc_cranges(struct gendisk *disk, int nr_ranges) | ||
| { | ||
| struct blk_cranges *cr; | ||
|
|
||
| cr = kzalloc_node(struct_size(cr, ranges, nr_ranges), GFP_KERNEL, | ||
| disk->queue->node); | ||
| if (cr) | ||
| cr->nr_ranges = nr_ranges; | ||
| return cr; | ||
| } | ||
| EXPORT_SYMBOL_GPL(disk_alloc_cranges); | ||
|
|
||
| /** | ||
| * disk_set_cranges - Set a disk concurrent positioning ranges | ||
| * @disk: target disk | ||
| * @cr: concurrent ranges structure | ||
| * | ||
| * Set the concurrant positioning ranges information of the request queue | ||
| * of @disk to @cr. If @cr is NULL and the concurrent ranges structure | ||
| * already set, if any, is cleared. If there are no differences between | ||
| * @cr and the concurrent ranges structure already set, @cr is freed. | ||
| */ | ||
| void disk_set_cranges(struct gendisk *disk, struct blk_cranges *cr) | ||
| { | ||
| struct request_queue *q = disk->queue; | ||
|
|
||
| if (WARN_ON_ONCE(cr && !cr->nr_ranges)) { | ||
| kfree(cr); | ||
| cr = NULL; | ||
| } | ||
|
|
||
| mutex_lock(&q->sysfs_dir_lock); | ||
| mutex_lock(&q->sysfs_lock); | ||
|
|
||
| if (cr) { | ||
| if (!disk_check_ranges(disk, cr)) { | ||
| kfree(cr); | ||
| cr = NULL; | ||
| goto reg; | ||
| } | ||
|
|
||
| if (!disk_cranges_changed(disk, cr)) { | ||
| kfree(cr); | ||
| goto unlock; | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * This may be called for a registered queue. E.g. during a device | ||
| * revalidation. If that is the case, we need to unregister the old | ||
| * set of concurrent ranges and register the new set. If the queue | ||
| * is not registered, the device request queue registration will | ||
| * register the ranges, so only swap in the new set and free the | ||
| * old one. | ||
| */ | ||
| reg: | ||
| if (blk_queue_registered(q)) { | ||
| disk_register_cranges(disk, cr); | ||
| } else { | ||
| swap(q->cranges, cr); | ||
| kfree(cr); | ||
| } | ||
|
|
||
| unlock: | ||
| mutex_unlock(&q->sysfs_lock); | ||
| mutex_unlock(&q->sysfs_dir_lock); | ||
| } | ||
| EXPORT_SYMBOL_GPL(disk_set_cranges); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.