Skip to content

Commit

Permalink
btrfs: add preferred_metadata mode mount option
Browse files Browse the repository at this point in the history
When this mode is enabled, the chunk allocation policy is modified
giving a different precedence between the disks depending by the chunk type.
A disk may be marked with the preferred_metadata flag to have higher chance
to host metadata.

There are 4 modes:
- preferred_metadata=disabled
  The allocator is the standard one.

- preferred_metadata=soft
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  If the space isn't enough, then it is possible to use the other kind
  of disks.

- preferred_metadata=hard
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  If the space isn't enough, then "no space left" error is raised. It
  is not possible to use the other kind of disks.

- preferred_metadata=metadata
  The metadata chunk are allocated on the disks marked with the
  "preferred_metadata" flag.
  For metadata, if the space isn't enough, then it is possible to use the
  other kind of disks.
  The data chunk are allocated on the disks not marked with the
  "preferred_metadata" flag.
  For data, if the space isn't enough, then "no space left" error is raised.
  It is not possible to use the other kind of disks.

To mark a disk as "preferred_metadata", use the command
# btrfs properties set <disk> preferred_metadata 1

To remove the flag "preferred_metadata" from a disk, use the command
# btrfs properties set <disk> preferred_metadata 0

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
  • Loading branch information
kreijack authored and intel-lab-lkp committed Jan 17, 2021
1 parent 4ebf30b commit 6e3781f
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 2 deletions.
105 changes: 103 additions & 2 deletions fs/btrfs/volumes.c
Expand Up @@ -4825,6 +4825,56 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
return 0;
}

/*
* sort the devices in descending order by preferred_metadata,
* max_avail, total_avail
*/
static int btrfs_cmp_device_info_metadata(const void *a, const void *b)
{
const struct btrfs_device_info *di_a = a;
const struct btrfs_device_info *di_b = b;

/* metadata -> preferred_metadata first */
if (di_a->preferred_metadata && !di_b->preferred_metadata)
return -1;
if (!di_a->preferred_metadata && di_b->preferred_metadata)
return 1;
if (di_a->max_avail > di_b->max_avail)
return -1;
if (di_a->max_avail < di_b->max_avail)
return 1;
if (di_a->total_avail > di_b->total_avail)
return -1;
if (di_a->total_avail < di_b->total_avail)
return 1;
return 0;
}

/*
* sort the devices in descending order by !preferred_metadata,
* max_avail, total_avail
*/
static int btrfs_cmp_device_info_data(const void *a, const void *b)
{
const struct btrfs_device_info *di_a = a;
const struct btrfs_device_info *di_b = b;

/* data -> preferred_metadata last */
if (di_a->preferred_metadata && !di_b->preferred_metadata)
return 1;
if (!di_a->preferred_metadata && di_b->preferred_metadata)
return -1;
if (di_a->max_avail > di_b->max_avail)
return -1;
if (di_a->max_avail < di_b->max_avail)
return 1;
if (di_a->total_avail > di_b->total_avail)
return -1;
if (di_a->total_avail < di_b->total_avail)
return 1;
return 0;
}

static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
Expand Down Expand Up @@ -4940,6 +4990,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
int ndevs = 0;
u64 max_avail;
u64 dev_offset;
int nr_preferred_metadata = 0;

/*
* in the first pass through the devices list, we gather information
Expand Down Expand Up @@ -4992,15 +5043,65 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
devices_info[ndevs].max_avail = max_avail;
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
devices_info[ndevs].preferred_metadata = !!(device->type &
BTRFS_DEV_PREFERRED_METADATA);
if (devices_info[ndevs].preferred_metadata)
nr_preferred_metadata++;
++ndevs;
}
ctl->ndevs = ndevs;

BUG_ON(nr_preferred_metadata > ndevs);
/*
* now sort the devices by hole size / available space
*/
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
btrfs_cmp_device_info, NULL);
if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
(ctl->type & BTRFS_BLOCK_GROUP_METADATA)) ||
info->preferred_metadata_mode == BTRFS_PM_DISABLED) {
/* mixed bg or PREFERRED_METADATA not set */
sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info),
btrfs_cmp_device_info, NULL);
} else {
/*
* if PREFERRED_METADATA is set, sort the device considering
* also the kind (preferred_metadata or not). Limit the
* availables devices to the ones of the same kind, to avoid
* that a striped profile, like raid5, spreads to all kind of
* devices.
* It is allowed to use different kinds of devices (if the ones
* of the same kind are not enough alone) in the following
* case:
* - preferred_metadata_mode == BTRFS_PM_SOFT:
* use the device of the same kind until these
* are enough. Otherwise it is allowed to
* use all the devices
* - preferred_metadata_mode == BTRFS_PM_HARD
* use the device of the same kind; if these are
* not enough, then an error will be raised raised
* - preferred_metadata_mode == BTRFS_PM_METADATA
* metadata/system -> as BTRFS_PM_SOFT
* data -> as BTRFS_PM_HARD
*/
if (ctl->type & BTRFS_BLOCK_GROUP_DATA) {
int nr_data = ctl->ndevs - nr_preferred_metadata;
sort(devices_info, ctl->ndevs,
sizeof(struct btrfs_device_info),
btrfs_cmp_device_info_data, NULL);
if (info->preferred_metadata_mode == BTRFS_PM_HARD ||
info->preferred_metadata_mode == BTRFS_PM_METADATA)
ctl->ndevs = nr_data;
else if (nr_data >= ctl->devs_min)
ctl->ndevs = nr_data;
} else { /* non data -> metadata and system */
sort(devices_info, ctl->ndevs,
sizeof(struct btrfs_device_info),
btrfs_cmp_device_info_metadata, NULL);
if (info->preferred_metadata_mode == BTRFS_PM_HARD)
ctl->ndevs = nr_preferred_metadata;
else if (nr_preferred_metadata >= ctl->devs_min)
ctl->ndevs = nr_preferred_metadata;
}
}

return 0;
}
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/volumes.h
Expand Up @@ -364,6 +364,7 @@ struct btrfs_device_info {
u64 dev_offset;
u64 max_avail;
u64 total_avail;
int preferred_metadata:1;
};

struct btrfs_raid_attr {
Expand Down

0 comments on commit 6e3781f

Please sign in to comment.