Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nixos/qemu-vm: use persistent block device names #236656

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions nixos/doc/manual/release-notes/rl-2311.section.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,17 @@
- A new option was added to the virtualisation module that enables specifying explicitly named network interfaces in QEMU VMs. The existing `virtualisation.vlans` is still supported for cases where the name of the network interface is irrelevant.

- `services.nginx` gained a `defaultListen` option at server-level with support for PROXY protocol listeners, also `proxyProtocol` is now exposed in `services.nginx.virtualHosts.<name>.listen` option. It is now possible to run PROXY listeners and non-PROXY listeners at a server-level, see [#213510](https://github.com/NixOS/nixpkgs/pull/213510/) for more details.

## Nixpkgs internals {#sec-release-23.11-nixpkgs-internals}

- The `qemu-vm.nix` module by default now identifies block devices via
persistent names available in `/dev/disk/by-*`. Because the rootDevice is
identfied by its filesystem label, it needs to be formatted before the VM is
started. The functionality of automatically formatting the rootDevice in the
initrd is removed from the QEMU module. However, for tests that depend on
this functionality, a test utility for the scripted initrd is added
(`nixos/tests/common/auto-format-root-device.nix`). To use this in a NixOS
test, import the module, e.g. `imports = [
./common/auto-format-root-device.nix ];` When you use the systemd initrd, you
can automatically format the root device by setting
`virtualisation.fileSystems."/".autoFormat = true;`.
1 change: 1 addition & 0 deletions nixos/lib/make-disk-image.nix
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ let format' = format; in let
# In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb
# Use this option to create a symlink from vda to any arbitrary device you want.
${optionalString (config.boot.loader.grub.device != "/dev/vda") ''
mkdir -p $(dirname ${config.boot.loader.grub.device})
ln -s /dev/vda ${config.boot.loader.grub.device}
''}

Expand Down
163 changes: 65 additions & 98 deletions nixos/modules/virtualisation/qemu-vm.nix
Original file line number Diff line number Diff line change
Expand Up @@ -81,25 +81,6 @@ let

drivesCmdLine = drives: concatStringsSep "\\\n " (imap1 driveCmdline drives);


# Creates a device name from a 1-based a numerical index, e.g.
# * `driveDeviceName 1` -> `/dev/vda`
# * `driveDeviceName 2` -> `/dev/vdb`
driveDeviceName = idx:
let letter = elemAt lowerChars (idx - 1);
in if cfg.qemu.diskInterface == "scsi" then
"/dev/sd${letter}"
else
"/dev/vd${letter}";

lookupDriveDeviceName = driveName: driveList:
(findSingle (drive: drive.name == driveName)
(throw "Drive ${driveName} not found")
(throw "Multiple drives named ${driveName}") driveList).device;

addDeviceNames =
imap1 (idx: drive: drive // { device = driveDeviceName idx; });

# Shell script to start the VM.
startVM =
''
Expand All @@ -109,25 +90,41 @@ let

set -e

# Create an empty ext4 filesystem image. A filesystem image does not
# contain a partition table but just a filesystem.
createEmptyFilesystemImage() {
local name=$1
local size=$2
local temp=$(mktemp)
${qemu}/bin/qemu-img create -f raw "$temp" "$size"
${pkgs.e2fsprogs}/bin/mkfs.ext4 -L ${rootFilesystemLabel} "$temp"
${qemu}/bin/qemu-img convert -f raw -O qcow2 "$temp" "$name"
rm "$temp"
}

NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE"

if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then
echo "Disk image do not exist, creating the virtualisation disk image..."
# If we are using a bootloader and default filesystems layout.
# We have to reuse the system image layout as a backing image format (CoW)
# So we can write on the top of it.

# If we are not using the default FS layout, potentially, we are interested into
# performing operations in postDeviceCommands or at early boot on the raw device.
# We can still boot through QEMU direct kernel boot feature.

# CoW prevent size to be attributed to an image.
# FIXME: raise this issue to upstream.
${qemu}/bin/qemu-img create \
${concatStringsSep " \\\n" ([ "-f qcow2" ]
++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2"
++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M"
++ [ ''"$NIX_DISK_IMAGE"'' ])}

${if (cfg.useBootLoader && cfg.useDefaultFilesystems) then ''
# Create a writable qcow2 image using the systemImage as a backing
# image.

# CoW prevent size to be attributed to an image.
# FIXME: raise this issue to upstream.
${qemu}/bin/qemu-img create \
-f qcow2 \
-b ${systemImage}/nixos.qcow2 \
-F qcow2 \
"$NIX_DISK_IMAGE"
'' else if cfg.useDefaultFilesystems then ''
createEmptyFilesystemImage "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
'' else ''
# Create an empty disk image without a filesystem.
${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
''
}
echo "Virtualisation disk image created."
fi

Expand All @@ -148,6 +145,7 @@ let
${pkgs.erofs-utils}/bin/mkfs.erofs \
--force-uid=0 \
--force-gid=0 \
-L ${nixStoreFilesystemLabel} \
-U eb176051-bd15-49b7-9e6b-462e0b467019 \
-T 0 \
--exclude-regex="$(
Expand Down Expand Up @@ -218,13 +216,27 @@ let

regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; };

# Use well-defined and persistent filesystem labels to identify block devices.
rootFilesystemLabel = "nixos";
espFilesystemLabel = "ESP"; # Hard-coded by make-disk-image.nix
nixStoreFilesystemLabel = "nix-store";

# The root drive is a raw disk which does not necessarily contain a
# filesystem or partition table. It thus cannot be identified via the typical
# persistent naming schemes (e.g. /dev/disk/by-{label, uuid, partlabel,
# partuuid}. Instead, supply a well-defined and persistent serial attribute
# via QEMU. Inside the running system, the disk can then be identified via
# the /dev/disk/by-id scheme.
rootDriveSerialAttr = "root";

# System image is akin to a complete NixOS install with
# a boot partition and root partition.
systemImage = import ../../lib/make-disk-image.nix {
inherit pkgs config lib;
additionalPaths = [ regInfo ];
format = "qcow2";
onlyNixStore = false;
label = rootFilesystemLabel;
partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; };
# Bootloader should be installed on the system image only if we are booting through bootloaders.
# Though, if a user is not using our default filesystems, it is possible to not have any ESP
Expand All @@ -247,6 +259,7 @@ let
additionalPaths = [ regInfo ];
format = "qcow2";
onlyNixStore = true;
label = nixStoreFilesystemLabel;
partitionTableType = "none";
installBootLoader = false;
touchEFIVars = false;
Expand All @@ -255,28 +268,6 @@ let
copyChannel = false;
};

bootConfiguration =
if cfg.useDefaultFilesystems
then
if cfg.useBootLoader
then
if cfg.useEFIBoot then "efi_bootloading_with_default_fs"
else "legacy_bootloading_with_default_fs"
else
if cfg.directBoot.enable then "direct_boot_with_default_fs"
else "custom"
else
"custom";
suggestedRootDevice = {
"efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2";
"legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1";
"direct_boot_with_default_fs" = cfg.bootLoaderDevice;
# This will enforce a NixOS module type checking error
# to ask explicitly the user to set a rootDevice.
# As it will look like `rootDevice = lib.mkDefault null;` after
# all "computations".
"custom" = null;
}.${bootConfiguration};
in

{
Expand Down Expand Up @@ -343,44 +334,39 @@ in
virtualisation.bootLoaderDevice =
mkOption {
type = types.path;
default = lookupDriveDeviceName "root" cfg.qemu.drives;
defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives'';
example = "/dev/vda";
default = "/dev/disk/by-id/virtio-${rootDriveSerialAttr}";
defaultText = literalExpression ''/dev/disk/by-id/virtio-${rootDriveSerialAttr}'';
example = "/dev/disk/by-id/virtio-boot-loader-device";
description =
lib.mdDoc ''
The disk to be used for the boot filesystem.
By default, it is the same disk as the root filesystem.
The path (inside th VM) to the device to boot from when legacy booting.
'';
};

virtualisation.bootPartition =
mkOption {
type = types.nullOr types.path;
default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null;
defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null'';
example = "/dev/vda1";
default = if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null;
defaultText = literalExpression ''if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null'';
example = "/dev/disk/by-label/esp";
description =
lib.mdDoc ''
The boot partition to be used to mount /boot filesystem.
In legacy boots, this should be null.
By default, in EFI boot, it is the first partition of the boot device.
The path (inside the VM) to the device containing the EFI System Partition (ESP).

If you are *not* booting from a UEFI firmware, this value is, by
default, `null`. The ESP is mounted under `/boot`.
'';
};

virtualisation.rootDevice =
mkOption {
type = types.nullOr types.path;
example = "/dev/vda2";
default = "/dev/disk/by-label/${rootFilesystemLabel}";
defaultText = literalExpression ''/dev/disk/by-label/${rootFilesystemLabel}'';
example = "/dev/disk/by-label/nixos";
description =
lib.mdDoc ''
The disk or partition to be used for the root filesystem.
By default (read the source code for more details):

- under EFI with a bootloader: 2nd partition of the boot disk
- in legacy boot with a bootloader: 1st partition of the boot disk
- in direct boot (i.e. without a bootloader): whole disk

In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device.
The path (inside the VM) to the device containing the root filesystem.
'';
};

Expand Down Expand Up @@ -711,7 +697,6 @@ in
mkOption {
type = types.listOf (types.submodule driveOpts);
description = lib.mdDoc "Drives passed to qemu.";
apply = addDeviceNames;
};

diskInterface =
Expand Down Expand Up @@ -975,29 +960,11 @@ in
# FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint?
boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice);
boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}";
virtualisation.rootDevice = mkDefault suggestedRootDevice;

boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ];

boot.loader.supportsInitrdSecrets = mkIf (!cfg.useBootLoader) (mkVMOverride false);

boot.initrd.extraUtilsCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
''
# We need mke2fs in the initrd.
copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
'';

boot.initrd.postDeviceCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
''
# If the disk image appears to be empty, run mke2fs to
# initialise.
FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true)
if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
mke2fs -t ext4 ${cfg.rootDevice}
fi
'';

boot.initrd.postMountCommands = lib.mkIf (!config.boot.initrd.systemd.enable)
''
# Mark this as a NixOS machine.
Expand Down Expand Up @@ -1112,6 +1079,7 @@ in
driveExtraOpts.cache = "writeback";
driveExtraOpts.werror = "report";
deviceExtraOpts.bootindex = "1";
deviceExtraOpts.serial = rootDriveSerialAttr;
}])
(mkIf cfg.useNixStoreImage [{
name = "nix-store";
Expand Down Expand Up @@ -1154,7 +1122,6 @@ in
} else {
device = cfg.rootDevice;
fsType = "ext4";
autoFormat = true;
});
"/tmp" = lib.mkIf config.boot.tmp.useTmpfs {
device = "tmpfs";
Expand All @@ -1164,7 +1131,7 @@ in
options = [ "mode=1777" "strictatime" "nosuid" "nodev" "size=${toString config.boot.tmp.tmpfsSize}" ];
};
"/nix/${if cfg.writableStore then ".ro-store" else "store"}" = lib.mkIf cfg.useNixStoreImage {
device = "${lookupDriveDeviceName "nix-store" cfg.qemu.drives}";
device = "/dev/disk/by-label/${nixStoreFilesystemLabel}";
neededForBoot = true;
options = [ "ro" ];
};
Expand All @@ -1174,7 +1141,7 @@ in
neededForBoot = true;
};
"/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) {
device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage`
device = cfg.bootPartition;
fsType = "vfat";
noCheck = true; # fsck fails on a r/o filesystem
};
Expand Down
29 changes: 29 additions & 0 deletions nixos/tests/common/auto-format-root-device.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# This is a test utility that automatically formats
# `config.virtualisation.rootDevice` in the initrd.
# Note that when you are using
# `boot.initrd.systemd.enable = true`, you can use
# `virtualisation.fileSystems."/".autoFormat = true;`
# instead.

{ config, pkgs, ... }:

let
rootDevice = config.virtualisation.rootDevice;
in
{

boot.initrd.extraUtilsCommands = ''
# We need mke2fs in the initrd.
copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
'';

boot.initrd.postDeviceCommands = ''
# If the disk image appears to be empty, run mke2fs to
# initialise.
FSTYPE=$(blkid -o value -s TYPE ${rootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${rootDevice} || true)
if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
mke2fs -t ext4 ${rootDevice}
fi
'';
}
10 changes: 7 additions & 3 deletions nixos/tests/fsck.nix
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ import ./make-test-python.nix {
boot.initrd.systemd.enable = systemdStage1;
};

testScript = ''
testScript = { nodes, ...}:
let
rootDevice = nodes.machine.virtualisation.rootDevice;
in
''
machine.wait_for_unit("default.target")

with subtest("root fs is fsckd"):
machine.succeed("journalctl -b | grep '${if systemdStage1
then "fsck.*vda.*clean"
else "fsck.ext4.*/dev/vda"}'")
then "fsck.*${builtins.baseNameOf rootDevice}.*clean"
else "fsck.ext4.*${rootDevice}"}'")

with subtest("mnt fs is fsckd"):
machine.succeed("journalctl -b | grep 'fsck.*vdb.*clean'")
Expand Down
1 change: 1 addition & 0 deletions nixos/tests/hibernate.nix
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ in makeTest {
imports = [
../modules/profiles/installation-device.nix
../modules/profiles/base.nix
./common/auto-format-root-device.nix
];

nix.settings = {
Expand Down
3 changes: 3 additions & 0 deletions nixos/tests/initrd-luks-empty-passphrase.nix
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ in {
name = "initrd-luks-empty-passphrase";

nodes.machine = { pkgs, ... }: {
imports = lib.optionals (!systemdStage1) [ ./common/auto-format-root-device.nix ];

virtualisation = {
emptyDiskImages = [ 512 ];
useBootLoader = true;
Expand All @@ -23,6 +25,7 @@ in {
# the new root device is /dev/vdb
# an empty 512MiB drive, containing no Nix store.
mountHostNixStore = true;
fileSystems."/".autoFormat = lib.mkIf systemdStage1 true;
};

boot.loader.systemd-boot.enable = true;
Expand Down
Loading