diff --git a/Makefile b/Makefile index 16128e4f2..ce65c09d4 100644 --- a/Makefile +++ b/Makefile @@ -379,8 +379,6 @@ copy-files: install -m 644 srv/salt/ceph/migrate/nodes/*.sls $(DESTDIR)/srv/salt/ceph/migrate/nodes/ install -d -m 755 $(DESTDIR)/srv/salt/ceph/migrate/policy install -m 644 srv/salt/ceph/migrate/policy/*.sls $(DESTDIR)/srv/salt/ceph/migrate/policy/ - install -d -m 755 $(DESTDIR)/srv/salt/ceph/migrate/subvolume - install -m 644 srv/salt/ceph/migrate/subvolume/*.sls $(DESTDIR)/srv/salt/ceph/migrate/subvolume/ # state files - mines install -d -m 755 $(DESTDIR)/srv/salt/ceph/mines install -m 644 srv/salt/ceph/mines/*.sls $(DESTDIR)/srv/salt/ceph/mines/ @@ -858,6 +856,9 @@ copy-files: # state files - sync install -d -m 755 $(DESTDIR)/srv/salt/ceph/sync install -m 644 srv/salt/ceph/sync/*.sls $(DESTDIR)/srv/salt/ceph/sync/ + # state files - subvolume + install -d -m 755 $(DESTDIR)/srv/salt/ceph/subvolume + install -m 644 srv/salt/ceph/subvolume/*.sls $(DESTDIR)/srv/salt/ceph/subvolume/ install -d -m 755 $(DESTDIR)/srv/salt/ceph/setosdflags install -m 644 srv/salt/ceph/setosdflags/*.sls $(DESTDIR)/srv/salt/ceph/setosdflags install -d -m 755 $(DESTDIR)/srv/salt/ceph/setosdflags/requireosdrelease diff --git a/deepsea.spec.in b/deepsea.spec.in index 0060f0a43..f38cef436 100644 --- a/deepsea.spec.in +++ b/deepsea.spec.in @@ -183,7 +183,6 @@ systemctl try-restart salt-api > /dev/null 2>&1 || : %dir /srv/salt/ceph/migrate/nodes %dir /srv/salt/ceph/migrate/osds %dir /srv/salt/ceph/migrate/policy -%dir /srv/salt/ceph/migrate/subvolume %dir /srv/salt/ceph/mines %dir /srv/salt/ceph/mines/files %dir /srv/salt/ceph/mon @@ -384,6 +383,7 @@ systemctl try-restart salt-api > /dev/null 2>&1 || : %dir /srv/salt/ceph/tools/fio %dir /srv/salt/ceph/tools/fio/files %dir /srv/salt/ceph/sync +%dir /srv/salt/ceph/subvolume %dir /srv/salt/ceph/sysctl %dir /srv/salt/ceph/sysctl/files %dir /srv/salt/ceph/setosdflags @@ -540,7 +540,6 @@ systemctl try-restart salt-api > /dev/null 2>&1 || : %config /srv/salt/ceph/migrate/nodes/*.sls %config /srv/salt/ceph/migrate/osds/*.sls %config /srv/salt/ceph/migrate/policy/*.sls -%config /srv/salt/ceph/migrate/subvolume/*.sls %config /srv/salt/ceph/mines/*.sls %config /srv/salt/ceph/mines/files/*.conf %config /srv/salt/ceph/mon/*.sls @@ -718,6 +717,7 @@ systemctl try-restart salt-api > /dev/null 2>&1 || : %config /srv/salt/ceph/stage/services/*.sls %config /srv/salt/ceph/stage/validate/*.sls %config /srv/salt/ceph/sync/*.sls +%config /srv/salt/ceph/subvolume/*.sls %config /srv/salt/ceph/sysctl/*.sls %config /srv/salt/ceph/sysctl/files/*.conf %config /srv/salt/ceph/setosdflags/*.sls diff --git a/man/deepsea-commands.7 b/man/deepsea-commands.7 index 611e945ce..f6695243d 100644 --- a/man/deepsea-commands.7 +++ b/man/deepsea-commands.7 @@ -101,9 +101,6 @@ salt-run state.orch ceph.migrate.osds .RS .RE salt-run state.orch ceph.migrate.policy -.RS -.RE -salt-run state.orch ceph.migrate.subvolume .PP salt-run state.orch ceph.purge .PP @@ -188,7 +185,7 @@ Salt States are the essential building blocks for running a series of steps on a .BR sls file. .PP -The states below can be run indepdently and are useful when debugging. Nearly all states are part of a larger orchestration which may have prerequisites of other states. Running these commands requires understanding what you are doing. +The states below can be run independently and are useful when debugging. Nearly all states are part of a larger orchestration which may have prerequisites of other states. Running these commands requires understanding what you are doing. .PP For guidance, the Salt targets are listed as pseudo hostnames. Some are intended to only run on the Salt master. These targets are .BR master_minion. @@ -284,6 +281,8 @@ salt 'rgw*' state.apply ceph.rgw.users .RE salt 'rgw*' state.apply ceph.rgw .PP +salt 'mon*' state.apply ceph.subvolume +.PP salt '*' state.apply ceph.time.ntp .RS .RE diff --git a/srv/modules/runners/fs.py b/srv/modules/runners/fs.py deleted file mode 100644 index 1e580c5f8..000000000 --- a/srv/modules/runners/fs.py +++ /dev/null @@ -1,479 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: skip-file -# pylint: disable=modernize-parse-error,too-few-public-methods -# -# The salt-api calls functions with keywords that are not needed -# pylint: disable=unused-argument -""" - fs.py - - Runner for performing filesystem operations. - Current task is to create/migrate /var/lib/ceph to btrfs subvolumes if applicable. -""" - -from __future__ import print_function -from __future__ import absolute_import -import os -import logging -import salt.client -import salt.utils.error -# pylint: disable=relative-import -import six - -log = logging.getLogger(__name__) - - -class Mount(object): - """ - Structure representing the mount information for a given path. - """ - def __init__(self, mountpoint, opts): - self.mountpoint = mountpoint - # List of mount opts in the form [ x, y, ... , {k : v}, ... ] - self.opts = opts - - def get_opt(self, opt): - """ - Return 'opt' if found in self.opts. Since self.opts may contain dictionary entries, - this may return the value of such an entry, should 'opt' match a dictionary entry key. - """ - for option in self.opts: - if option == opt: - return option - if isinstance(option, dict) and opt in option: - return option[opt] - - # Didn't find it. - return None - - def __str__(self): - return "{} opts:{}".format(self.mountpoint, self.opts) - - -class Device(object): - """ - Structure representing a disk/partition. - """ - def __init__(self, dev, part_dev, dtype, uuid, fstype): - # ie. 'vda' - self.dev = dev - # ie. 'vda2' - self.part_dev = part_dev - # String representing the device type: 'hd', 'ssd', 'unknown' - self.dtype = dtype - self.uuid = uuid - # String representing the underlying fs: 'btrfs', 'xfs', etc - self.fstype = fstype - - def __str__(self): - return "uuid:{} (/dev/{}), {}, {}".format(self.uuid, self.part_dev, self.dtype, self.fstype) - - -class Path(object): - """ - Structure representing a path on a filesystem. - """ - def __init__(self, path, attrs, exists, ptype, device, mount): - # The full path, normalized - self.path = os.path.normpath(path) - # lsattr type attributes - self.attrs = attrs - self.exists = exists - # ie. dir or file - self.ptype = ptype - # A Device() instance - self.device = device - # A Mount() instance - self.mount = mount - - def __str__(self): - return ("{} {} {}, mounted on: {}, with device info:" - " {}".format("Existent" if self.exists else "Nonexsitent", - self.ptype if self.ptype else '', self.path, - self.mount, self.device)) - -# ------------------------------------------------------------------------------ -# Runner functions. -# ------------------------------------------------------------------------------ - -# Some fun output -BOLD = '\033[1m' -ENDC = '\033[0m' -GREEN = '\033[1;32m' -YELLOW = '\033[1;33m' -RED = '\033[1;31m' -CEPH_STATEDIR = "/var/lib/ceph" - - -def _analyze_ceph_statedirs(statedirs): - """ - Based on some elements of statedir, give the admin feedback regarding the - ceph variable statedir. - """ - local = salt.client.LocalClient() - results = {'ok': [], 'to_create': [], 'to_migrate': [], - 'to_correct_cow': [], 'alt_fs': [], 'ceph_down': []} - - # pylint: disable=too-many-nested-blocks - for minion, statedir in six.iteritems(statedirs): - if statedir.device.fstype == 'btrfs': - if statedir.exists: - # OK, path exists, is there a subvolume mounted on it? - # We detect this by checking the mountpoint - if statedir.mount.mountpoint == statedir.path: - # subvol = statedir.mount.get_opt('subvol') - # Already a subvolume! Check if CoW - if 'C' not in statedir.attrs: - results['to_correct_cow'].append(minion) - else: - # Copy on write disabled, all good! - results['ok'].append(minion) - # Also check to see if Ceph is running. - if not local.cmd(minion, 'cephprocesses.check', [], - expre_form='compound')[minion]: - results['ceph_down'].append(minion) - else: - # Path exists, but is not a subovlume - results['to_migrate'].append(minion) - else: - # Path does not yet exist. - results['to_create'].append(minion) - else: - # Not btrfs. Nothing to suggest. - results['alt_fs'].append(minion) - # Also check to see if Ceph is running - if not local.cmd(minion, 'cephprocesses.check', [], expre_form='compound')[minion]: - results['ceph_down'].append(minion) - - return results - - -def create_var(**kwargs): - """ - Create /var/lib/ceph as a btrfs subvolume - """ - local = salt.client.LocalClient() - path = CEPH_STATEDIR - ret = True - - statedirs = _inspect_ceph_statedir(path) - results = _analyze_ceph_statedirs(statedirs) - - if not results['to_create']: - print("{}No nodes marked for subvolume creation.{}".format(BOLD, ENDC)) - return True - - for _minion in results['to_create']: - if ret: - print("{}{}: Beginning creation...{}".format(BOLD, _minion, ENDC)) - for minion, ret in six.iteritems(local.cmd(_minion, 'fs.instantiate_btrfs_subvolume', - ["path={}".format(path), "subvol=@{}".format(path)], - tgt_type='compound')): - if not ret: - print("{}{}: {}Failed to properly create and mount" - "@{} onto {}{}. {}Check the local minion logs for " - "further details.{}".format(BOLD, minion, RED, path, - path, ENDC, BOLD, ENDC)) - else: - print("{}{}: {}Successfully created and mounted @{} onto " - "{}.{}".format(BOLD, minion, GREEN, path, path, ENDC)) - ret = _correct_ceph_statedir_attrs(minion) - - if ret: - print("{}Success.{}".format(GREEN, ENDC)) - else: - print("{}Failure detected, not proceeding with further creations.{}".format(RED, ENDC)) - - return ret - - -def migrate_var(**kwargs): - """ - Drive the migration of /var/lib/ceph to a btrfs subvolume. This needs to - be done one node at a time. - """ - local = salt.client.LocalClient() - path = CEPH_STATEDIR - ret = True - - statedirs = _inspect_ceph_statedir(path) - results = _analyze_ceph_statedirs(statedirs) - - if not results['to_migrate']: - print("{}No nodes marked for subvolume migration.{}".format(BOLD, ENDC)) - return True - - for _minion in results['to_migrate']: - if ret: - print("{}{}: Beginning migration...{}".format(BOLD, _minion, ENDC)) - for minion, ret in six.iteritems(local.cmd(_minion, 'fs.migrate_path_to_btrfs_subvolume', - ["path={}".format(path), "subvol=@{}".format(path)], - tgt_type='compound')): - # Human intervention needed. - if ret is None: - print("{}{}: {}Failure detected while migrating {} to " - "btrfs subvolume. This failure is potentially " - "serious and will require manual intervention on " - "the node to determine the cause. Please check " - "/var/log/salt/minion, the status of Ceph daemons " - "and the state of {}. You may also run: " - "{}{}salt-run fs.inspect_var {}{}to check the " - "status.{}".format(BOLD, minion, RED, path, path, - ENDC, BOLD, ENDC, RED, ENDC)) - elif ret is False: - print("{}{}: {}Failure detected while migrating {} to " - "btrfs subvolume. We have failed to properly " - "migrate {}, however, we have hopefully recoveRED " - "to the previous state and Ceph should again be " - "running. Please, however check " - "/var/log/salt/minion, the status of Ceph daemons " - "and the state of {} to confirm. You may also run: " - "{}{}salt-run fs.inspect_var {}{}to check the " - "status.{}".format(BOLD, minion, YELLOW, path, path, - path, ENDC, BOLD, ENDC, YELLOW, - ENDC)) - else: - print("{}{}: {}Successfully migrated.{}".format(BOLD, minion, GREEN, ENDC)) - ret = _correct_ceph_statedir_attrs(_minion) - - if ret: - print("{}Success.{}".format(GREEN, ENDC)) - else: - print("{}Failure detected, not proceeding with further migrations.{}".format(RED, ENDC)) - - return ret - - -def _correct_ceph_statedir_attrs(minion=None): - """ - Helper function to disable the copy-on-write attr on the ceph statedir. - """ - local = salt.client.LocalClient() - path = CEPH_STATEDIR - attrs = "C" - recursive = True - ret = True - - if minion: - # Omit /var/lib/ceph/osd directory, as underneath we may have OSDs mounted. - for minion, rets in six.iteritems(local.cmd(minion, 'fs.add_attrs', - ["path={}".format(path), "attrs={}".format(attrs), - "rec={}".format(recursive), "omit={}/osd".format(path)], - tgt_type='compound')): - for _path, ret in six.iteritems(rets): - if not ret: - print("{}{}: {}Failed to recursively disable " - "copy-on-write for {}.{}".format(BOLD, minion, RED, - _path, ENDC)) - ret = False - - if ret: - print("{}{}: {}Successfully disabled copy-on-write for {} and " - "it's contents.{}".format(BOLD, minion, GREEN, path, ENDC)) - - return ret - - -def correct_var_attrs(**kwargs): - """ - Recursively set No_COW (ie. disable copy-on-write) flag on /var/lib/ceph. - """ - path = CEPH_STATEDIR - ret = True - - all_btrfs_nodes = kwargs['all_btrfs_nodes'] if 'all_btrfs_nodes' in kwargs else False - - statedirs = _inspect_ceph_statedir(path) - results = _analyze_ceph_statedirs(statedirs) - - if not all_btrfs_nodes and not results['to_correct_cow']: - print("{}No nodes marked for copy-on-write correction.{}".format(BOLD, ENDC)) - return True - - # If all_btrfs_nodes == True, correct COW on all nodes regardless whether - # they're in results['to_correct_cow']. - # Only really useful if the admin manually set No_COW on /var/lib/ceph, - # but didn't recursively set all files underneath. - for minion, statedir in six.iteritems(statedirs): - if not statedir.exists: - print("{}{}: {} not found.{}".format(BOLD, minion, path, ENDC)) - - if statedir.exists and statedir.device.fstype == 'btrfs': - minion_to_correct = None - if all_btrfs_nodes: - minion_to_correct = minion - else: - minion_to_correct = minion if minion in results['to_correct_cow'] else None - - # Unlike the creation and migration functions, don't abort on first failure. - if not _correct_ceph_statedir_attrs(minion_to_correct): - ret = False - - if ret: - print("{}Success.{}".format(GREEN, ENDC)) - else: - print("{}Failure detected disabling copy-on-write for {}.{}".format(RED, path, ENDC)) - - return ret - - -def _inspect_ceph_statedir(path): - """ - Helper function that collects /var/lib/ceph information from all minions. - - Returns a dictionary of Path objects keyed on minion id. - - """ - search = __utils__['deepsea_minions.show'] - local = salt.client.LocalClient() - - # A container of Path's keyed on minion id. - statedirs = {} - - for minion, path_info in six.iteritems(local.cmd(search, 'fs.inspect_path', - ["path={}".format(path)], - tgt_type='compound')): - statedirs[minion] = Path(path, path_info['attrs'], path_info['exists'], - path_info['type'], - Device(path_info['dev_info']['dev'], - path_info['dev_info']['part_dev'], - path_info['dev_info']['type'], - path_info['dev_info']['uuid'], - path_info['dev_info']['fstype']), - Mount(path_info['mount_info']['mountpoint'], - path_info['mount_info']['opts'])) if path_info['ret'] else None - - return statedirs - - -def inspect_var(**kwargs): - """ - Collect /var/lib/ceph information from all minions. - """ - path = CEPH_STATEDIR - - # Loud by default. - quiet = kwargs['quiet'] if 'quiet' in kwargs else False - - statedirs = _inspect_ceph_statedir(path) - results = _analyze_ceph_statedirs(statedirs) - - if not quiet: - print("{}Inspecting Ceph Statedir ({})...{}".format(BOLD, path, ENDC)) - for minion, statedir in six.iteritems(statedirs): - print("{}{}:{} {}".format(BOLD, minion, ENDC, statedir)) - print("") - - if not quiet: - # Offer some suggestions. - # Migration/Creation/COW adjustment. - - if results['ceph_down']: - print("{}The following nodes have Ceph processes which are " - "currently down:{}".format(RED, ENDC)) - for minion in results['ceph_down']: - print("{}".format(minion)) - print("{}Determine the nature of the failures before " - "proceeding.{}\n".format(RED, ENDC)) - - if results['to_migrate']: - print("{}For the following nodes using btrfs:{}".format(YELLOW, ENDC)) - for minion in results['to_migrate']: - print("{}".format(minion)) - print("{}{} exists, but no btrfs subvolume is mounted. " - "Run: {}{}salt-run fs.migrate_var{}{} to " - "migrate {} to the btrfs subvolume " - "@{}{}".format(YELLOW, path, ENDC, BOLD, ENDC, YELLOW, path, - path, ENDC)) - print("{}You may then run: {}{}salt-run fs.inspect_var {}{}to " - "check the status.{}\n".format(YELLOW, ENDC, BOLD, ENDC, - YELLOW, ENDC)) - else: - # print ("{}No nodes found needing migration of {} to btrfs " - # "subvolume @{}.{}\n".format(GREEN, path, path, ENDC)) - pass - - if results['to_create']: - print("{}For the following nodes using btrfs:{}".format(YELLOW, ENDC)) - for minion in results['to_create']: - print("{}".format(minion)) - print("{}{} does not yet exist. " - "Run: {}{}salt-run fs.create_var{}{} to create and mount " - "the btrfs subvolume @{} onto {}.{}".format(YELLOW, path, ENDC, BOLD, - ENDC, YELLOW, path, path, ENDC)) - print("{}You may then run: {}{}salt-run fs.inspect_var {}{}to check the " - "status.{}\n".format(YELLOW, ENDC, BOLD, ENDC, YELLOW, ENDC)) - else: - # Migration also creates subvolumes, so let's not confuse the admin. - if not results['to_migrate']: - # print ("{}No nodes found needing creation of {} as btrfs " - # "subvolume @{}.{}\n".format(GREEN, path, path, ENDC)) - pass - - if results['to_correct_cow']: - print("{}For the following nodes using btrfs:{}".format(YELLOW, ENDC)) - for minion in results['to_correct_cow']: - print("{}".format(minion)) - print("{}A btrfs subvolume is mounted on {}{}. However, " - "copy-on-write is enabled. Run: {}{}salt-run " - "fs.correct_var_attrs{}{} to disable " - "copy-on-write.".format(YELLOW, path, ENDC, BOLD, ENDC, - YELLOW, ENDC)) - print("{}You may then run: {}{}salt-run fs.inspect_var {}{}to " - "check the status.{}\n".format(YELLOW, ENDC, BOLD, ENDC, - YELLOW, ENDC)) - else: - # Migration also sets No_COW, so let's not confuse the admin. - if not results['to_migrate']: - print("{}No nodes found needing adjustment of copy-on-write " - "for {}.{}".format(GREEN, path, ENDC)) - print("{}NOTE: If copy-on-write was disabled manually for " - "{}, you may still want to run {}{}salt-run " - "fs.correct_var_attrs all_btrfs_nodes=True{}{} to " - "correct all relevant files contained within {} on all " - "nodes running btrfs.{}\n".format(YELLOW, path, ENDC, - BOLD, ENDC, YELLOW, - path, ENDC)) - - if results['ok']: - print("{}The following btrfs nodes have @{} correctly mounted on " - "{}, and do not require any subvolume " - "manipulation:{}".format(GREEN, path, path, ENDC)) - for minion in results['ok']: - print("{}".format(minion)) - print("") - - if results['alt_fs']: - print("{}The following nodes are not using btrfs, and hence no " - "action is needed:{}".format(GREEN, ENDC)) - for minion in results['alt_fs']: - print("{}".format(minion)) - print("") - - return True - - -def help_(): - """ - Usage. - """ - usage = ("""salt-run fs.inspect_var - Inspects /var/lib/ceph, provides mountpoint and device - information along with suggestions regarding migration of - /var/lib/ceph to a btrfs subvolume if applicable. - - salt-run fs.create_var - Creates /var/lib/ceph (if not yet present) as a btrfs subvolume. - - salt-run fs.migrate_var - Migrates /var/lib/ceph to a btrfs subvolume (@/var/lib/ceph) if applicable. - - salt-run fs.correct_var_attrs [all_btrfs_nodes=True] - Disables copy-on-write for /var/lib/ceph on btrfs if applicable - """) - print(usage) - return "" - -__func_alias__ = { - 'help_': 'help', - } diff --git a/srv/modules/runners/validate.py b/srv/modules/runners/validate.py index e80752724..a82387248 100644 --- a/srv/modules/runners/validate.py +++ b/srv/modules/runners/validate.py @@ -1013,6 +1013,33 @@ def kernel(self): self.passed['kernel_module'] = 'valid' self._set_pass_status('kernel_module') + def subvolume(self): + """ + Verify that /var/lib/ceph is a subvolume on the monitors when using + btrfs. Skip checks if subvolume state is disabled. + """ + for node in self.data: + if 'subvolume_init' in self.data[node] and self.data[node]['subvolume_init'] == "disabled": + self.skipped['subvolume'] = "skipping" + return + break + + targets = "I@roles:mon" + results = self.local.cmd(targets, 'subvolume.check', [], tgt_type="compound") + for minion in results: + rc_, msg = results[minion] + if not rc_: + if 'subvolume' in self.errors: + self.errors['subvolume'].append("{} on {}".format(msg, minion)) + else: + self.errors['subvolume'] = ["{} on {}".format(msg, minion)] + + if 'subvolume' in self.errors: + self.errors['subvolume'].append("See /srv/salt/ceph/subvolume/README.md") + else: + self.passed['subvolume'] = "valid" + self._set_pass_status('subvolume') + def salt_updates(self): """ Salt Updates available? @@ -1378,6 +1405,7 @@ def pillar(cluster=None, printer=None, **kwargs): valid.cluster_interface() valid.check_ipversion() valid.monitors() + valid.subvolume() valid.mgrs() valid.storage() valid.storage_role() diff --git a/srv/salt/_modules/fs.py b/srv/salt/_modules/fs.py deleted file mode 100644 index 42a67a91e..000000000 --- a/srv/salt/_modules/fs.py +++ /dev/null @@ -1,1186 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable=fixme,no-else-return - -""" ------------------------------------------------------------------------------- -fs.py - -Module for performing filesystem operations. - ------------------------------------------------------------------------------- -""" - -from __future__ import absolute_import -import logging -import os -import tempfile -import shutil -import uuid -import time -# pylint: disable=import-error,3rd-party-module-not-gated -import psutil - - -log = logging.getLogger(__name__) - - -def _systemctl_cmd_target(cmd, target): - """ - Run a systemctl cmd on a target. Returns True/False. - """ - retries = 5 - delay = 2 - - # TODO: warn that target is null? - if not target: - return False - - cmd = "systemctl {} {}".format(cmd, target) - - # Try a _few_ times, with a small sleep. - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - while retries and _rc != 0: - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - retries -= 1 - time.sleep(delay) - - if _rc != 0: - log.error("Failed to {} target {}.".format(cmd, target)) - return False - - return True - - -def _systemctl_stop_target(target): - """ - systemctl stop target. - """ - return _systemctl_cmd_target('stop', target) - - -def _systemctl_start_target(target): - """ - systemctl start target. - """ - return _systemctl_cmd_target('start', target) - - -def _systemctl_restart_target(target): - """ - systemctl restart target. - """ - return _systemctl_cmd_target('restart', target) - - -def _teardown_ceph(): - """ - Kill Ceph and return the status obtained from _ceph_is_down() - """ - # TODO: ganesha and others!? - # NOTE: yes, this will trigger an error in the log for nodes that don't run - # _all_ of these targets. - for target in ['ceph-mon.target', 'ceph-osd.target', 'ceph-mds.target', - 'ceph-radosgw.target', 'ceph-mgr.target', 'ceph.target']: - _systemctl_stop_target(target) - - return _ceph_is_down() - - -def _startup_ceph(): - """ - Start Ceph, check status of Ceph processes, and return True/False - """ - ret = _systemctl_start_target('ceph.target') - - # We 'succeeded'... but that doesn't mean Ceph isn't still running. - if ret: - # Let things settle. - time.sleep(5) - return _ceph_is_up() - return False - - -def _ceph_is_down(): - """ - Queries whether all Ceph processes meant to be running on this node have been shut down. - - Returns True/False. - """ - retries = 6 - delay = 2 - down = False - # Processes that don't impede migration (ganesha related). - omit_list = ['ganesha.nfsd', 'rpcbind', 'rpc.statd'] - - while retries and not down: - running_procs = list(__salt__['cephprocesses.check'](results=True, quiet=True)['up'].keys()) - if not running_procs: - down = True - else: - # Compute processes which are running, but not in the omit_list. - waiting_for = [proc for proc in running_procs if proc not in omit_list] - if not waiting_for: - down = True - else: - log.warning(("Waiting for the following Ceph processes to stop: " - "{}.".format(waiting_for))) - retries -= 1 - time.sleep(delay) - delay *= 2 - - return down - - -def _ceph_is_up(): - """ - Queries whether all Ceph processes meant to be running on this node are up. - - Returns True/False. - """ - retries = 6 - delay = 2 - - while retries and not __salt__['cephprocesses.check'](): - log.warning("Waiting for Ceph processes to start.") - retries -= 1 - time.sleep(delay) - delay *= 2 - - return __salt__['cephprocesses.check']() - - -def _get_unique_path(path): - """ - Tries to return a unique path, else None on error. - """ - retries = 100 - tmp_path = "{}.{}".format(path, str(uuid.uuid4())) - - while os.path.exists(tmp_path) and retries: - tmp_path = "{}.{}".format(path, str(uuid.uuid4())) - retries -= 1 - - # Couldn't find a unique path. - if not retries and os.path.exists(tmp_path): - return None - - return tmp_path - - -def _get_uid_gid(path): - """ - For a given path, return {'uid': uid, 'gid': gid} or None if path does not exist. - """ - if not os.path.exists(path): - return None - stats = os.stat(path) - return {'uid': stats.st_uid, 'gid': stats.st_gid} - - -def _mv_contents(path, new_path): - """ - Try to move the contents of path to tmp_path. Return True/False. - - NOTE: Invoking `mv` as shutil.move() was not preserving ownership metadata. - """ - for entry in os.listdir(path): - cmd = "mv {}/{} {}".format(path, entry, new_path) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - if _rc != 0: - return False - - return True - - -# pylint: disable=too-many-return-statements -def _add_fstab_entry(_uuid, path, fstype, subvol): - """ - Append entry to /etc/fstab if it does not already exist. - - Return True/False. - """ - fstab_entries = None - - if not _uuid or not path or not fstype or not subvol: - log.error("Refusing to modify /etc/fstab: Unable to form proper fstab entry.") - return False - - entry = "UUID={} {} {} subvol={} 0 0".format(_uuid, path, fstype, subvol) - - try: - with open('/etc/fstab', 'r') as _fstab: - fstab_entries = [line.rstrip('\n') for line in _fstab] - except (IOError, FileNotFoundError): - log.error("Failed to read /etc/fstab.", exc_info=True) - return False - - # Process entries. - if entry in fstab_entries: - log.warning("'{}' already exists in /etc/fstab".format(entry)) - return True - if path in fstab_entries: - log.error("Refusing to modify /etc/fstab: existing path entry for '{}' found.".format(path)) - return False - elif subvol in fstab_entries: - log.error(("Refusing to modify /etc/fstab: existing subvol entry for " - "'{}' found.".format(subvol))) - return False - - # Append entry to /etc/fstab. - try: - with open('/etc/fstab', 'a') as _fstab: - _fstab.write("{}\n".format(entry)) - except (IOError, FileNotFoundError): - log.error("Failed to append '{}' to /etc/fstab.".format(entry), exc_info=True) - return False - - log.warning("Successfully appended '{}' to /etc/fstab.".format(entry)) - return True - -# ------------------------------------------------------------------------------ -# BTRFS related functions. -# -# Note that there appears to be a python btrfs module, but does not appear to -# exist in OBS/IBS. -# ------------------------------------------------------------------------------ - - -def _btrfs_path_as_subvol(path): - """ - Returns '@' concatinated with path => @/foo/bar. - """ - return "@{}".format(path) - - -# pylint: disable=unused-argument -def btrfs_get_mountpoints_of_subvol(subvol='', **kwargs): - """ - Determine the list of mountpoints for a given subvol (of the form @/foo/bar). - - Returns a list of mountpoint(s), or an empty list. - """ - mountpoints = [] - if not subvol: - return [] - - # Seems the easiest way to do this is to walk the disk partitions, extract the opts - # string and see if subvol is present. Remember the leading '/'. - for part in psutil.disk_partitions(): - if "subvol=/{}".format(subvol) in part.opts: - mountpoints.append(part.mountpoint) - - return mountpoints - - -# pylint: disable=unused-argument -def btrfs_get_default_subvol(path='', **kwargs): - """ - Returns the default subvolume (in the form @/foo/bar) of a given path or None on error. - """ - cmd = "btrfs subvolume get-default {}".format(path) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - if _rc == 0 and _stdout: - # _stdout example: ID 259 gen 35248 top level 258 path @/.snapshots/1/snapshot - # Return only the subvol - return _stdout.split()[-1] - - return None - - -# pylint: disable=unused-argument -def btrfs_subvol_exists(subvol='', **kwargs): - """ - Determine if subvol, of the form @/foo/bar exists as a btrfs subvolume. The - subvolume need not be mounted. - - Returns True/False. Returns False for empty subvolumes. - """ - if not subvol: - return False - - # If the subvol is mounted somewhere, it obviously exists. - if btrfs_get_mountpoints_of_subvol(subvol): - return True - - # If it isn't mounted, we have no idea the mountpoint to use in the below - # list, so just default to / - cmd = "btrfs subvolume list /" - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - if _rc == 0 and _stdout: - subvols = _stdout.split('\n') - for volume in subvols: - if volume.endswith("path {}".format(volume)): - return True - - # Haven't found it. - return False - - -def btrfs_create_subvol(subvol='', dev_info=None, **kwargs): - """ - Create a btrfs subvolume for the given subvol. Expected subvol to be of the - form @/foo/bar. dev_info is either passed (when called directly) or queried - by stripping off the '@' from the subvol in order to query the path. - - Return True/False. - """ - ret = True - tmp_dir = None - - if not subvol: - log.error("Unable to create subvolume '{}'.".format(subvol)) - return False - - # Check if subvol already exists. - if btrfs_subvol_exists(subvol): - log.warning("Subvolume '{}' already exists.".format(subvol)) - return True - - # If we didn't get dev_info (because we're being called directly from the command - # line), we _assume_ that the subvol path will ultimately be mounted onto a matching - # path, so _try_ to get the device information by converting subvol to it's corresponding - # path (ie. by stripping the leading '@'). - if not dev_info: - dev_info = get_device_info(get_mountpoint(subvol[1:])) - - if not dev_info: - log.error(("Unable to create subvolume '{}': failed to get device " - "information for '{}'".format(subvol, subvol[1:]))) - return False - - if dev_info['fstype'] != 'btrfs': - log.error(("Unable to create subvolume '{}': invalid filesystem type " - "({}).".format(subvol, dev_info['fstype']))) - return False - - # Get the partition of the mountpoint of the path. - part_path = "/dev/{}".format(dev_info['part_dev']) - - # Create a unique tmp directory. - try: - tmp_dir = tempfile.mkdtemp() - except (PermissionError, FileExistsError, OSError): - log.error("Unable to create subvolume '{}': failed to create a temporary directory.".format( - subvol), exc_info=True) - return False - - # Mount tmpdir. - cmd = "mount -t btrfs -o subvolid=0 '{}' '{}'".format(part_path, tmp_dir) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - if _rc != 0: - log.error("Failed to mount '{}' with subvolid=0 on '{}'.".format(part_path, tmp_dir)) - ret = False - - if ret: - # Create the subvol. - cmd = "btrfs subvolume create '{}/{}'".format(tmp_dir, subvol) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - if _rc != 0: - log.error("Failed to create subvolume '{}' on '{}'.".format(subvol, part_path)) - ret = False - - # Cleanup tmp_dir. Don't touch ret here, just log any errors. - if os.path.exists(tmp_dir): - cmd = "umount '{}'".format(tmp_dir) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - if _rc != 0: - log.error("Failed to unmount '{}'.".format(tmp_dir)) - try: - shutil.rmtree(tmp_dir) - except (OSError, FileNotFoundError): - log.error("Failed to remove '{}'.".format(tmp_dir), exc_info=True) - - if not ret: - # We failed somewhere, so take care of removing the subvolume, etc. - # TODO: there is a bug with subvolume deletes - # (https://bugzilla.opensuse.org/show_bug.cgi?id=957198) - # so no more cleanup can be dont at this point. - log.error("Failed to create subvolume '{}'.".format(subvol)) - else: - log.warning("Successfully created subvolume '{}'.".format(subvol)) - - return ret - - -def btrfs_mount_subvol(subvol='', path='', **kwargs): - """ - Given a subvolume in the form "@/path/to/subvol", mount it atop of path. If - path does not exist, log an error and abort. If path is already a mountpoint - for for subvol, skip. If path is a mountpoint for something other than path, - abort. Refuse to mount a subvol with a differing path (ie. refuse to mount - @/var/lib/foo atop of /var/lib/bar). - - CAUTION: No checks are performed whether path contains existing data! - NOTE: Does not touch /etc/fstab, for that, _add_fstab_entry(). - - Returns True/False. - """ - if not subvol or not path: - log.error("Unable to mount subvolume '{}' onto '{}'.".format(subvol, path)) - return False - - # Grab the mount info for path. - mount_info = get_mount_info(path) - if not mount_info: - log.error(("Unable to mount subvolume '{}' onto '{}': no mount " - "information obtained.".format(subvol, path))) - return False - - # Grab device info to confirm this is a btrfs filesystem. - dev_info = get_device_info(mount_info['mountpoint']) - if not dev_info: - log.error(("Unable to mount subvolume '{}' onto '{}': no filesystem " - "information obtained.".format(subvol, path))) - return False - if dev_info['fstype'] != 'btrfs': - log.error("Unable to mount subvolume '{}' onto '{}': invalid filesystem type ({}).".format( - subvol, path, dev_info['fstype'])) - return False - - # Subvol should exist! - if not btrfs_subvol_exists(subvol): - log.error(("Unable to mount subvolume '{}' onto '{}': '{}' does not " - "exist.".format(subvol, path, subvol))) - return False - - # Path should exist! - if not os.path.exists(path): - log.error(("Unable to mount subvolume '{}' onto '{}': '{}' does not " - "exist.".format(subvol, path, path))) - return False - - # Begin mounting process. - - # If path == mountpoint, then we already have a subvolume mounted on this path. - if path == mount_info['mountpoint']: - # our path is a mountpoint, run some basic checks - if path in btrfs_get_mountpoints_of_subvol(subvol): - log.warning(("Subvolume '{}' is already mounted onto " - "'{}'.".format(subvol, path))) - return True - else: - # Another subvolume is mounted on path, output which - log.error(("Unable to mount subvolume '{}' onto '{}': a different " - "subvolume ({}) is already " - "mounted.".format(subvol, path, - _get_mount_opt('subvol', - mount_info['opts'])))) - return False - else: - # TODO: Should we prevent the same subvolume being mounted on multiple - # different directories? btrfs is happy to mount the same subvolume - # onto multiple directories, so let's not limit this behaviour. If - # needed, we can always check the current subvol of the path, and if - # it isn't the default subvol (via btrfs_get_default_subvol()), we - # could assume a subvol is already mounted and log an error/return - # False. - pass - - # Finally mount! - cmd = "mount '/dev/{}' '{}' -t btrfs -o subvol={}".format(dev_info['part_dev'], path, subvol) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - if _rc != 0: - log.error(("Failed to mount subvolume '{}' onto '{}': stderr: " - "'{}'.".format(subvol, path, _stderr))) - return False - - log.warning(("Successfully mounted subvolume '{}' onto " - "'{}'.".format(subvol, path))) - return True - -# ------------------------------------------------------------------------------ -# General FS related functions. -# ------------------------------------------------------------------------------ - - -def _get_mount_opt(opt, mount_opts): - """ - Search for the opt string argument in mount_opts (ie. mount_info['opts']). - Entries within the mount_info['opts'] list are either strings, or single k:v - dictionaries. - - Returns the opt (or it's value if it's a {k:v}) if found, otherwise None. - """ - if not mount_opts: - return None - - for _opt in mount_opts: - if _opt == opt: - return _opt - if isinstance(_opt, dict) and opt in _opt: - return _opt[opt] - - # Didn't find opt it. - return None - - -def get_attrs(path='', **kwargs): - """ - Obtains the raw output of `lsattr` on a given path. - - Returns the attrs string after having stripped off the path, or None on error - or if path is empty. If path is a directory, it does not recursively follow - all child paths. - - # TODO: Any use in adding a recursive flag and dumping output into a list? - """ - # TODO: Should we warn if the path doesn't exist, or quietly return None? - if not os.path.exists(path): - return None - - cmd = ("lsattr -d {}".format(path) if os.path.isdir(path) - else "lsattr {}".format(path)) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - if _rc == 0 and _stdout: - return _stdout.split()[0] - else: - log.error("Failed to determine attrs for '{}': stderr: '{}'".format(path, _stderr)) - return None - - -# pylint: disable=invalid-name -def _call_chattr(op, path, attrs): - """ - Call chattr and check the return code. - - Returns True on if the return code is 0, otherwise False. - """ - cmd = "chattr {} {}{} {}".format('-d' if os.path.isdir(path) else '', - op, attrs, path) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - return _rc == 0 - - -# pylint: disable=invalid-name -def _rchattr(op, path, attrs, rec, omit, rets): - """ - Yet another helper for the whole chattr story. Recursively applies op and - attrs to paths which are not present in the omit list. - - Returns a dictionary of { path: True/False, ... } entries representing the - succesful/not successful application of op and attrs. - """ - # Basic non recursive case. Set attrs for a given path, if it's not in the omit list. - if not rec: - if path not in omit: - rets[path] = _call_chattr(op, path, attrs) - else: - log.warning(("Refusing to apply '{}' attrs to '{}' which is also in" - "the omit list {}.".format(attrs, path, omit))) - rets[path] = False - return rets[path] - # The fun case. - else: - # If our path is a directory, compute it's contents in an absolute form. - if os.path.isdir(path): - path_contents = ["{}/{}".format(path, e) for e in os.listdir(path)] - # Leaf directory with no contents, and not to be omitted. - if not path_contents and path not in omit: - rets[path] = _call_chattr(op, path, attrs) - # There are paths present in path_contents, process those. - else: - # For each path that is not in the omit list, recurse. - for _pathname in path_contents: - if _pathname not in omit: - _rchattr(op, _pathname, attrs, rec, omit, rets) - # Now process our non-leaf directory. - # TODO: I have a feeling we should check this after the isdir() - # and not process it or it's children. - if path not in omit: - # Finally add the path - rets[path] = _call_chattr(op, path, attrs) - # Why no return rets[path] ? - return rets[path] - # Path is a file. - else: - if path not in omit: - rets[path] = _call_chattr(op, path, attrs) - # Why no return rets[path] ? - return rets[path] - return None - - -# pylint: disable=invalid-name -def _chattr(op, path, attrs, rec, omit): - """ - {add,remove,set}_attrs helper function. op should be one of '+', '-', or '=' per `man chatter`. - Ultimately invokes the recursive _rchatter and collects results. - """ - supported_ops = {'-': 'remove', '+': 'add', '=': 'set'} - rets = {} - - # Convert omit string to list. - omit = omit.split(',') if omit else [] - - # Verify op. - if op not in list(supported_ops.keys()): - log.error(("Unable to manipulate attrs for '{}': unsuppurted chattr op:" - "{}.".format(path, op))) - rets[path] = False - return rets - - # Hopefully it's obvious why we're unable to proceed. Maybe an error is a bit much. - if not path or not attrs: - log.error("Unable to {} attrs '{}' for path '{}'.".format(supported_ops[op], attrs, path)) - rets[path] = False - return rets - - # Make sure path exists. - if not os.path.exists(path): - log.error("Unable to {} attrs '{}' for '{}': '{}' does not exist.".format( - supported_ops[op], attrs, path, path)) - rets[path] = False - return rets - - _rchattr(op, path, attrs, rec, omit, rets) - return rets - - -def add_attrs(path='', attrs='', rec=False, omit='', **kwargs): - """ - Add attrs to existing attrs for path. If path is a directory, and rec is True, will attempt - to add attrs recursively to path and it's contents. Omits paths found in omit. - - attrs should be a string of attributes to add. For example, "CA" would add attributes - 'C' and 'A' to path. Please refer to `man chattr` for valid attrs. - - Returns a dictionary (see _rchattr). - """ - rets = _chattr('+', path, attrs, rec, omit) - return rets - - -def remove_attrs(path='', attrs='', rec=False, omit='', **kwargs): - """ - Remove attrs from existing attrs for path. If path is a directory, and - rec is True, will attempt to remove attrs recursively from path and it's - contents. Omits paths found in omit. - - attrs should be a string of attributes to remove. For example, "CA" would - remove attributes 'C' and 'A' from path. Please refer to `man chattr` for - valid attrs. - - Returns a dictionary (see _rchattr). - """ - rets = _chattr('-', path, attrs, rec, omit) - return rets - - -def set_attrs(path='', attrs='', rec=False, omit='', **kwargs): - """ - Set attrs for path. If path is a directory, and rec is True, will attempt - to set attrs recursively for path and it's contents. Omits paths found in - omit. - - attrs should be a string of attributes to set. For example, "CA" would - set attributes 'C' and 'A' for path. Please refer to `man chattr` for - valid attrs. - - Returns a dictionary (see _rchattr). - """ - rets = _chattr('=', path, attrs, rec, omit) - return rets - - -def get_mountpoint_opts(mountpoint='', **kwargs): - """ - Determine the mount options set for a given mountpoint. - - Returns a list of mount opts or None on error. For opts in the form 'key=val', - convert the opt into dictionary. Thus, our return structure may look - something like: - [ 'rw', 'relatime', ..., { 'subvolid': '259' }, ... ]' - """ - opts = None - - for part in psutil.disk_partitions(): - if part.mountpoint == mountpoint: - opts = part.opts.split(',') - - # Convert foo=bar to dictionary entries if opts is not None or not an empty list. - opts = [o if '=' not in o else {k: v for (k, v) in [tuple(o.split('='))]} - for o in opts] if opts else None - - if not opts: - log.error("Failed to determine mount opts for '{}'.".format(mountpoint)) - - return opts - - -def _get_mountpoint(path): - """ - Check if path is a mount point. If not, split the path until either a mount - point is found, or path is empty. Returns a mount point path or None. - """ - if not path or os.path.ismount(path): - return path - - return _get_mountpoint(os.path.split(path)[0]) - - -def get_mountpoint(path='', **kwargs): - """ - Recursively finds the mount point for a given path. If a path does not exist, - returns the mount point of the path _if_ it were to be created. - - Returns the mount point or an empty path if mount point was not found. - - For cases where, for example, path=="var", we make no special assumptions - about the parent, nor do we take an abspath(). This example would simply - return ''. - """ - mountpoint = _get_mountpoint(path) - if not mountpoint: - log.error("Failed to determine mountpoint of '{}'.".format(path)) - - return mountpoint - - -def get_mount_info(path='', **kwargs): - """ - Determine the mount point and mount options for a given path. - - Returns { 'mountpoint': String, 'opts': [ String | {k:v} ] } or None on error. - - 'opts' may contain a { 'subvol': String } list entry indicating the btrfs - subvolume mounted atop the 'mount_point'. - - TODO: Should a lack of mountpoint opts trigger a None return and error? - """ - mount_info = {'mountpoint': '', 'opts': []} - - mountpoint = get_mountpoint(path) - if not mountpoint: - log.error("Failed to obtain mount information for '{}'.".format(path)) - return None - mount_info['mountpoint'] = mountpoint - - opts = get_mountpoint_opts(mountpoint) - if not opts: - log.error("Failed to obtain mount information for '{}'.".format(path)) - return None - mount_info['opts'] = opts - - return mount_info - - -def get_uuid(dev_path='', **kwargs): - """ - Determine the UUID of a given dev_path (ie. /dev/sdb2). - - Returns the UUID of dev, or None on error. - - NOTE: Simplified form of original found in osd.py - """ - pathname = "/dev/disk/by-uuid" - - cmd = "find -L {} -samefile {}".format(pathname, dev_path) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - if _rc == 0 and _stdout: - return os.path.basename(_stdout) - else: - log.error("Failed to determine uuid of '{}'.".format(dev_path)) - return None - - -def get_device_info(mountpoint='', **kwargs): - """ - Determine the device, uuid, type and fs type for a given mountpoint. - - Returns { 'dev': String, 'part_dev': String, 'uuid': String, - 'type': String (ssd|hd|unknown), - 'fstype': String (btrfs|xfs|extX|unknown) } - or None on error. - """ - dev_info = {'dev': None, 'part_dev': None, 'uuid': None, 'type': None, 'fstype': None} - dev_path = None - dev = None - part_dev = None - fstype = None - - if not mountpoint: - log.error("Unable to determine the device of mountponit '{}'.".format(mountpoint)) - return None - - # Grab device path and fs type in one shot. - for part in psutil.disk_partitions(): - if part.mountpoint == mountpoint: - dev_path = part.device - fstype = part.fstype - - if not dev_path: - log.error("Failed to determine the device of mountpoint '{}'.".format(mountpoint)) - return None - - part_dev = os.path.basename(dev_path) - - # From part_dev (ie. sdb2), grab the underlying device - dev = part_dev.rstrip("1234567890") - # For nvme, strip the trailing 'p' as well. - if "nvme" in dev: - dev = dev[:-1] - - dev_info['part_dev'] = part_dev - dev_info['dev'] = dev - - if not fstype: - log.error("Failed to determine the filesystem type of mountpoint '{}'.".format(mountpoint)) - return None - dev_info['fstype'] = fstype - - # Check if we're on an SSD or not. - try: - with open("/sys/block/{}/queue/rotational".format(dev), 'r') as _file: - line = _file.readline().rstrip() - if line == '0': - dev_info['type'] = 'ssd' - elif line == '1': - dev_info['type'] = 'hd' - else: - dev_info['type'] = 'unknown' - except (IOError, FileNotFoundError): - # For some reason, the file doesn't exist or we can't open it. - log.error( - "Failed to determine if '{}' is a solid state device.".format(dev_path), - exc_info=True - ) - return None - - _uuid = get_uuid(dev_path) - if not _uuid: - return None - dev_info['uuid'] = _uuid - - return dev_info - -# ------------------------------------------------------------------------------ -# Driver functions. -# ------------------------------------------------------------------------------ - - -def instantiate_btrfs_subvolume(subvol='', path='', **kwargs): - """ - Drive creation and mounting of btrfs subvolumes. Expects subvol in the form @/foo/bar. - - Returns True/False. - """ - uid_gid = None - - if not path or not subvol: - log.error("Unable to create subvolume '{}' and mount onto '{}'.".format(subvol, path)) - return False - - # Grab device info to confirm this is a btrfs filesystem. - dev_info = get_device_info(get_mountpoint(path)) - if not dev_info: - log.error("Unable to create subvolume '{}' without filesystem information.".format(subvol)) - return False - if dev_info['fstype'] != 'btrfs': - log.error("Unable to create subvolume on '{}' filesystem.".format(dev_info['fstype'])) - return False - - # Logs error already. - ret = btrfs_create_subvol(subvol, dev_info) - - # Create the mount path if it does not yet exist. If it does, grab it's - # uid and gid so we can set it back after mount (root:root otherwise). - if ret: - if not os.path.exists(path): - try: - os.mkdir(path) - except (OSError, FileNotFoundError): - log.error("Failed to create '{}' for mounting of '{}'.".format( - path, subvol), exc_info=True) - return False - else: - uid_gid = _get_uid_gid(path) - - # Try to mount the subvolume. - ret = btrfs_mount_subvol(subvol, path) - - if ret and uid_gid: - # Make sure path has correct uid/gid. - try: - os.chown(path, uid_gid['uid'], uid_gid['gid']) - except (OSError, FileNotFoundError): - log.error( - "Failed to set {}:{} ownership of existing '{}' after mounting subvolume '{}'." - .format(uid_gid['uid'], uid_gid['gid'], path, subvol), - exc_info=True - ) - # NOTE: I'd rather proceed with /etc/fstab in spite of this - # failure, not setting ret to False. - - if ret: - # Create an /etc/fstab entry as well, so mount survives reboots. Logs - # it's own errors. - ret = _add_fstab_entry(dev_info['uuid'], path, dev_info['fstype'], subvol) - - return ret - - -def _unmount_osd(osd_mountpoint): - """ - Unmount the OSD defined by osd_mountpoint. Returns True/False. - """ - _rc = 0 - if get_mountpoint(osd_mountpoint) == osd_mountpoint: - cmd = "umount {}".format(osd_mountpoint) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - return _rc == 0 - - -def _mount_osd(osd_dev, osd_mountpoint): - """ - Activate the OSD defined by osd_dev. Returns True/False - """ - _rc = 0 - if get_mountpoint(osd_mountpoint) != osd_mountpoint: - cmd = "mount {} {}".format(osd_dev, osd_mountpoint) - _rc, _stdout, _stderr = __salt__['helper.run'](cmd) - - return _rc == 0 - - -def migrate_path_to_btrfs_subvolume(path='', subvol='', **kwargs): - """ - Migrate an existing path to a btrfs subvolume. This should be done one - node at a time (controlled from the fs runner), as Ceph services need to - be stopped and OSD's unmounted. - - Returns True/False or None. None indicates a servere, unrecoverable error. - """ - ret = True - - if not path or not subvol: - log.error("Unable to migrate path '{}' to subvolume '{}'.".format(path, subvol)) - return False - - # If path doesn't exist, there's nothing to migrate. - if not os.path.exists(path): - log.error(("Unable to migrate '{}' to subvolume '{}': '{}' does not " - "exist.".format(path, subvol, path))) - return False - - # Inspect the path, we could do this piecewise. - # TODO: The ret check is a bit harsh. We could also check the specific - # parts of path_info. - path_info = inspect_path(path) - if not path_info or not path_info['ret']: - log.error(("Unable to migrate '{}' to subvolume '{}': unable to obtain " - "path information.".format(path, subvol))) - return False - - # Let's make sure (paranoia) that fstype is correct. - if path_info['dev_info']['fstype'] != 'btrfs': - log.error("Unable to migrate '{}' to subvolume '{}': invalid filesystem type ({}).".format( - path, subvol, path_info['dev_info']['fstype'])) - return False - - # Check if path is already a mount point. For a btrfs path to be a - # mountpoint implies that a subvolume is mounted. - if path_info['mount_info']['mountpoint'] == path: - # Check if subvol matches the mounted subvol... this mainly for - # thoroughness and information. - # Remember to strip off the leading '/' when getting the subvol opt. - subvol_opt = _get_mount_opt('subvol', path_info['mount_info']['opts'])[1:] - if subvol == subvol_opt: - log.warning(("No need to migrate '{}': '{}' is already a mountpoint " - "for subvolume '{}'.".format(path, path, subvol))) - return True - else: - log.error("Unable to migrate '{}' to subvolume '{}': a different subvolume ({}) " - "is mounted.".format(path, subvol, subvol_opt)) - return False - - # At this point, we've determined that path is not a mount point for the - # requested subvol. Check if path is empty. If it is, we don't really - # need to do a full migration. - # TODO: possible race if a rogue issues zypper install ceph... - if not os.listdir(path): - log.warning(("No need to migrate empty '{}'. Creating '{}' to be mounted " - "onto '{}'.".format(path, subvol, path))) - return instantiate_btrfs_subvolume(subvol, path) - - # path is not empty, thus begin with migration... - - # Determine a unique tmp path. - tmp_path = _get_unique_path(path) - if not tmp_path: - log.error(("Unable to migrate '{}' to subvolume '{}': failed to obtain " - "unique temporary path.".format(path, subvol))) - return False - - # Try to create tmp_path. - try: - os.mkdir(tmp_path) - except (OSError, FileNotFoundError): - log.error("Unable to migrate '{}' to subvolume '{}': failed to create '{}'.".format( - path, subvol, tmp_path), exc_info=True) - return False - - # Grab uid/gid of path. - uid_gid = _get_uid_gid(path) - - # From here, some intelligent recovery/cleanup may be needed. - - # Grab osd device pairs needed for unmounting and re-activating. - osd_pairs = __salt__['osd.part_pairs']() - - # Stop all Ceph processes on this node. If unable to stop Ceph, we can't - # proceed and bail out. Note that just because systemctl call succeeded, - # doesn't mean the services have actually been stopped, hence the additional - # check. - if not _teardown_ceph(): - log.error(("Unable to migrate '{}' to subvolume '{}': unable to stop " - "Ceph daemons.".format(path, subvol))) - ret = False - - # Unmount all OSDs on this node. If we fail to do so, we can't proceed - # further. Cleanup will remount OSDs and restart Ceph. - if ret: - for osd_pair in osd_pairs: - if ret: - if not _unmount_osd(osd_pair[1]): - log.error(("Unable to migrate '{}' to subvolume '{}': " - "failed to unmount OSD at '{}'" - ".".format(path, subvol, osd_pair[1]))) - ret = False - - if ret: - # Try to move contents of path to tmp_path. - if not _mv_contents(path, tmp_path): - log.error(("Unable to migrate '{}' to subvolume '{}': failed to " - "move contents of '{}' to '{}'" - ".".format(path, subvol, path, tmp_path))) - ret = False - - if ret: - # Try to create and mount the subvolume (including modifying /etc/fstab). - if not instantiate_btrfs_subvolume(subvol, path): - # Failed to have instiated btrfs subvol, either: - # i. Failed to have created subvol - # ii. Failed to have mounted subvol onto path - # iii. Failed to have modified /etc/fstab - if not btrfs_subvol_exists(subvol) or btrfs_get_mountpoints_of_subvol(subvol) != path: - log.error("Unable to migrate '{}' to subvolume '{}': failed " - "to create/mount '{}'.".format(path, subvol, subvol)) - ret = False - else: - # We created/mounted the subvolume, but just couldn't write - # /etc/fstab. This is so close, and while if we reboot this - # node, subvol will no longer be mounted on path, we hope the - # admin will be able to resolve this on seeing the error log. - log.error(("Migration of '{}' to subvolume '{}' succeeded, but " - "/etc/fstab could not be written. " - "Manual intervention needed.".format(path, subvol))) - ret = False - - # Cleanup... - - # Make sure path has correct uid/gid. - try: - os.chown(path, uid_gid['uid'], uid_gid['gid']) - except (OSError, FileNotFoundError): - log.error( - "Failed to set {}:{} ownership of '{}' after migration to subvolume '{}'." - .format(uid_gid['uid'], uid_gid['gid'], path, subvol), - exc_info=True - ) - # Not worth an abrupt failure at this point, but do log it and alert at the runner. - ret = False - - # Move contents of tmp_path back to path, if tmp_path is not empty. If - # it's empty, we failed to move contents of path. - if os.listdir(tmp_path) and not _mv_contents(tmp_path, path): - # Unrecoverable, don't delete either paths as we may lose data. - # Return immediately with None indicating manual intervention. - log.error(("Unable to migrate '{}' to subvolume '{}': failed to " - "move contents of '{}' back to path '{}'. Manual " - "intervention needed!".format(path, subvol, tmp_path, path))) - return None - - # At this point, it's safe to remove tmp_path. - try: - os.rmdir(tmp_path) - except (OSError, FileNotFoundError): - # shutil.move() would have failed above if we failed to move - # everything from tmp_path to path, treating this as a cleanup error. - log.error( - "Failed to cleanup from migration of '{}' to subvolume '{}': failed to remove '{}'" - .format(path, subvol, tmp_path), - exc_info=True - ) - - # Try to remount as many OSDs as possible. If we fail on any one, return - # None at the end. - mount_ret = True - for osd_pair in osd_pairs: - if not _mount_osd(osd_pair[0], osd_pair[1]): - log.error(("Failed to re-mount OSD onto '{}' after migration of " - "'{}' to subvolume '{}'. Manual intervention " - "needed!".format(osd_pair[1], path, subvol))) - mount_ret = False - if not mount_ret: - return None - - # Finally, restart Ceph - if not _startup_ceph(): - log.error("Failed to restart Ceph after migration of '{}' to subvolume '{}'. " - "Manual intervention needed".format(path, subvol)) - return None - - if not ret: - log.error("Failed to successfully migrate '{}' to subvolume '{}'.".format(path, subvol)) - else: - log.warning("Succesfully migrated '{}' to subvolume '{}'.".format(path, subvol)) - - return ret - - -def inspect_path(path='', **kwargs): - """ - Determine some intersting information for a given path. - - Returns { 'ret': Bool, 'exists': exists(path), 'type': String ('directory', 'file'), - 'attrs': get_attrs(path), - 'mount_info': { get_mount_info(path) }, - 'dev_info': { get_device_info(mountpoint) } - or None if path not supplied. On an error return from any of the composite - functions, set 'ret' = False. - """ - # 'mount_info' and 'dev_info' keys not explicitly initialized to None to avoid pylint E1136 - # which appears as a false postive when referencing a variable that was first initialized - # to None, and later assigned a dictionary. - # See: https://github.com/PyCQA/pylint/issues/1498 - open ticket as of this commit. - path_info = {'ret': True, 'exists': None, 'type': None, 'attrs': None} - - if not path: - log.error("Unable to inspect '{}'.".format(path)) - return None - - path_info['exists'] = os.path.exists(path) - - # Keeping it simple: 'directory' or 'file'. If it doesn't exist, None. - if path_info['exists']: - path_info['type'] = 'directory' if os.path.isdir(path) else 'file' - - path_info['attrs'] = get_attrs(path) - if path_info['exists'] and not path_info['attrs']: - # Only set a fail flag when collecting attrs for existing paths. - path_info['ret'] = False - - path_info['mount_info'] = get_mount_info(path) - if not path_info['mount_info']: - path_info['ret'] = False - - mountpoint = path_info['mount_info']['mountpoint'] if path_info['mount_info'] else '' - path_info['dev_info'] = get_device_info(mountpoint) - if not path_info['dev_info']: - path_info['ret'] = False - - return path_info diff --git a/srv/salt/_modules/subvolume.py b/srv/salt/_modules/subvolume.py new file mode 100644 index 000000000..ce49dad49 --- /dev/null +++ b/srv/salt/_modules/subvolume.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +Check btrfs subvolume +""" + +from __future__ import absolute_import + + +def check(): + """ + Return bool and message about the subvolume state of /var/lib/ceph + """ + btrfs, mounted = _btrfs() + if mounted: + return True, "/var/lib/ceph subvolume mounted" + + if not btrfs: + return True, "/ is not btrfs" + + created = _subvol() + + if created: + return False, "/var/lib/ceph not mounted" + + return False, "/var/lib/ceph subvolume missing" + + +def _btrfs(): + """ + Scan /proc/mounts. Return whether / is btrfs and /var/lib/ceph is mounted. + """ + btrfs = False + mounted = False + with open('/proc/mounts') as mounts: + for line in mounts: + _device, path, fstype, options = line.split()[:4] + if path == "/" and fstype == "btrfs": + btrfs = True + if path == "/var/lib/ceph" and "subvol" in options: + mounted = True + return btrfs, mounted + + +def _subvol(): + """ + Check if /var/lib/ceph subvolume exists + """ + cmd = "btrfs subvolume list /" + _rc, stdout, _stderr = __salt__['helper.run'](cmd) + + found = False + for line in stdout: + if line.endswith("/var/lib/ceph"): + found = True + return found diff --git a/srv/salt/ceph/migrate/subvolume/default.sls b/srv/salt/ceph/migrate/subvolume/default.sls deleted file mode 100644 index a6431b687..000000000 --- a/srv/salt/ceph/migrate/subvolume/default.sls +++ /dev/null @@ -1,12 +0,0 @@ - -create /var/lib/ceph as subvolume if /var/lib/ceph doesn't exist: - salt.runner: - - name: fs.create_var - -migrate existing /var/lib/ceph to subvolume: - salt.runner: - - name: fs.migrate_var - -disable copy-on-write for /var/lib/ceph: - salt.runner: - - name: fs.correct_var_attrs diff --git a/srv/salt/ceph/migrate/subvolume/init.sls b/srv/salt/ceph/migrate/subvolume/init.sls deleted file mode 100644 index c0c45d39a..000000000 --- a/srv/salt/ceph/migrate/subvolume/init.sls +++ /dev/null @@ -1,3 +0,0 @@ - -include: - - .{{ salt['pillar.get']('migrate_subvolume', 'default') }} diff --git a/srv/salt/ceph/subvolume/README.md b/srv/salt/ceph/subvolume/README.md new file mode 100644 index 000000000..4306cc2c2 --- /dev/null +++ b/srv/salt/ceph/subvolume/README.md @@ -0,0 +1,57 @@ +# Subvolume instructions +Any btrfs snapshots will capture the state of the monitors when /var/lib/ceph is part of the root subvolume. A dedicated subvolume of /var/lib/ceph is recommended. + +## Disable +To disable this state and the validation check, set + +subvolume_init: disabled + +in /srv/pillar/ceph/stack/global.yml and refresh the pillar. + +## Before Stage 0 +Prior to Stage 0, apply the following command to each of the minions intended to be monitors. + +``` +# salt '*' saltutil.sync_all +# salt 'minion*' state.apply ceph.subvolume +``` + +## Stage 3 validation failure +If Stage 3 failed validation, then the /var/lib/ceph directory exists from package installations. Do the following: + +``` +minionX # cd /var/lib +minionX # mv ceph ceph- + +# salt 'minionX*' state.apply ceph.subvolume + +minionX # cd /var/lib/ceph- +minionX # rsync -av . ../ceph +minionX # cd .. +minionX # rm -rf ./ceph- +``` + +## Existing Ceph cluster +If this is not a fresh deployment and the validation has been circumvented, then any ceph processes will need to be stopped and started. This may include unmounting or deactivating OSDs. + +``` +minionX # systemctl stop ceph-mon@minionX +minionX # systemctl stop +minionX # cd /var/lib +minionX # mv ceph ceph- + +# salt 'minionX*' state.apply ceph.subvolume + +minionX # cd /var/lib/ceph- +minionX # rsync -av . ../ceph +minionX # cd .. +minionX # rm -rf ./ceph- +minionX # systemctl start ceph-mon@minionX +minionX # systemctl start +``` + +## Developer notes + The move/restore steps could be added to the state file, but complicates non-btrfs recovery. Considering this is done exactly once per OS installation, manually doing the move/restore is not a tremendous burden. + + + diff --git a/srv/salt/ceph/subvolume/default.sls b/srv/salt/ceph/subvolume/default.sls new file mode 100644 index 000000000..3db1f2188 --- /dev/null +++ b/srv/salt/ceph/subvolume/default.sls @@ -0,0 +1,19 @@ + +subvolume: + cmd.run: + - name: "btrfs subvolume create /var/lib/ceph" + - unless: "btrfs subvolume list / | grep -q '@/var/lib/ceph$'" + - failhard: True + +{# sed is easier to explain/debug than file.replace #} +fstab: + cmd.run: + - name: "sed -i 's!LABEL=ROOT /var btrfs defaults,subvol=@/var 0 0!&\\\nLABEL=ROOT /var/lib/ceph btrfs defaults,subvol=@/var/lib/ceph 0 0!' /etc/fstab" + - unless: "grep -q subvol=@/var/lib/ceph /etc/fstab" + - failhard: True + +mount: + cmd.run: + - name: "mount /var/lib/ceph" + - unless: "mount | grep -q /var/lib/ceph" + diff --git a/srv/salt/ceph/subvolume/disabled.sls b/srv/salt/ceph/subvolume/disabled.sls new file mode 100644 index 000000000..60b92a395 --- /dev/null +++ b/srv/salt/ceph/subvolume/disabled.sls @@ -0,0 +1,3 @@ + +subvolume nop: + test.nop diff --git a/srv/salt/ceph/subvolume/init.sls b/srv/salt/ceph/subvolume/init.sls new file mode 100644 index 000000000..c0fcea3fc --- /dev/null +++ b/srv/salt/ceph/subvolume/init.sls @@ -0,0 +1,4 @@ + +include: + - .{{ salt['pillar.get']('subvolume_init', 'default') }} + diff --git a/tests/unit/_modules/test_subvolume.py b/tests/unit/_modules/test_subvolume.py new file mode 100644 index 000000000..7344fbf37 --- /dev/null +++ b/tests/unit/_modules/test_subvolume.py @@ -0,0 +1,86 @@ +import pytest +import salt.client +import os +import sys +sys.path.insert(0, 'srv/salt/_modules') +from pyfakefs import fake_filesystem, fake_filesystem_glob +from mock import patch, MagicMock, mock +from srv.salt._modules import subvolume + +fs = fake_filesystem.FakeFilesystem() +f_glob = fake_filesystem_glob.FakeGlobModule(fs) +f_os = fake_filesystem.FakeOsModule(fs) +f_open = fake_filesystem.FakeFileOpen(fs) + +class Testsubvolume(): + + @patch('srv.salt._modules.subvolume._btrfs') + def test_check_mounted(self, mockb): + mockb.return_value = [True, True] + state, msg = subvolume.check() + assert state == True + assert msg == "/var/lib/ceph subvolume mounted" + + @patch('srv.salt._modules.subvolume._btrfs') + def test_check_not_btrfs(self, mockb): + mockb.return_value = [False, False] + state, msg = subvolume.check() + assert state == True + assert msg == "/ is not btrfs" + + @patch('srv.salt._modules.subvolume._subvol') + @patch('srv.salt._modules.subvolume._btrfs') + def test_check_fails_with_no_mount(self, mockb, mocks): + mockb.return_value = [True, False] + mocks.return_value = True + state, msg = subvolume.check() + assert state == False + assert msg == "/var/lib/ceph not mounted" + + @patch('srv.salt._modules.subvolume._subvol') + @patch('srv.salt._modules.subvolume._btrfs') + def test_check_fails_missing(self, mockb, mocks): + mockb.return_value = [True, False] + mocks.return_value = False + state, msg = subvolume.check() + assert state == False + assert msg == "/var/lib/ceph subvolume missing" + + @patch('builtins.open', new=f_open) + def test_btrfs_mounted(self): + fs.CreateFile("/proc/mounts", contents="/dev/sda1 / btrfs rw,relatime\n/dev/sda1 /var/lib/ceph btrfs rw,relatime,space_cache,subvolid=286,subvol=/@/var/lib/ceph\n") + btrfs, mounted = subvolume._btrfs() + fs.RemoveFile("/proc/mounts") + assert btrfs == True + assert mounted == True + + @patch('builtins.open', new=f_open) + def test_btrfs_unmounted(self): + fs.CreateFile("/proc/mounts", contents="/dev/sda1 / btrfs rw,relatime\n") + btrfs, mounted = subvolume._btrfs() + fs.RemoveFile("/proc/mounts") + assert btrfs == True + assert mounted == False + + @patch('builtins.open', new=f_open) + def test_btrfs_other_fs(self): + fs.CreateFile("/proc/mounts", contents="/dev/sda1 / xfs rw,relatime\n") + btrfs, mounted = subvolume._btrfs() + fs.RemoveFile("/proc/mounts") + assert btrfs == False + assert mounted == False + + def test_subvol(self): + subvolume.__salt__ = {} + subvolume.__salt__['helper.run'] = mock.Mock() + subvolume.__salt__['helper.run'].return_value = (0, ["ID 286 gen 363 top level 265 path @/var/lib/ceph"], "") + result = subvolume._subvol() + assert result == True + + def test_subvol_fails(self): + subvolume.__salt__ = {} + subvolume.__salt__['helper.run'] = mock.Mock() + subvolume.__salt__['helper.run'].return_value = (0, ["ID 286 gen 363 top level 265 path @/var"], "") + result = subvolume._subvol() + assert result == False + diff --git a/tests/unit/runners/test_validate.py b/tests/unit/runners/test_validate.py index aa0bab2b1..8eb3f009d 100644 --- a/tests/unit/runners/test_validate.py +++ b/tests/unit/runners/test_validate.py @@ -2,6 +2,7 @@ import salt.client import sys import types +import mock sys.path.insert(0, 'srv/modules/runners') sys.path.insert(0, 'srv/modules/runners/utils') @@ -737,6 +738,61 @@ def set_pillar(self): self.data = {'admin.ceph': {'roles': 'admin'}, 'igw1.ceph': {'roles': 'igw'}} + def test_subvolume(self): + data = {'mon.ceph': {}, + 'mgr.ceph': {}} + + client = mock.Mock() + with patch.object(validate.Validate, "__init__", lambda self, n: None): + validator = validate.Validate("setup") + validator.local = client.return_value + validator.local.cmd.return_value = {'mon.ceph': [True, "/var/lib/ceph subvolume mounted"]} + validator.errors = {} + validator.warnings = {} + validator.passed = {} + validator.data = data + validator.grains = {} + + validator.subvolume() + assert validator.passed['subvolume'] == "valid" + + def test_subvolume_fails(self): + data = {'mon.ceph': {}, + 'mgr.ceph': {}} + + client = mock.Mock() + with patch.object(validate.Validate, "__init__", lambda self, n: None): + validator = validate.Validate("setup") + validator.local = client.return_value + validator.local.cmd.return_value = {'mon.ceph': [False, "/var/lib/ceph subvolume missing"]} + validator.errors = {} + validator.warnings = {} + validator.passed = {} + validator.data = data + validator.grains = {} + + validator.subvolume() + assert "/var/lib/ceph subvolume missing on mon.ceph" in validator.errors['subvolume'] + + def test_subvolume_skips(self): + data = {'mon.ceph': {'subvolume_init': 'disabled'}, + 'mgr.ceph': {'subvolume_init': 'disabled'}} + + client = mock.Mock() + with patch.object(validate.Validate, "__init__", lambda self, n: None): + validator = validate.Validate("setup") + validator.local = client.return_value + validator.local.cmd.return_value = {'mon.ceph': [False, "/var/lib/ceph subvolume missing"]} + validator.errors = {} + validator.warnings = {} + validator.passed = {} + validator.skipped = {} + validator.data = data + validator.grains = {} + + validator.subvolume() + assert validator.skipped['subvolume'] == "skipping" + class TestConfigCheck():