From 7a60715b515c11499cc7fc2f6acee792897f926b Mon Sep 17 00:00:00 2001 From: Tim Serong Date: Thu, 25 May 2017 20:12:58 +1000 Subject: [PATCH] Experimental import of existing cluster Here's the idea: - Install DeepSea on some node (your salt master). - Install salt-minion on every existing ceph node - Hook up the minions to the masters as usual. - Run the prep and discovery stages (hoping that prep doesn't do anything annoying like install updates and reboot half your cluster). - Now you've got all possible roles and hardware profiles that DeepSea would have come up with when given a clean slate. - Run `salt-run populate.engulf_existing_cluster`. This will generate a policy.cfg with roles matching whatever was discovered running on all those ceph nodes. Any node running a mon will have role-mon. Any node running rgw will have role-rgw, etc. Any node with the client admin keyring will have role-admin. To be clear, I'm presently just trying to get a feel for the shape of this thing. The current implementation will *not* assign hardware profiles for storage nodes. Don't expect this to give you a working cluster. In fact, if you run it on a working cluster, then proceed with the configure, deploy and services stages, you should expect to have a broken cluster immediately thereafter. There's a huge list of missing things, including but not limited to: - Verify the cluster is actually healthy and everything is running first. Ensure none of the config is inconsistent. - Get the existing cluster's fsid, network settings and any custom config. - Generate storage/hardware profiles to match what's deployed - Save the existing daemon keys rather than generating new ones - Invoke engulf_existing_cluster automatically during discovery if and only if there's no policy.cfg already and a cluster is detected (maybe). - Test on every possible imaginable cluster to make sure it's not going to do anything stupid. - Check for ALL the errors. Signed-off-by: Tim Serong --- srv/modules/runners/populate.py | 83 ++++++++++++++++++++++++++++++ srv/salt/_modules/cephinspector.py | 51 ++++++++++++++++++ srv/salt/_modules/keyring.py | 3 ++ 3 files changed, 137 insertions(+) create mode 100644 srv/salt/_modules/cephinspector.py diff --git a/srv/modules/runners/populate.py b/srv/modules/runners/populate.py index 5d447f334..3d6b27e82 100644 --- a/srv/modules/runners/populate.py +++ b/srv/modules/runners/populate.py @@ -852,3 +852,86 @@ def proposals(**kwargs): ceph_roles.monitor_members() ceph_roles.igw_members() return [ True ] + +def engulf_existing_cluster(**kwargs): + """ + Assuming proposals() has already been run to collect hardware profiles and + all possible role assignments and common configuration, this will generate + a policy.cfg with roles and assignments reflecting whatever cluster is + currently deployed. It will also suck in all the keyrings so that they're + present when the configure stage is run. + + This assumes your cluster is named "ceph". If it's not, things will break. + """ + + # TODO: + # - verify the cluster is actually healthy and everything is running first + # - /srv/pillar/ceph/proposals/config/stack/default/ceph/cluster.yml: + # - need to inject fsid from existing cluster + # - likewise cluster_network and public_network need to be set from + # actual cluster (might not be what deepsea thinks from proposals?) + # - public addresses for individual MONs might be similarly wrong + # - get any extra custom config from ceph.conf + # - generate hardware proposals based on actual deployed OSDs (which again + # might not be what deepsea thinks from the proposals it came up with) + + policy_cfg = [] + + local = salt.client.LocalClient() + + # TODO: if local.cmd fails, we'll get back something nasty which isn't handled + for minion, info in local.cmd("*", "cephinspector.inspect").items(): + + is_admin = len(info["ceph_keys"]["ceph.client.admin"]) > 0 + + if not info["running_services"].keys() and not is_admin: + # No ceph services running, no admin key, don't assign it + # to the cluster + continue + + policy_cfg.append("cluster-ceph/cluster/" + minion + ".sls") + + if is_admin: + policy_cfg.append("role-admin/cluster/" + minion + ".sls") + + if "ceph-mon" in info["running_services"].keys(): + policy_cfg.append("role-mon/cluster/" + minion + ".sls") + policy_cfg.append("role-mon/stack/default/ceph/minions/" + minion + ".yml") + + if "ceph-osd" in info["running_services"].keys(): + # Needs a storage profile assigned (which may be different + # than the proposals deepsea has come up with, depending on + # how things were deployed) + pass + + if "ceph-mds" in info["running_services"].keys(): + policy_cfg.append("role-mds/cluster/" + minion + ".sls") + pass + + if "ceph-radosgw" in info["running_services"].keys(): + policy_cfg.append("role-rgw/cluster/" + minion + ".sls") + pass + + # TODO: somewhere in here, take info["ceph_keys"] and write the keys to: + # - /srv/salt/ceph/admin/cache/ceph.client.admin.keyring + # - /srv/salt/ceph/mon/cache/mon.keyring + # - /srv/salt/ceph/osd/cache/bootstrap.keyring + # - /srv/salt/ceph/mds/cache/$name.keyring + # - /srv/salt/ceph/rgw/cache/$name.keyring + + # Now policy_cfg reflects the current deployment, make it a bit legible... + policy_cfg.sort() + + # ...but inject the unassigned line first so it takes precendence, + # along with the global config bits (because they're prettier early)... + policy_cfg = [ + "cluster-unassigned/cluster/*.sls", + "config/stack/default/ceph/cluster.yml", + "config/stack/default/global.yml" ] + policy_cfg + + # ...and write it out (this will fail with EPERM if someone's already + # created a policy.cfg as root, BTW) + with open("/srv/pillar/ceph/proposals/policy.cfg", 'w') as policy: + policy.write("\n".join(policy_cfg) + "\n") + + return [ True ] diff --git a/srv/salt/_modules/cephinspector.py b/srv/salt/_modules/cephinspector.py new file mode 100644 index 000000000..3c69dea66 --- /dev/null +++ b/srv/salt/_modules/cephinspector.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# vim: ts=8 et sw=4 sts=4 + +import os + +def _extract_key(filename): + # This is pretty similar to keyring.secret()... + if os.path.exists(filename): + with open(filename, 'r') as keyring: + for line in keyring: + if "key" in line and " = " in line: + return line.split(" = ")[1].strip() + return "" + +def inspect(**kwargs): + # deliberately only looking for things ceph-deploy can deploy + ceph_services = ['ceph-mon', 'ceph-osd', 'ceph-mds', 'ceph-radosgw'] + + # + # running_services will be something like: + # + # { + # 'ceph-mon': [ 'hostname' ], + # 'ceph-osd': [ '0', '1', '2', ... ] + # } + # + running_services = {} + for rs in __salt__['service.get_running'](): + instance = rs.split('@') + if len(instance) == 2 and instance[0] in ceph_services: + if not running_services.has_key(instance[0]): + running_services[instance[0]] = [] + running_services[instance[0]].append(instance[1]) + + ceph_keys = {} + + ceph_keys["ceph.client.admin"] = _extract_key("/etc/ceph/ceph.client.admin.keyring") + ceph_keys["bootstrap-osd"] = _extract_key("/var/lib/ceph/bootstrap-osd/ceph.keyring") + + if "ceph-mon" in running_services.keys(): + ceph_keys["mon"] = _extract_key("/var/lib/ceph/mon/ceph-" + running_services["ceph-mon"][0] + "/keyring") + + # TODO: something similar to the above for MDS and RGW keys (but be aware + # there might be multiple instances. Hell, there could be multiple instances + # for MONs too on one host, if someone has set up something really weird... + + # note that some keys will be empty strings if not present + return { + "running_services": running_services, + "ceph_keys": ceph_keys + } diff --git a/srv/salt/_modules/keyring.py b/srv/salt/_modules/keyring.py index 14becded3..ec570a995 100644 --- a/srv/salt/_modules/keyring.py +++ b/srv/salt/_modules/keyring.py @@ -9,6 +9,9 @@ def secret(filename): """ Read the filename and return the key value. If it does not exist, generate one. + + Note that if used on a file that contains multiple keys, this will + always return the first key. """ if os.path.exists(filename): with open(filename, 'r') as keyring: