From 7a60715b515c11499cc7fc2f6acee792897f926b Mon Sep 17 00:00:00 2001
From: Tim Serong <tserong@suse.com>
Date: Thu, 25 May 2017 20:12:58 +1000
Subject: [PATCH] Experimental import of existing cluster

Here's the idea:

- Install DeepSea on some node (your salt master).
- Install salt-minion on every existing ceph node
- Hook up the minions to the masters as usual.
- Run the prep and discovery stages (hoping that prep doesn't do
  anything annoying like install updates and reboot half your
  cluster).
- Now you've got all possible roles and hardware profiles that
  DeepSea would have come up with when given a clean slate.
- Run `salt-run populate.engulf_existing_cluster`.  This will
  generate a policy.cfg with roles matching whatever was discovered
  running on all those ceph nodes.  Any node running a mon will have
  role-mon.  Any node running rgw will have role-rgw, etc.  Any node
  with the client admin keyring will have role-admin.

To be clear, I'm presently just trying to get a feel for the
shape of this thing.  The current implementation will *not* assign
hardware profiles for storage nodes.  Don't expect this to give you
a working cluster.  In fact, if you run it on a working cluster,
then proceed with the configure, deploy and services stages, you
should expect to have a broken cluster immediately thereafter.

There's a huge list of missing things, including but not limited to:

- Verify the cluster is actually healthy and everything is running
  first.  Ensure none of the config is inconsistent.
- Get the existing cluster's fsid, network settings and any custom
  config.
- Generate storage/hardware profiles to match what's deployed
- Save the existing daemon keys rather than generating new ones
- Invoke engulf_existing_cluster automatically during discovery
  if and only if there's no policy.cfg already and a cluster is
  detected (maybe).
- Test on every possible imaginable cluster to make sure it's not
  going to do anything stupid.
- Check for ALL the errors.

Signed-off-by: Tim Serong <tserong@suse.com>
---
 srv/modules/runners/populate.py    | 83 ++++++++++++++++++++++++++++++
 srv/salt/_modules/cephinspector.py | 51 ++++++++++++++++++
 srv/salt/_modules/keyring.py       |  3 ++
 3 files changed, 137 insertions(+)
 create mode 100644 srv/salt/_modules/cephinspector.py

diff --git a/srv/modules/runners/populate.py b/srv/modules/runners/populate.py
index 5d447f334..3d6b27e82 100644
--- a/srv/modules/runners/populate.py
+++ b/srv/modules/runners/populate.py
@@ -852,3 +852,86 @@ def proposals(**kwargs):
         ceph_roles.monitor_members()
         ceph_roles.igw_members()
     return [ True ]
+
+def engulf_existing_cluster(**kwargs):
+    """
+    Assuming proposals() has already been run to collect hardware profiles and
+    all possible role assignments and common configuration, this will generate
+    a policy.cfg with roles and assignments reflecting whatever cluster is
+    currently deployed.  It will also suck in all the keyrings so that they're
+    present when the configure stage is run.
+
+    This assumes your cluster is named "ceph".  If it's not, things will break.
+    """
+
+    # TODO:
+    # - verify the cluster is actually healthy and everything is running first
+    # - /srv/pillar/ceph/proposals/config/stack/default/ceph/cluster.yml:
+    #   - need to inject fsid from existing cluster
+    #   - likewise cluster_network and public_network need to be set from
+    #     actual cluster (might not be what deepsea thinks from proposals?)
+    #   - public addresses for individual MONs might be similarly wrong
+    # - get any extra custom config from ceph.conf
+    # - generate hardware proposals based on actual deployed OSDs (which again
+    #   might not be what deepsea thinks from the proposals it came up with)
+
+    policy_cfg = []
+
+    local = salt.client.LocalClient()
+
+    # TODO: if local.cmd fails, we'll get back something nasty which isn't handled
+    for minion, info in local.cmd("*", "cephinspector.inspect").items():
+
+        is_admin = len(info["ceph_keys"]["ceph.client.admin"]) > 0
+
+        if not info["running_services"].keys() and not is_admin:
+            # No ceph services running, no admin key, don't assign it
+            # to the cluster
+            continue
+
+        policy_cfg.append("cluster-ceph/cluster/" + minion + ".sls")
+
+        if is_admin:
+            policy_cfg.append("role-admin/cluster/" + minion + ".sls")
+
+        if "ceph-mon" in info["running_services"].keys():
+            policy_cfg.append("role-mon/cluster/" + minion + ".sls")
+            policy_cfg.append("role-mon/stack/default/ceph/minions/" + minion + ".yml")
+
+        if "ceph-osd" in info["running_services"].keys():
+            # Needs a storage profile assigned (which may be different
+            # than the proposals deepsea has come up with, depending on
+            # how things were deployed)
+            pass
+
+        if "ceph-mds" in info["running_services"].keys():
+            policy_cfg.append("role-mds/cluster/" + minion + ".sls")
+            pass
+
+        if "ceph-radosgw" in info["running_services"].keys():
+            policy_cfg.append("role-rgw/cluster/" + minion + ".sls")
+            pass
+
+        # TODO: somewhere in here, take info["ceph_keys"] and write the keys to:
+        # - /srv/salt/ceph/admin/cache/ceph.client.admin.keyring
+        # - /srv/salt/ceph/mon/cache/mon.keyring
+        # - /srv/salt/ceph/osd/cache/bootstrap.keyring
+        # - /srv/salt/ceph/mds/cache/$name.keyring
+        # - /srv/salt/ceph/rgw/cache/$name.keyring
+
+    # Now policy_cfg reflects the current deployment, make it a bit legible...
+    policy_cfg.sort()
+
+    # ...but inject the unassigned line first so it takes precendence,
+    # along with the global config bits (because they're prettier early)...
+    policy_cfg = [
+        "cluster-unassigned/cluster/*.sls",
+        "config/stack/default/ceph/cluster.yml",
+        "config/stack/default/global.yml" ] + policy_cfg
+
+    # ...and write it out (this will fail with EPERM if someone's already
+    # created a policy.cfg as root, BTW)
+    with open("/srv/pillar/ceph/proposals/policy.cfg", 'w') as policy:
+        policy.write("\n".join(policy_cfg) + "\n")
+
+    return [ True ]
diff --git a/srv/salt/_modules/cephinspector.py b/srv/salt/_modules/cephinspector.py
new file mode 100644
index 000000000..3c69dea66
--- /dev/null
+++ b/srv/salt/_modules/cephinspector.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# vim: ts=8 et sw=4 sts=4
+
+import os
+
+def _extract_key(filename):
+    # This is pretty similar to keyring.secret()...
+    if os.path.exists(filename):
+        with open(filename, 'r') as keyring:
+            for line in keyring:
+                if "key" in line and " = " in line:
+                    return line.split(" = ")[1].strip()
+    return ""
+
+def inspect(**kwargs):
+    # deliberately only looking for things ceph-deploy can deploy
+    ceph_services = ['ceph-mon', 'ceph-osd', 'ceph-mds', 'ceph-radosgw']
+
+    #
+    # running_services will be something like:
+    #
+    # {
+    #   'ceph-mon': [ 'hostname' ],
+    #   'ceph-osd': [ '0', '1', '2', ... ]
+    # }
+    #
+    running_services = {}
+    for rs in __salt__['service.get_running']():
+        instance = rs.split('@')
+        if len(instance) == 2 and instance[0] in ceph_services:
+            if not running_services.has_key(instance[0]):
+                running_services[instance[0]] = []
+            running_services[instance[0]].append(instance[1])
+
+    ceph_keys = {}
+
+    ceph_keys["ceph.client.admin"] = _extract_key("/etc/ceph/ceph.client.admin.keyring")
+    ceph_keys["bootstrap-osd"] = _extract_key("/var/lib/ceph/bootstrap-osd/ceph.keyring")
+
+    if "ceph-mon" in running_services.keys():
+        ceph_keys["mon"] = _extract_key("/var/lib/ceph/mon/ceph-" + running_services["ceph-mon"][0] + "/keyring")
+
+    # TODO: something similar to the above for MDS and RGW keys (but be aware
+    # there might be multiple instances.  Hell, there could be multiple instances
+    # for MONs too on one host, if someone has set up something really weird...
+
+    # note that some keys will be empty strings if not present
+    return {
+        "running_services": running_services,
+        "ceph_keys": ceph_keys
+    }
diff --git a/srv/salt/_modules/keyring.py b/srv/salt/_modules/keyring.py
index 14becded3..ec570a995 100644
--- a/srv/salt/_modules/keyring.py
+++ b/srv/salt/_modules/keyring.py
@@ -9,6 +9,9 @@ def secret(filename):
     """
     Read the filename and return the key value.  If it does not exist,
     generate one.
+
+    Note that if used on a file that contains multiple keys, this will
+    always return the first key.
     """
     if os.path.exists(filename):
         with open(filename, 'r') as keyring: