fence_compute: Fence agent for Nova compute machines

Author: Andrew Beekhof
marxsk committed May 14, 2015
1 parent bc15860 commit 855c7f617e6afc840540439c359de970d3dc8cee
@@ -174,6 +174,7 @@ AC_PATH_PROG([SUDO_PATH], [sudo], [/usr/bin/sudo])
AC_PATH_PROG([SNMPWALK_PATH], [snmpwalk], [/usr/bin/snmpwalk])
AC_PATH_PROG([SNMPSET_PATH], [snmpset], [/usr/bin/snmpset])
AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget])
AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova])
## do subst
@@ -274,6 +275,7 @@ AC_CONFIG_FILES([Makefile
@@ -0,0 +1,17 @@
TARGET = fence_compute
SRC = $(TARGET).py
man_MANS = $(TARGET).8
FENCE_TEST_ARGS = -l test -p test -n 1
include $(top_srcdir)/make/
include $(top_srcdir)/make/
include $(top_srcdir)/make/
@@ -0,0 +1,218 @@
#!/usr/bin/python -tt
import sys
import time
import atexit
import logging
from fencing import *
from fencing import fail_usage, is_executable, run_command, run_delay
from novaclient import client as nova_client
BUILD_DATE="(built Wed Nov 12 06:33:38 EST 2014)"
REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved."
override_status = ""
nova = None
def get_power_status(_, options):
global override_status
status = "unknown"
logging.debug("get action: " + options["--action"])
if len(override_status):
logging.debug("Pretending we're " + override_status)
return override_status
if nova:
services =["--plug"])
except Exception, e:
for service in services:
if service.binary == "nova-compute":
if service.state == "up":
status = "on"
elif service.state == "down":
status = "down"
logging.debug("Unknown status detected from nova: " + service.state)
return status
# NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib
# module which is not stable
def _server_evacuate(server, on_shared_storage):
success = True
error_message = ""
nova.servers.evacuate(server=server['uuid'], on_shared_storage=on_shared_storage)
except Exception as e:
success = False
error_message = "Error while evacuating instance: %s" % e
return {
"server_uuid": server['uuid'],
"evacuate_accepted": success,
"error_message": error_message,
def _host_evacuate(host, on_shared_storage):
hypervisors =, servers=True)
response = []
for hyper in hypervisors:
if hasattr(hyper, 'servers'):
for server in hyper.servers:
response.append(_server_evacuate(server, on_shared_storage))
def set_power_status(_, options):
global override_status
override_status = ""
logging.debug("set action: " + options["--action"])
if not nova:
if options["--action"] == "on":
if get_power_status(_, options) == "on":["--plug"], 'nova-compute')
# Pretend we're 'on' so that the fencing library doesn't loop forever waiting for the node to boot
override_status = "on"
# need to wait for nova to update its internal status or we
# cannot call host-evacuate
while get_power_status(_, options) != "off":
# Loop forever if need be.
# Some callers (such as Pacemaker) will have a timer
# running and kill us if necessary
logging.debug("Waiting for nova to update it's internal state")
if "--no-shared-storage" not in options:
# If the admin sets this when they DO have shared
# storage in use, then they get what they asked for
on_shared_storage = True
on_shared_storage = False
_host_evacuate(options["--plug"], on_shared_storage)
def get_plugs_list(_, options):
result = {}
if nova:
hypervisors = nova.hypervisors.list()
for hypervisor in hypervisors:
longhost = hypervisor.hypervisor_hostname
if options["--action"] == "list" and options["--domain"] != "":
shorthost = longhost.replace("." + options["--domain"],
result[shorthost] = ("", None)
result[longhost] = ("", None)
return result
def define_new_opts():
all_opt["tenant-name"] = {
"getopt" : "t:",
"longopt" : "tenant-name",
"help" : "-t, --tenant-name=[tenant] Keystone Admin Tenant",
"required" : "0",
"shortdesc" : "Keystone Admin Tenant",
"default" : "",
"order": 1,
all_opt["auth-url"] = {
"getopt" : "k:",
"longopt" : "auth-url",
"help" : "-k, --auth-url=[tenant] Keystone Admin Auth URL",
"required" : "0",
"shortdesc" : "Keystone Admin Auth URL",
"default" : "",
"order": 1,
all_opt["novatool-path"] = {
"getopt" : "i:",
"longopt" : "novatool-path",
"help" : "-i, --novatool-path=[path] Path to nova binary",
"required" : "0",
"shortdesc" : "Path to nova binary",
"default" : "@NOVA_PATH@",
"order": 6,
all_opt["domain"] = {
"getopt" : "d:",
"longopt" : "domain",
"help" : "-d, --domain=[string] DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN",
"required" : "0",
"shortdesc" : "DNS domain in which hosts live",
"default" : "",
"order": 5,
all_opt["no-shared-storage"] = {
"getopt" : "",
"longopt" : "no-shared-storage",
"help" : "--no-shared-storage Disable functionality for shared storage",
"required" : "0",
"shortdesc" : "Disable functionality for dealing with shared storage",
"default" : "False",
"order": 5,
def main():
global override_status
global nova
device_opt = ["login", "passwd", "tenant-name", "auth-url",
"novatool-path", "no_login", "no_password", "port", "domain", "no-shared-storage"]
all_opt["shell_timeout"]["default"] = "180"
options = check_input(device_opt, process_input(device_opt))
docs = {}
docs["shortdesc"] = "Fence agent for nova compute nodes"
docs["longdesc"] = "fence_nova_host is a Nova fencing notification agent"
docs["vendorurl"] = ""
show_docs(options, docs)
# The first argument is the Nova client version
nova = nova_client.Client('2',
if options["--action"] in ["off", "reboot"]:
# Pretend we're 'on' so that the fencing library will always call set_power_status(off)
override_status = "on"
if options["--action"] == "on":
# Pretend we're 'off' so that the fencing library will always call set_power_status(on)
override_status = "off"
# Potentially we should make this a pacemaker feature
if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
options["--plug"] = options["--plug"]+"."+options["--domain"]
result = fence_action(None, options, set_power_status, get_power_status, get_plugs_list, None)
if __name__ == "__main__":
@@ -24,6 +24,7 @@ $(TARGET): $(SRC)
-e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \
> $@
if [ 0 -eq `echo "$(SRC)" | grep fence_ &> /dev/null; echo $$?` ]; then \
@@ -0,0 +1,123 @@
<?xml version="1.0" ?>
<resource-agent name="fence_compute" shortdesc="Fence agent for nova compute nodes" >
<longdesc>fence_nova_host is a Nova fencing notification agent</longdesc>
<parameter name="action" unique="0" required="1">
<getopt mixed="-o, --action=[action]" />
<content type="string" default="reboot" />
<shortdesc lang="en">Fencing action</shortdesc>
<parameter name="auth-url" unique="0" required="0">
<getopt mixed="-k, --auth-url=[tenant]" />
<content type="string" default="" />
<shortdesc lang="en">Keystone Admin Auth URL</shortdesc>
<parameter name="login" unique="0" required="0">
<getopt mixed="-l, --username=[name]" />
<content type="string" />
<shortdesc lang="en">Login name</shortdesc>
<parameter name="passwd" unique="0" required="0">
<getopt mixed="-p, --password=[password]" />
<content type="string" />
<shortdesc lang="en">Login password or passphrase</shortdesc>
<parameter name="passwd_script" unique="0" required="0">
<getopt mixed="-S, --password-script=[script]" />
<content type="string" />
<shortdesc lang="en">Script to run to retrieve password</shortdesc>
<parameter name="port" unique="0" required="1">
<getopt mixed="-n, --plug=[id]" />
<content type="string" />
<shortdesc lang="en">Physical plug number on device, UUID or identification of machine</shortdesc>
<parameter name="tenant-name" unique="0" required="0">
<getopt mixed="-t, --tenant-name=[tenant]" />
<content type="string" default="" />
<shortdesc lang="en">Keystone Admin Tenant</shortdesc>
<parameter name="domain" unique="0" required="0">
<getopt mixed="-d, --domain=[string]" />
<content type="string" default="" />
<shortdesc lang="en">DNS domain in which hosts live</shortdesc>
<parameter name="no-shared-storage" unique="0" required="0">
<getopt mixed="--no-shared-storage" />
<content type="boolean" default="False" />
<shortdesc lang="en">Disable functionality for dealing with shared storage</shortdesc>
<parameter name="novatool-path" unique="0" required="0">
<getopt mixed="-i, --novatool-path=[path]" />
<content type="string" default="/usr/bin/nova" />
<shortdesc lang="en">Path to nova binary</shortdesc>
<parameter name="verbose" unique="0" required="0">
<getopt mixed="-v, --verbose" />
<content type="boolean" />
<shortdesc lang="en">Verbose mode</shortdesc>
<parameter name="debug" unique="0" required="0">
<getopt mixed="-D, --debug-file=[debugfile]" />
<content type="string" />
<shortdesc lang="en">Write debug information to given file</shortdesc>
<parameter name="version" unique="0" required="0">
<getopt mixed="-V, --version" />
<content type="boolean" />
<shortdesc lang="en">Display version information and exit</shortdesc>
<parameter name="help" unique="0" required="0">
<getopt mixed="-h, --help" />
<content type="boolean" />
<shortdesc lang="en">Display help and exit</shortdesc>
<parameter name="separator" unique="0" required="0">
<getopt mixed="-C, --separator=[char]" />
<content type="string" default="," />
<shortdesc lang="en">Separator for CSV created by 'list' operation</shortdesc>
<parameter name="delay" unique="0" required="0">
<getopt mixed="--delay=[seconds]" />
<content type="string" default="0" />
<shortdesc lang="en">Wait X seconds before fencing is started</shortdesc>
<parameter name="login_timeout" unique="0" required="0">
<getopt mixed="--login-timeout=[seconds]" />
<content type="string" default="5" />
<shortdesc lang="en">Wait X seconds for cmd prompt after login</shortdesc>
<parameter name="power_timeout" unique="0" required="0">
<getopt mixed="--power-timeout=[seconds]" />
<content type="string" default="20" />
<shortdesc lang="en">Test X seconds for status change after ON/OFF</shortdesc>
<parameter name="power_wait" unique="0" required="0">
<getopt mixed="--power-wait=[seconds]" />
<content type="string" default="0" />
<shortdesc lang="en">Wait X seconds after issuing ON/OFF</shortdesc>
<parameter name="shell_timeout" unique="0" required="0">
<getopt mixed="--shell-timeout=[seconds]" />
<content type="string" default="180" />
<shortdesc lang="en">Wait X seconds for cmd prompt after issuing command</shortdesc>
<parameter name="retry_on" unique="0" required="0">
<getopt mixed="--retry-on=[attempts]" />
<content type="string" default="1" />
<shortdesc lang="en">Count of attempts to retry power on</shortdesc>
<action name="on" automatic="0"/>
<action name="off" />
<action name="reboot" />
<action name="status" />
<action name="list" />
<action name="list-status" />
<action name="monitor" />
<action name="metadata" />
<action name="validate-all" />

