Skip to content

Commit

Permalink
blueprint agent-db-ha
Browse files Browse the repository at this point in the history
bug 985470
bug 985646

The fixes enable the OVS and linuxbridge agenets to "keep alive" when the host running the server/plugin is down.

Fixes after comments. Better logging

Fixes after comments - added reconnect interval + cleanup

Fixes after comments - simplify code + ovs intervals moved to configuration file

Fixes after comments - move int conversion to configuration

Fixes after comments - if one of the polling interval or reconnect interval are not
defined in the relevant ini files then a default value is used.

Fixes after comments and merges with HACKING.rst fixes

Fixes after port binding comments

Fixes after comments from gongysh

Fixes after comments - align comments in agent ini files

Fixes - revert some code

Change-Id: I9194f142478b130e8ef198b019539357a9916d7f
  • Loading branch information
Gary Kotton committed May 3, 2012
1 parent 0c66ab4 commit be45b70
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 97 deletions.
6 changes: 4 additions & 2 deletions etc/quantum/plugins/linuxbridge/linuxbridge_conf.ini
Expand Up @@ -16,12 +16,14 @@ host = <hostname_or_IP_address_of_Quantum_server>
port = 3306

[LINUX_BRIDGE]
#this is the interface connected to the switch on your Quantum network
# This is the interface connected to the switch on your Quantum network
physical_interface = eth1

[AGENT]
#agent's polling interval in seconds
# Agent's polling interval in seconds
polling_interval = 2
# Agent's database reconnection interval in seconds - in event connectivity is lost
reconnect_interval = 2
# Change to "sudo quantum-rootwrap" to limit commands that can be run
# as root.
root_helper = sudo
4 changes: 4 additions & 0 deletions etc/quantum/plugins/openvswitch/ovs_quantum_plugin.ini
Expand Up @@ -32,6 +32,10 @@ integration-bridge = br-int
# local-ip = 10.0.0.3

[AGENT]
# Agent's polling interval in seconds
polling_interval = 2
# Agent's database reconnection interval in seconds - in event connectivity is lost
reconnect_interval = 2
# Change to "sudo quantum-rootwrap" to limit commands that can be run
# as root.
root_helper = sudo
Expand Down
137 changes: 85 additions & 52 deletions quantum/plugins/linuxbridge/agent/linuxbridge_quantum_agent.py
Expand Up @@ -28,14 +28,13 @@
import os
import shlex
import signal
import sqlite3
import subprocess
import sys
import time

import MySQLdb

from sqlalchemy.ext.sqlsoup import SqlSoup

logging.basicConfig()
LOG = logging.getLogger(__name__)


Expand All @@ -52,7 +51,9 @@
PORT_BINDINGS = "port_bindings"
OP_STATUS_UP = "UP"
OP_STATUS_DOWN = "DOWN"
DB_CONNECTION = None
# Default inteval values
DEFAULT_POLLING_INTERVAL = 2
DEFAULT_RECONNECT_INTERVAL = 2


class LinuxBridge:
Expand Down Expand Up @@ -288,10 +289,12 @@ def delete_vlan(self, interface):
class LinuxBridgeQuantumAgent:

def __init__(self, br_name_prefix, physical_interface, polling_interval,
root_helper):
self.polling_interval = int(polling_interval)
reconnect_interval, root_helper):
self.polling_interval = polling_interval
self.reconnect_interval = reconnect_interval
self.root_helper = root_helper
self.setup_linux_bridge(br_name_prefix, physical_interface)
self.db_connected = False

def setup_linux_bridge(self, br_name_prefix, physical_interface):
self.linux_br = LinuxBridge(br_name_prefix, physical_interface,
Expand Down Expand Up @@ -350,27 +353,43 @@ def process_deleted_networks(self, vlan_bindings):
if bridge not in current_quantum_bridge_names:
self.linux_br.delete_vlan_bridge(bridge)

def manage_networks_on_host(self, conn, old_vlan_bindings,
def manage_networks_on_host(self, db,
old_vlan_bindings,
old_port_bindings):
if DB_CONNECTION != 'sqlite':
cursor = MySQLdb.cursors.DictCursor(conn)
else:
cursor = conn.cursor()
cursor.execute("SELECT * FROM vlan_bindings")
rows = cursor.fetchall()
cursor.close()
vlan_bindings = {}
try:
vlan_binds = db.vlan_bindings.all()
except Exception as e:
LOG.info("Unable to get vlan bindings! Exception: %s" % e)
self.db_connected = False
return {VLAN_BINDINGS: {},
PORT_BINDINGS: []}

vlans_string = ""
for row in rows:
vlan_bindings[row['network_id']] = row
vlans_string = "%s %s" % (vlans_string, row)
for bind in vlan_binds:
entry = {'network_id': bind.network_id, 'vlan_id': bind.vlan_id}
vlan_bindings[bind.network_id] = entry
vlans_string = "%s %s" % (vlans_string, entry)

port_bindings = []
try:
port_binds = db.ports.all()
except Exception as e:
LOG.info("Unable to get port bindings! Exception: %s" % e)
self.db_connected = False
return {VLAN_BINDINGS: {},
PORT_BINDINGS: []}

all_bindings = {}
for bind in port_binds:
all_bindings[bind.uuid] = bind
entry = {'network_id': bind.network_id, 'state': bind.state,
'op_status': bind.op_status, 'uuid': bind.uuid,
'interface_id': bind.interface_id}
if bind.state == 'ACTIVE':
port_bindings.append(entry)

plugged_interfaces = []
cursor = MySQLdb.cursors.DictCursor(conn)
cursor.execute("SELECT * FROM ports where state = 'ACTIVE'")
port_bindings = cursor.fetchall()
cursor.close()

ports_string = ""
for pb in port_bindings:
ports_string = "%s %s" % (ports_string, pb)
Expand All @@ -380,10 +399,7 @@ def manage_networks_on_host(self, conn, old_vlan_bindings,
pb['network_id'],
pb['interface_id'],
vlan_id):
cursor = MySQLdb.cursors.DictCursor(conn)
sql = PORT_OPSTATUS_UPDATESQL % (pb['uuid'], OP_STATUS_UP)
cursor.execute(sql)
cursor.close()
all_bindings[pb['uuid']].op_status = OP_STATUS_UP
plugged_interfaces.append(pb['interface_id'])

if old_port_bindings != port_bindings:
Expand All @@ -396,16 +412,30 @@ def manage_networks_on_host(self, conn, old_vlan_bindings,

self.process_deleted_networks(vlan_bindings)

conn.commit()
try:
db.commit()
except Exception as e:
LOG.info("Unable to update database! Exception: %s" % e)
db.rollback()
vlan_bindings = {}
port_bindings = []

return {VLAN_BINDINGS: vlan_bindings,
PORT_BINDINGS: port_bindings}

def daemon_loop(self, conn):
def daemon_loop(self, db_connection_url):
old_vlan_bindings = {}
old_port_bindings = {}
old_port_bindings = []
self.db_connected = False

while True:
bindings = self.manage_networks_on_host(conn,
if not self.db_connected:
time.sleep(self.reconnect_interval)
db = SqlSoup(db_connection_url)
self.db_connected = True
LOG.info("Connecting to database \"%s\" on %s" %
(db.engine.url.database, db.engine.url.host))
bindings = self.manage_networks_on_host(db,
old_vlan_bindings,
old_port_bindings)
old_vlan_bindings = bindings[VLAN_BINDINGS]
Expand All @@ -422,54 +452,57 @@ def main():
options, args = parser.parse_args()

if options.verbose:
LOG.basicConfig(level=LOG.DEBUG)
LOG.setLevel(logging.DEBUG)
else:
LOG.basicConfig(level=LOG.WARN)
LOG.setLevel(logging.WARNING)

if len(args) != 1:
parser.print_help()
sys.exit(1)

config_file = args[0]
config = ConfigParser.ConfigParser()
conn = None
try:
fh = open(config_file)
fh.close()
config.read(config_file)
br_name_prefix = BRIDGE_NAME_PREFIX
physical_interface = config.get("LINUX_BRIDGE", "physical_interface")
polling_interval = config.get("AGENT", "polling_interval")
if config.has_option("AGENT", "polling_interval"):
polling_interval = config.getint("AGENT", "polling_interval")
else:
polling_interval = DEFAULT_POLLING_INTERVAL
LOG.info("Polling interval not defined. Using default.")
if config.has_option("AGENT", "reconnect_interval"):
reconnect_interval = config.getint("AGENT", "reconnect_interval")
else:
reconnect_interval = DEFAULT_RECONNECT_INTERVAL
LOG.info("Reconnect interval not defined. Using default.")
root_helper = config.get("AGENT", "root_helper")
'Establish database connection and load models'
global DB_CONNECTION
DB_CONNECTION = config.get("DATABASE", "connection")
if DB_CONNECTION == 'sqlite':
connection = config.get("DATABASE", "connection")
if connection == 'sqlite':
LOG.info("Connecting to sqlite DB")
conn = sqlite3.connect(":memory:")
conn.row_factory = sqlite3.Row
db_connection_url = "sqlite:///:memory:"
else:
db_name = config.get("DATABASE", "name")
db_user = config.get("DATABASE", "user")
db_pass = config.get("DATABASE", "pass")
db_host = config.get("DATABASE", "host")
db_port = int(config.get("DATABASE", "port"))
LOG.info("Connecting to database %s on %s" % (db_name, db_host))
conn = MySQLdb.connect(host=db_host, user=db_user, port=db_port,
passwd=db_pass, db=db_name)
except Exception, e:
LOG.error("Unable to parse config file \"%s\": \nException%s"
% (config_file, str(e)))
db_connection_url = ("%s://%s:%s@%s:%d/%s" %
(connection, db_user, db_pass, db_host, db_port, db_name))
except Exception as e:
LOG.error("Unable to parse config file \"%s\": \nException %s" %
(config_file, str(e)))
sys.exit(1)

try:
plugin = LinuxBridgeQuantumAgent(br_name_prefix, physical_interface,
polling_interval, root_helper)
LOG.info("Agent initialized successfully, now running...")
plugin.daemon_loop(conn)
finally:
if conn:
conn.close()
plugin = LinuxBridgeQuantumAgent(br_name_prefix, physical_interface,
polling_interval, reconnect_interval,
root_helper)
LOG.info("Agent initialized successfully, now running... ")
plugin.daemon_loop(db_connection_url)

sys.exit(0)

Expand Down
Expand Up @@ -34,7 +34,7 @@
from quantum.plugins.linuxbridge.db import l2network_db as cdb


LOG = logger.getLogger(__name__)
LOG = logging.getLogger(__name__)


class LinuxBridgeAgentTest(unittest.TestCase):
Expand Down

0 comments on commit be45b70

Please sign in to comment.