Skip to content

Commit

Permalink
Fixed db conn recovery issue. Fixes bug 954971.
Browse files Browse the repository at this point in the history
This implementation wraps the db method calls and retries the method if an attempt failed due to db connectivity issues. Defines 2 new configuration parameters viz sql_max_retries and sql_retry_interval (values default to 10 and 1 respectively).
This implementation does not have any overhead in the normal case, as opposed to MySQLPingListener implementation, which does a 'select 1' query every time a connection is checkout from the pool.

Change-Id: I24240f22bca445b9ab76a1594631b5eaca393b4d
  • Loading branch information
Unmesh Gurjar authored and bcwaldon committed Mar 27, 2012
1 parent f4a7035 commit 98f5347
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 35 deletions.
1 change: 1 addition & 0 deletions Authors
Expand Up @@ -54,6 +54,7 @@ Stuart McLaren <stuart.mclaren@hp.com>
Taku Fukushima <tfukushima@dcl.info.waseda.ac.jp>
Thierry Carrez <thierry@openstack.org>
Tom Hancock <tom.hancock@hp.com>
Unmesh Gurjar <unmesh.gurjar@vertex.co.in>
Wayne A. Walls <wayne.walls@rackspace.com>
William Wolf <throughnothing@gmail.com>
Vishvananda Ishaya <vishvananda@gmail.com>
Expand Down
91 changes: 56 additions & 35 deletions glance/registry/db/api.py
Expand Up @@ -23,10 +23,11 @@
"""

import logging
import time

import sqlalchemy
from sqlalchemy import asc, create_engine, desc
from sqlalchemy.exc import IntegrityError, DisconnectionError
from sqlalchemy.exc import IntegrityError, OperationalError, DBAPIError
from sqlalchemy.orm import exc
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import sessionmaker
Expand All @@ -39,6 +40,8 @@

_ENGINE = None
_MAKER = None
_MAX_RETRIES = None
_RETRY_INTERVAL = None
BASE = models.BASE
sa_logger = None
logger = logging.getLogger(__name__)
Expand All @@ -62,59 +65,32 @@
db_opts = [
cfg.IntOpt('sql_idle_timeout', default=3600),
cfg.StrOpt('sql_connection', default='sqlite:///glance.sqlite'),
cfg.IntOpt('sql_max_retries', default=10),
cfg.IntOpt('sql_retry_interval', default=1)
]


class MySQLPingListener(object):

"""
Ensures that MySQL connections checked out of the
pool are alive.
Borrowed from:
http://groups.google.com/group/sqlalchemy/msg/a4ce563d802c929f
Error codes caught:
* 2006 MySQL server has gone away
* 2013 Lost connection to MySQL server during query
* 2014 Commands out of sync; you can't run this command now
* 2045 Can't open shared memory; no answer from server (%lu)
* 2055 Lost connection to MySQL server at '%s', system error: %d
from http://dev.mysql.com/doc/refman/5.6/en/error-messages-client.html
"""

def checkout(self, dbapi_con, con_record, con_proxy):
try:
dbapi_con.cursor().execute('select 1')
except dbapi_con.OperationalError, ex:
if ex.args[0] in (2006, 2013, 2014, 2045, 2055):
logging.warn('Got mysql server has gone away: %s', ex)
raise DisconnectionError("Database server went away")
else:
raise


def configure_db(conf):
"""
Establish the database, create an engine if needed, and
register the models.
:param conf: Mapping of configuration options
"""
global _ENGINE, sa_logger, logger
global _ENGINE, sa_logger, logger, _MAX_RETRIES, _RETRY_INTERVAL
if not _ENGINE:
conf.register_opts(db_opts)
sql_connection = conf.sql_connection
_MAX_RETRIES = conf.sql_max_retries
_RETRY_INTERVAL = conf.sql_retry_interval
connection_dict = sqlalchemy.engine.url.make_url(sql_connection)
engine_args = {'pool_recycle': conf.sql_idle_timeout,
'echo': False,
'convert_unicode': True
}
if 'mysql' in connection_dict.drivername:
engine_args['listeners'] = [MySQLPingListener()]
try:
_ENGINE = create_engine(sql_connection, **engine_args)
_ENGINE.create = wrap_db_error(_ENGINE.create)
except Exception, err:
msg = _("Error configuring registry database with supplied "
"sql_connection '%(sql_connection)s'. "
Expand Down Expand Up @@ -151,7 +127,52 @@ def get_session(autocommit=True, expire_on_commit=False):
_MAKER = sessionmaker(bind=_ENGINE,
autocommit=autocommit,
expire_on_commit=expire_on_commit)
return _MAKER()
session = _MAKER()
session.query = wrap_db_error(session.query)
session.flush = wrap_db_error(session.flush)
session.execute = wrap_db_error(session.execute)
session.begin = wrap_db_error(session.begin)
return session


def is_db_connection_error(args):
"""Return True if error in connecting to db."""
conn_err_codes = ('2002', '2006')
for err_code in conn_err_codes:
if args.find(err_code) != -1:
return True
return False


def wrap_db_error(f):
"""Retry DB connection. Copied from nova and modified."""
def _wrap(*args, **kwargs):
try:
return f(*args, **kwargs)
except OperationalError, e:
if not is_db_connection_error(e.args[0]):
raise

global _MAX_RETRIES
global _RETRY_INTERVAL
remaining_attempts = _MAX_RETRIES
while True:
logger.warning(_('SQL connection failed. %d attempts left.'),
remaining_attempts)
remaining_attempts -= 1
time.sleep(_RETRY_INTERVAL)
try:
return f(*args, **kwargs)
except OperationalError, e:
if remaining_attempts == 0 or \
not is_db_connection_error(e.args[0]):
raise
except DBAPIError:
raise
except DBAPIError:
raise
_wrap.func_name = f.func_name
return _wrap


def image_create(context, values):
Expand Down

0 comments on commit 98f5347

Please sign in to comment.