From 80762481c840aaace4be176ad8c81df08d4cd1c2 Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Wed, 18 Dec 2019 20:00:13 +0100 Subject: [PATCH] db: Turn the transaction counter into an optimistic lock The optimistic lock prevents multiple instances of c-lightning making concurrent modifications to the database. That would be unsafe as it messes up the state in the DB. The optimistic lock is implemented by checking whether a gated update on the previous value of the `data_version` actually results in an update. If that's not the case the DB has been changed under our feet. The lock provides linearizability of DB modifications: if a database is changed under the feet of a running process that process will `abort()`, which from a global point of view is as if it had crashed right after the last successful commit. Any process that also changed the DB must've started between the last successful commit and the unsuccessful one since otherwise its counters would not have matched (which would also have aborted that transaction). So this reduces all the possible timelines to an equivalent where the first process died, and the second process recovered from the DB. This is not that interesting for `sqlite3` where we are also protected via the PID file, but when running on multiple hosts against the same DB, e.g., with `postgres`, this protection becomes important. Changelog-Added: DB: Optimistic logging prevents instances from running concurrently against the same database, providing linear consistency to changes. --- tests/test_db.py | 26 ++++++++++++++++++++++++-- wallet/db.c | 19 ++++++++++++++++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/tests/test_db.py b/tests/test_db.py index ee942eed44cc..c1e0c4edc4ad 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -1,8 +1,10 @@ from fixtures import * # noqa: F401,F403 -from utils import wait_for, sync_blockheight, COMPAT from fixtures import TEST_NETWORK - +from pyln.client import RpcError +from utils import wait_for, sync_blockheight, COMPAT import os +import pytest +import time import unittest @@ -136,3 +138,23 @@ def test_scid_upgrade(node_factory, bitcoind): assert l1.db_query('SELECT short_channel_id from channels;') == [{'short_channel_id': '103x1x1'}] assert l1.db_query('SELECT failchannel from payments;') == [{'failchannel': '103x1x1'}] + + +def test_optimistic_locking(node_factory, bitcoind): + """Have a node run against a DB, then change it under its feet, crashing it. + + We start a node, wait for it to settle its write so we have a window where + we can interfere, and watch the world burn (safely). + """ + l1 = node_factory.get_node(may_fail=True, allow_broken_log=True) + + sync_blockheight(bitcoind, [l1]) + l1.rpc.getinfo() + time.sleep(1) + l1.db.execute("UPDATE vars SET intval = intval + 1 WHERE name = 'data_version';") + + # Now trigger any DB write and we should be crashing. + with pytest.raises(RpcError, match=r'Connection to RPC server lost.'): + l1.rpc.newaddr() + + assert(l1.daemon.is_in_log(r'Optimistic lock on the database failed')) diff --git a/wallet/db.c b/wallet/db.c index 6d0bee788bc5..edfe3f4475e5 100644 --- a/wallet/db.c +++ b/wallet/db.c @@ -802,13 +802,30 @@ void db_begin_transaction_(struct db *db, const char *location) db->in_transaction = location; } +/* By making the update conditional on the current value we expect we + * are implementing an optimistic lock: if the update results in + * changes on the DB we know that the data_version did not change + * under our feet and no other transaction ran in the meantime. + * + * Notice that this update effectively locks the row, so that other + * operations attempting to change this outside the transaction will + * wait for this transaction to complete. The external change will + * ultimately fail the changes test below, it'll just delay its abort + * until our transaction is committed. + */ static void db_data_version_incr(struct db *db) { struct db_stmt *stmt = db_prepare_v2( db, SQL("UPDATE vars " "SET intval = intval + 1 " - "WHERE name = 'data_version'")); + "WHERE name = 'data_version'" + " AND intval = ?")); + db_bind_int(stmt, 0, db->data_version); db_exec_prepared_v2(stmt); + if (db_count_changes(stmt) != 1) + fatal("Optimistic lock on the database failed. There may be a " + "concurrent access to the database. Aborting since " + "concurrent access is unsafe."); tal_free(stmt); db->data_version++; }