Skip to content

Commit

Permalink
config: validate names and UUIDs before box.cfg
Browse files Browse the repository at this point in the history
Currently only instance_uuid is validated before recovery process.
All names and replicaset_uuid are checked only when recovery is done,
which can take a long time. It can be frustrating to users, which
have been waiting for several hours only to get name mismatch error.

Let's read the small part of snapshot file before calling box.cfg
in order to figure out, whether the names and uuids, passed to
configuration match the ones, saved inside the snapshot.

During config reload there's no sense in reading snapshot file, as
data is already saved inside spaces, let's read them. We still check
that names in config and names in spaces don't contradict during
config reload.

This commit also introduces methods, for getting names, which are
not set in snap (or memory), this'll be used in consequent commits
to set names automatically.

Needed for tarantool#8978

NO_DOC=tarantool/doc#3661
  • Loading branch information
Serpentian committed Oct 26, 2023
1 parent d426d14 commit 9c675f6
Show file tree
Hide file tree
Showing 5 changed files with 461 additions and 3 deletions.
5 changes: 5 additions & 0 deletions changelogs/unreleased/config-validate-identifiers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## feature/config

* Introduced validation for replicaset_name/uuid and instance_name/uuid
mismatches before the recovery process when Tarantool is configured via
a YAML file or etcd.
180 changes: 180 additions & 0 deletions src/box/lua/config/configdata.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ local urilib = require('uri')
local digest = require('digest')
local instance_config = require('internal.config.instance_config')
local cluster_config = require('internal.config.cluster_config')
local snapshot = require('internal.config.utils.snapshot')

local function choose_iconfig(self, opts)
if opts ~= nil and opts.peer ~= nil then
Expand Down Expand Up @@ -197,10 +198,173 @@ function methods.bootstrap_leader_name(self)
return self._bootstrap_leader_name
end

-- Returns instance_uuid and replicaset_uuid, saved in config.
local function find_uuids_by_name(peers, instance_name)
for name, peer in pairs(peers) do
if name == instance_name then
local iconfig = peer.iconfig_def
return instance_config:get(iconfig, 'database.instance_uuid'),
instance_config:get(iconfig, 'database.replicaset_uuid')
end
end
return nil
end

local function find_peer_name_by_uuid(peers, instance_uuid)
for name, peer in pairs(peers) do
local uuid = instance_config:get(peer.iconfig_def,
'database.instance_uuid')
if uuid == instance_uuid then
return name
end
end
return nil
end

function methods.peer_name_by_uuid(self, instance_uuid)
return find_peer_name_by_uuid(self._peers, instance_uuid)
end

local function find_saved_names(iconfig)
if type(box.cfg) == 'function' then
local snap_path = snapshot.get_path(iconfig)
-- Bootstrap is going to be done, no names are saved.
if snap_path == nil then
return nil
end

-- Read system spaces of snap file.
return snapshot.get_names(snap_path)
end

-- Box.cfg was already done. No sense in snapshot
-- reading, we can get all data from memory.
local peers = {}
for _, row in ipairs(box.space._cluster:select(nil, {limit = 32})) do
if row[3] ~= nil then
peers[row[3]] = row[2]
end
end

return {
replicaset_name = box.info.replicaset.name,
replicaset_uuid = box.info.replicaset.uuid,
instance_name = box.info.name,
instance_uuid = box.info.uuid,
peers = peers,
}
end

-- Return a map, which shows, which instances doesn't have a name
-- set, info about the current replicaset name is also included in map.
function methods.missing_names(self)
local missing_names = {
-- Note, that replicaset_name cannot start with underscore (_peers
-- name is forbidden), so we won't overwrite it with list of peers.
_peers = {},
}

local saved_names = find_saved_names(self._iconfig_def)
if saved_names == nil then
-- All names will be set during replicaset bootstrap.
return missing_names
end

-- Missing name of the current replicaset.
if saved_names.replicaset_name == nil then
missing_names[self._replicaset_name] = saved_names.replicaset_uuid
end

for name, peer in pairs(self._peers) do
local iconfig = peer.iconfig_def
-- We allow anonymous replica without instance_uuid. Anonymous replica
-- cannot have name set, it's enough to validate replicaset_name/uuid.
if instance_config:get(iconfig, 'replication.anon') then
goto continue
end

-- cfg_uuid may be box.NULL if instance_uuid is not passed to config.
local cfg_uuid = instance_config:get(iconfig, 'database.instance_uuid')
if cfg_uuid == box.NULL then
cfg_uuid = 'unknown'
end

if not saved_names.peers[name] then
missing_names._peers[name] = cfg_uuid
end

::continue::
end

return missing_names
end

local mt = {
__index = methods,
}

-- Validate UUIDs and names passed to config against the data,
-- saved inside snapshot. Fail early if mismatch is found.
local function validate_names(saved_names, config_names)
-- Snapshot always has replicaset uuid and
-- at least one peer in _cluster space.
assert(saved_names.replicaset_uuid)
assert(saved_names.instance_uuid)
-- Config always has names set.
assert(config_names.replicaset_name ~= nil)
assert(config_names.instance_name ~= nil)

if config_names.replicaset_uuid ~= nil and
config_names.replicaset_uuid ~= saved_names.replicaset_uuid then
error(string.format('Replicaset UUID mismatch. Snapshot: %s, ' ..
'config: %s.', saved_names.replicaset_uuid,
config_names.replicaset_uuid), 0)
end

if saved_names.replicaset_name ~= nil and
saved_names.replicaset_name ~= config_names.replicaset_name then
error(string.format('Replicaset name mismatch. Snapshot: %s, ' ..
'config: %s.', saved_names.replicaset_name,
config_names.replicaset_name), 0)
end

if config_names.instance_uuid ~= nil and
config_names.instance_uuid ~= saved_names.instance_uuid then
error(string.format('Instance UUID mismatch. Snapshot: %s, ' ..
'config: %s.', saved_names.instance_uuid,
config_names.instance_uuid), 0)
end

if saved_names.instance_name ~= nil and
saved_names.instance_name ~= config_names.instance_name then
error(string.format('Instance name mismatch. Snapshot: %s, ' ..
'config: %s.', saved_names.instance_name,
config_names.instance_name), 0)
end

-- Fail early, if current UUID is not set, but no name is found
-- inside the snapshot file. Ignore this failure, if replica is
-- configured as anonymous, anon replicas cannot have names.
local iconfig = config_names.peers[config_names.instance_name].iconfig_def
if not instance_config:get(iconfig, 'replication.anon') then
if saved_names.instance_name == nil and
config_names.instance_uuid == nil then
error(string.format('Instance name for %s is not set in snapshot' ..
' and UUID is missing in the config. Found ' ..
'%s in snapshot.', config_names.instance_name,
saved_names.instance_uuid), 0)
end
if saved_names.replicaset_name == nil and
config_names.replicaset_uuid == nil then
error(string.format('Replicaset name for %s is not set in ' ..
'snapshot and UUID is missing in the ' ..
'config. Found %s in snapshot.',
config_names.replicaset_name,
saved_names.replicaset_uuid), 0)
end
end
end

local function new(iconfig, cconfig, instance_name)
-- Precalculate configuration with applied defaults.
local iconfig_def = instance_config:apply_default(iconfig)
Expand Down Expand Up @@ -352,6 +516,22 @@ local function new(iconfig, cconfig, instance_name)
bootstrap_leader_name = peer_names[1]
end

-- Names and UUIDs are always validated: during instance start
-- and during config reload.
local saved_names = find_saved_names(iconfig_def)
if saved_names ~= nil then
local config_instance_uuid, config_replicaset_uuid =
find_uuids_by_name(peers, instance_name)
validate_names(saved_names, {
replicaset_name = found.replicaset_name,
instance_name = instance_name,
-- UUIDs from config, generated one should not be used here.
replicaset_uuid = config_replicaset_uuid,
instance_uuid = config_instance_uuid,
peers = peers,
})
end

return setmetatable({
_iconfig = iconfig,
_iconfig_def = iconfig_def,
Expand Down
47 changes: 47 additions & 0 deletions src/box/lua/config/utils/snapshot.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
local fio = require('fio')
local xlog = require('xlog')
local instance_config = require('internal.config.instance_config')

local snapshot_path = nil
Expand Down Expand Up @@ -62,6 +63,52 @@ local function get_snapshot_path(iconfig)
return snapshot_path
end

-- Read snap file and return a map of saved UUIDs and names for
-- all instances and for the current replicaset.
local function get_snapshot_names(snap_path)
local peers = {}
local instance_uuid = xlog.meta(snap_path).instance_uuid
local instance_name, replicaset_name, replicaset_uuid
for _, row in xlog.pairs(snap_path) do
local body = row.BODY
if not body.space_id then
goto continue
end

if body.space_id > box.schema.CLUSTER_ID then
-- No sense in scanning after _cluster.
break
end

if body.space_id == box.schema.SCHEMA_ID then
if body.tuple[1] == 'replicaset_uuid' or
body.tuple[1] == 'cluster' then
replicaset_uuid = body.tuple[2]
elseif body.tuple[1] == 'replicaset_name' then
replicaset_name = body.tuple[2]
end
elseif body.space_id == box.schema.CLUSTER_ID then
if body.tuple[2] == instance_uuid then
instance_name = body.tuple[3]
end

if body.tuple[3] ~= nil then
peers[body.tuple[3]] = body.tuple[2]
end
end
::continue::
end

return {
replicaset_name = replicaset_name,
replicaset_uuid = replicaset_uuid,
instance_name = instance_name,
instance_uuid = instance_uuid,
peers = peers,
}
end

return {
get_path = get_snapshot_path,
get_names = get_snapshot_names,
}
31 changes: 28 additions & 3 deletions test/config-luatest/config_test.lua
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
local t = require('luatest')
local server = require('test.luatest_helpers.server')
local cluster_config = require('internal.config.cluster_config')
local configdata = require('internal.config.configdata')
local helpers = require('test.config-luatest.helpers')
local treegen = require('test.treegen')
local justrun = require('test.justrun')
Expand All @@ -23,7 +21,20 @@ g.after_each(function()
end
end)

g.test_configdata = function()
local function verify_configdata()
local json = require('json')
local t = require('luatest')
local configdata = require('internal.config.configdata')
local cluster_config = require('internal.config.cluster_config')

local saved_assert_equals = t.assert_equals
t.assert_equals = function(...)
local ok, err = pcall(saved_assert_equals, ...)
if not ok then
error(json.encode(err), 2)
end
end

local cconfig = {
credentials = {
users = {
Expand Down Expand Up @@ -147,6 +158,20 @@ g.test_configdata = function()
t.assert_equals(data:peers(), {'instance-001', 'instance-002'})
end

g.test_configdata = function()
local dir = treegen.prepare_directory(g, {}, {})
local script = string.dump(verify_configdata)
treegen.write_script(dir, 'main.lua', script)

local opts = {nojson = true, stderr = true}
local res = justrun.tarantool(dir, {}, {'main.lua'}, opts)
t.assert_equals(res, {
exit_code = 0,
stdout = '',
stderr = '',
})
end

g.test_config_general = function()
local dir = treegen.prepare_directory(g, {}, {})
local script = [[
Expand Down

0 comments on commit 9c675f6

Please sign in to comment.