Skip to content

Commit

Permalink
NAS-128399 / 24.10 / More nfs failover fixes (#13598)
Browse files Browse the repository at this point in the history
Changes in support of NFS failover in runtime code:
* Changes to allow NFS clients using locks to survive failover
* Add code to support moving the NFS status directory to the system dataset.

Changes to runtime code to improve NFS resiliency:
* Increase timeout on NFS service call from 5 to 10 seconds.  NFS takes about 5 sec to start.

Changes to CI tests:
* Add check for nfs in the boot_pool.
* Fix flake8 complaint.
* Improve error reporting for service state check.
* Added check for NFS dataset in first boot check in test_001_ssh.py.
* Some simple refactoring in test_300_nfs.py.

---------

Co-authored-by: Caleb St. John <30729806+yocalebo@users.noreply.github.com>
  • Loading branch information
mgrimesix and yocalebo committed Apr 26, 2024
1 parent 6bc1d37 commit 91bc180
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 13 deletions.
@@ -0,0 +1,3 @@
[Unit]
BindTo=nfs-server.service

13 changes: 13 additions & 0 deletions src/middlewared/middlewared/etc_files/nfs.conf.d/local.conf.mako
@@ -1,4 +1,8 @@
<%
from middlewared.plugins.nfs import NFSServicePathInfo
state_path = NFSServicePathInfo.STATEDIR.path()
cld_storedir = NFSServicePathInfo.CLDDIR.path()
cltrack_storedir = NFSServicePathInfo.CLDTRKDIR.path()
config = render_ctx["nfs.config"]
# Fail-safe setting is two nfsd
Expand Down Expand Up @@ -28,14 +32,23 @@ vers4 = n
host = ${','.join(config['bindip'])}
% endif

[exportd]
state-directory-path = ${state_path}
[nfsdcld]
storagedir = ${cld_storedir}
[nfsdcltrack]
storagedir = ${cltrack_storedir}

[mountd]
state-directory-path = ${state_path}
threads = ${num_mountd}
% if config['mountd_port']:
port = ${config['mountd_port']}
% endif
manage-gids = ${manage_gids}

[statd]
state-directory-path = ${state_path}
% if config['rpcstatd_port']:
port = ${config['rpcstatd_port']}
% endif
Expand Down
98 changes: 97 additions & 1 deletion src/middlewared/middlewared/plugins/nfs.py
Expand Up @@ -2,8 +2,9 @@
import errno
import ipaddress
import itertools
import os.path
import os
import pathlib
import shutil

from middlewared.common.attachment import LockableFSAttachmentDelegate
from middlewared.common.listen import SystemServiceListenMultipleDelegate
Expand All @@ -15,6 +16,30 @@
import middlewared.sqlalchemy as sa
from middlewared.utils.asyncio_ import asyncio_map
from middlewared.plugins.nfs_.utils import get_domain, leftmost_has_wildcards, get_wildcard_domain
from middlewared.plugins.system_dataset.utils import SYSDATASET_PATH


class NFSServicePathInfo(enum.Enum):
# nfs conf sections that use STATEDIR: exportd, mountd, statd
STATEDIR = (os.path.join(SYSDATASET_PATH, 'nfs'), 0o755, True, {'uid': 0, 'gid': 0})
CLDDIR = (os.path.join(SYSDATASET_PATH, 'nfs', 'nfsdcld'), 0o700, True, {'uid': 0, 'gid': 0})
CLDTRKDIR = (os.path.join(SYSDATASET_PATH, 'nfs', 'nfsdcltrack'), 0o700, True, {'uid': 0, 'gid': 0})
# Fix up the uid and gid in setup_directories
SMDIR = (os.path.join(SYSDATASET_PATH, 'nfs', 'sm'), 0o755, True, {'uid': 'statd', 'gid': 'nogroup'})
SMBAKDIR = (os.path.join(SYSDATASET_PATH, 'nfs', 'sm.bak'), 0o755, True, {'uid': 'statd', 'gid': 'nogroup'})
V4RECOVERYDIR = (os.path.join(SYSDATASET_PATH, 'nfs', 'v4recovery'), 0o755, True, {'uid': 0, 'gid': 0})

def path(self):
return self.value[0]

def mode(self):
return self.value[1]

def is_dir(self):
return self.value[2]

def owner(self):
return self.value[3]


class NFSProtocol(str, enum.Enum):
Expand Down Expand Up @@ -77,6 +102,77 @@ class Config:
Bool('managed_nfsd', default=True),
)

@private
def name_to_id_conversion(self, name, name_type='user'):
''' Convert built-in user or group name to associated UID or GID '''
if any((not isinstance(name, str), isinstance(name, int))):
# it's not a string (NoneType, float, w/e) or it's an int
# so there is nothing to do
return name

if name_type == 'user':
method = 'user.get_builtin_user_id'
elif name_type == 'group':
method = 'group.get_builtin_group_id'
else:
self.logger.error('Unexpected name_type (%r)', name_type)
return name
try:
return self.middleware.call_sync(method, name)
except Exception as e:
if hasattr(e, 'errno') and e.errno == errno.ENOENT:
self.logger.error('Failed to resolve builtin %s %r', name_type, name)
else:
self.logger.error('Unexpected error resolving builtin %s %r', name_type, name, exc_info=True)
return name

@private
def setup_directories(self):
'''
We are moving the NFS state directory from /var/lib/nfs to
the system dataset: /var/db/system/nfs.
When setup_directories is called /var/db/system/nfs is expected to exist.
If STATEDIR is empty, then this might be an initialization
and there might be current info in /var/lib/nfs.
We always make sure the expected directories are present
'''

# Initialize the system dataset NFS state directory
state_dir = NFSServicePathInfo.STATEDIR.path()
try:
shutil.copytree('/var/lib/nfs', state_dir)
except FileExistsError:
# destination file/dir already exists so ignore error
pass
except Exception:
self.logger.error('Unexpected error initializing %r', state_dir, exc_info=True)

# Make sure we have the necessary directories
for i in NFSServicePathInfo:
uid = self.name_to_id_conversion(i.owner()['uid'], name_type='user')
gid = self.name_to_id_conversion(i.owner()['gid'], name_type='group')
path = i.path()
if i.is_dir():
os.makedirs(path, exist_ok=True)

try:
os.chmod(path, i.mode())
os.chown(path, uid, gid)
except Exception:
self.logger.error('Unexpected failure initializing %r', path, exc_info=True)

procfs_path = '/proc/fs/nfsd/nfsv4recoverydir'
try:
with open(procfs_path, 'r+') as fp:
fp.write(f'{NFSServicePathInfo.V4RECOVERYDIR.path()}\n')
except FileNotFoundError:
# When this is removed from the kernel we will have a gentle reminder
self.logger.info("%r: Proc file has been removed", procfs_path)
except Exception:
self.logger.error("Unexpected failure updating %r", procfs_path, exc_info=True)

@private
async def nfs_extend(self, nfs):
keytab_has_nfs = await self.middleware.call("kerberos.keytab.has_nfs_principal")
Expand Down
3 changes: 2 additions & 1 deletion src/middlewared/middlewared/plugins/nfs_/status.py
@@ -1,3 +1,4 @@
from middlewared.plugins.nfs import NFSServicePathInfo
from middlewared.schema import accepts, Int, returns, Str, Dict
from middlewared.service import Service, private, filterable, filterable_returns
from middlewared.utils import filter_list
Expand All @@ -18,7 +19,7 @@ def get_rmtab(self):
"""
entries = []
with suppress(FileNotFoundError):
with open("/var/lib/nfs/rmtab", "r") as f:
with open(os.path.join(NFSServicePathInfo.STATEDIR.path(), "rmtab"), "r") as f:
for line in f:
ip, data = line.split(":", 1)
export, refcnt = line.rsplit(":", 1)
Expand Down
Expand Up @@ -6,6 +6,7 @@
class NFSService(SimpleService):
name = "nfs"
reloadable = True
systemd_unit_timeout = 10

etc = ["nfsd"]

Expand Down
19 changes: 19 additions & 0 deletions src/middlewared/middlewared/plugins/system_dataset/hierarchy.py
Expand Up @@ -106,6 +106,25 @@ def get_system_dataset_spec(pool_name: str, uuid: str) -> list:
'mode': 0o775,
},
},
{
'name': os.path.join(pool_name, '.system/nfs'),
'props': {
'mountpoint': 'legacy',
'readonly': 'off',
'snapdir': 'hidden',
},
'chown_config': {
'uid': 0,
'gid': 0,
'mode': 0o755,
},
'post_mount_actions': [
{
'method': 'nfs.setup_directories',
'args': [],
}
]
},
{
'name': os.path.join(pool_name, '.system/samba4'),
'props': {
Expand Down
7 changes: 4 additions & 3 deletions tests/api2/test_001_ssh.py
Expand Up @@ -8,7 +8,7 @@

from functions import if_key_listed, SSH_TEST
from auto_config import ip, sshKey, user, password
from middlewared.test.integration.utils import call, fail
from middlewared.test.integration.utils import fail
from middlewared.test.integration.utils.client import client


Expand All @@ -34,6 +34,7 @@ def test_002_firstboot_checks(ws_client):
expected_ds = [
'boot-pool/.system',
'boot-pool/.system/cores',
'boot-pool/.system/nfs',
'boot-pool/.system/samba4',
'boot-pool/grub'
]
Expand All @@ -50,8 +51,8 @@ def test_002_firstboot_checks(ws_client):
# always start in all circumstances (even if there is an invalid (or empty) config)
ignore = ('smartd',)
for srv in filter(lambda x: x['service'] not in ignore, ws_client.call('service.query')):
assert srv['enable'] is False
assert srv['state'] == 'STOPPED'
assert srv['enable'] is False, f"{srv['service']} service is unexpectedly enabled"
assert srv['state'] == 'STOPPED', f"{srv['service']} service expected STOPPED, but found {srv['state']}"

# verify posix mode, uid and gid for standard users
stat_info = {
Expand Down
62 changes: 54 additions & 8 deletions tests/api2/test_300_nfs.py
Expand Up @@ -38,7 +38,14 @@
conf_file = {
"nfs": {
"pname": "/etc/nfs.conf.d/local.conf",
"sections": {'nfsd': {}, 'mountd': {}, 'statd': {}, 'lockd': {}}
"sections": {
'nfsd': {},
'exportd': {},
'nfsdcld': {},
'nfsdcltrack': {},
'mountd': {},
'statd': {},
'lockd': {}}
},
"idmapd": {
"pname": "/etc/idmapd.conf",
Expand Down Expand Up @@ -134,6 +141,11 @@ def parse_rpcbind_config():
return rv


def get_nfs_service_state():
nfs_service = call('service.query', [['service', '=', 'nfs']], {'get': True})
return nfs_service['state']


def set_nfs_service_state(do_what=None, expect_to_pass=True, fail_check=None):
'''
Start or Stop NFS service
Expand Down Expand Up @@ -321,21 +333,29 @@ def nfs_config(options=None):


# Enable NFS server
def test_01_creating_the_nfs_server():
def test_01_init_the_nfs_config():
# initialize default_nfs_config for later restore
save_nfs_config()

# Confirm NFS is not running
nfs_state = get_nfs_service_state()
assert nfs_state == 'STOPPED', f'Before update, expected NFS to be STOPPED, but found {nfs_state}'

payload = {
"servers": 10,
"mountd_port": 618,
"allow_nonroot": False,
"rpcstatd_port": 871,
"rpclockd_port": 32803,
"protocols": ["NFSV3", "NFSV4"]
}
results = PUT("/nfs/", payload)
assert results.status_code == 200, results.text
# The service is not yet enabled, so we cannot yet confirm the settings
nfs_conf = call("nfs.update", payload)
assert nfs_conf['mountd_port'] == 618
assert nfs_conf['rpcstatd_port'] == 871
assert nfs_conf['rpclockd_port'] == 32803

# Confirm NFS remains not running
nfs_state = get_nfs_service_state()
assert nfs_state == 'STOPPED', f'After update, xpected NFS to be STOPPED, but found {nfs_state}'


@pytest.mark.dependency(name='NFS_DATASET_CREATED')
Expand Down Expand Up @@ -396,8 +416,33 @@ def test_09_checking_to_see_if_nfs_service_is_running(request):
assert results.json()[0]["state"] == "RUNNING", results.text


def test_10_confirm_state_directory(request):
"""
By default, the NFS state directory is at /var/lib/nfs.
To support HA systems, we moved this to the system dataset
at /var/db/system/nfs. In support of this we updated the
NFS conf file settings
"""
depends(request, ["NFS_SERVICE_STARTED"], scope="session")

# Make sure the conf file has the expected settings
nfs_state_dir = '/var/db/system/nfs'
s = parse_server_config()
assert s['exportd']['state-directory-path'] == nfs_state_dir, str(s)
assert s['nfsdcld']['storagedir'] == os.path.join(nfs_state_dir, 'nfsdcld'), str(s)
assert s['nfsdcltrack']['storagedir'] == os.path.join(nfs_state_dir, 'nfsdcltrack'), str(s)
assert s['nfsdcld']['storagedir'] == os.path.join(nfs_state_dir, 'nfsdcld'), str(s)
assert s['mountd']['state-directory-path'] == nfs_state_dir, str(s)
assert s['statd']['state-directory-path'] == nfs_state_dir, str(s)
# Confirm we have the mount point in the system dataset
# ----------------------------------------------------------------------
# NOTE: Update test_001_ssh.py: test_002_first_boot_checks.
# NOTE: Test fresh-install and upgrade.
# ----------------------------------------------------------------------


@pytest.mark.parametrize('vers', [3, 4])
def test_10_perform_basic_nfs_ops(request, vers):
def test_11_perform_basic_nfs_ops(request, vers):
with SSH_NFS(ip, NFS_PATH, vers=vers, user=user, password=password, ip=ip) as n:
n.create('testfile')
n.mkdir('testdir')
Expand All @@ -412,7 +457,7 @@ def test_10_perform_basic_nfs_ops(request, vers):
assert 'testfile' not in contents


def test_11_perform_server_side_copy(request):
def test_12_perform_server_side_copy(request):
with SSH_NFS(ip, NFS_PATH, vers=4, user=user, password=password, ip=ip) as n:
n.server_side_copy('ssc1', 'ssc2')

Expand Down Expand Up @@ -1208,6 +1253,7 @@ def test_43_check_nfsv4_acl_support(request):
4) For NFSv4.1 or NFSv4.2, repeat same process for each of the
supported acl_flags.
"""
depends(request, ["NFS_SERVICE_STARTED"], scope="session")
acl_nfs_path = f'/mnt/{pool_name}/test_nfs4_acl'
test_perms = {
"READ_DATA": True,
Expand Down

0 comments on commit 91bc180

Please sign in to comment.