From ba0b8b57c486aa2559b813f219899a5077e01c01 Mon Sep 17 00:00:00 2001 From: Evan Layton Date: Mon, 5 Mar 2018 20:41:49 -0800 Subject: [PATCH] NEX-15279 support NFS server in zone NEX-15520 online NFS shares cause zoneadm halt to hang in nfs_export_zone_fini Portions contributed by: Dan Kruchinin dan.kruchinin@nexenta.com Portions contributed by: Stepan Zastupov stepan.zastupov@gmail.com Reviewed by: Joyce McIntosh Reviewed by: Rob Gittins Reviewed by: Gordon Ross --- usr/src/cmd/dfs.cmds/sharemgr/commands.c | 9 +- usr/src/cmd/fs.d/nfs/svc/nfs-server | 10 +- usr/src/lib/brand/ipkg/zone/platform.xml | 3 +- usr/src/lib/libshare/common/libshare_zfs.c | 15 +- usr/src/lib/libshare/nfs/libshare_nfs.c | 81 +-- usr/src/lib/libzfs/common/libzfs_dataset.c | 10 +- usr/src/lib/libzfs/common/libzfs_mount.c | 14 +- usr/src/uts/common/fs/nfs/nfs3_srv.c | 95 ++-- usr/src/uts/common/fs/nfs/nfs4_db.c | 8 +- usr/src/uts/common/fs/nfs/nfs4_dispatch.c | 21 +- usr/src/uts/common/fs/nfs/nfs4_srv.c | 521 +++++++++++++----- usr/src/uts/common/fs/nfs/nfs4_srv_attr.c | 8 +- usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c | 93 ++-- usr/src/uts/common/fs/nfs/nfs4_srv_ns.c | 73 +-- usr/src/uts/common/fs/nfs/nfs4_state.c | 234 ++++---- usr/src/uts/common/fs/nfs/nfs_auth.c | 282 ++++++---- usr/src/uts/common/fs/nfs/nfs_client.c | 31 +- usr/src/uts/common/fs/nfs/nfs_cmd.c | 70 ++- usr/src/uts/common/fs/nfs/nfs_export.c | 390 +++++++------ usr/src/uts/common/fs/nfs/nfs_log.c | 51 +- usr/src/uts/common/fs/nfs/nfs_server.c | 464 +++++----------- usr/src/uts/common/fs/nfs/nfs_srv.c | 127 +++-- usr/src/uts/common/fs/nfs/nfs_sys.c | 21 +- .../uts/common/fs/sharefs/sharefs_vfsops.c | 14 +- usr/src/uts/common/fs/sharefs/sharefs_vnops.c | 83 +-- usr/src/uts/common/fs/sharefs/sharetab.c | 278 +++++----- usr/src/uts/common/fs/zfs/zfs_ioctl.c | 8 +- usr/src/uts/common/nfs/auth.h | 6 +- usr/src/uts/common/nfs/export.h | 64 ++- usr/src/uts/common/nfs/nfs.h | 47 +- usr/src/uts/common/nfs/nfs4.h | 95 ++-- usr/src/uts/common/nfs/nfs4_drc.h | 10 +- usr/src/uts/common/nfs/nfs_cmd.h | 7 + usr/src/uts/common/nfs/nfs_log.h | 9 +- usr/src/uts/common/sharefs/sharefs.h | 33 +- usr/src/uts/common/sys/zone.h | 12 +- 36 files changed, 1841 insertions(+), 1456 deletions(-) diff --git a/usr/src/cmd/dfs.cmds/sharemgr/commands.c b/usr/src/cmd/dfs.cmds/sharemgr/commands.c index 15e9cee992ca..92883d264c1e 100644 --- a/usr/src/cmd/dfs.cmds/sharemgr/commands.c +++ b/usr/src/cmd/dfs.cmds/sharemgr/commands.c @@ -22,7 +22,11 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + */ + +/* * Copyright 2012 Milan Jurik. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -2166,7 +2170,6 @@ static void show_group(sa_group_t group, int verbose, int properties, char *proto, char *subgroup) { - sa_share_t share; char *groupname; char *zfs = NULL; int iszfs = 0; @@ -2174,6 +2177,8 @@ show_group(sa_group_t group, int verbose, int properties, char *proto, groupname = sa_get_group_attr(group, "name"); if (groupname != NULL) { + sa_share_t share; + if (proto != NULL && !has_protocol(group, proto)) { sa_free_attr_string(groupname); return; @@ -2190,7 +2195,7 @@ show_group(sa_group_t group, int verbose, int properties, char *proto, iszfs = 1; sa_free_attr_string(zfs); } - share = sa_get_share(group, NULL); + if (subgroup == NULL) (void) printf("%s", groupname); else diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server index 997a15cde787..a2001a3b02af 100644 --- a/usr/src/cmd/fs.d/nfs/svc/nfs-server +++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server @@ -23,7 +23,7 @@ # # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2016 Hans Rosenfeld -# Copyright 2016 Nexenta Systems, Inc. All rights reserved. +# Copyright 2018 Nexenta Systems, Inc. # # Start/stop processes required for server NFS @@ -66,14 +66,6 @@ configure_ipfilter() case "$1" in 'start') - # The NFS server is not supported in a local zone - if smf_is_nonglobalzone; then - /usr/sbin/svcadm disable -t svc:/network/nfs/server - echo "The NFS server is not supported in a local zone" - sleep 5 & - exit $SMF_EXIT_OK - fi - # Share all file systems enabled for sharing. sharemgr understands # regular shares and ZFS shares and will handle both. Technically, # the shares would have been started long before getting here since diff --git a/usr/src/lib/brand/ipkg/zone/platform.xml b/usr/src/lib/brand/ipkg/zone/platform.xml index e9c192742e1c..0425c1a58c24 100644 --- a/usr/src/lib/brand/ipkg/zone/platform.xml +++ b/usr/src/lib/brand/ipkg/zone/platform.xml @@ -20,8 +20,8 @@ CDDL HEADER END - Copyright 2015 Nexenta Systems, Inc. All rights reserved. Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright 2018 Nexenta Systems, Inc. DO NOT EDIT THIS FILE. --> @@ -41,6 +41,7 @@ + diff --git a/usr/src/lib/libshare/common/libshare_zfs.c b/usr/src/lib/libshare/common/libshare_zfs.c index 488841fcff69..7c8775cd3c7d 100644 --- a/usr/src/lib/libshare/common/libshare_zfs.c +++ b/usr/src/lib/libshare/common/libshare_zfs.c @@ -22,10 +22,11 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ + /* - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2016 by Delphix. All rights reserved. * Copyright 2017 RackTop Systems. + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include "libshare_impl.h" #include @@ -238,6 +240,7 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen, { FILE *fp; struct mnttab entry; + int rc = 1; if ((fp = fopen(MNTTAB, "r")) == NULL) { return (1); @@ -256,11 +259,12 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen, if (dlen > 0) (void) strlcpy(dataset, entry.mnt_special, dlen); + rc = 0; break; } } (void) fclose(fp); - return (1); + return (rc); } @@ -817,6 +821,13 @@ sa_get_zfs_share_common(sa_handle_t handle, zfs_handle_t *fs_handle, char *path, if (!zfs_is_mounted(fs_handle, NULL)) return (SA_SYSTEM_ERR); + /* + * Ignore "zoned" datasets in global zone. + */ + if (getzoneid() == GLOBAL_ZONEID && + zfs_prop_get_int(fs_handle, ZFS_PROP_ZONED)) + return (SA_SYSTEM_ERR); + nfs = nfs_inherited = B_FALSE; if (zfs_prop_get(fs_handle, ZFS_PROP_SHARENFS, nfsshareopts, diff --git a/usr/src/lib/libshare/nfs/libshare_nfs.c b/usr/src/lib/libshare/nfs/libshare_nfs.c index cdd571356864..60cdc911a4c5 100644 --- a/usr/src/lib/libshare/nfs/libshare_nfs.c +++ b/usr/src/lib/libshare/nfs/libshare_nfs.c @@ -21,19 +21,22 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Nexenta Systems, Inc. + */ + +/* * Copyright (c) 2014, 2016 by Delphix. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ /* * NFS specific functions */ + #include #include #include #include #include -#include #include #include #include @@ -1906,12 +1909,7 @@ nfs_enable_share(sa_share_t share) sa_free_attr_string(sectype); } } - /* - * when we get here, we can do the exportfs system call and - * initiate things. We probably want to enable the - * svc:/network/nfs/server service first if it isn't running. - */ - /* check svc:/network/nfs/server status and start if needed */ + /* now add the share to the internal tables */ printarg(path, &export); /* @@ -1921,52 +1919,17 @@ nfs_enable_share(sa_share_t share) if (iszfs) { struct exportfs_args ea; share_t sh; - char *str; - priv_set_t *priv_effective; - int privileged; - - /* - * If we aren't a privileged user - * and NFS server service isn't running - * then print out an error message - * and return EPERM - */ - - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - - if (!privileged && - (str = smf_get_state(NFS_SERVER_SVC)) != NULL) { - err = 0; - if (strcmp(str, SCF_STATE_STRING_ONLINE) != 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share remote " - "filesystem: %s\n"), path); - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Service needs to be enabled " - "by a privileged user\n")); - err = SA_SYSTEM_ERR; - errno = EPERM; - } - free(str); - } - if (err == 0) { - ea.dname = path; - ea.uex = &export; + ea.dname = path; + ea.uex = &export; - (void) sa_sharetab_fill_zfs(share, &sh, "nfs"); - err = sa_share_zfs(share, NULL, path, &sh, - &ea, ZFS_SHARE_NFS); - if (err != SA_OK) { - errno = err; - err = -1; - } - sa_emptyshare(&sh); + (void) sa_sharetab_fill_zfs(share, &sh, "nfs"); + err = sa_share_zfs(share, NULL, path, &sh, &ea, ZFS_SHARE_NFS); + if (err != SA_OK) { + errno = err; + err = -1; } + sa_emptyshare(&sh); } else { err = exportfs(path, &export); } @@ -1974,20 +1937,7 @@ nfs_enable_share(sa_share_t share) if (err < 0) { err = SA_SYSTEM_ERR; switch (errno) { - case EREMOTE: - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share filesystems " - "in non-global zones: %s\n"), path); - err = SA_NOT_SUPPORTED; - break; case EPERM: - if (getzoneid() != GLOBAL_ZONEID) { - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share file systems " - "in non-global zones: %s\n"), path); - err = SA_NOT_SUPPORTED; - break; - } err = SA_NO_PERMISSION; break; case EEXIST: @@ -2099,9 +2049,6 @@ nfs_disable_share(sa_share_t share, char *path) case EPERM: case EACCES: ret = SA_NO_PERMISSION; - if (getzoneid() != GLOBAL_ZONEID) { - ret = SA_NOT_SUPPORTED; - } break; case EINVAL: case ENOENT: diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index a850e8502030..f8beb742de88 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -21,6 +21,9 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. @@ -28,7 +31,7 @@ * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright 2016 Nexenta Systems, Inc. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov * Copyright 2017 RackTop Systems. */ @@ -1265,7 +1268,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, * global zone non-global zone * -------------------------------------------------- * zoned=on mountpoint (no) mountpoint (yes) - * sharenfs (no) sharenfs (no) + * sharenfs (no) sharenfs (yes) * sharesmb (no) sharesmb (no) * * zoned=off mountpoint (yes) N/A @@ -1281,8 +1284,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; - } else if (prop == ZFS_PROP_SHARENFS || - prop == ZFS_PROP_SHARESMB) { + } else if (prop == ZFS_PROP_SHARESMB) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' cannot be set in " "a non-global zone"), propname); diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index 27065d8fc9ec..a2af8865fb73 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -20,8 +20,11 @@ */ /* - * Copyright 2018 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 2014, 2016 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov * Copyright 2017 Joyent, Inc. @@ -843,15 +846,6 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) return (-1); } - /* - * If the 'zoned' property is set, then zfs_is_mountable() - * will have already bailed out if we are in the global zone. - * But local zones cannot be NFS servers, so we ignore it for - * local zones as well. - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) - continue; - share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); if (share == NULL) { /* diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c index 46c1ff3f182f..f0634945ea68 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -28,6 +28,7 @@ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ + #include #include #include @@ -67,14 +68,19 @@ #include #include +/* + * Zone global variables of NFSv3 server + */ +typedef struct nfs3_srv { + writeverf3 write3verf; +} nfs3_srv_t; + /* * These are the interface routines for the server side of the * Network File System. See the NFS version 3 protocol specification * for a description of this interface. */ -static writeverf3 write3verf; - static int sattr3_to_vattr(sattr3 *, struct vattr *); static int vattr_to_fattr3(struct vattr *, fattr3 *); static int vattr_to_wcc_attr(struct vattr *, wcc_attr *); @@ -85,6 +91,7 @@ static int rdma_setup_read_data3(READ3args *, READ3resok *); extern int nfs_loaned_buffers; u_longlong_t nfs3_srv_caller_id; +static zone_key_t rfs3_zone_key; /* ARGSUSED */ void @@ -388,7 +395,7 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { - dvp = rootdir; + dvp = ZONE_ROOTVP(); VN_HOLD(dvp); DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req, @@ -1260,6 +1267,7 @@ void rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { + nfs3_srv_t *ns; int error; vnode_t *vp; struct vattr *bvap = NULL; @@ -1288,6 +1296,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, goto err; } + ns = zone_getspecific(rfs3_zone_key, curzone); if (is_system_labeled()) { bslabel_t *clabel = req->rq_label; @@ -1375,7 +1384,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); resp->resok.count = 0; resp->resok.committed = args->stable; - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; goto out; } @@ -1477,7 +1486,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); resp->resok.count = args->count - uio.uio_resid; resp->resok.committed = args->stable; - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; goto out; err: @@ -2624,7 +2633,7 @@ rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi, goto err1; } - error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0); + error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0); if (name != args->object.name) kmem_free(name, MAXPATHLEN + 1); @@ -2830,10 +2839,10 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, } /* - * Check for renaming over a delegated file. Check rfs4_deleg_policy + * Check for renaming over a delegated file. Check nfs4_deleg_policy * first to avoid VOP_LOOKUP if possible. */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { @@ -3371,6 +3380,11 @@ rfs3_readdir_free(READDIR3res *resp) } } +#ifdef nextdp +#undef nextdp +#endif +#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) + /* ARGSUSED */ void rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp, @@ -4026,6 +4040,7 @@ void rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { + nfs3_srv_t *ns; int error; vnode_t *vp; struct vattr *bvap; @@ -4046,6 +4061,7 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, goto out; } + ns = zone_getspecific(rfs3_zone_key, curzone); bva.va_mask = AT_ALL; error = VOP_GETATTR(vp, &bva, 0, cr, NULL); @@ -4098,7 +4114,7 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, resp->status = NFS3_OK; vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req, cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp); @@ -4192,7 +4208,7 @@ sattr3_to_vattr(sattr3 *sap, struct vattr *vap) return (0); } -static ftype3 vt_to_nf3[] = { +static const ftype3 vt_to_nf3[] = { 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0 }; @@ -4274,20 +4290,40 @@ vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap) static void vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp) { - vattr_to_pre_op_attr(bvap, &wccp->before); vattr_to_post_op_attr(avap, &wccp->after); } -void -rfs3_srvrinit(void) +static int +rdma_setup_read_data3(READ3args *args, READ3resok *rok) +{ + struct clist *wcl; + int wlist_len; + count3 count = rok->count; + + wcl = args->wlist; + if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) + return (FALSE); + + wcl = args->wlist; + rok->wlist_len = wlist_len; + rok->wlist = wcl; + return (TRUE); +} + +/* ARGSUSED */ +static void * +rfs3_zone_init(zoneid_t zoneid) { + nfs3_srv_t *ns; struct rfs3_verf_overlay { uint_t id; /* a "unique" identifier */ int ts; /* a unique timestamp */ } *verfp; timestruc_t now; + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); + /* * The following algorithm attempts to find a unique verifier * to be used as the write verifier returned from the server @@ -4311,37 +4347,34 @@ rfs3_srvrinit(void) * We ASSERT that this constant logic expression is * always true because in the past, it wasn't. */ - ASSERT(sizeof (*verfp) <= sizeof (write3verf)); + ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf)); #endif gethrestime(&now); - verfp = (struct rfs3_verf_overlay *)&write3verf; + verfp = (struct rfs3_verf_overlay *)&ns->write3verf; verfp->ts = (int)now.tv_sec; verfp->id = zone_get_hostid(NULL); if (verfp->id == 0) verfp->id = (uint_t)now.tv_nsec; - nfs3_srv_caller_id = fs_new_caller_id(); - + return (ns); } -static int -rdma_setup_read_data3(READ3args *args, READ3resok *rok) +/* ARGSUSED */ +static void +rfs3_zone_fini(zoneid_t zoneid, void *data) { - struct clist *wcl; - int wlist_len; - count3 count = rok->count; + nfs3_srv_t *ns = data; - wcl = args->wlist; - if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { - return (FALSE); - } + kmem_free(ns, sizeof (*ns)); +} - wcl = args->wlist; - rok->wlist_len = wlist_len; - rok->wlist = wcl; - return (TRUE); +void +rfs3_srvrinit(void) +{ + nfs3_srv_caller_id = fs_new_caller_id(); + zone_key_create(&rfs3_zone_key, rfs3_zone_init, NULL, rfs3_zone_fini); } void diff --git a/usr/src/uts/common/fs/nfs/nfs4_db.c b/usr/src/uts/common/fs/nfs/nfs4_db.c index fbecb86f6419..bedefc9e60fa 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_db.c +++ b/usr/src/uts/common/fs/nfs/nfs4_db.c @@ -18,10 +18,15 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -879,6 +884,7 @@ reaper_thread(caddr_t *arg) table->dbt_db->db_shutdown_count--; cv_signal(&table->dbt_db->db_shutdown_wait); mutex_exit(table->dbt_db->db_lock); + zthread_exit(); } static void @@ -887,7 +893,7 @@ rfs4_start_reaper(rfs4_table_t *table) if (table->dbt_max_cache_time == 0) return; - (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, reaper_thread, table, 0, minclsyspri); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c index 4a0669eba645..69c29726f397 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c +++ b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c @@ -20,12 +20,12 @@ */ /* - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -42,11 +42,6 @@ #define NFS4_MAX_MINOR_VERSION 0 -/* - * This is the duplicate request cache for NFSv4 - */ -rfs4_drc_t *nfs4_drc = NULL; - /* * The default size of the duplicate request cache */ @@ -60,6 +55,8 @@ uint32_t nfs4_drc_hash = 541; static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp); +extern zone_key_t rfs4_zone_key; + /* * Initialize a duplicate request cache. */ @@ -98,12 +95,12 @@ rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size) * Destroy a duplicate request cache. */ void -rfs4_fini_drc(rfs4_drc_t *drc) +rfs4_fini_drc(void) { + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + rfs4_drc_t *drc = nsrv4->nfs4_drc; rfs4_dupreq_t *drp, *drp_next; - ASSERT(drc); - /* iterate over the dr_cache and free the enties */ for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { @@ -391,6 +388,8 @@ rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, int dr_stat = NFS4_NOT_DUP; rfs4_dupreq_t *drp = NULL; int rv; + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + rfs4_drc_t *nfs4_drc = nsrv4->nfs4_drc; ASSERT(disp); diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index babea4ab8648..be9f32a3b987 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -20,9 +20,7 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. */ /* @@ -30,6 +28,11 @@ * All Rights Reserved */ +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + */ + #include #include #include @@ -67,10 +70,12 @@ #include #include +#include #include #include #include #include +#include #include #include @@ -148,108 +153,106 @@ static int rdma_setup_read_data4(READ4args *, READ4res *); #define DIRENT64_TO_DIRCOUNT(dp) \ (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) -time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */ +zone_key_t rfs4_zone_key; -static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ +static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ u_longlong_t nfs4_srv_caller_id; uint_t nfs4_srv_vkey = 0; -verifier4 Write4verf; -verifier4 Readdir4verf; - void rfs4_init_compound_state(struct compound_state *); static void nullfree(caddr_t); static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_create_free(nfs_resop4 *resop); static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_getattr_free(nfs_resop4 *); static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_getfh_free(nfs_resop4 *); static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void lock_denied_free(nfs_resop4 *); static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, - struct svc_req *req, struct compound_state *cs); + struct svc_req *req, struct compound_state *cs); static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_read_free(nfs_resop4 *); static void rfs4_op_readdir_free(nfs_resop4 *resop); static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_readlink_free(nfs_resop4 *); static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, - struct svc_req *req, struct compound_state *); + struct svc_req *req, struct compound_state *); static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_secinfo_free(nfs_resop4 *); -static nfsstat4 check_open_access(uint32_t, - struct compound_state *, struct svc_req *); -nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); -void rfs4_ss_clid(rfs4_client_t *); +static nfsstat4 check_open_access(uint32_t, struct compound_state *, + struct svc_req *); +nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); +void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *); + /* * translation table for attrs @@ -263,19 +266,17 @@ struct nfs4_ntov_table { static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, - struct nfs4_svgetit_arg *sargp); + struct nfs4_svgetit_arg *sargp); static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); +static void hanfsv4_failover(nfs4_srv_t *); + fem_t *deleg_rdops; fem_t *deleg_wrops; -rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */ -kmutex_t rfs4_servinst_lock; /* protects linked list */ -int rfs4_seen_first_compound; /* set first time we see one */ - /* * NFS4 op dispatch table */ @@ -501,7 +502,7 @@ static char *rfs4_op_string[] = { }; #endif -void rfs4_ss_chkclid(rfs4_client_t *); +void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *); extern size_t strlcpy(char *dst, const char *src, size_t dstsize); @@ -534,13 +535,14 @@ static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { NULL, NULL }; -int -rfs4_srvrinit(void) +/* ARGSUSED */ +static void * +rfs4_zone_init(zoneid_t zoneid) { + nfs4_srv_t *nsrv4; timespec32_t verf; - int error; - extern void rfs4_attr_init(); - extern krwlock_t rfs4_deleg_policy_lock; + + nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP); /* * The following algorithm attempts to find a unique verifier @@ -571,57 +573,104 @@ rfs4_srvrinit(void) verf.tv_nsec = tverf.tv_nsec; } - Write4verf = *(uint64_t *)&verf; + nsrv4->nfs4_cur_servinst = NULL; + nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE; + nsrv4->write4verf = *(uint64_t *)&verf; + mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL); + + return (nsrv4); +} + +/* ARGSUSED */ +static void +rfs4_zone_fini(zoneid_t zoneid, void *data) +{ + nfs4_srv_t *nsrv4 = data; + + mutex_destroy(&nsrv4->deleg_lock); + mutex_destroy(&nsrv4->servinst_lock); + rw_destroy(&nsrv4->deleg_policy_lock); + + kmem_free(nsrv4, sizeof (*nsrv4)); +} + +void +rfs4_srvrinit(void) +{ + extern void rfs4_attr_init(); + + zone_key_create(&rfs4_zone_key, rfs4_zone_init, NULL, rfs4_zone_fini); rfs4_attr_init(); - mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL); /* Used to manage create/destroy of server state */ mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL); - /* Used to manage access to server instance linked list */ - mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL); - - /* Used to manage access to rfs4_deleg_policy */ - rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL); - - error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops); - if (error != 0) { + if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) { rfs4_disable_delegation(); - } else { - error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, - &deleg_wrops); - if (error != 0) { - rfs4_disable_delegation(); - fem_free(deleg_rdops); - } + } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, + &deleg_wrops) != 0) { + rfs4_disable_delegation(); + fem_free(deleg_rdops); } nfs4_srv_caller_id = fs_new_caller_id(); - lockt_sysid = lm_alloc_sysidt(); - vsd_create(&nfs4_srv_vkey, NULL); - - return (0); + rfs4_state_g_init(); } void rfs4_srvrfini(void) { - extern krwlock_t rfs4_deleg_policy_lock; - if (lockt_sysid != LM_NOSYSID) { lm_free_sysidt(lockt_sysid); lockt_sysid = LM_NOSYSID; } - mutex_destroy(&rfs4_deleg_lock); + rfs4_state_g_fini(); + mutex_destroy(&rfs4_state_lock); - rw_destroy(&rfs4_deleg_policy_lock); fem_free(deleg_rdops); fem_free(deleg_wrops); + + (void) zone_key_delete(rfs4_zone_key); +} + +void +rfs4_do_server_start(int server_upordown, + int srv_delegation, int cluster_booted) +{ + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + + /* Is this a warm start? */ + if (server_upordown == NFS_SERVER_QUIESCED) { + cmn_err(CE_NOTE, "nfs4_srv: " + "server was previously quiesced; " + "existing NFSv4 state will be re-used"); + + /* + * HA-NFSv4: this is also the signal + * that a Resource Group failover has + * occurred. + */ + if (cluster_booted) + hanfsv4_failover(nsrv4); + } else { + /* Cold start */ + nsrv4->rfs4_start_time = 0; + nsrv4->cpr_id = 0; + rfs4_state_zone_init(nsrv4); + nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max, + nfs4_drc_hash); + } + + /* Check if delegation is to be enabled */ + if (srv_delegation != FALSE) + rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE); } void @@ -688,34 +737,35 @@ rfs4_clnt_in_grace(rfs4_client_t *cp) * reset all currently active grace periods */ void -rfs4_grace_reset_all(void) +rfs4_grace_reset_all(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) if (rfs4_servinst_in_grace(sip)) rfs4_grace_start(sip); - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* * start any new instances' grace periods */ void -rfs4_grace_start_new(void) +rfs4_grace_start_new(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) if (rfs4_servinst_grace_new(sip)) rfs4_grace_start(sip); - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static rfs4_dss_path_t * -rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) +rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip, + char *path, unsigned index) { size_t len; rfs4_dss_path_t *dss_path; @@ -739,15 +789,15 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) * Add to list of served paths. * No locking required, as we're only ever called at startup. */ - if (rfs4_dss_pathlist == NULL) { + if (nsrv4->dss_pathlist == NULL) { /* this is the first dss_path_t */ /* needed for insque/remque */ dss_path->next = dss_path->prev = dss_path; - rfs4_dss_pathlist = dss_path; + nsrv4->dss_pathlist = dss_path; } else { - insque(dss_path, rfs4_dss_pathlist); + insque(dss_path, nsrv4->dss_pathlist); } return (dss_path); @@ -759,7 +809,8 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) * recovery window. */ void -rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) +rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace, + int dss_npaths, char **dss_paths) { unsigned i; rfs4_servinst_t *sip; @@ -790,21 +841,21 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) sizeof (rfs4_dss_path_t *), KM_SLEEP); for (i = 0; i < dss_npaths; i++) { - sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i); + sip->dss_paths[i] = rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i); } - mutex_enter(&rfs4_servinst_lock); - if (rfs4_cur_servinst != NULL) { + mutex_enter(&nsrv4->servinst_lock); + if (nsrv4->nfs4_cur_servinst != NULL) { /* add to linked list */ - sip->prev = rfs4_cur_servinst; - rfs4_cur_servinst->next = sip; + sip->prev = nsrv4->nfs4_cur_servinst; + nsrv4->nfs4_cur_servinst->next = sip; } if (start_grace) rfs4_grace_start(sip); /* make the new instance "current" */ - rfs4_cur_servinst = sip; + nsrv4->nfs4_cur_servinst = sip; - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -812,17 +863,17 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) * all instances directly. */ void -rfs4_servinst_destroy_all(void) +rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip, *prev, *current; #ifdef DEBUG int n = 0; #endif - mutex_enter(&rfs4_servinst_lock); - ASSERT(rfs4_cur_servinst != NULL); - current = rfs4_cur_servinst; - rfs4_cur_servinst = NULL; + mutex_enter(&nsrv4->servinst_lock); + ASSERT(nsrv4->nfs4_cur_servinst != NULL); + current = nsrv4->nfs4_cur_servinst; + nsrv4->nfs4_cur_servinst = NULL; for (sip = current; sip != NULL; sip = prev) { prev = sip->prev; rw_destroy(&sip->rwlock); @@ -836,7 +887,7 @@ rfs4_servinst_destroy_all(void) n++; #endif } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -844,7 +895,8 @@ rfs4_servinst_destroy_all(void) * Should be called with cp->rc_dbe held. */ void -rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip) +rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp, + rfs4_servinst_t *sip) { ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); @@ -852,9 +904,9 @@ rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip) * The lock ensures that if the current instance is in the process * of changing, we will see the new one. */ - mutex_enter(&rfs4_servinst_lock); + mutex_enter(&nsrv4->servinst_lock); cp->rc_server_instance = sip; - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } rfs4_servinst_t * @@ -915,6 +967,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) seconfig_t *si; bool_t did_traverse = FALSE; int dotdot, walk; + nfs_export_t *ne = nfs_get_export(); dvp = cs->vp; dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); @@ -936,7 +989,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * If at the system root, then can * go up no further. */ - if (VN_CMP(dvp, rootdir)) + if (VN_CMP(dvp, ZONE_ROOTVP())) return (puterrno4(ENOENT)); /* @@ -1053,7 +1106,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * For a real export node, return the flavor that the client * has access with. */ - ASSERT(RW_LOCK_HELD(&exported_lock)); + ASSERT(RW_LOCK_HELD(&ne->exported_lock)); if (PSEUDO(exi)) { count = exi->exi_export.ex_seccnt; /* total sec count */ resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); @@ -1416,6 +1469,7 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, vnode_t *vp = cs->vp; cred_t *cr = cs->cr; vattr_t va; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, COMMIT4args *, args); @@ -1472,8 +1526,9 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); *cs->statusp = resp->status = NFS4_OK; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; out: DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, COMMIT4res *, resp); @@ -2681,7 +2736,7 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) * If at the system root, then can * go up no further. */ - if (VN_CMP(cs->vp, rootdir)) + if (VN_CMP(cs->vp, ZONE_ROOTVP())) return (puterrno4(ENOENT)); /* @@ -3445,6 +3500,7 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, vnode_t *vp; struct exportinfo *exi, *sav_exi; nfs_fh4_fmt_t *fh_fmtp; + nfs_export_t *ne = nfs_get_export(); DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); @@ -3458,19 +3514,19 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, cs->cr = crdup(cs->basecr); - vp = exi_public->exi_vp; + vp = ne->exi_public->exi_vp; if (vp == NULL) { *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; goto out; } - error = makefh4(&cs->fh, vp, exi_public); + error = makefh4(&cs->fh, vp, ne->exi_public); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; } sav_exi = cs->exi; - if (exi_public == exi_root) { + if (ne->exi_public == ne->exi_root) { /* * No filesystem is actually shared public, so we default * to exi_root. In this case, we must check whether root @@ -3485,12 +3541,12 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, */ exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, NULL); - cs->exi = ((exi != NULL) ? exi : exi_public); + cs->exi = ((exi != NULL) ? exi : ne->exi_public); } else { /* * it's a properly shared filesystem */ - cs->exi = exi_public; + cs->exi = ne->exi_public; } if (is_system_labeled()) { @@ -3631,7 +3687,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, */ bzero(&fid, sizeof (fid)); fid.fid_len = MAXFIDSZ; - error = vop_fid_pseudo(rootdir, &fid); + error = vop_fid_pseudo(ZONE_ROOTVP(), &fid); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; @@ -3645,7 +3701,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * one or more exports further down in the server's * file tree. */ - exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL); + exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL); if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { NFS4_DEBUG(rfs4_debug, (CE_WARN, "rfs4_op_putrootfh: export check failure")); @@ -3657,7 +3713,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * Now make a filehandle based on the root * export and root vnode. */ - error = makefh4(&cs->fh, rootdir, exi); + error = makefh4(&cs->fh, ZONE_ROOTVP(), exi); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; @@ -3666,11 +3722,11 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, sav_exi = cs->exi; cs->exi = exi; - VN_HOLD(rootdir); - cs->vp = rootdir; + VN_HOLD(ZONE_ROOTVP()); + cs->vp = ZONE_ROOTVP(); if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { - VN_RELE(rootdir); + VN_RELE(cs->vp); cs->vp = NULL; cs->exi = sav_exi; goto out; @@ -4281,7 +4337,7 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to * transmit over the wire. */ - if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr, + if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr, NULL, 0)) == EEXIST) error = ENOTEMPTY; } @@ -5594,6 +5650,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, nfsstat4 stat; int in_crit = 0; caller_context_t ct; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, WRITE4args *, args); @@ -5664,11 +5721,12 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); if (args->data_len == 0) { *cs->statusp = resp->status = NFS4_OK; resp->count = 0; resp->committed = args->stable; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; goto out; } @@ -5764,7 +5822,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, else resp->committed = FILE_SYNC4; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; out: if (in_crit) @@ -5784,6 +5842,8 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, { uint_t i; struct compound_state cs; + nfs4_srv_t *nsrv4; + nfs_export_t *ne = nfs_get_export(); if (rv != NULL) *rv = 0; @@ -5841,6 +5901,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, KM_SLEEP); cs.basecr = cr; + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs, COMPOUND4args *, args); @@ -5855,20 +5916,20 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, * ops along with unexport. This lock will be removed as * part of the NFSv4 phase 2 namespace redesign work. */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); /* * If this is the first compound we've seen, we need to start all * new instances' grace periods. */ - if (rfs4_seen_first_compound == 0) { - rfs4_grace_start_new(); + if (nsrv4->seen_first_compound == 0) { + rfs4_grace_start_new(nsrv4); /* * This must be set after rfs4_grace_start_new(), otherwise * another thread could proceed past here before the former * is finished. */ - rfs4_seen_first_compound = 1; + nsrv4->seen_first_compound = 1; } for (i = 0; i < args->array_len && cs.cont; i++) { @@ -5988,7 +6049,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, } } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs, COMPOUND4res *, resp); @@ -6092,6 +6153,7 @@ void rfs4_compound_kstat_res(COMPOUND4res *res) { int i; + nfs_export_t *ne = nfs_get_export(); for (i = 0; i < res->array_len; i++) { uint_t op = (uint_t)res->array[i].resop; @@ -6111,7 +6173,7 @@ rfs4_compound_kstat_res(COMPOUND4res *res) if (exi != NULL) { kstat_t *exi_ksp = NULL; - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); if (exi->exi_kstats != NULL) /*CSTYLED*/ @@ -6124,7 +6186,7 @@ rfs4_compound_kstat_res(COMPOUND4res *res) mutex_exit(exi_ksp->ks_lock); } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); } } } @@ -6764,25 +6826,27 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, if (trunc) { int in_crit = 0; rfs4_file_t *fp; + nfs4_srv_t *nsrv4; bool_t create = FALSE; /* * We are writing over an existing file. * Check to see if we need to recall a delegation. */ - rfs4_hold_deleg_policy(); + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + rfs4_hold_deleg_policy(nsrv4); if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { if (rfs4_check_delegated_byfp(FWRITE, fp, (reqsize == 0), FALSE, FALSE, &clientid)) { rfs4_file_rele(fp); - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); VN_RELE(vp); *attrset = 0; return (NFS4ERR_DELAY); } rfs4_file_rele(fp); } - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); if (nbl_need_check(vp)) { in_crit = 1; @@ -8340,11 +8404,13 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, SETCLIENTID_CONFIRM4res *res = &resop->nfs_resop4_u.opsetclientid_confirm; rfs4_client_t *cp, *cptoclose = NULL; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__setclientid__confirm__start, struct compound_state *, cs, SETCLIENTID_CONFIRM4args *, args); + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); *cs->statusp = res->status = NFS4_OK; cp = rfs4_findclient_by_id(args->clientid, TRUE); @@ -8380,14 +8446,14 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, * Update the client's associated server instance, if it's changed * since the client was created. */ - if (rfs4_servinst(cp) != rfs4_cur_servinst) - rfs4_servinst_assign(cp, rfs4_cur_servinst); + if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst) + rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); /* * Record clientid in stable storage. * Must be done after server instance has been assigned. */ - rfs4_ss_clid(cp); + rfs4_ss_clid(nsrv4, cp); rfs4_dbe_unlock(cp->rc_dbe); @@ -8402,7 +8468,7 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, /* * Check to see if client can perform reclaims */ - rfs4_ss_chkclid(cp); + rfs4_ss_chkclid(nsrv4, cp); rfs4_client_rele(cp); @@ -10047,3 +10113,166 @@ client_is_downrev(struct svc_req *req) rfs4_dbe_rele(ci->ri_dbe); return (is_downrev); } + +/* + * Do the main work of handling HA-NFSv4 Resource Group failover on + * Sun Cluster. + * We need to detect whether any RG admin paths have been added or removed, + * and adjust resources accordingly. + * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In + * order to scale, the list and array of paths need to be held in more + * suitable data structures. + */ +static void +hanfsv4_failover(nfs4_srv_t *nsrv4) +{ + int i, start_grace, numadded_paths = 0; + char **added_paths = NULL; + rfs4_dss_path_t *dss_path; + + /* + * Note: currently, dss_pathlist cannot be NULL, since + * it will always include an entry for NFS4_DSS_VAR_DIR. If we + * make the latter dynamically specified too, the following will + * need to be adjusted. + */ + + /* + * First, look for removed paths: RGs that have been failed-over + * away from this node. + * Walk the "currently-serving" dss_pathlist and, for each + * path, check if it is on the "passed-in" rfs4_dss_newpaths array + * from nfsd. If not, that RG path has been removed. + * + * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed + * any duplicates. + */ + dss_path = nsrv4->dss_pathlist; + do { + int found = 0; + char *path = dss_path->path; + + /* used only for non-HA so may not be removed */ + if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { + dss_path = dss_path->next; + continue; + } + + for (i = 0; i < rfs4_dss_numnewpaths; i++) { + int cmpret; + char *newpath = rfs4_dss_newpaths[i]; + + /* + * Since nfsd has sorted rfs4_dss_newpaths for us, + * once the return from strcmp is negative we know + * we've passed the point where "path" should be, + * and can stop searching: "path" has been removed. + */ + cmpret = strcmp(path, newpath); + if (cmpret < 0) + break; + if (cmpret == 0) { + found = 1; + break; + } + } + + if (found == 0) { + unsigned index = dss_path->index; + rfs4_servinst_t *sip = dss_path->sip; + rfs4_dss_path_t *path_next = dss_path->next; + + /* + * This path has been removed. + * We must clear out the servinst reference to + * it, since it's now owned by another + * node: we should not attempt to touch it. + */ + ASSERT(dss_path == sip->dss_paths[index]); + sip->dss_paths[index] = NULL; + + /* remove from "currently-serving" list, and destroy */ + remque(dss_path); + /* allow for NUL */ + kmem_free(dss_path->path, strlen(dss_path->path) + 1); + kmem_free(dss_path, sizeof (rfs4_dss_path_t)); + + dss_path = path_next; + } else { + /* path was found; not removed */ + dss_path = dss_path->next; + } + } while (dss_path != nsrv4->dss_pathlist); + + /* + * Now, look for added paths: RGs that have been failed-over + * to this node. + * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, + * for each path, check if it is on the "currently-serving" + * dss_pathlist. If not, that RG path has been added. + * + * Note: we don't do duplicate detection here; nfsd does that for us. + * + * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us + * an upper bound for the size needed for added_paths[numadded_paths]. + */ + + /* probably more space than we need, but guaranteed to be enough */ + if (rfs4_dss_numnewpaths > 0) { + size_t sz = rfs4_dss_numnewpaths * sizeof (char *); + added_paths = kmem_zalloc(sz, KM_SLEEP); + } + + /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ + for (i = 0; i < rfs4_dss_numnewpaths; i++) { + int found = 0; + char *newpath = rfs4_dss_newpaths[i]; + + dss_path = nsrv4->dss_pathlist; + do { + char *path = dss_path->path; + + /* used only for non-HA */ + if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { + dss_path = dss_path->next; + continue; + } + + if (strncmp(path, newpath, strlen(path)) == 0) { + found = 1; + break; + } + + dss_path = dss_path->next; + } while (dss_path != nsrv4->dss_pathlist); + + if (found == 0) { + added_paths[numadded_paths] = newpath; + numadded_paths++; + } + } + + /* did we find any added paths? */ + if (numadded_paths > 0) { + + /* create a new server instance, and start its grace period */ + start_grace = 1; + rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths); + + /* read in the stable storage state from these paths */ + rfs4_dss_readstate(nsrv4, numadded_paths, added_paths); + + /* + * Multiple failovers during a grace period will cause + * clients of the same resource group to be partitioned + * into different server instances, with different + * grace periods. Since clients of the same resource + * group must be subject to the same grace period, + * we need to reset all currently active grace periods. + */ + rfs4_grace_reset_all(nsrv4); + } + + if (rfs4_dss_numnewpaths > 0) + kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); +} diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c index 7240faa3566e..52f254c98c26 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c @@ -18,12 +18,14 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -133,7 +135,7 @@ rfs4_attr_init() struct statvfs64 sb; rfs4_init_compound_state(&cs); - cs.vp = rootvp; + cs.vp = ZONE_ROOTVP(); cs.fh.nfs_fh4_val = NULL; cs.cr = kcred; @@ -1316,7 +1318,7 @@ rfs4_get_mntdfileid(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg) * another getattr -- just use the one in sarg. */ if (VN_CMP(vp, stubvp)) { - ASSERT(VN_CMP(vp, rootdir)); + ASSERT(VN_CMP(vp, ZONE_ROOTVP())); vap = sarg->vap; } else { va.va_mask = AT_NODEID; diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c index bb3f1bdd957d..46fac9a42be8 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c @@ -22,7 +22,10 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -48,10 +51,7 @@ #define MAX_READ_DELEGATIONS 5 -krwlock_t rfs4_deleg_policy_lock; -srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; static int rfs4_deleg_wlp = 5; -kmutex_t rfs4_deleg_lock; static int rfs4_deleg_disabled; static int rfs4_max_setup_cb_tries = 5; @@ -138,23 +138,30 @@ uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) * value of "new_policy" */ void -rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) +rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy) { - rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); - rfs4_deleg_policy = new_policy; - rw_exit(&rfs4_deleg_policy_lock); + rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER); + nsrv4->nfs4_deleg_policy = new_policy; + rw_exit(&nsrv4->deleg_policy_lock); } void -rfs4_hold_deleg_policy(void) +rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4) { - rw_enter(&rfs4_deleg_policy_lock, RW_READER); + rw_enter(&nsrv4->deleg_policy_lock, RW_READER); } void -rfs4_rele_deleg_policy(void) +rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4) +{ + rw_exit(&nsrv4->deleg_policy_lock); +} + +srv_deleg_policy_t +nfs4_get_deleg_policy() { - rw_exit(&rfs4_deleg_policy_lock); + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + return (nsrv4->nfs4_deleg_policy); } @@ -210,7 +217,7 @@ rfs4_do_cb_null(rfs4_client_t *cp) if (cbp->cb_nullcaller == TRUE) { mutex_exit(cbp->cb_lock); rfs4_client_rele(cp); - return; + zthread_exit(); } /* Mark the cbinfo as having a thread in the NULL callback */ @@ -278,7 +285,7 @@ rfs4_do_cb_null(rfs4_client_t *cp) cbp->cb_nullcaller = FALSE; mutex_exit(cbp->cb_lock); rfs4_client_rele(cp); - return; + zthread_exit(); } /* mark rfs4_client_t as CALLBACK NULL in progress */ @@ -320,8 +327,8 @@ rfs4_do_cb_null(rfs4_client_t *cp) cv_broadcast(cbp->cb_cv); /* start up the other threads */ cbp->cb_nullcaller = FALSE; mutex_exit(cbp->cb_lock); - rfs4_client_rele(cp); + zthread_exit(); } /* @@ -687,7 +694,7 @@ rfs4_deleg_cb_check(rfs4_client_t *cp) rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ - (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0, minclsyspri); } @@ -948,8 +955,8 @@ do_recall(struct recall_arg *arg) mutex_destroy(&cpr_lock); rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ - kmem_free(arg, sizeof (struct recall_arg)); + zthread_exit(); } struct master_recall_args { @@ -977,7 +984,7 @@ do_recall_file(struct master_recall_args *map) rfs4_dbe_rele_nolock(fp->rf_dbe); rfs4_dbe_unlock(fp->rf_dbe); kmem_free(map, sizeof (struct master_recall_args)); - return; + zthread_exit(); } mutex_exit(fp->rf_dinfo.rd_recall_lock); @@ -1010,7 +1017,7 @@ do_recall_file(struct master_recall_args *map) recall_count++; - (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, do_recall, arg, 0, minclsyspri); } @@ -1035,6 +1042,7 @@ do_recall_file(struct master_recall_args *map) mutex_enter(&cpr_lock); CALLB_CPR_EXIT(&cpr_info); mutex_destroy(&cpr_lock); + zthread_exit(); } static void @@ -1070,7 +1078,7 @@ rfs4_recall_file(rfs4_file_t *fp, args->recall = recall; args->trunc = trunc; - (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, do_recall_file, args, 0, minclsyspri); } @@ -1206,12 +1214,12 @@ rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) * determine the actual delegation type to return. */ static open_delegation_type4 -rfs4_delegation_policy(open_delegation_type4 dtype, +rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype, rfs4_dinfo_t *dinfo, clientid4 cid) { time_t elapsed; - if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) + if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE) return (OPEN_DELEGATE_NONE); /* @@ -1254,6 +1262,7 @@ rfs4_delegation_policy(open_delegation_type4 dtype, rfs4_deleg_state_t * rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) { + nfs4_srv_t *nsrv4; rfs4_file_t *fp = sp->rs_finfo; open_delegation_type4 dtype; int no_delegation; @@ -1261,14 +1270,16 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + /* Is the server even providing delegations? */ - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) return (NULL); /* Check to see if delegations have been temporarily disabled */ - mutex_enter(&rfs4_deleg_lock); + mutex_enter(&nsrv4->deleg_lock); no_delegation = rfs4_deleg_disabled; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); if (no_delegation) return (NULL); @@ -1349,7 +1360,7 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) * Based on policy and the history of the file get the * actual delegation. */ - dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo, + dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo, sp->rs_owner->ro_client->rc_clientid); if (dtype == OPEN_DELEGATE_NONE) @@ -1438,8 +1449,10 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, { rfs4_deleg_state_t *dsp; + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + /* Is delegation enabled? */ - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) return (FALSE); /* do we have a delegation on this file? */ @@ -1504,14 +1517,16 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, bool_t rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) { + nfs4_srv_t *nsrv4; rfs4_file_t *fp; bool_t create = FALSE; bool_t rc = FALSE; - rfs4_hold_deleg_policy(); + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + rfs4_hold_deleg_policy(nsrv4); /* Is delegation enabled? */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { + if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) { fp = rfs4_findfile(vp, NULL, &create); if (fp != NULL) { if (rfs4_check_delegated_byfp(mode, fp, trunc, @@ -1521,7 +1536,7 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) rfs4_file_rele(fp); } } - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); return (rc); } @@ -1533,7 +1548,9 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) void rfs4_clear_dont_grant(rfs4_file_t *fp) { - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) + nfs4_srv_t *nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) return; rfs4_dbe_lock(fp->rf_dbe); ASSERT(fp->rf_dinfo.rd_hold_grant > 0); @@ -1869,18 +1886,24 @@ rfs4_is_deleg(rfs4_state_t *sp) void rfs4_disable_delegation(void) { - mutex_enter(&rfs4_deleg_lock); + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + mutex_enter(&nsrv4->deleg_lock); rfs4_deleg_disabled++; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); } void rfs4_enable_delegation(void) { - mutex_enter(&rfs4_deleg_lock); + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + mutex_enter(&nsrv4->deleg_lock); ASSERT(rfs4_deleg_disabled > 0); rfs4_deleg_disabled--; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); } void diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c index 69cbfbc2355c..bcc7b43750b1 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c @@ -20,8 +20,11 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 2015, Joyent, Inc. */ @@ -143,7 +146,7 @@ nfs4_vget_pseudo(struct exportinfo *exi, vnode_t **vpp, fid_t *fidp) * b) a descendant of the export root is shared */ struct exportinfo * -pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, +pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, struct exportdata *exdata) { struct exportinfo *exi; @@ -152,7 +155,7 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, int vpathlen; int i; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); fsid = vp->v_vfsp->vfs_fsid; exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); @@ -205,13 +208,15 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, /* * Insert the new entry at the front of the export list */ - export_link(exi); + export_link(ne, exi); /* * Initialize exi_id and exi_kstats */ + rw_enter(&nfs_exi_id_lock, RW_WRITER); exi->exi_id = exi_id_get_next(); avl_add(&exi_id_tree, exi); + rw_exit(&nfs_exi_id_lock); exi->exi_kstats = exp_kstats_init(getzoneid(), exi->exi_id, kex->ex_path, vpathlen, TRUE); @@ -289,14 +294,14 @@ tree_prepend_node(treenode_t *n, exp_visible_t *v, exportinfo_t *e) * they should be already freed. */ static void -tree_remove_node(treenode_t *node) +tree_remove_node(nfs_export_t *ne, treenode_t *node) { treenode_t *parent = node->tree_parent; treenode_t *s; /* s for sibling */ if (parent == NULL) { kmem_free(node, sizeof (*node)); - ns_root = NULL; + ne->ns_root = NULL; return; } /* This node is first child */ @@ -445,6 +450,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) struct exp_visible *vp1, *vp2, *vis_head, *tail, *next; int found; treenode_t *child, *curr, *connect_point; + nfs_export_t *ne = nfs_get_export(); vis_head = tree_head->tree_vis; connect_point = exi->exi_tree; @@ -458,7 +464,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) exi->exi_visible = vis_head; /* Update the change timestamp */ - tree_update_change(connect_point, &vis_head->vis_change); + tree_update_change(ne, connect_point, &vis_head->vis_change); return; } @@ -518,7 +524,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) tree_add_child(connect_point, curr); /* Update the change timestamp */ - tree_update_change(connect_point, + tree_update_change(ne, connect_point, &curr->tree_vis->vis_change); connect_point = NULL; @@ -635,8 +641,9 @@ treeclimb_export(struct exportinfo *exip) struct vattr va; treenode_t *tree_head = NULL; timespec_t now; + nfs_export_t *ne = nfs_get_export(); - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); gethrestime(&now); @@ -685,23 +692,23 @@ treeclimb_export(struct exportinfo *exip) * this as a pseudo export so that an NFS v4 * client can do lookups in it. */ - new_exi = pseudo_exportfs(vp, &fid, vis_head, - NULL); + new_exi = pseudo_exportfs(ne, vp, &fid, + vis_head, NULL); vis_head = NULL; } - if (VN_CMP(vp, rootdir)) { + if (VN_CMP(vp, ZONE_ROOTVP())) { /* at system root */ /* * If sharing "/", new_exi is shared exportinfo * (exip). Otherwise, new_exi is exportinfo * created by pseudo_exportfs() above. */ - ns_root = tree_prepend_node(tree_head, NULL, + ne->ns_root = tree_prepend_node(tree_head, NULL, new_exi); /* Update the change timestamp */ - tree_update_change(ns_root, &now); + tree_update_change(ne, ne->ns_root, &now); break; } @@ -797,8 +804,10 @@ treeclimb_export(struct exportinfo *exip) /* exip will be freed in exportfs() */ if (e && e != exip) { exp_kstats_delete(e->exi_kstats); + rw_enter(&nfs_exi_id_lock, RW_WRITER); avl_remove(&exi_id_tree, e); - export_unlink(e); + rw_exit(&nfs_exi_id_lock); + export_unlink(ne, e); exi_rele(&e); } tree_head = tree_head->tree_child_first; @@ -822,12 +831,12 @@ treeclimb_export(struct exportinfo *exip) * to continue releasing visibles, until we reach VROOT node. */ void -treeclimb_unexport(struct exportinfo *exip) +treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip) { treenode_t *tnode, *old_nd; treenode_t *connect_point = NULL; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); tnode = exip->exi_tree; /* @@ -850,8 +859,10 @@ treeclimb_unexport(struct exportinfo *exip) if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) && tnode->tree_child_first == NULL) { exp_kstats_delete(tnode->tree_exi->exi_kstats); + rw_enter(&nfs_exi_id_lock, RW_WRITER); avl_remove(&exi_id_tree, tnode->tree_exi); - export_unlink(tnode->tree_exi); + rw_exit(&nfs_exi_id_lock); + export_unlink(ne, tnode->tree_exi); exi_rele(&tnode->tree_exi); } @@ -866,14 +877,14 @@ treeclimb_unexport(struct exportinfo *exip) /* Remove itself, if this is a leaf and non-exported node */ if (old_nd->tree_child_first == NULL && !TREE_EXPORTED(old_nd)) { - tree_remove_node(old_nd); + tree_remove_node(ne, old_nd); connect_point = tnode; } } /* Update the change timestamp */ if (connect_point != NULL) - tree_update_change(connect_point, NULL); + tree_update_change(ne, connect_point, NULL); } /* @@ -1165,14 +1176,6 @@ nfs_visible_inode(struct exportinfo *exi, ino64_t ino, return (0); } -/* - * The change attribute value of the root of nfs pseudo namespace. - * - * The ns_root_change is protected by exported_lock because all of the treenode - * operations are protected by exported_lock too. - */ -static timespec_t ns_root_change; - /* * Get the change attribute from visible and returns TRUE. * If the change value is not available returns FALSE. @@ -1183,6 +1186,7 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) struct exp_visible *visp; fid_t fid; treenode_t *node; + nfs_export_t *ne = nfs_get_export(); /* * First check to see if vp is export root. @@ -1227,14 +1231,13 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) exproot: /* The VROOT export have its visible available through treenode */ node = exi->exi_tree; - if (node != ns_root) { + if (node != ne->ns_root) { ASSERT(node->tree_vis != NULL); *change = node->tree_vis->vis_change; } else { ASSERT(node->tree_vis == NULL); - *change = ns_root_change; + *change = ne->ns_root_change; } - return (TRUE); } @@ -1246,15 +1249,15 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) * If the change value is not supplied, the current time is used. */ void -tree_update_change(treenode_t *tnode, timespec_t *change) +tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change) { timespec_t *vis_change; ASSERT(tnode != NULL); - ASSERT((tnode != ns_root && tnode->tree_vis != NULL) || - (tnode == ns_root && tnode->tree_vis == NULL)); + ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) || + (tnode == ne->ns_root && tnode->tree_vis == NULL)); - vis_change = tnode == ns_root ? &ns_root_change + vis_change = tnode == ne->ns_root ? &ne->ns_root_change : &tnode->tree_vis->vis_change; if (change != NULL) diff --git a/usr/src/uts/common/fs/nfs/nfs4_state.c b/usr/src/uts/common/fs/nfs/nfs4_state.c index 47941454bc8f..1d7f47768574 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_state.c +++ b/usr/src/uts/common/fs/nfs/nfs4_state.c @@ -18,9 +18,13 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -41,7 +45,6 @@ extern u_longlong_t nfs4_srv_caller_id; -extern time_t rfs4_start_time; extern uint_t nfs4_srv_vkey; stateid4 special0 = { @@ -72,7 +75,7 @@ int rfs4_debug; static uint32_t rfs4_database_debug = 0x00; -static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf); +static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf); static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf); static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip); static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip); @@ -121,11 +124,6 @@ rfs4_sw_exit(rfs4_state_wait_t *swp) mutex_exit(swp->sw_cv_lock); } -/* - * CPR callback id -- not related to v4 callbacks - */ -static callb_id_t cpr_id = 0; - static void deep_lock_copy(LOCK4res *dres, LOCK4res *sres) { @@ -274,7 +272,7 @@ rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src) #define ADDRHASH(key) ((unsigned long)(key) >> 3) /* Used to serialize create/destroy of rfs4_server_state database */ -kmutex_t rfs4_state_lock; +kmutex_t rfs4_state_lock; static rfs4_database_t *rfs4_server_state = NULL; /* Used to serialize lookups of clientids */ @@ -705,29 +703,29 @@ rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir) } static void -rfs4_ss_init(void) +rfs4_ss_init(nfs4_srv_t *nsrv4) { int npaths = 1; char *default_dss_path = NFS4_DSS_VAR_DIR; /* read the default stable storage state */ - rfs4_dss_readstate(npaths, &default_dss_path); + rfs4_dss_readstate(nsrv4, npaths, &default_dss_path); rfs4_ss_enabled = 1; } static void -rfs4_ss_fini(void) +rfs4_ss_fini(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - sip = rfs4_cur_servinst; + mutex_enter(&nsrv4->servinst_lock); + sip = nsrv4->nfs4_cur_servinst; while (sip != NULL) { rfs4_dss_clear_oldstate(sip); sip = sip->next; } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -771,7 +769,7 @@ rfs4_dss_clear_oldstate(rfs4_servinst_t *sip) * Form the state and oldstate paths, and read in the stable storage files. */ void -rfs4_dss_readstate(int npaths, char **paths) +rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths) { int i; char *state, *oldstate; @@ -795,8 +793,8 @@ rfs4_dss_readstate(int npaths, char **paths) * and move the latter's contents to old state * directory. */ - rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL); - rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate); + rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL); + rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate); } kmem_free(state, MAXPATHLEN); @@ -809,7 +807,7 @@ rfs4_dss_readstate(int npaths, char **paths) * granted permission to perform reclaims. */ void -rfs4_ss_chkclid(rfs4_client_t *cp) +rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) { rfs4_servinst_t *sip; @@ -830,15 +828,15 @@ rfs4_ss_chkclid(rfs4_client_t *cp) * Start at the current instance, and walk the list backwards * to the first. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { rfs4_ss_chkclid_sip(cp, sip); /* if the above check found this client, we're done */ if (cp->rc_can_reclaim) break; } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static void @@ -888,7 +886,7 @@ rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip) * the server-generated short-hand clientid. */ void -rfs4_ss_clid(rfs4_client_t *cp) +rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) { const char *kinet_ntop6(uchar_t *, char *, size_t); char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN]; @@ -920,7 +918,7 @@ rfs4_ss_clid(rfs4_client_t *cp) (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf, (longlong_t)cp->rc_clientid); - rfs4_ss_clid_write(cp, leaf); + rfs4_ss_clid_write(nsrv4, cp, leaf); } /* @@ -929,7 +927,7 @@ rfs4_ss_clid(rfs4_client_t *cp) * multiple directories. */ static void -rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) +rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf) { rfs4_servinst_t *sip; @@ -943,8 +941,8 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) * to all instances' paths. Start at the current instance, and * walk the list backwards to the first. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { int i, npaths = sip->dss_npaths; /* write the leaf file to all DSS paths */ @@ -958,7 +956,7 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) rfs4_ss_clid_write_one(cp, dss_path->path, leaf); } } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -1156,16 +1154,11 @@ rfs4_clear_client_state(struct nfs4clrst_args *clr) /* * Used to initialize the NFSv4 server's state or database. All of - * the tables are created and timers are set. Only called when NFSv4 - * service is provided. + * the tables are created and timers are set. */ void -rfs4_state_init() +rfs4_state_g_init() { - int start_grace; - extern boolean_t rfs4_cpr_callb(void *, int); - char *dss_path = NFS4_DSS_VAR_DIR; - time_t start_time; mutex_enter(&rfs4_state_lock); @@ -1180,40 +1173,6 @@ rfs4_state_init() rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL); - /* - * Set the boot time. If the server - * has been restarted quickly and has had the opportunity to - * service clients, then the start_time needs to be bumped - * regardless. A small window but it exists... - */ - start_time = gethrestime_sec(); - if (rfs4_start_time < start_time) - rfs4_start_time = start_time; - else - rfs4_start_time++; - - /* DSS: distributed stable storage: initialise served paths list */ - rfs4_dss_pathlist = NULL; - - /* - * Create the first server instance, or a new one if the server has - * been restarted; see above comments on rfs4_start_time. Don't - * start its grace period; that will be done later, to maximise the - * clients' recovery window. - */ - start_grace = 0; - rfs4_servinst_create(start_grace, 1, &dss_path); - - /* reset the "first NFSv4 request" status */ - rfs4_seen_first_compound = 0; - - /* - * Add a CPR callback so that we can update client - * access times to extend the lease after a suspend - * and resume (using the same class as rpcmod/connmgr) - */ - cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); - /* set the various cache timers for table creation */ if (rfs4_client_cache_time == 0) rfs4_client_cache_time = CLIENT_CACHE_TIME; @@ -1398,11 +1357,6 @@ rfs4_state_init() deleg_state_compare, deleg_state_mkkey, FALSE); - /* - * Init the stable storage. - */ - rfs4_ss_init(); - rfs4_client_clrst = rfs4_clear_client_state; mutex_exit(&rfs4_state_lock); @@ -1414,7 +1368,7 @@ rfs4_state_init() * and other state. */ void -rfs4_state_fini() +rfs4_state_g_fini() { rfs4_database_t *dbp; @@ -1427,22 +1381,13 @@ rfs4_state_fini() rfs4_client_clrst = NULL; - rfs4_set_deleg_policy(SRV_NEVER_DELEGATE); dbp = rfs4_server_state; rfs4_server_state = NULL; - /* - * Cleanup the CPR callback. - */ - if (cpr_id) - (void) callb_delete(cpr_id); - rw_destroy(&rfs4_findclient_lock); /* First stop all of the reaper threads in the database */ rfs4_database_shutdown(dbp); - /* clean up any dangling stable storage structures */ - rfs4_ss_fini(); /* Now actually destroy/release the database and its tables */ rfs4_database_destroy(dbp); @@ -1457,18 +1402,92 @@ rfs4_state_fini() mutex_exit(&rfs4_state_lock); - /* destroy server instances and current instance ptr */ - rfs4_servinst_destroy_all(); - - /* reset the "first NFSv4 request" status */ - rfs4_seen_first_compound = 0; - /* DSS: distributed stable storage */ nvlist_free(rfs4_dss_oldpaths); nvlist_free(rfs4_dss_paths); rfs4_dss_paths = rfs4_dss_oldpaths = NULL; } +/* + * Used to initialize the per zone NFSv4 server's state + */ +void +rfs4_state_zone_init(nfs4_srv_t *nsrv4) +{ + extern boolean_t rfs4_cpr_callb(void *, int); + time_t start_time; + int start_grace; + char *dss_path = NFS4_DSS_VAR_DIR; + + /* DSS: distributed stable storage: initialise served paths list */ + nsrv4->dss_pathlist = NULL; + + /* + * Set the boot time. If the server + * has been restarted quickly and has had the opportunity to + * service clients, then the start_time needs to be bumped + * regardless. A small window but it exists... + */ + start_time = gethrestime_sec(); + if (nsrv4->rfs4_start_time < start_time) + nsrv4->rfs4_start_time = start_time; + else + nsrv4->rfs4_start_time++; + + /* + * Add a CPR callback so that we can update client + * access times to extend the lease after a suspend + * and resume (using the same class as rpcmod/connmgr) + */ + nsrv4->cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); + + /* + * Create the first server instance, or a new one if the server has + * been restarted; see above comments on rfs4_start_time. Don't + * start its grace period; that will be done later, to maximise the + * clients' recovery window. + */ + start_grace = 0; + rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path); + + /* reset the "first NFSv4 request" status */ + nsrv4->seen_first_compound = 0; + + /* + * Init the stable storage. + */ + rfs4_ss_init(nsrv4); +} + +/* + * Used at server shutdown to cleanup all of NFSv4 server's zone structures + * and state. + */ +void +rfs4_state_zone_fini() +{ + nfs4_srv_t *nsrv4; + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + + rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE); + + /* + * Cleanup the CPR callback. + */ + if (nsrv4->cpr_id) + (void) callb_delete(nsrv4->cpr_id); + + /* destroy server instances and current instance ptr */ + rfs4_servinst_destroy_all(nsrv4); + + /* reset the "first NFSv4 request" status */ + nsrv4->seen_first_compound = 0; + + /* clean up any dangling stable storage structures */ + rfs4_ss_fini(nsrv4); + +} + typedef union { struct { uint32_t start_time; @@ -1581,6 +1600,7 @@ rfs4_client_expiry(rfs4_entry_t u_entry) static void rfs4_dss_remove_cpleaf(rfs4_client_t *cp) { + nfs4_srv_t *nsrv4; rfs4_servinst_t *sip; char *leaf = cp->rc_ss_pn->leaf; @@ -1590,12 +1610,13 @@ rfs4_dss_remove_cpleaf(rfs4_client_t *cp) * from all server instances. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { /* remove the leaf file associated with this server instance */ rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf); } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static void @@ -1663,10 +1684,13 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) struct sockaddr *ca; cid *cidp; scid_confirm_verf *scvp; + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); /* Get a clientid to give to the client */ cidp = (cid *)&cp->rc_clientid; - cidp->impl_id.start_time = rfs4_start_time; + cidp->impl_id.start_time = nsrv4->rfs4_start_time; cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe); /* If we are booted as a cluster node, embed our nodeid */ @@ -1724,7 +1748,7 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) * rfs4_servinst_assign(). In this case it's not strictly necessary. */ rfs4_dbe_hold(cp->rc_dbe); - rfs4_servinst_assign(cp, rfs4_cur_servinst); + rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); rfs4_dbe_rele(cp->rc_dbe); return (TRUE); @@ -2702,8 +2726,11 @@ static stateid_t get_stateid(id_t eid) { stateid_t id; + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); - id.bits.boottime = rfs4_start_time; + id.bits.boottime = nsrv4->rfs4_start_time; id.bits.ident = eid; id.bits.chgseq = 0; id.bits.type = 0; @@ -3231,6 +3258,9 @@ nfsstat4 rfs4_check_clientid(clientid4 *cp, int setclid_confirm) { cid *cidp = (cid *) cp; + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); /* * If we are booted as a cluster node, check the embedded nodeid. @@ -3245,7 +3275,7 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm) * by the client (via the clientid) and this is NOT a * setclientid_confirm then return EXPIRED. */ - if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time) + if (!setclid_confirm && cidp->impl_id.start_time == nsrv4->rfs4_start_time) return (NFS4ERR_EXPIRED); return (NFS4ERR_STALE_CLIENTID); @@ -3259,6 +3289,10 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm) static nfsstat4 what_stateid_error(stateid_t *id, stateid_type_t type) { + nfs4_srv_t *nsrv4; + + nsrv4 = zone_getspecific(rfs4_zone_key, curzone); + /* If we are booted as a cluster node, was stateid locally generated? */ if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) return (NFS4ERR_STALE_STATEID); @@ -3268,7 +3302,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type) return (NFS4ERR_BAD_STATEID); /* From a different server instantiation, return STALE */ - if (id->bits.boottime != rfs4_start_time) + if (id->bits.boottime != nsrv4->rfs4_start_time) return (NFS4ERR_STALE_STATEID); /* @@ -3283,7 +3317,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type) * that has been revoked, the server should return BAD_STATEID * instead of the more common EXPIRED error. */ - if (id->bits.boottime == rfs4_start_time) { + if (id->bits.boottime == nsrv4->rfs4_start_time) { if (type == DELEGID) return (NFS4ERR_BAD_STATEID); else @@ -3785,7 +3819,7 @@ rfs4_close_all_state(rfs4_file_t *fp) #ifdef DEBUG /* only applies when server is handing out delegations */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE) ASSERT(fp->rf_dinfo.rd_hold_grant > 0); #endif diff --git a/usr/src/uts/common/fs/nfs/nfs_auth.c b/usr/src/uts/common/fs/nfs/nfs_auth.c index b58d4df227e9..5594f8cc0d17 100644 --- a/usr/src/uts/common/fs/nfs/nfs_auth.c +++ b/usr/src/uts/common/fs/nfs/nfs_auth.c @@ -20,8 +20,11 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 2015 by Delphix. All rights reserved. */ @@ -53,9 +56,13 @@ static struct kmem_cache *exi_cache_handle; static void exi_cache_reclaim(void *); static void exi_cache_trim(struct exportinfo *exi); +static void *nfsauth_zone_init(zoneid_t); +static void nfsauth_zone_shutdown(zoneid_t zoneid, void *data); +static void nfsauth_zone_fini(zoneid_t, void *); extern pri_t minclsyspri; +/* NFS auth cache statistics */ volatile uint_t nfsauth_cache_hit; volatile uint_t nfsauth_cache_miss; volatile uint_t nfsauth_cache_refresh; @@ -119,9 +126,8 @@ typedef struct refreshq_auth_node { } refreshq_auth_node_t; /* - * Used to manipulate things on the refreshq_queue. - * Note that the refresh thread will effectively - * pop a node off of the queue, at which point it + * Used to manipulate things on the refreshq_queue. Note that the refresh + * thread will effectively pop a node off of the queue, at which point it * will no longer need to hold the mutex. */ static kmutex_t refreshq_lock; @@ -129,102 +135,128 @@ static list_t refreshq_queue; static kcondvar_t refreshq_cv; /* - * If there is ever a problem with loading the - * module, then nfsauth_fini() needs to be called - * to remove state. In that event, since the - * refreshq thread has been started, they need to - * work together to get rid of state. + * If there is ever a problem with loading the module, then nfsauth_fini() + * needs to be called to remove state. In that event, since the refreshq + * thread has been started, they need to work together to get rid of state. */ typedef enum nfsauth_refreshq_thread_state { REFRESHQ_THREAD_RUNNING, REFRESHQ_THREAD_FINI_REQ, - REFRESHQ_THREAD_HALTED + REFRESHQ_THREAD_HALTED, + REFRESHQ_THREAD_NEED_CREATE } nfsauth_refreshq_thread_state_t; -nfsauth_refreshq_thread_state_t -refreshq_thread_state = REFRESHQ_THREAD_HALTED; +typedef struct nfsauth_globals { + kmutex_t mountd_lock; + door_handle_t mountd_dh; + + /* + * Used to manipulate things on the refreshq_queue. Note that the + * refresh thread will effectively pop a node off of the queue, + * at which point it will no longer need to hold the mutex. + */ + kmutex_t refreshq_lock; + list_t refreshq_queue; + kcondvar_t refreshq_cv; + + /* + * A list_t would be overkill. These are auth_cache entries which are + * no longer linked to an exi. It should be the case that all of their + * states are NFS_AUTH_INVALID, i.e., the only way to be put on this + * list is iff their state indicated that they had been placed on the + * refreshq_queue. + * + * Note that while there is no link from the exi or back to the exi, + * the exi can not go away until these entries are harvested. + */ + struct auth_cache *refreshq_dead_entries; + nfsauth_refreshq_thread_state_t refreshq_thread_state; + +} nfsauth_globals_t; static void nfsauth_free_node(struct auth_cache *); -static void nfsauth_refresh_thread(void); +static void nfsauth_refresh_thread(nfsauth_globals_t *); static int nfsauth_cache_compar(const void *, const void *); -/* - * mountd is a server-side only daemon. This will need to be - * revisited if the NFS server is ever made zones-aware. - */ -kmutex_t mountd_lock; -door_handle_t mountd_dh; +static zone_key_t nfsauth_zone_key; void mountd_args(uint_t did) { - mutex_enter(&mountd_lock); - if (mountd_dh != NULL) - door_ki_rele(mountd_dh); - mountd_dh = door_ki_lookup(did); - mutex_exit(&mountd_lock); + nfsauth_globals_t *nag; + + nag = zone_getspecific(nfsauth_zone_key, curzone); + mutex_enter(&nag->mountd_lock); + if (nag->mountd_dh != NULL) + door_ki_rele(nag->mountd_dh); + nag->mountd_dh = door_ki_lookup(did); + mutex_exit(&nag->mountd_lock); } void nfsauth_init(void) { - /* - * mountd can be restarted by smf(5). We need to make sure - * the updated door handle will safely make it to mountd_dh - */ - mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL); + zone_key_create(&nfsauth_zone_key, nfsauth_zone_init, + nfsauth_zone_shutdown, nfsauth_zone_fini); - mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&refreshq_queue, sizeof (refreshq_exi_node_t), - offsetof(refreshq_exi_node_t, ren_node)); - - cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL); - - /* - * Allocate nfsauth cache handle - */ exi_cache_handle = kmem_cache_create("exi_cache_handle", sizeof (struct auth_cache), 0, NULL, NULL, exi_cache_reclaim, NULL, NULL, 0); - - refreshq_thread_state = REFRESHQ_THREAD_RUNNING; - (void) zthread_create(NULL, 0, nfsauth_refresh_thread, - NULL, 0, minclsyspri); } -/* - * Finalization routine for nfsauth. It is important to call this routine - * before destroying the exported_lock. - */ void nfsauth_fini(void) { - refreshq_exi_node_t *ren; + kmem_cache_destroy(exi_cache_handle); +} + +/*ARGSUSED*/ +static void * +nfsauth_zone_init(zoneid_t zoneid) +{ + nfsauth_globals_t *nag; + + nag = kmem_zalloc(sizeof (*nag), KM_SLEEP); /* - * Prevent the nfsauth_refresh_thread from getting new - * work. + * mountd can be restarted by smf(5). We need to make sure + * the updated door handle will safely make it to mountd_dh. */ - mutex_enter(&refreshq_lock); - if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) { - refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ; - cv_broadcast(&refreshq_cv); + mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t), + offsetof(refreshq_exi_node_t, ren_node)); + cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL); + nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE; - /* - * Also, wait for nfsauth_refresh_thread() to exit. - */ - while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) { - cv_wait(&refreshq_cv, &refreshq_lock); - } + return (nag); +} + +/*ARGSUSED*/ +static void +nfsauth_zone_shutdown(zoneid_t zoneid, void *data) +{ + refreshq_exi_node_t *ren; + nfsauth_globals_t *nag = data; + + /* Prevent the nfsauth_refresh_thread from getting new work */ + mutex_enter(&nag->refreshq_lock); + if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) { + nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ; + cv_broadcast(&nag->refreshq_cv); + + /* Wait for nfsauth_refresh_thread() to exit */ + while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED) + cv_wait(&nag->refreshq_cv, &nag->refreshq_lock); } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); /* * Walk the exi_list and in turn, walk the auth_lists and free all * lists. In addition, free INVALID auth_cache entries. */ - while ((ren = list_remove_head(&refreshq_queue))) { + while ((ren = list_remove_head(&nag->refreshq_queue))) { refreshq_auth_node_t *ran; while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) { @@ -232,24 +264,26 @@ nfsauth_fini(void) if (p->auth_state == NFS_AUTH_INVALID) nfsauth_free_node(p); strfree(ran->ran_netid); - kmem_free(ran, sizeof (refreshq_auth_node_t)); + kmem_free(ran, sizeof (*ran)); } list_destroy(&ren->ren_authlist); exi_rele(&ren->ren_exi); - kmem_free(ren, sizeof (refreshq_exi_node_t)); + kmem_free(ren, sizeof (*ren)); } - list_destroy(&refreshq_queue); - - cv_destroy(&refreshq_cv); - mutex_destroy(&refreshq_lock); +} - mutex_destroy(&mountd_lock); +/*ARGSUSED*/ +static void +nfsauth_zone_fini(zoneid_t zoneid, void *data) +{ + nfsauth_globals_t *nag = data; - /* - * Deallocate nfsauth cache handle - */ - kmem_cache_destroy(exi_cache_handle); + list_destroy(&nag->refreshq_queue); + cv_destroy(&nag->refreshq_cv); + mutex_destroy(&nag->refreshq_lock); + mutex_destroy(&nag->mountd_lock); + kmem_free(nag, sizeof (*nag)); } /* @@ -341,9 +375,10 @@ sys_log(const char *msg) * Callup to the mountd to get access information in the kernel. */ static bool_t -nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, - struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid, - gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids) +nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi, + char *req_netid, int flavor, struct netbuf *addr, int *access, + cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt, + gid_t **srv_gids) { varg_t varg = {0}; nfsauth_res_t res = {0}; @@ -416,11 +451,11 @@ nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, da.rsize = 1; retry: - mutex_enter(&mountd_lock); - dh = mountd_dh; + mutex_enter(&nag->mountd_lock); + dh = nag->mountd_dh; if (dh != NULL) door_ki_hold(dh); - mutex_exit(&mountd_lock); + mutex_exit(&nag->mountd_lock); if (dh == NULL) { /* @@ -490,12 +525,12 @@ nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, * chance to restart mountd(1m) * and establish a new door handle. */ - mutex_enter(&mountd_lock); - if (dh == mountd_dh) { - door_ki_rele(mountd_dh); - mountd_dh = NULL; + mutex_enter(&nag->mountd_lock); + if (dh == nag->mountd_dh) { + door_ki_rele(nag->mountd_dh); + nag->mountd_dh = NULL; } - mutex_exit(&mountd_lock); + mutex_exit(&nag->mountd_lock); delay(hz); goto retry; } @@ -587,7 +622,7 @@ nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, } static void -nfsauth_refresh_thread(void) +nfsauth_refresh_thread(nfsauth_globals_t *nag) { refreshq_exi_node_t *ren; refreshq_auth_node_t *ran; @@ -599,25 +634,25 @@ nfsauth_refresh_thread(void) callb_cpr_t cprinfo; - CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr, + CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr, "nfsauth_refresh"); for (;;) { - mutex_enter(&refreshq_lock); - if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { + mutex_enter(&nag->refreshq_lock); + if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { /* Keep the hold on the lock! */ break; } - ren = list_remove_head(&refreshq_queue); + ren = list_remove_head(&nag->refreshq_queue); if (ren == NULL) { CALLB_CPR_SAFE_BEGIN(&cprinfo); - cv_wait(&refreshq_cv, &refreshq_lock); - CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock); - mutex_exit(&refreshq_lock); + cv_wait(&nag->refreshq_cv, &nag->refreshq_lock); + CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock); + mutex_exit(&nag->refreshq_lock); continue; } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); exi = ren->ren_exi; ASSERT(exi != NULL); @@ -664,7 +699,8 @@ nfsauth_refresh_thread(void) * shutdown. */ if (p->auth_state == NFS_AUTH_INVALID || - refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { + nag->refreshq_thread_state != + REFRESHQ_THREAD_RUNNING) { mutex_exit(&p->auth_lock); if (p->auth_state == NFS_AUTH_INVALID) @@ -699,7 +735,7 @@ nfsauth_refresh_thread(void) * of the request which triggered the * refresh attempt. */ - retrieval = nfsauth_retrieve(exi, netid, + retrieval = nfsauth_retrieve(nag, exi, netid, p->auth_flavor, &p->auth_clnt->authc_addr, &access, p->auth_clnt_cred, &uid, &gid, &ngids, &gids); @@ -746,9 +782,10 @@ nfsauth_refresh_thread(void) kmem_free(ren, sizeof (refreshq_exi_node_t)); } - refreshq_thread_state = REFRESHQ_THREAD_HALTED; - cv_broadcast(&refreshq_cv); + nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED; + cv_broadcast(&nag->refreshq_cv); CALLB_CPR_EXIT(&cprinfo); + DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit); zthread_exit(); } @@ -820,6 +857,7 @@ static int nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids) { + nfsauth_globals_t *nag; struct netbuf *taddrmask; struct netbuf addr; /* temporary copy of client's address */ const struct netbuf *claddr; @@ -839,6 +877,8 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, ASSERT(cr != NULL); + nag = zone_getspecific(nfsauth_zone_key, curzone); + /* * Now check whether this client already * has an entry for this flavor in the cache @@ -998,8 +1038,9 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, atomic_inc_uint(&nfsauth_cache_miss); - res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, - &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids); + res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), + flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, + &tmpgids); p->auth_access = access; p->auth_time = p->auth_freshness = gethrestime_sec(); @@ -1080,21 +1121,33 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, ran->ran_auth = p; ran->ran_netid = strdup(svc_getnetid(req->rq_xprt)); - mutex_enter(&refreshq_lock); + mutex_enter(&nag->refreshq_lock); + + if (nag->refreshq_thread_state == + REFRESHQ_THREAD_NEED_CREATE) { + /* Launch nfsauth refresh thread */ + nag->refreshq_thread_state = + REFRESHQ_THREAD_RUNNING; + (void) zthread_create(NULL, 0, + nfsauth_refresh_thread, nag, 0, + minclsyspri); + } + /* - * We should not add a work queue - * item if the thread is not - * accepting them. + * We should not add a work queue item if the thread + * is not accepting them. */ - if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) { + if (nag->refreshq_thread_state == + REFRESHQ_THREAD_RUNNING) { refreshq_exi_node_t *ren; /* * Is there an existing exi_list? */ - for (ren = list_head(&refreshq_queue); + for (ren = list_head(&nag->refreshq_queue); ren != NULL; - ren = list_next(&refreshq_queue, ren)) { + ren = list_next(&nag->refreshq_queue, + ren)) { if (ren->ren_exi == exi) { list_insert_tail( &ren->ren_authlist, ran); @@ -1117,16 +1170,17 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, list_insert_tail(&ren->ren_authlist, ran); - list_insert_tail(&refreshq_queue, ren); + list_insert_tail(&nag->refreshq_queue, + ren); } - cv_broadcast(&refreshq_cv); + cv_broadcast(&nag->refreshq_cv); } else { strfree(ran->ran_netid); kmem_free(ran, sizeof (refreshq_auth_node_t)); } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); } else { mutex_exit(&p->auth_lock); } @@ -1152,8 +1206,8 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, atomic_inc_uint(&nfsauth_cache_miss); - if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr, - &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) { + if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor, + &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) { if (uid != NULL) *uid = tmpuid; if (gid != NULL) @@ -1411,16 +1465,16 @@ exi_cache_reclaim(void *cdrarg) { int i; struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); for (i = 0; i < EXPTABLESIZE; i++) { - for (exi = exptable[i]; exi; exi = exi->fid_hash.next) { + for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next) exi_cache_trim(exi); - } } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); atomic_inc_uint(&nfsauth_cache_reclaim); } diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c index 531a04469556..87c0f43d46bb 100644 --- a/usr/src/uts/common/fs/nfs/nfs_client.c +++ b/usr/src/uts/common/fs/nfs/nfs_client.c @@ -18,14 +18,20 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - * + */ + +/* * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -61,6 +67,7 @@ #include #include +#include #include #include @@ -1355,8 +1362,8 @@ nfs_async_manager_stop(vfs_t *vfsp) int nfs_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, - struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *, - u_offset_t, caddr_t, struct seg *, cred_t *)) + struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *, u_offset_t, + caddr_t, struct seg *, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1455,8 +1462,8 @@ nfs_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, int nfs_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len, - int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *, - u_offset_t, size_t, int, cred_t *)) + int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *, u_offset_t, + size_t, int, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1577,8 +1584,8 @@ nfs_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len, int nfs_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, - int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t, - size_t, int, cred_t *)) + int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t, + size_t, int, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1710,7 +1717,7 @@ nfs_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, void nfs_async_readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr, - int (*readdir)(vnode_t *, rddir_cache *, cred_t *)) + int (*readdir)(vnode_t *, rddir_cache *, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -1807,8 +1814,7 @@ nfs_async_readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr, void nfs_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count, - cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, - cred_t *)) + cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, cred_t *)) { rnode_t *rp; mntinfo_t *mi; @@ -2829,6 +2835,8 @@ nfs_clntinit(void) nfs4_clnt_init(); + nfscmd_init(); + #ifdef DEBUG nfs_clntup = B_TRUE; #endif @@ -2848,6 +2856,7 @@ nfs_clntfini(void) nfs_subrfini(); nfs_vfsfini(); nfs4_clnt_fini(); + nfscmd_fini(); } /* diff --git a/usr/src/uts/common/fs/nfs/nfs_cmd.c b/usr/src/uts/common/fs/nfs/nfs_cmd.c index 9ebc8ced6250..4d154d38de56 100644 --- a/usr/src/uts/common/fs/nfs/nfs_cmd.c +++ b/usr/src/uts/common/fs/nfs/nfs_cmd.c @@ -18,12 +18,14 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ #include @@ -48,32 +50,65 @@ #endif #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) -kmutex_t nfscmd_lock; -door_handle_t nfscmd_dh; +typedef struct nfscmd_globals { + kmutex_t nfscmd_lock; + door_handle_t nfscmd_dh; +} nfscmd_globals_t; + +static zone_key_t nfscmd_zone_key; static struct charset_cache *nfscmd_charmap(exportinfo_t *exi, struct sockaddr *sp); - +static void *nfscmd_zone_init(zoneid_t); +static void nfscmd_zone_fini(zoneid_t, void *); void nfscmd_args(uint_t did) { - mutex_enter(&nfscmd_lock); - if (nfscmd_dh) - door_ki_rele(nfscmd_dh); - nfscmd_dh = door_ki_lookup(did); - mutex_exit(&nfscmd_lock); + nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone); + + mutex_enter(&ncg->nfscmd_lock); + if (ncg->nfscmd_dh != NULL) + door_ki_rele(ncg->nfscmd_dh); + ncg->nfscmd_dh = door_ki_lookup(did); + mutex_exit(&ncg->nfscmd_lock); } void nfscmd_init(void) { - mutex_init(&nfscmd_lock, NULL, MUTEX_DEFAULT, NULL); + zone_key_create(&nfscmd_zone_key, nfscmd_zone_init, + NULL, nfscmd_zone_fini); } void nfscmd_fini(void) { + (void) zone_key_delete(nfscmd_zone_key); +} + +/*ARGSUSED*/ +static void * +nfscmd_zone_init(zoneid_t zoneid) +{ + nfscmd_globals_t *ncg; + + ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP); + mutex_init(&ncg->nfscmd_lock, NULL, MUTEX_DEFAULT, NULL); + + return (ncg); +} + +/*ARGSUSED*/ +static void +nfscmd_zone_fini(zoneid_t zoneid, void *data) +{ + nfscmd_globals_t *ncg = data; + + mutex_destroy(&ncg->nfscmd_lock); + if (ncg->nfscmd_dh) + door_ki_rele(ncg->nfscmd_dh); + kmem_free(ncg, sizeof (*ncg)); } /* @@ -91,13 +126,14 @@ nfscmd_send(nfscmd_arg_t *arg, nfscmd_res_t *res) door_info_t di; int ntries = 0; int last = 0; + nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone); retry: - mutex_enter(&nfscmd_lock); - dh = nfscmd_dh; + mutex_enter(&ncg->nfscmd_lock); + dh = ncg->nfscmd_dh; if (dh != NULL) door_ki_hold(dh); - mutex_exit(&nfscmd_lock); + mutex_exit(&ncg->nfscmd_lock); if (dh == NULL) { /* @@ -144,10 +180,10 @@ nfscmd_send(nfscmd_arg_t *arg, nfscmd_res_t *res) * chance to restart mountd(1m) * and establish a new door handle. */ - mutex_enter(&nfscmd_lock); - if (dh == nfscmd_dh) - nfscmd_dh = NULL; - mutex_exit(&nfscmd_lock); + mutex_enter(&ncg->nfscmd_lock); + if (dh == ncg->nfscmd_dh) + ncg->nfscmd_dh = NULL; + mutex_exit(&ncg->nfscmd_lock); door_ki_rele(dh); delay(hz); goto retry; diff --git a/usr/src/uts/common/fs/nfs/nfs_export.c b/usr/src/uts/common/fs/nfs/nfs_export.c index 7f80427458d9..81471998ebaa 100644 --- a/usr/src/uts/common/fs/nfs/nfs_export.c +++ b/usr/src/uts/common/fs/nfs/nfs_export.c @@ -20,7 +20,6 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -29,6 +28,9 @@ * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ #include #include @@ -65,12 +67,8 @@ #include #include #include -#include - -treenode_t *ns_root; -struct exportinfo *exptable_path_hash[PKP_HASH_SIZE]; -struct exportinfo *exptable[EXPTABLESIZE]; +static zone_key_t nfs_export_key; /* * exi_id support @@ -79,15 +77,17 @@ struct exportinfo *exptable[EXPTABLESIZE]; * exi_id_overflow The exi_id_next already overflowed, so we should * thoroughly check for duplicates. * exi_id_tree AVL tree indexed by exi_id. + * nfs_exi_id_lock Lock to protect the export ID list * * All exi_id_next, exi_id_overflow, and exi_id_tree are protected by - * exported_lock. + * nfs_exi_id_lock. */ static int exi_id_next; static bool_t exi_id_overflow; avl_tree_t exi_id_tree; +krwlock_t nfs_exi_id_lock; -static int unexport(exportinfo_t *); +static int unexport(nfs_export_t *, exportinfo_t *); static void exportfree(exportinfo_t *); static int loadindex(exportdata_t *); @@ -95,31 +95,18 @@ extern void nfsauth_cache_free(exportinfo_t *); extern int sec_svc_loadrootnames(int, int, caddr_t **, model_t); extern void sec_svc_freerootnames(int, int, caddr_t *); -static int build_seclist_nodups(exportdata_t *, secinfo_t *, int); -static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int); -static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int); -static void srv_secinfo_treeclimb(exportinfo_t *, secinfo_t *, int, bool_t); +static int build_seclist_nodups(exportdata_t *, secinfo_t *, int); +static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int); +static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int); +static void srv_secinfo_treeclimb(nfs_export_t *, exportinfo_t *, + secinfo_t *, int, bool_t); #ifdef VOLATILE_FH_TEST static struct ex_vol_rename *find_volrnm_fh(exportinfo_t *, nfs_fh4 *); static uint32_t find_volrnm_fh_id(exportinfo_t *, nfs_fh4 *); -static void free_volrnm_list(exportinfo_t *); +static void free_volrnm_list(exportinfo_t *); #endif /* VOLATILE_FH_TEST */ -/* - * exported_lock Read/Write lock that protects the exportinfo list. - * This lock must be held when searching or modifiying - * the exportinfo list. - */ -krwlock_t exported_lock; - -/* - * "public" and default (root) location for public filehandle - */ -struct exportinfo *exi_public, *exi_root; - -fid_t exi_rootfid; /* for checking the default public file handle */ - fhandle_t nullfh2; /* for comparing V2 filehandles */ /* @@ -132,6 +119,12 @@ fhandle_t nullfh2; /* for comparing V2 filehandles */ #define exptablehash(fsid, fid) (nfs_fhhash((fsid), (fid)) & (EXPTABLESIZE - 1)) +extern nfs_export_t * +nfs_get_export(void) +{ + return (zone_getspecific(nfs_export_key, curzone)); +} + static uint8_t xor_hash(uint8_t *data, int len) { @@ -718,12 +711,12 @@ vis2exi(treenode_t *tnode) * given exportinfo from its ancestors upto the system root. */ void -srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt, - bool_t isadd) +srv_secinfo_treeclimb(nfs_export_t *ne, exportinfo_t *exip, secinfo_t *sec, + int seccnt, bool_t isadd) { treenode_t *tnode = exip->exi_tree; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); ASSERT(tnode != NULL); if (seccnt == 0) @@ -797,16 +790,16 @@ srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt, *(bucket) = (exi); void -export_link(exportinfo_t *exi) +export_link(nfs_export_t *ne, exportinfo_t *exi) { exportinfo_t **bckt; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); - bckt = &exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)]; + bckt = &ne->exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)]; exp_hash_link(exi, fid_hash, bckt); - bckt = &exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path, + bckt = &ne->exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path, strlen(exi->exi_export.ex_path))]; exp_hash_link(exi, path_hash, bckt); } @@ -829,12 +822,12 @@ exi_id_compar(const void *v1, const void *v2) } int -exi_id_get_next(void) +exi_id_get_next() { struct exportinfo e; int ret = exi_id_next; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_LOCK_HELD(&nfs_exi_id_lock)); do { exi_id_next++; @@ -853,134 +846,157 @@ exi_id_get_next(void) return (ret); } -/* - * Initialization routine for export routines. Should only be called once. - */ -int -nfs_exportinit(void) +/*ARGSUSED*/ +static void * +nfs_export_zone_init(zoneid_t zoneid) { - int error; int i; + nfs_export_t *ne; - rw_init(&exported_lock, NULL, RW_DEFAULT, NULL); + ne = kmem_zalloc(sizeof (*ne), KM_SLEEP); - /* - * exi_id handling initialization - */ - exi_id_next = 0; - exi_id_overflow = FALSE; - avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo), - offsetof(struct exportinfo, exi_id_link)); + rw_init(&ne->exported_lock, NULL, RW_DEFAULT, NULL); /* * Allocate the place holder for the public file handle, which * is all zeroes. It is initially set to the root filesystem. */ - exi_root = kmem_zalloc(sizeof (*exi_root), KM_SLEEP); - exi_public = exi_root; - - exi_root->exi_export.ex_flags = EX_PUBLIC; - exi_root->exi_export.ex_pathlen = 1; /* length of "/" */ - exi_root->exi_export.ex_path = - kmem_alloc(exi_root->exi_export.ex_pathlen + 1, KM_SLEEP); - exi_root->exi_export.ex_path[0] = '/'; - exi_root->exi_export.ex_path[1] = '\0'; - - exi_root->exi_count = 1; - mutex_init(&exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL); - - exi_root->exi_vp = rootdir; - exi_rootfid.fid_len = MAXFIDSZ; - error = vop_fid_pseudo(exi_root->exi_vp, &exi_rootfid); - if (error) { - mutex_destroy(&exi_root->exi_lock); - kmem_free(exi_root, sizeof (*exi_root)); - return (error); + ne->exi_root = kmem_zalloc(sizeof (*ne->exi_root), KM_SLEEP); + ne->exi_public = ne->exi_root; + + ne->exi_root->exi_export.ex_flags = EX_PUBLIC; + ne->exi_root->exi_export.ex_pathlen = 1; /* length of "/" */ + ne->exi_root->exi_export.ex_path = + kmem_alloc(ne->exi_root->exi_export.ex_pathlen + 1, KM_SLEEP); + ne->exi_root->exi_export.ex_path[0] = '/'; + ne->exi_root->exi_export.ex_path[1] = '\0'; + + ne->exi_root->exi_count = 1; + mutex_init(&ne->exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL); + + ne->exi_root->exi_vp = ZONE_ROOTVP(); + ne->exi_rootfid.fid_len = MAXFIDSZ; + if (vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid) != 0) { + mutex_destroy(&ne->exi_root->exi_lock); + kmem_free(ne->exi_root->exi_export.ex_path, + ne->exi_root->exi_export.ex_pathlen + 1); + kmem_free(ne->exi_root, sizeof (*ne->exi_root)); + return (NULL); } - /* - * Initialize auth cache and auth cache lock - */ + /* Initialize auth cache and auth cache lock */ for (i = 0; i < AUTH_TABLESIZE; i++) { - exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), + ne->exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); - avl_create(exi_root->exi_cache[i], nfsauth_cache_clnt_compar, - sizeof (struct auth_cache_clnt), + avl_create(ne->exi_root->exi_cache[i], + nfsauth_cache_clnt_compar, sizeof (struct auth_cache_clnt), offsetof(struct auth_cache_clnt, authc_link)); } - rw_init(&exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ne->exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL); - /* setup the fhandle template */ - exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid; - exi_root->exi_fh.fh_xlen = exi_rootfid.fid_len; - bcopy(exi_rootfid.fid_data, exi_root->exi_fh.fh_xdata, - exi_rootfid.fid_len); - exi_root->exi_fh.fh_len = sizeof (exi_root->exi_fh.fh_data); + /* Setup the fhandle template */ + ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid; + ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len; + bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata, + ne->exi_rootfid.fid_len); + ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data); - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); - /* - * Publish the exportinfo in the hash table - */ - export_link(exi_root); + /* Publish the exportinfo in the hash table */ + export_link(ne, ne->exi_root); - /* - * Initialize exi_id and exi_kstats - */ - exi_root->exi_id = exi_id_get_next(); - avl_add(&exi_id_tree, exi_root); - exi_root->exi_kstats = exp_kstats_init(getzoneid(), exi_root->exi_id, - exi_root->exi_export.ex_path, exi_root->exi_export.ex_pathlen, - FALSE); + /* Initialize exi_id and exi_kstats */ + rw_enter(&nfs_exi_id_lock, RW_WRITER); + ne->exi_root->exi_id = exi_id_get_next(); + avl_add(&exi_id_tree, ne->exi_root); + rw_exit(&nfs_exi_id_lock); + ne->exi_root->exi_kstats = exp_kstats_init(zoneid, + ne->exi_root->exi_id, ne->exi_root->exi_export.ex_path, + ne->exi_root->exi_export.ex_pathlen, FALSE); - rw_exit(&exported_lock); - - nfslog_init(); - ns_root = NULL; + rw_exit(&ne->exported_lock); + ne->ns_root = NULL; - return (0); + return (ne); } -/* - * Finalization routine for export routines. Called to cleanup previously - * initialization work when the NFS server module could not be loaded correctly. - */ -void -nfs_exportfini(void) +/*ARGSUSED*/ +static void +nfs_export_zone_fini(zoneid_t zoneid, void *data) { int i; + nfs_export_t *ne = data; + struct exportinfo *exi; - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); + rw_enter(&nfs_exi_id_lock, RW_WRITER); - exp_kstats_delete(exi_root->exi_kstats); - avl_remove(&exi_id_tree, exi_root); - export_unlink(exi_root); + exp_kstats_delete(ne->exi_root->exi_kstats); + avl_remove(&exi_id_tree, ne->exi_root); + export_unlink(ne, ne->exi_root); - rw_exit(&exported_lock); + rw_exit(&nfs_exi_id_lock); + rw_exit(&ne->exported_lock); - /* - * Deallocate the place holder for the public file handle. - */ - srv_secinfo_list_free(exi_root->exi_export.ex_secinfo, - exi_root->exi_export.ex_seccnt); - mutex_destroy(&exi_root->exi_lock); + /* Deallocate the place holder for the public file handle */ + srv_secinfo_list_free(ne->exi_root->exi_export.ex_secinfo, + ne->exi_root->exi_export.ex_seccnt); + mutex_destroy(&ne->exi_root->exi_lock); - rw_destroy(&exi_root->exi_cache_lock); + rw_destroy(&ne->exi_root->exi_cache_lock); for (i = 0; i < AUTH_TABLESIZE; i++) { - avl_destroy(exi_root->exi_cache[i]); - kmem_free(exi_root->exi_cache[i], sizeof (avl_tree_t)); + avl_destroy(ne->exi_root->exi_cache[i]); + kmem_free(ne->exi_root->exi_cache[i], sizeof (avl_tree_t)); } - exp_kstats_fini(exi_root->exi_kstats); + exp_kstats_fini(ne->exi_root->exi_kstats); + kmem_free(ne->exi_root->exi_export.ex_path, + ne->exi_root->exi_export.ex_pathlen + 1); + kmem_free(ne->exi_root, sizeof (*ne->exi_root)); - kmem_free(exi_root, sizeof (*exi_root)); + exi = avl_first(&exi_id_tree); + while (exi != NULL) { + struct exportinfo *nexi = AVL_NEXT(&exi_id_tree, exi); + if (zoneid == exi->exi_zoneid) + (void) unexport(ne, exi); + exi = nexi; + } - /* - * exi_id handling cleanup - */ - avl_destroy(&exi_id_tree); + rw_destroy(&ne->exported_lock); + kmem_free(ne, sizeof (*ne)); +} - rw_destroy(&exported_lock); +/* + * Initialization routine for export routines. + * Should only be called once. + */ +void +nfs_exportinit(void) +{ + rw_init(&nfs_exi_id_lock, NULL, RW_DEFAULT, NULL); + + /* exi_id handling initialization */ + exi_id_next = 0; + exi_id_overflow = FALSE; + avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo), + offsetof(struct exportinfo, exi_id_link)); + + zone_key_create(&nfs_export_key, nfs_export_zone_init, + NULL, nfs_export_zone_fini); + + nfslog_init(); +} + +/* + * Finalization routine for export routines. + */ +void +nfs_exportfini(void) +{ + (void) zone_key_delete(nfs_export_key); + avl_destroy(&exi_id_tree); + rw_destroy(&nfs_exi_id_lock); } /* @@ -1019,6 +1035,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, int i, j; rpc_gss_rawcred_t *raw_cred; struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); /* * We don't deal with delegated credentials. @@ -1029,9 +1046,10 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, raw_cred = lock->raw_cred; *cookie = NULL; - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); + for (i = 0; i < EXPTABLESIZE; i++) { - exi = exptable[i]; + exi = ne->exptable[i]; while (exi) { if (exi->exi_export.ex_seccnt > 0) { struct secinfo *secp; @@ -1071,7 +1089,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, } } done: - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* * If no nfs pseudo number mapping can be found in the export @@ -1138,6 +1156,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) int oldcnt; int i; struct pathname lookpn; + nfs_export_t *ne = nfs_get_export(); STRUCT_SET_HANDLE(uap, model, args); @@ -1146,25 +1165,25 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) return (error); /* Walk the export list looking for that pathname */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); DTRACE_PROBE(nfss__i__exported_lock1_start); - for (ex1 = exptable_path_hash[pkp_tab_hash(lookpn.pn_path, + for (ex1 = ne->exptable_path_hash[pkp_tab_hash(lookpn.pn_path, strlen(lookpn.pn_path))]; ex1; ex1 = ex1->path_hash.next) { - if (ex1 != exi_root && 0 == + if (ex1 != ne->exi_root && 0 == strcmp(ex1->exi_export.ex_path, lookpn.pn_path)) { exi_hold(ex1); break; } } DTRACE_PROBE(nfss__i__exported_lock1_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* Is this an unshare? */ if (STRUCT_FGETP(uap, uex) == NULL) { pn_free(&lookpn); if (ex1 == NULL) return (EINVAL); - error = unexport(ex1); + error = unexport(ne, ex1); exi_rele(&ex1); return (error); } @@ -1260,15 +1279,15 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * Do not allow re-sharing a shared vnode under a different path * PSEUDO export has ex_path fabricated, e.g. "/tmp (pseudo)", skip it. */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); DTRACE_PROBE(nfss__i__exported_lock2_start); - for (ex2 = exptable[exptablehash(&fsid, &fid)]; ex2; + for (ex2 = ne->exptable[exptablehash(&fsid, &fid)]; ex2; ex2 = ex2->fid_hash.next) { - if (ex2 != exi_root && !PSEUDO(ex2) && + if (ex2 != ne->exi_root && !PSEUDO(ex2) && VN_CMP(ex2->exi_vp, vp) && strcmp(ex2->exi_export.ex_path, lookpn.pn_path) != 0) { DTRACE_PROBE(nfss__i__exported_lock2_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); VN_RELE(vp); if (dvp != NULL) VN_RELE(dvp); @@ -1277,7 +1296,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) } } DTRACE_PROBE(nfss__i__exported_lock2_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); pn_free(&lookpn); exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); @@ -1285,6 +1304,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) exi->exi_fid = fid; exi->exi_vp = vp; exi->exi_count = 1; + exi->exi_zoneid = crgetzoneid(cr); exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag & VSW_VOLATILEDEV) ? 1 : 0; mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1558,10 +1578,10 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) /* * Insert the new entry at the front of the export list */ - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); DTRACE_PROBE(nfss__i__exported_lock3_start); - export_link(exi); + export_link(ne, exi); /* * Check the rest of the list for an old entry for the fs. @@ -1569,9 +1589,11 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * only reference and then free it. */ for (ex = exi->fid_hash.next; ex != NULL; ex = ex->fid_hash.next) { - if (ex != exi_root && VN_CMP(ex->exi_vp, vp)) { + if (ex != ne->exi_root && VN_CMP(ex->exi_vp, vp)) { + rw_enter(&nfs_exi_id_lock, RW_WRITER); avl_remove(&exi_id_tree, ex); - export_unlink(ex); + rw_exit(&nfs_exi_id_lock); + export_unlink(ne, ex); break; } } @@ -1580,8 +1602,8 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * If the public filehandle is pointing at the * old entry, then point it back at the root. */ - if (ex != NULL && ex == exi_public) - exi_public = exi_root; + if (ex != NULL && ex == ne->exi_public) + ne->exi_public = ne->exi_root; /* * If the public flag is on, make the global exi_public @@ -1589,7 +1611,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * we can distinguish it from the place holder export. */ if (kex->ex_flags & EX_PUBLIC) { - exi_public = exi; + ne->exi_public = exi; kex->ex_flags &= ~EX_PUBLIC; } @@ -1621,7 +1643,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) exi->exi_tree->tree_exi = exi; /* Update the change timestamp */ - tree_update_change(exi->exi_tree, NULL); + tree_update_change(ne, exi->exi_tree, NULL); } /* @@ -1631,7 +1653,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) */ newcnt = build_seclist_nodups(&exi->exi_export, newsec, FALSE); - srv_secinfo_treeclimb(exi, newsec, newcnt, TRUE); + srv_secinfo_treeclimb(ne, exi, newsec, newcnt, TRUE); /* * If re-sharing an old export entry, update the secinfo data @@ -1656,7 +1678,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * Remove old flavor refs last. */ srv_secinfo_exp2exp(&exi->exi_export, oldsec, oldcnt); - srv_secinfo_treeclimb(ex, oldsec, oldcnt, FALSE); + srv_secinfo_treeclimb(ne, ex, oldsec, oldcnt, FALSE); } } @@ -1679,16 +1701,20 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) exp_kstats_reset(exi->exi_kstats, kex->ex_path, kex->ex_pathlen, FALSE); } else { + rw_enter(&nfs_exi_id_lock, RW_READER); exi->exi_id = exi_id_get_next(); - exi->exi_kstats = exp_kstats_init(getzoneid(), exi->exi_id, + rw_exit(&nfs_exi_id_lock); + exi->exi_kstats = exp_kstats_init(crgetzoneid(cr), exi->exi_id, kex->ex_path, kex->ex_pathlen, FALSE); } + rw_enter(&nfs_exi_id_lock, RW_WRITER); avl_add(&exi_id_tree, exi); + rw_exit(&nfs_exi_id_lock); DTRACE_PROBE(nfss__i__exported_lock3_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); - if (exi_public == exi || kex->ex_flags & EX_LOG) { + if (ne->exi_public == exi || kex->ex_flags & EX_LOG) { /* * Log share operation to this buffer only. */ @@ -1702,9 +1728,9 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) out7: /* Unlink the new export in exptable. */ - export_unlink(exi); + export_unlink(ne, exi); DTRACE_PROBE(nfss__i__exported_lock3_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); out6: if (kex->ex_flags & EX_INDEX) kmem_free(kex->ex_index, strlen(kex->ex_index) + 1); @@ -1748,9 +1774,9 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * Remove the exportinfo from the export list */ void -export_unlink(struct exportinfo *exi) +export_unlink(nfs_export_t *ne, struct exportinfo *exi) { - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); exp_hash_unlink(exi, fid_hash); exp_hash_unlink(exi, path_hash); @@ -1760,30 +1786,31 @@ export_unlink(struct exportinfo *exi) * Unexport an exported filesystem */ static int -unexport(struct exportinfo *exi) +unexport(nfs_export_t *ne, struct exportinfo *exi) { struct secinfo cursec[MAX_FLAVORS]; int curcnt; - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); /* Check if exi is still linked in the export table */ if (!EXP_LINKED(exi) || PSEUDO(exi)) { - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (EINVAL); } exp_kstats_delete(exi->exi_kstats); + rw_enter(&nfs_exi_id_lock, RW_WRITER); avl_remove(&exi_id_tree, exi); - export_unlink(exi); + rw_exit(&nfs_exi_id_lock); + export_unlink(ne, exi); /* * Remove security flavors before treeclimb_unexport() is called * because srv_secinfo_treeclimb needs the namespace tree */ curcnt = build_seclist_nodups(&exi->exi_export, cursec, TRUE); - - srv_secinfo_treeclimb(exi, cursec, curcnt, FALSE); + srv_secinfo_treeclimb(ne, exi, cursec, curcnt, FALSE); /* * If there's a visible list, then need to leave @@ -1793,7 +1820,7 @@ unexport(struct exportinfo *exi) if (exi->exi_visible != NULL) { struct exportinfo *newexi; - newexi = pseudo_exportfs(exi->exi_vp, &exi->exi_fid, + newexi = pseudo_exportfs(ne, exi->exi_vp, &exi->exi_fid, exi->exi_visible, &exi->exi_export); exi->exi_visible = NULL; @@ -1802,12 +1829,12 @@ unexport(struct exportinfo *exi) newexi->exi_tree->tree_exi = newexi; /* Update the change timestamp */ - tree_update_change(exi->exi_tree, NULL); + tree_update_change(ne, exi->exi_tree, NULL); } else { - treeclimb_unexport(exi); + treeclimb_unexport(ne, exi); } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* * Need to call into the NFSv4 server and release all data @@ -1827,15 +1854,14 @@ unexport(struct exportinfo *exi) * If this was a public export, restore * the public filehandle to the root. */ - if (exi == exi_public) { - exi_public = exi_root; + if (exi == ne->exi_public) { + ne->exi_public = ne->exi_root; - nfslog_share_record(exi_public, CRED()); + nfslog_share_record(ne->exi_public, CRED()); } - if (exi->exi_export.ex_flags & EX_LOG) { + if (exi->exi_export.ex_flags & EX_LOG) nfslog_unshare_record(exi, CRED()); - } exi_rele(&exi); return (0); @@ -2562,9 +2588,10 @@ struct exportinfo * checkexport(fsid_t *fsid, fid_t *fid) { struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); - rw_enter(&exported_lock, RW_READER); - for (exi = exptable[exptablehash(fsid, fid)]; + rw_enter(&ne->exported_lock, RW_READER); + for (exi = ne->exptable[exptablehash(fsid, fid)]; exi != NULL; exi = exi->fid_hash.next) { if (exportmatch(exi, fsid, fid)) { @@ -2575,15 +2602,15 @@ checkexport(fsid_t *fsid, fid_t *fid) * handle. */ if (exi->exi_export.ex_flags & EX_PUBLIC) { - exi = exi_public; + exi = ne->exi_public; } exi_hold(exi); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (exi); } } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (NULL); } @@ -2599,10 +2626,11 @@ struct exportinfo * checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp) { struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); - ASSERT(RW_LOCK_HELD(&exported_lock)); + ASSERT(RW_LOCK_HELD(&ne->exported_lock)); - for (exi = exptable[exptablehash(fsid, fid)]; + for (exi = ne->exptable[exptablehash(fsid, fid)]; exi != NULL; exi = exi->fid_hash.next) { if (exportmatch(exi, fsid, fid)) { @@ -2613,7 +2641,7 @@ checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp) * handle. */ if (exi->exi_export.ex_flags & EX_PUBLIC) { - exi = exi_public; + exi = ne->exi_public; } /* diff --git a/usr/src/uts/common/fs/nfs/nfs_log.c b/usr/src/uts/common/fs/nfs/nfs_log.c index 7cf0fe24e979..a314f4319abb 100644 --- a/usr/src/uts/common/fs/nfs/nfs_log.c +++ b/usr/src/uts/common/fs/nfs/nfs_log.c @@ -18,10 +18,15 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -43,8 +48,6 @@ #define NUM_RECORDS_TO_WRITE 256 #define NUM_BYTES_TO_WRITE 65536 -extern krwlock_t exported_lock; - static int nfslog_num_records_to_write = NUM_RECORDS_TO_WRITE; static int nfslog_num_bytes_to_write = NUM_BYTES_TO_WRITE; @@ -595,11 +598,8 @@ log_file_rele(struct log_file *lfp) */ /* ARGSUSED */ void * -nfslog_record_alloc( - struct exportinfo *exi, - int alloc_indx, - void **cookie, - int flags) +nfslog_record_alloc(struct exportinfo *exi, int alloc_indx, void **cookie, + int flags) { struct lr_alloc *lrp; @@ -652,7 +652,7 @@ nfslog_record_alloc( */ void nfslog_record_put(void *cookie, size_t size, bool_t sync, - unsigned int which_buffers) + unsigned int which_buffers) { struct lr_alloc *lrp = (struct lr_alloc *)cookie; struct log_buffer *lbp = lrp->lb; @@ -768,8 +768,8 @@ nfslog_records_flush_to_disk_nolock(struct log_buffer *lbp) * them to the end of the log file. */ static int -nfslog_write_logrecords(struct log_file *lfp, - struct lr_alloc *lrp_writers, int num_recs) +nfslog_write_logrecords(struct log_file *lfp, struct lr_alloc *lrp_writers, + int num_recs) { struct uio uio; struct iovec *iovp; @@ -1161,8 +1161,8 @@ nfsl_flush(struct nfsl_flush_args *args, model_t model) /* * Do the work asynchronously */ - (void) thread_create(NULL, 0, nfslog_do_flush, - tparams, 0, &p0, TS_RUN, minclsyspri); + (void) zthread_create(NULL, 0, nfslog_do_flush, + tparams, 0, minclsyspri); } return (error); @@ -1249,8 +1249,7 @@ nfslog_do_flush(struct flush_thread_params *tparams) */ kmem_free(args->buff, args->buff_len); kmem_free(tparams, sizeof (*tparams)); - thread_exit(); - /* NOTREACHED */ + zthread_exit(); } tparams->tp_error = error; @@ -1529,6 +1528,7 @@ static int nfslog_dispatch_table_arglen = sizeof (nfslog_dispatch_table) / */ struct exportinfo * nfslog_get_exi( + nfs_export_t *ne, struct exportinfo *exi, struct svc_req *req, caddr_t res, @@ -1560,7 +1560,7 @@ nfslog_get_exi( return (exi); } - if (exi != exi_public) + if (exi != ne->exi_public) return (NULL); /* @@ -1625,8 +1625,8 @@ static long long rfslog_records_ignored = 0; */ void nfslog_write_record(struct exportinfo *exi, struct svc_req *req, - caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb, - unsigned int record_id, unsigned int which_buffers) + caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb, + unsigned int record_id, unsigned int which_buffers) { struct nfslog_prog_disp *progtable; /* prog struct */ struct nfslog_vers_disp *verstable; /* version struct */ @@ -1764,17 +1764,17 @@ nfslog_write_record(struct exportinfo *exi, struct svc_req *req, static char * get_publicfh_path(int *alloc_length) { - extern struct exportinfo *exi_public; char *pubpath; + nfs_export_t *ne = nfs_get_export(); - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); - *alloc_length = exi_public->exi_export.ex_pathlen + 1; + *alloc_length = ne->exi_public->exi_export.ex_pathlen + 1; pubpath = kmem_alloc(*alloc_length, KM_SLEEP); - (void) strcpy(pubpath, exi_public->exi_export.ex_path); + (void) strcpy(pubpath, ne->exi_public->exi_export.ex_path); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (pubpath); } @@ -1870,11 +1870,8 @@ nfslog_unshare_record(struct exportinfo *exi, cred_t *cr) void -nfslog_getfh(struct exportinfo *exi, - fhandle *fh, - char *fname, - enum uio_seg seg, - cred_t *cr) +nfslog_getfh(struct exportinfo *exi, fhandle *fh, char *fname, enum uio_seg seg, + cred_t *cr) { struct svc_req req; int res = 0; diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c index d6f40dab0a6b..b2c28de43b11 100644 --- a/usr/src/uts/common/fs/nfs/nfs_server.c +++ b/usr/src/uts/common/fs/nfs/nfs_server.c @@ -21,10 +21,6 @@ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Bayard G. Bell. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2017 Joyent Inc */ /* @@ -33,6 +29,13 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2011 Bayard G. Bell. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2017 Joyent Inc + */ + #include #include #include @@ -84,7 +87,6 @@ #include #include #include -#include #include #include #include @@ -110,6 +112,7 @@ static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; +zone_key_t nfssrv_zone_key; kmem_cache_t *nfs_xuio_cache; int nfs_loaned_buffers = 0; @@ -118,10 +121,7 @@ _init(void) { int status; - if ((status = nfs_srvinit()) != 0) { - cmn_err(CE_WARN, "_init: nfs_srvinit failed"); - return (status); - } + nfs_srvinit(); status = mod_install((struct modlinkage *)&modlinkage); if (status != 0) { @@ -178,27 +178,28 @@ _info(struct modinfo *modinfop) * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported * public (i.e., not the placeholder). */ -#define PUBLICFH_CHECK(disp, exi, fsid, xfid) \ +#define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \ ((disp->dis_flags & RPC_PUBLICFH_OK) && \ ((exi->exi_export.ex_flags & EX_PUBLIC) || \ - (exi == exi_public && exportmatch(exi_root, \ + (exi == ne->exi_public && exportmatch(ne->exi_root, \ fsid, xfid)))) static void nfs_srv_shutdown_all(int); -static void rfs4_server_start(int); +static void rfs4_server_start(nfs_globals_t *, int); static void nullfree(void); static void rfs_dispatch(struct svc_req *, SVCXPRT *); static void acl_dispatch(struct svc_req *, SVCXPRT *); static void common_dispatch(struct svc_req *, SVCXPRT *, rpcvers_t, rpcvers_t, char *, struct rpc_disptable *); -static void hanfsv4_failover(void); static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int, bool_t, bool_t *); static char *client_name(struct svc_req *req); static char *client_addr(struct svc_req *req, char *buf); extern int sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *); extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *); +static void *nfs_srv_zone_init(zoneid_t); +static void nfs_srv_zone_fini(zoneid_t, void *); #define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \ (nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \ @@ -249,24 +250,6 @@ static SVC_CALLOUT __nfs_sc_rdma[] = { static SVC_CALLOUT_TABLE nfs_sct_rdma = { sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma }; -rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT; -rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT; - -/* - * Used to track the state of the server so that initialization - * can be done properly. - */ -typedef enum { - NFS_SERVER_STOPPED, /* server state destroyed */ - NFS_SERVER_STOPPING, /* server state being destroyed */ - NFS_SERVER_RUNNING, - NFS_SERVER_QUIESCED, /* server state preserved */ - NFS_SERVER_OFFLINE /* server pool offline */ -} nfs_server_running_t; - -static nfs_server_running_t nfs_server_upordown; -static kmutex_t nfs_server_upordown_lock; -static kcondvar_t nfs_server_upordown_cv; /* * DSS: distributed stable storage @@ -278,12 +261,6 @@ int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *, size_t *); bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *); -/* - * RDMA wait variables. - */ -static kcondvar_t rdma_wait_cv; -static kmutex_t rdma_wait_mutex; - /* * Will be called at the point the server pool is being unregistered * from the pool list. From that point onwards, the pool is waiting @@ -293,11 +270,15 @@ static kmutex_t rdma_wait_mutex; void nfs_srv_offline(void) { - mutex_enter(&nfs_server_upordown_lock); - if (nfs_server_upordown == NFS_SERVER_RUNNING) { - nfs_server_upordown = NFS_SERVER_OFFLINE; + nfs_globals_t *ng; + + ng = zone_getspecific(nfssrv_zone_key, curzone); + + mutex_enter(&ng->nfs_server_upordown_lock); + if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) { + ng->nfs_server_upordown = NFS_SERVER_OFFLINE; } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } /* @@ -326,13 +307,16 @@ nfs_srv_quiesce_all(void) } static void -nfs_srv_shutdown_all(int quiesce) { - mutex_enter(&nfs_server_upordown_lock); +nfs_srv_shutdown_all(int quiesce) +{ + nfs_globals_t *ng = zone_getspecific(nfssrv_zone_key, curzone); + + mutex_enter(&ng->nfs_server_upordown_lock); if (quiesce) { - if (nfs_server_upordown == NFS_SERVER_RUNNING || - nfs_server_upordown == NFS_SERVER_OFFLINE) { - nfs_server_upordown = NFS_SERVER_QUIESCED; - cv_signal(&nfs_server_upordown_cv); + if (ng->nfs_server_upordown == NFS_SERVER_RUNNING || + ng->nfs_server_upordown == NFS_SERVER_OFFLINE) { + ng->nfs_server_upordown = NFS_SERVER_QUIESCED; + cv_signal(&ng->nfs_server_upordown_cv); /* reset DSS state, for subsequent warm restart */ rfs4_dss_numnewpaths = 0; @@ -342,22 +326,22 @@ nfs_srv_shutdown_all(int quiesce) { "NFSv4 state has been preserved"); } } else { - if (nfs_server_upordown == NFS_SERVER_OFFLINE) { - nfs_server_upordown = NFS_SERVER_STOPPING; - mutex_exit(&nfs_server_upordown_lock); - rfs4_state_fini(); - rfs4_fini_drc(nfs4_drc); - mutex_enter(&nfs_server_upordown_lock); - nfs_server_upordown = NFS_SERVER_STOPPED; - cv_signal(&nfs_server_upordown_cv); + if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) { + ng->nfs_server_upordown = NFS_SERVER_STOPPING; + mutex_exit(&ng->nfs_server_upordown_lock); + rfs4_state_zone_fini(); + rfs4_fini_drc(); + mutex_enter(&ng->nfs_server_upordown_lock); + ng->nfs_server_upordown = NFS_SERVER_STOPPED; + cv_signal(&ng->nfs_server_upordown_cv); } } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } static int nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp, - rpcvers_t versmin, rpcvers_t versmax) + rpcvers_t versmin, rpcvers_t versmax) { struct strioctl strioc; struct T_info_ack tinfo; @@ -420,6 +404,7 @@ nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp, int nfs_svc(struct nfs_svc_args *arg, model_t model) { + nfs_globals_t *ng; file_t *fp; SVCMASTERXPRT *xprt; int error; @@ -434,6 +419,7 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) model = model; /* STRUCT macros don't always refer to it */ #endif + ng = zone_getspecific(nfssrv_zone_key, curzone); STRUCT_SET_HANDLE(uap, model, arg); /* Check privileges in nfssys() */ @@ -467,27 +453,27 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) return (error); } - nfs_versmin = STRUCT_FGET(uap, versmin); - nfs_versmax = STRUCT_FGET(uap, versmax); + ng->nfs_versmin = STRUCT_FGET(uap, versmin); + ng->nfs_versmax = STRUCT_FGET(uap, versmax); /* Double check the vers min/max ranges */ - if ((nfs_versmin > nfs_versmax) || - (nfs_versmin < NFS_VERSMIN) || - (nfs_versmax > NFS_VERSMAX)) { - nfs_versmin = NFS_VERSMIN_DEFAULT; - nfs_versmax = NFS_VERSMAX_DEFAULT; + if ((ng->nfs_versmin > ng->nfs_versmax) || + (ng->nfs_versmin < NFS_VERSMIN) || + (ng->nfs_versmax > NFS_VERSMAX)) { + ng->nfs_versmin = NFS_VERSMIN_DEFAULT; + ng->nfs_versmax = NFS_VERSMAX_DEFAULT; } - if (error = - nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) { + if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin, + ng->nfs_versmax)) { releasef(STRUCT_FGET(uap, fd)); kmem_free(addrmask.buf, addrmask.maxlen); return (error); } /* Initialize nfsv4 server */ - if (nfs_versmax == (rpcvers_t)NFS_V4) - rfs4_server_start(STRUCT_FGET(uap, delegation)); + if (ng->nfs_versmax == (rpcvers_t)NFS_V4) + rfs4_server_start(ng, STRUCT_FGET(uap, delegation)); /* Create a transport handle. */ error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt, @@ -506,59 +492,36 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) } static void -rfs4_server_start(int nfs4_srv_delegation) +rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation) { /* * Determine if the server has previously been "started" and * if not, do the per instance initialization */ - mutex_enter(&nfs_server_upordown_lock); + mutex_enter(&ng->nfs_server_upordown_lock); - if (nfs_server_upordown != NFS_SERVER_RUNNING) { + if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) { /* Do we need to stop and wait on the previous server? */ - while (nfs_server_upordown == NFS_SERVER_STOPPING || - nfs_server_upordown == NFS_SERVER_OFFLINE) - cv_wait(&nfs_server_upordown_cv, - &nfs_server_upordown_lock); + while (ng->nfs_server_upordown == NFS_SERVER_STOPPING || + ng->nfs_server_upordown == NFS_SERVER_OFFLINE) + cv_wait(&ng->nfs_server_upordown_cv, + &ng->nfs_server_upordown_lock); - if (nfs_server_upordown != NFS_SERVER_RUNNING) { + if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) { (void) svc_pool_control(NFS_SVCPOOL_ID, SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline); (void) svc_pool_control(NFS_SVCPOOL_ID, SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all); - /* is this an nfsd warm start? */ - if (nfs_server_upordown == NFS_SERVER_QUIESCED) { - cmn_err(CE_NOTE, "nfs_server: " - "server was previously quiesced; " - "existing NFSv4 state will be re-used"); + rfs4_do_server_start(ng->nfs_server_upordown, + nfs4_srv_delegation, + cluster_bootflags & CLUSTER_BOOTED); - /* - * HA-NFSv4: this is also the signal - * that a Resource Group failover has - * occurred. - */ - if (cluster_bootflags & CLUSTER_BOOTED) - hanfsv4_failover(); - } else { - /* cold start */ - rfs4_state_init(); - nfs4_drc = rfs4_init_drc(nfs4_drc_max, - nfs4_drc_hash); - } - - /* - * Check to see if delegation is to be - * enabled at the server - */ - if (nfs4_srv_delegation != FALSE) - rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE); - - nfs_server_upordown = NFS_SERVER_RUNNING; + ng->nfs_server_upordown = NFS_SERVER_RUNNING; } - cv_signal(&nfs_server_upordown_cv); + cv_signal(&ng->nfs_server_upordown_cv); } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } /* @@ -568,6 +531,7 @@ rfs4_server_start(int nfs4_srv_delegation) int rdma_start(struct rdma_svc_args *rsa) { + nfs_globals_t *ng; int error; rdma_xprt_group_t started_rdma_xprts; rdma_stat stat; @@ -580,8 +544,10 @@ rdma_start(struct rdma_svc_args *rsa) rsa->nfs_versmin = NFS_VERSMIN_DEFAULT; rsa->nfs_versmax = NFS_VERSMAX_DEFAULT; } - nfs_versmin = rsa->nfs_versmin; - nfs_versmax = rsa->nfs_versmax; + + ng = zone_getspecific(nfssrv_zone_key, curzone); + ng->nfs_versmin = rsa->nfs_versmin; + ng->nfs_versmax = rsa->nfs_versmax; /* Set the versions in the callout table */ __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin; @@ -595,7 +561,7 @@ rdma_start(struct rdma_svc_args *rsa) /* Initialize nfsv4 server */ if (rsa->nfs_versmax == (rpcvers_t)NFS_V4) - rfs4_server_start(rsa->delegation); + rfs4_server_start(ng, rsa->delegation); started_rdma_xprts.rtg_count = 0; started_rdma_xprts.rtg_listhead = NULL; @@ -612,7 +578,7 @@ rdma_start(struct rdma_svc_args *rsa) /* * wait till either interrupted by a signal on * nfs service stop/restart or signalled by a - * rdma plugin attach/detatch. + * rdma attach/detatch. */ stat = rdma_kwait(); @@ -1369,7 +1335,6 @@ static int cred_hits = 0; static int cred_misses = 0; #endif - #ifdef DEBUG /* * Debug code to allow disabling of rfs_dispatch() use of @@ -1476,8 +1441,7 @@ auth_tooweak(struct svc_req *req, char *res) static void common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, - rpcvers_t max_vers, char *pgmname, - struct rpc_disptable *disptable) + rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable) { int which; rpcvers_t vers; @@ -1515,6 +1479,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, size_t pos; /* request size */ size_t rlen; /* reply size */ bool_t rsent = FALSE; /* reply was sent successfully */ + nfs_export_t *ne = nfs_get_export(); vers = req->rq_vers; @@ -1649,13 +1614,15 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, cr = xprt->xp_cred; ASSERT(cr != NULL); #ifdef DEBUG - if (crgetref(cr) != 1) { - crfree(cr); - cr = crget(); - xprt->xp_cred = cr; - cred_misses++; - } else - cred_hits++; + { + if (crgetref(cr) != 1) { + crfree(cr); + cr = crget(); + xprt->xp_cred = cr; + cred_misses++; + } else + cred_hits++; + } #else if (crgetref(cr) != 1) { crfree(cr); @@ -1667,20 +1634,24 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, exi = checkexport(fsid, xfid); if (exi != NULL) { - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); exi_ksp = NULL; if (exi->exi_kstats != NULL) { switch (req->rq_vers) { case NFS_VERSION: exi_ksp = (disptable == rfs_disptable) ? - exi->exi_kstats->rfsprocio_v2_ptr[which] : - exi->exi_kstats->aclprocio_v2_ptr[which]; + exi->exi_kstats-> + rfsprocio_v2_ptr[which] : + exi->exi_kstats-> + aclprocio_v2_ptr[which]; break; case NFS_V3: exi_ksp = (disptable == rfs_disptable) ? - exi->exi_kstats->rfsprocio_v3_ptr[which] : - exi->exi_kstats->aclprocio_v3_ptr[which]; + exi->exi_kstats-> + rfsprocio_v3_ptr[which] : + exi->exi_kstats-> + aclprocio_v3_ptr[which]; break; default: ASSERT(0); @@ -1693,11 +1664,10 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, kstat_runq_enter(KSTAT_IO_PTR(exi_ksp)); mutex_exit(exi_ksp->ks_lock); } else { - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); } - publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid); - + publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid); /* * Don't allow non-V4 clients access * to pseudo exports @@ -1809,7 +1779,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, * file system. */ if (nfslog_buffer_list != NULL) { - nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id); + nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id); /* * Is logging enabled? */ @@ -1910,7 +1880,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, kstat_runq_exit(KSTAT_IO_PTR(exi_ksp)); mutex_exit(exi_ksp->ks_lock); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); } if (exi != NULL) @@ -2650,31 +2620,18 @@ client_addr(struct svc_req *req, char *buf) * - Initialize all locks * - initialize the version 3 write verifier */ -int +void nfs_srvinit(void) { - int error; + /* NFS server zone-specific global variables */ + zone_key_create(&nfssrv_zone_key, nfs_srv_zone_init, + NULL, nfs_srv_zone_fini); - error = nfs_exportinit(); - if (error != 0) - return (error); - error = rfs4_srvrinit(); - if (error != 0) { - nfs_exportfini(); - return (error); - } + nfs_exportinit(); rfs_srvrinit(); rfs3_srvrinit(); + rfs4_srvrinit(); nfsauth_init(); - - /* Init the stuff to control start/stop */ - nfs_server_upordown = NFS_SERVER_STOPPED; - mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL); - cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL); - - return (0); } /* @@ -2686,20 +2643,53 @@ void nfs_srvfini(void) { nfsauth_fini(); + rfs4_srvrfini(); rfs3_srvrfini(); rfs_srvrfini(); nfs_exportfini(); - mutex_destroy(&nfs_server_upordown_lock); - cv_destroy(&nfs_server_upordown_cv); - mutex_destroy(&rdma_wait_mutex); - cv_destroy(&rdma_wait_cv); + (void) zone_key_delete(nfssrv_zone_key); +} + +/* ARGSUSED */ +static void * +nfs_srv_zone_init(zoneid_t zoneid) +{ + nfs_globals_t *ng; + + ng = kmem_zalloc(sizeof (*ng), KM_SLEEP); + + ng->nfs_versmin = NFS_VERSMIN_DEFAULT; + ng->nfs_versmax = NFS_VERSMAX_DEFAULT; + + /* Init the stuff to control start/stop */ + ng->nfs_server_upordown = NFS_SERVER_STOPPED; + mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL); + + return (ng); +} + +/* ARGSUSED */ +static void +nfs_srv_zone_fini(zoneid_t zoneid, void *data) +{ + nfs_globals_t *ng; + + ng = (nfs_globals_t *)data; + mutex_destroy(&ng->nfs_server_upordown_lock); + cv_destroy(&ng->nfs_server_upordown_cv); + mutex_destroy(&ng->rdma_wait_mutex); + cv_destroy(&ng->rdma_wait_cv); + + kmem_free(ng, sizeof (*ng)); } /* * Set up an iovec array of up to cnt pointers. */ - void mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp) { @@ -2975,7 +2965,7 @@ rfs_pathname( while (*path == '/') path++; - startdvp = rootdir; + startdvp = ZONE_ROOTVP(); } error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf)); @@ -2998,7 +2988,7 @@ rfs_pathname( } VN_HOLD(startdvp); error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp, - rootdir, startdvp, cr); + ZONE_ROOTVP(), startdvp, cr); } if (error == ENAMETOOLONG) { /* @@ -3015,7 +3005,7 @@ rfs_pathname( } VN_HOLD(startdvp); error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp, - rootdir, startdvp, cr); + ZONE_ROOTVP(), startdvp, cr); pn_free(&pn); } @@ -3119,168 +3109,6 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr, return (error); } -/* - * Do the main work of handling HA-NFSv4 Resource Group failover on - * Sun Cluster. - * We need to detect whether any RG admin paths have been added or removed, - * and adjust resources accordingly. - * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In - * order to scale, the list and array of paths need to be held in more - * suitable data structures. - */ -static void -hanfsv4_failover(void) -{ - int i, start_grace, numadded_paths = 0; - char **added_paths = NULL; - rfs4_dss_path_t *dss_path; - - /* - * Note: currently, rfs4_dss_pathlist cannot be NULL, since - * it will always include an entry for NFS4_DSS_VAR_DIR. If we - * make the latter dynamically specified too, the following will - * need to be adjusted. - */ - - /* - * First, look for removed paths: RGs that have been failed-over - * away from this node. - * Walk the "currently-serving" rfs4_dss_pathlist and, for each - * path, check if it is on the "passed-in" rfs4_dss_newpaths array - * from nfsd. If not, that RG path has been removed. - * - * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed - * any duplicates. - */ - dss_path = rfs4_dss_pathlist; - do { - int found = 0; - char *path = dss_path->path; - - /* used only for non-HA so may not be removed */ - if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { - dss_path = dss_path->next; - continue; - } - - for (i = 0; i < rfs4_dss_numnewpaths; i++) { - int cmpret; - char *newpath = rfs4_dss_newpaths[i]; - - /* - * Since nfsd has sorted rfs4_dss_newpaths for us, - * once the return from strcmp is negative we know - * we've passed the point where "path" should be, - * and can stop searching: "path" has been removed. - */ - cmpret = strcmp(path, newpath); - if (cmpret < 0) - break; - if (cmpret == 0) { - found = 1; - break; - } - } - - if (found == 0) { - unsigned index = dss_path->index; - rfs4_servinst_t *sip = dss_path->sip; - rfs4_dss_path_t *path_next = dss_path->next; - - /* - * This path has been removed. - * We must clear out the servinst reference to - * it, since it's now owned by another - * node: we should not attempt to touch it. - */ - ASSERT(dss_path == sip->dss_paths[index]); - sip->dss_paths[index] = NULL; - - /* remove from "currently-serving" list, and destroy */ - remque(dss_path); - /* allow for NUL */ - kmem_free(dss_path->path, strlen(dss_path->path) + 1); - kmem_free(dss_path, sizeof (rfs4_dss_path_t)); - - dss_path = path_next; - } else { - /* path was found; not removed */ - dss_path = dss_path->next; - } - } while (dss_path != rfs4_dss_pathlist); - - /* - * Now, look for added paths: RGs that have been failed-over - * to this node. - * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, - * for each path, check if it is on the "currently-serving" - * rfs4_dss_pathlist. If not, that RG path has been added. - * - * Note: we don't do duplicate detection here; nfsd does that for us. - * - * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us - * an upper bound for the size needed for added_paths[numadded_paths]. - */ - - /* probably more space than we need, but guaranteed to be enough */ - if (rfs4_dss_numnewpaths > 0) { - size_t sz = rfs4_dss_numnewpaths * sizeof (char *); - added_paths = kmem_zalloc(sz, KM_SLEEP); - } - - /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ - for (i = 0; i < rfs4_dss_numnewpaths; i++) { - int found = 0; - char *newpath = rfs4_dss_newpaths[i]; - - dss_path = rfs4_dss_pathlist; - do { - char *path = dss_path->path; - - /* used only for non-HA */ - if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { - dss_path = dss_path->next; - continue; - } - - if (strncmp(path, newpath, strlen(path)) == 0) { - found = 1; - break; - } - - dss_path = dss_path->next; - } while (dss_path != rfs4_dss_pathlist); - - if (found == 0) { - added_paths[numadded_paths] = newpath; - numadded_paths++; - } - } - - /* did we find any added paths? */ - if (numadded_paths > 0) { - /* create a new server instance, and start its grace period */ - start_grace = 1; - rfs4_servinst_create(start_grace, numadded_paths, added_paths); - - /* read in the stable storage state from these paths */ - rfs4_dss_readstate(numadded_paths, added_paths); - - /* - * Multiple failovers during a grace period will cause - * clients of the same resource group to be partitioned - * into different server instances, with different - * grace periods. Since clients of the same resource - * group must be subject to the same grace period, - * we need to reset all currently active grace periods. - */ - rfs4_grace_reset_all(); - } - - if (rfs4_dss_numnewpaths > 0) - kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); -} - /* * Used by NFSv3 and NFSv4 server to query label of * a pathname component during lookup/access ops. diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c index 86d78eee5d55..b8df7299acf4 100644 --- a/usr/src/uts/common/fs/nfs/nfs_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs_srv.c @@ -21,8 +21,6 @@ /* * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ /* @@ -30,6 +28,11 @@ * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + #include #include #include @@ -70,6 +73,21 @@ #include +struct rfs_async_write_list; + +/* + * Zone globals of NFSv2 server + */ +typedef struct nfs_srv { + kmutex_t async_write_lock; + struct rfs_async_write_list *async_write_head; + + /* + * enables write clustering if == 1 + */ + int write_async; +} nfs_srv_t; + /* * These are the interface routines for the server side of the * Network File System. See the NFS version 2 protocol specification @@ -79,6 +97,9 @@ static int sattr_to_vattr(struct nfssattr *, struct vattr *); static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, cred_t *); +static void *rfs_zone_init(zoneid_t zoneid); +static void rfs_zone_fini(zoneid_t zoneid, void *data); + /* * Some "over the wire" UNIX file types. These are encoded @@ -90,6 +111,7 @@ static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, #define IFSOCK 0140000 /* socket */ u_longlong_t nfs2_srv_caller_id; +static zone_key_t rfs_zone_key; /* * Get file attributes. @@ -446,7 +468,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { - dvp = rootdir; + dvp = ZONE_ROOTVP(); VN_HOLD(dvp); } else { dvp = nfs_fhtovp(fhp, exi); @@ -1143,6 +1165,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); curthread->t_cred = savecred; } else { + iovcnt = 0; for (m = wa->wa_mblk; m != NULL; m = m->b_cont) iovcnt++; @@ -1286,8 +1309,10 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, cred_t *savecred; int in_crit = 0; caller_context_t ct; + nfs_srv_t *nsrv; - if (!rfs_write_async) { + nsrv = zone_getspecific(rfs_zone_key, curzone); + if (!nsrv->write_async) { rfs_write_sync(wa, ns, exi, req, cr, ro); return; } @@ -1312,8 +1337,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, * Look to see if there is already a cluster started * for this file. */ - mutex_enter(&rfs_async_write_lock); - for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { + mutex_enter(&nsrv->async_write_lock); + for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) { if (bcmp(&wa->wa_fhandle, lp->fhp, sizeof (fhandle_t)) == 0) break; @@ -1339,8 +1364,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, else trp->list = nrp; while (nrp->ns->ns_status == RFSWRITE_INITVAL) - cv_wait(&lp->cv, &rfs_async_write_lock); - mutex_exit(&rfs_async_write_lock); + cv_wait(&lp->cv, &nsrv->async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1357,15 +1382,15 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, nlp->list = nrp; nlp->next = NULL; - if (rfs_async_write_head == NULL) { - rfs_async_write_head = nlp; + if (nsrv->async_write_head == NULL) { + nsrv->async_write_head = nlp; } else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != NULL) lp = lp->next; lp->next = nlp; } - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); /* * Convert the file handle common to all of the requests @@ -1373,11 +1398,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ vp = nfs_fhtovp(&wa->wa_fhandle, exi); if (vp == NULL) { - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1388,7 +1413,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1399,11 +1424,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ if (vp->v_type != VREG) { VN_RELE(vp); - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1414,7 +1439,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1446,11 +1471,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1462,7 +1487,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, } } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1484,16 +1509,16 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, * will allow more requests to be clustered in this * second cluster. */ - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); /* * Step through the list of requests in this cluster. @@ -1738,7 +1763,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, VN_RELE(vp); t_flag = curthread->t_flag & T_WOULDBLOCK; - mutex_enter(&rfs_async_write_lock); + mutex_enter(&nsrv->async_write_lock); for (rp = nlp->list; rp != NULL; rp = rp->list) { if (rp->ns->ns_status == RFSWRITE_INITVAL) { rp->ns->ns_status = puterrno(error); @@ -1746,7 +1771,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, } } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); } @@ -2224,7 +2249,7 @@ rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, /* Check for delegation on the file being renamed over, if it exists */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { @@ -2610,7 +2635,7 @@ rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, * supplying a vnode known to exist and illegal to * remove. */ - error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); + error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0); /* * Force modified data and metadata out to stable storage. @@ -2975,7 +3000,7 @@ sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) return (0); } -static enum nfsftype vt_to_nf[] = { +static const enum nfsftype vt_to_nf[] = { 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 }; @@ -3194,14 +3219,38 @@ acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) void rfs_srvrinit(void) { - mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); nfs2_srv_caller_id = fs_new_caller_id(); + zone_key_create(&rfs_zone_key, rfs_zone_init, NULL, rfs_zone_fini); } void rfs_srvrfini(void) { - mutex_destroy(&rfs_async_write_lock); +} + +/* ARGSUSED */ +static void * +rfs_zone_init(zoneid_t zoneid) +{ + nfs_srv_t *ns; + + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); + + mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL); + ns->write_async = 1; + + return (ns); +} + +/* ARGSUSED */ +static void +rfs_zone_fini(zoneid_t zoneid, void *data) +{ + nfs_srv_t *ns; + + ns = (nfs_srv_t *)data; + mutex_destroy(&ns->async_write_lock); + kmem_free(ns, sizeof (*ns)); } static int diff --git a/usr/src/uts/common/fs/nfs/nfs_sys.c b/usr/src/uts/common/fs/nfs/nfs_sys.c index e6ff4a2e0b11..434c9a2a3e4a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_sys.c +++ b/usr/src/uts/common/fs/nfs/nfs_sys.c @@ -18,14 +18,21 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * + */ + +/* * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -80,8 +87,6 @@ nfs_export(void *arg) { STRUCT_DECL(exportfs_args, ea); - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(ea, get_udatamodel()); if (copyin(arg, STRUCT_BUF(ea), STRUCT_SIZE(ea))) return (set_errno(EFAULT)); @@ -111,9 +116,6 @@ nfssys(enum nfssys_op opcode, void *arg) break; } - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); - STRUCT_INIT(u_clr, get_udatamodel()); if (copyin(arg, STRUCT_BUF(u_clr), STRUCT_SIZE(u_clr))) @@ -164,8 +166,6 @@ nfssys(enum nfssys_op opcode, void *arg) struct rdma_svc_args rsa; char netstore[20] = "tcp"; - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); if (get_udatamodel() != DATAMODEL_NATIVE) { STRUCT_DECL(rdma_svc_args, ursa); @@ -189,9 +189,6 @@ nfssys(enum nfssys_op opcode, void *arg) case NFS_SVC: { /* NFS server daemon */ STRUCT_DECL(nfs_svc_args, nsa); - - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(nsa, get_udatamodel()); if (copyin(arg, STRUCT_BUF(nsa), STRUCT_SIZE(nsa))) @@ -209,8 +206,6 @@ nfssys(enum nfssys_op opcode, void *arg) case NFS_GETFH: { /* get a file handle */ STRUCT_DECL(nfs_getfh_args, nga); - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(nga, get_udatamodel()); if (copyin(arg, STRUCT_BUF(nga), STRUCT_SIZE(nga))) return (set_errno(EFAULT)); diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c index 1fa1617ec82b..1065d86719d0 100644 --- a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c +++ b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c @@ -23,6 +23,10 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -242,16 +246,6 @@ sharefs_unmount(vfs_t *vfsp, int flag, struct cred *cr) if (data->sharefs_vfs_root->v_count > 1) return (EBUSY); - /* - * Only allow an unmount iff there are no entries in memory. - */ - rw_enter(&sharetab_lock, RW_READER); - if (sharetab_size != 0) { - rw_exit(&sharetab_lock); - return (EBUSY); - } - rw_exit(&sharetab_lock); - /* * Release the last hold on the root vnode */ diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c index 2ca3f293a518..8e5a9a2cc78f 100644 --- a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c +++ b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include @@ -45,7 +49,7 @@ * the shares enumerated. */ static int -sharefs_snap_create(shnode_t *sft) +sharefs_snap_create(sharetab_globals_t *sg, shnode_t *sft) { sharetab_t *sht; share_t *sh; @@ -53,16 +57,16 @@ sharefs_snap_create(shnode_t *sft) int iCount = 0; char *buf; - rw_enter(&sharefs_lock, RW_WRITER); - rw_enter(&sharetab_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_WRITER); + rw_enter(&sg->sharetab_lock, RW_READER); if (sft->sharefs_snap) { /* * Nothing has changed, so no need to grab a new copy! */ - if (sft->sharefs_generation == sharetab_generation) { - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + if (sft->sharefs_generation == sg->sharetab_generation) { + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -71,12 +75,12 @@ sharefs_snap_create(shnode_t *sft) sft->sharefs_snap = NULL; } - sft->sharefs_size = sharetab_size; - sft->sharefs_count = sharetab_count; + sft->sharefs_size = sg->sharetab_size; + sft->sharefs_count = sg->sharetab_count; if (sft->sharefs_size == 0) { - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -87,7 +91,7 @@ sharefs_snap_create(shnode_t *sft) /* * Walk the Sharetab, dumping each entry. */ - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { int i; for (i = 0; i < SHARETAB_HASHES; i++) { @@ -132,14 +136,14 @@ sharefs_snap_create(shnode_t *sft) * We want to record the generation number and * mtime inside this snapshot. */ - gethrestime(&sharetab_snap_time); - sft->sharefs_snap_time = sharetab_snap_time; - sft->sharefs_generation = sharetab_generation; + gethrestime(&sg->sharetab_snap_time); + sft->sharefs_snap_time = sg->sharetab_snap_time; + sft->sharefs_generation = sg->sharetab_generation; ASSERT(iCount == sft->sharefs_count); - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); error_fault: @@ -148,8 +152,8 @@ sharefs_snap_create(shnode_t *sft) sft->sharefs_size = 0; sft->sharefs_count = 0; sft->sharefs_snap = NULL; - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (EFAULT); } @@ -161,13 +165,14 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, { timestruc_t now; shnode_t *sft = VTOSH(vp); + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); vap->va_type = VREG; vap->va_mode = S_IRUSR | S_IRGRP | S_IROTH; vap->va_nodeid = SHAREFS_INO_FILE; vap->va_nlink = 1; - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); /* * If we get asked about a snapped vnode, then @@ -177,15 +182,15 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, * sharetab. */ if (sft->sharefs_real_vp) { - rw_enter(&sharetab_lock, RW_READER); - vap->va_size = sharetab_size; - vap->va_mtime = sharetab_mtime; - rw_exit(&sharetab_lock); + rw_enter(&sg->sharetab_lock, RW_READER); + vap->va_size = sg->sharetab_size; + vap->va_mtime = sg->sharetab_mtime; + rw_exit(&sg->sharetab_lock); } else { vap->va_size = sft->sharefs_size; vap->va_mtime = sft->sharefs_snap_time; } - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); gethrestime(&now); vap->va_atime = vap->va_ctime = now; @@ -259,7 +264,8 @@ sharefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) * are dumping an extremely huge sharetab, we make a copy * of it here and use it to dump instead. */ - error = sharefs_snap_create(sft); + error = sharefs_snap_create(sharetab_get_globals(vp->v_vfsp->vfs_zone), + sft); return (error); } @@ -270,11 +276,12 @@ sharefs_close(vnode_t *vp, int flag, int count, offset_t off, cred_t *cr, caller_context_t *ct) { shnode_t *sft = VTOSH(vp); + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); if (count > 1) return (0); - rw_enter(&sharefs_lock, RW_WRITER); + rw_enter(&sg->sharefs_lock, RW_WRITER); if (vp->v_count == 1) { if (sft->sharefs_snap != NULL) { kmem_free(sft->sharefs_snap, sft->sharefs_size + 1); @@ -284,7 +291,7 @@ sharefs_close(vnode_t *vp, int flag, int count, } } atomic_dec_32(&sft->sharefs_refs); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -292,30 +299,31 @@ sharefs_close(vnode_t *vp, int flag, int count, /* ARGSUSED */ static int sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, - caller_context_t *ct) + caller_context_t *ct) { shnode_t *sft = VTOSH(vp); off_t off = uio->uio_offset; size_t len = uio->uio_resid; int error = 0; + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); /* * First check to see if we need to grab a new snapshot. */ if (off == (off_t)0) { - rw_exit(&sharefs_lock); - error = sharefs_snap_create(sft); + rw_exit(&sg->sharefs_lock); + error = sharefs_snap_create(sg, sft); if (error) { return (EFAULT); } - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); } /* LINTED */ if (len <= 0 || off >= sft->sharefs_size) { - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (error); } @@ -323,7 +331,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, len = sft->sharefs_size - off; if (off < 0 || len > sft->sharefs_size) { - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (EFAULT); } @@ -332,7 +340,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, len, UIO_READ, uio); } - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (error); } @@ -342,16 +350,17 @@ sharefs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *tx) { gfs_file_t *fp = vp->v_data; shnode_t *sft; + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); sft = (shnode_t *)gfs_file_inactive(vp); if (sft) { - rw_enter(&sharefs_lock, RW_WRITER); + rw_enter(&sg->sharefs_lock, RW_WRITER); if (sft->sharefs_snap != NULL) { kmem_free(sft->sharefs_snap, sft->sharefs_size + 1); } kmem_free(sft, fp->gfs_size); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); } } diff --git a/usr/src/uts/common/fs/sharefs/sharetab.c b/usr/src/uts/common/fs/sharefs/sharetab.c index 0f8543641cc1..7b8055e94b14 100644 --- a/usr/src/uts/common/fs/sharefs/sharetab.c +++ b/usr/src/uts/common/fs/sharefs/sharetab.c @@ -23,6 +23,10 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include #include #include @@ -47,15 +51,13 @@ */ #define SHARETAB_COPYIN(field) \ if (copyinstr(STRUCT_FGETP(u_sh, sh_##field), \ - buf, \ - bufsz + 1, /* Add one for extra NUL */ \ - &len)) { \ + buf, \ + bufsz + 1, /* Add one for extra NUL */ \ + &len)) { \ error = EFAULT; \ goto cleanup; \ } \ - /* \ - * Need to remove 1 because copyinstr() counts the NUL. \ - */ \ + /* Need to remove 1 because copyinstr() counts the NUL */ \ len--; \ sh->sh_##field = kmem_alloc(len + 1, KM_SLEEP); \ bcopy(buf, sh->sh_##field, len); \ @@ -64,24 +66,13 @@ sh->sh_size += shl.shl_##field; /* Debug counting */ #define SHARETAB_DELETE_FIELD(field) \ - if (sh->sh_##field) { \ + if (sh->sh_##field != NULL) { \ kmem_free(sh->sh_##field, \ - shl ? shl->shl_##field + 1 : \ - strlen(sh->sh_##field) + 1); \ + shl ? shl->shl_##field + 1 : \ + strlen(sh->sh_##field) + 1); \ } -sharetab_t *sharefs_sharetab = NULL; /* The incore sharetab. */ -size_t sharetab_size; -uint_t sharetab_count; - -krwlock_t sharetab_lock; /* lock to protect the cached sharetab */ - -krwlock_t sharefs_lock; /* lock to protect the vnode ops */ - -timestruc_t sharetab_mtime; -timestruc_t sharetab_snap_time; - -uint_t sharetab_generation; /* Only increments and wraps! */ +static zone_key_t sharetab_zone_key; /* * Take care of cleaning up a share. @@ -91,7 +82,7 @@ uint_t sharetab_generation; /* Only increments and wraps! */ static void sharefree(share_t *sh, sharefs_lens_t *shl) { - if (!sh) + if (sh == NULL) return; SHARETAB_DELETE_FIELD(path); @@ -100,7 +91,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl) SHARETAB_DELETE_FIELD(opts); SHARETAB_DELETE_FIELD(descr); - kmem_free(sh, sizeof (share_t)); + kmem_free(sh, sizeof (*sh)); } /* @@ -108,7 +99,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl) * cleaning up the memory associated with the share argument. */ static int -sharefs_remove(share_t *sh, sharefs_lens_t *shl) +sharefs_remove(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl) { int iHash; sharetab_t *sht; @@ -118,23 +109,22 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) if (!sh) return (ENOENT); - rw_enter(&sharetab_lock, RW_WRITER); - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { - if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) { + rw_enter(&sg->sharetab_lock, RW_WRITER); + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { + if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) break; - } } /* * There does not exist a fstype in memory which * matches the share passed in. */ - if (!sht) { - rw_exit(&sharetab_lock); + if (sht == NULL) { + rw_exit(&sg->sharetab_lock); return (ENOENT); } - iPath = shl ? shl->shl_path : strlen(sh->sh_path); + iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path); iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path)); /* @@ -147,22 +137,21 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) */ if (strcmp(sh->sh_path, s->sh_path) == 0 && strlen(s->sh_path) == iPath) { - if (p) { + if (p != NULL) p->sh_next = s->sh_next; - } else { + else sht->s_buckets[iHash].ssh_sh = s->sh_next; - } ASSERT(sht->s_buckets[iHash].ssh_count != 0); atomic_dec_32(&sht->s_buckets[iHash].ssh_count); atomic_dec_32(&sht->s_count); - atomic_dec_32(&sharetab_count); + atomic_dec_32(&sg->sharetab_count); - ASSERT(sharetab_size >= s->sh_size); - sharetab_size -= s->sh_size; + ASSERT(sg->sharetab_size >= s->sh_size); + sg->sharetab_size -= s->sh_size; - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); break; } @@ -170,18 +159,15 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) p = s; } - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); - if (!s) { + if (s == NULL) return (ENOENT); - } s->sh_next = NULL; sharefree(s, NULL); - /* - * We need to free the share for the caller. - */ + /* We need to free the share for the caller */ sharefree(sh, shl); return (0); @@ -191,7 +177,7 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) * The caller must have allocated memory for us to use. */ static int -sharefs_add(share_t *sh, sharefs_lens_t *shl) +sharefs_add(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl) { int iHash; sharetab_t *sht; @@ -199,41 +185,31 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) int iPath; int n; - if (!sh) { + if (sh == NULL) return (ENOENT); - } - /* - * We need to find the hash buckets for the fstype. - */ - rw_enter(&sharetab_lock, RW_WRITER); - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { - if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) { + /* We need to find the hash buckets for the fstype */ + rw_enter(&sg->sharetab_lock, RW_WRITER); + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { + if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) break; - } } - /* - * Did not exist, so allocate one and add it to the - * sharetab. - */ - if (!sht) { + /* Did not exist, so allocate one and add it to the sharetab */ + if (sht == NULL) { sht = kmem_zalloc(sizeof (*sht), KM_SLEEP); n = strlen(sh->sh_fstype); sht->s_fstype = kmem_zalloc(n + 1, KM_SLEEP); (void) strncpy(sht->s_fstype, sh->sh_fstype, n); - sht->s_next = sharefs_sharetab; - sharefs_sharetab = sht; + sht->s_next = sg->sharefs_sharetab; + sg->sharefs_sharetab = sht; } - /* - * Now we need to find where we have to add the entry. - */ + /* Now we need to find where we have to add the entry */ + iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path); iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path)); - iPath = shl ? shl->shl_path : strlen(sh->sh_path); - if (shl) { sh->sh_size = shl->shl_path + shl->shl_res + shl->shl_fstype + shl->shl_opts + shl->shl_descr; @@ -243,15 +219,10 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) strlen(sh->sh_opts) + strlen(sh->sh_descr); } - /* - * We need to account for field seperators and - * the EOL. - */ + /* We need to account for field seperators and the EOL */ sh->sh_size += 5; - /* - * Now walk down the hash table and add the new entry! - */ + /* Now walk down the hash table and add the new entry */ for (p = NULL, s = sht->s_buckets[iHash].ssh_sh; s != NULL; s = s->sh_next) { /* @@ -263,28 +234,25 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) */ if (strcmp(sh->sh_path, s->sh_path) == 0 && strlen(s->sh_path) == iPath) { - if (p) { + if (p != NULL) p->sh_next = sh; - } else { + else sht->s_buckets[iHash].ssh_sh = sh; - } sh->sh_next = s->sh_next; - ASSERT(sharetab_size >= s->sh_size); - sharetab_size -= s->sh_size; - sharetab_size += sh->sh_size; + ASSERT(sg->sharetab_size >= s->sh_size); + sg->sharetab_size -= s->sh_size; + sg->sharetab_size += sh->sh_size; - /* - * Get rid of the old node. - */ + /* Get rid of the old node */ sharefree(s, NULL); - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); ASSERT(sht->s_buckets[iHash].ssh_count != 0); - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); return (0); } @@ -300,29 +268,61 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) sht->s_buckets[iHash].ssh_sh = sh; atomic_inc_32(&sht->s_buckets[iHash].ssh_count); atomic_inc_32(&sht->s_count); - atomic_inc_32(&sharetab_count); - sharetab_size += sh->sh_size; + atomic_inc_32(&sg->sharetab_count); + sg->sharetab_size += sh->sh_size; - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); return (0); } +/* ARGSUSED */ +static void * +sharetab_zone_init(zoneid_t zoneid) +{ + sharetab_globals_t *sg; + + sg = kmem_zalloc(sizeof (*sg), KM_SLEEP); + + rw_init(&sg->sharetab_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sg->sharefs_lock, NULL, RW_DEFAULT, NULL); + + sg->sharetab_size = 0; + sg->sharetab_count = 0; + sg->sharetab_generation = 1; + + gethrestime(&sg->sharetab_mtime); + gethrestime(&sg->sharetab_snap_time); + + return (sg); +} + +/* ARGSUSED */ +static void +sharetab_zone_fini(zoneid_t zoneid, void *data) +{ + sharetab_globals_t *sg = data; + + rw_destroy(&sg->sharefs_lock); + rw_destroy(&sg->sharetab_lock); + + kmem_free(sg, sizeof (*sg)); +} + void sharefs_sharetab_init(void) { - rw_init(&sharetab_lock, NULL, RW_DEFAULT, NULL); - rw_init(&sharefs_lock, NULL, RW_DEFAULT, NULL); - - sharetab_size = 0; - sharetab_count = 0; - sharetab_generation = 1; + zone_key_create(&sharetab_zone_key, sharetab_zone_init, + NULL, sharetab_zone_fini); +} - gethrestime(&sharetab_mtime); - gethrestime(&sharetab_snap_time); +sharetab_globals_t * +sharetab_get_globals(zone_t *zone) +{ + return (zone_getspecific(sharetab_zone_key, zone)); } int @@ -332,12 +332,10 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) size_t len; size_t bufsz; share_t *sh; - sharefs_lens_t shl; - model_t model; - char *buf = NULL; + sharetab_globals_t *sg = sharetab_get_globals(curzone); STRUCT_DECL(share, u_sh); @@ -347,20 +345,20 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) * Before we do anything, lets make sure we have * a sharetab in memory if we need one. */ - rw_enter(&sharetab_lock, RW_READER); + rw_enter(&sg->sharetab_lock, RW_READER); switch (opcode) { - case (SHAREFS_REMOVE) : - case (SHAREFS_REPLACE) : - if (!sharefs_sharetab) { - rw_exit(&sharetab_lock); + case SHAREFS_REMOVE: + case SHAREFS_REPLACE: + if (!sg->sharefs_sharetab) { + rw_exit(&sg->sharetab_lock); return (set_errno(ENOENT)); } break; - case (SHAREFS_ADD) : - default : + case SHAREFS_ADD: + default: break; } - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); model = get_udatamodel(); @@ -368,49 +366,37 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) * Initialize the data pointers. */ STRUCT_INIT(u_sh, model); - if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) { + if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) return (set_errno(EFAULT)); - } - /* - * Get the share. - */ + /* Get the share */ sh = kmem_zalloc(sizeof (share_t), KM_SLEEP); - /* - * Get some storage for copying in the strings. - */ + /* Get some storage for copying in the strings */ buf = kmem_zalloc(bufsz + 1, KM_SLEEP); bzero(&shl, sizeof (sharefs_lens_t)); - /* - * Only grab these two until we know what we want. - */ + /* Only grab these two until we know what we want */ SHARETAB_COPYIN(path); SHARETAB_COPYIN(fstype); switch (opcode) { - case (SHAREFS_ADD) : - case (SHAREFS_REPLACE) : + case SHAREFS_ADD: + case SHAREFS_REPLACE: SHARETAB_COPYIN(res); SHARETAB_COPYIN(opts); SHARETAB_COPYIN(descr); - - error = sharefs_add(sh, &shl); + error = sharefs_add(sg, sh, &shl); break; - - case (SHAREFS_REMOVE) : - - error = sharefs_remove(sh, &shl); + case SHAREFS_REMOVE: + error = sharefs_remove(sg, sh, &shl); break; - default: error = EINVAL; break; } cleanup: - /* * If there is no error, then we have stashed the structure * away in the sharetab hash table or have deleted it. @@ -418,22 +404,38 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) * Either way, the only reason to blow away the data is if * there was an error. */ - if (error != 0) { + if (error != 0) sharefree(sh, &shl); - } - if (buf) { + if (buf != NULL) kmem_free(buf, bufsz + 1); - } - return ((error != 0) ? set_errno(error) : 0); + return (error != 0 ? set_errno(error) : 0); } int sharefs(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) { - if (secpolicy_sys_config(CRED(), B_FALSE) != 0) - return (set_errno(EPERM)); + /* + * If we're in the global zone PRIV_SYS_CONFIG gives us the + * priviledges needed to act on sharetab. However if we're in + * a non-global zone PRIV_SYS_CONFIG is not allowed. To work + * around this issue PRIV_SYS_NFS is used in this case. + * + * TODO: This basically overloads the definition/use of + * PRIV_SYS_NFS to work around the limitation of PRIV_SYS_CONFIG + * in a zone. Solaris 11 solved this by implementing a PRIV_SYS_SHARE + * we should do the same and replace the use of PRIV_SYS_NFS here and + * in zfs_secpolicy_share. + */ + if (INGLOBALZONE(curproc)) { + if (secpolicy_sys_config(CRED(), B_FALSE) != 0) + return (set_errno(EPERM)); + } else { + /* behave like zfs_secpolicy_share() */ + if (secpolicy_nfs(CRED()) != 0) + return (set_errno(EPERM)); + } return (sharefs_impl(opcode, sh_in, iMaxLen)); } diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 20efe9b29ed0..a61386265611 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -21,6 +21,9 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. * Portions Copyright 2011 Martin Matuska * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. @@ -29,7 +32,7 @@ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 Nexenta Systems, Inc. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2016 Toomas Soome * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Datto Inc. @@ -812,9 +815,6 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) int zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - if (!INGLOBALZONE(curproc)) - return (SET_ERROR(EPERM)); - if (secpolicy_nfs(cr) == 0) { return (0); } else { diff --git a/usr/src/uts/common/nfs/auth.h b/usr/src/uts/common/nfs/auth.h index 5293e3fdd1de..a35ad9109b19 100644 --- a/usr/src/uts/common/nfs/auth.h +++ b/usr/src/uts/common/nfs/auth.h @@ -20,12 +20,12 @@ */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2018 Nexenta Systems, Inc. */ #ifndef _AUTH_H diff --git a/usr/src/uts/common/nfs/export.h b/usr/src/uts/common/nfs/export.h index 070f6bc8de0c..13f7b665d3b3 100644 --- a/usr/src/uts/common/nfs/export.h +++ b/usr/src/uts/common/nfs/export.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Jason King. */ @@ -38,6 +38,10 @@ #include #include +#ifdef _KERNEL +#include /* for PKP_HASH_SIZE */ +#endif /* _KERNEL */ + #ifdef __cplusplus extern "C" { #endif @@ -477,9 +481,6 @@ typedef struct treenode { #define TREE_EXPORTED(t) \ ((t)->tree_exi && !PSEUDO((t)->tree_exi)) -/* Root of nfs pseudo namespace */ -extern treenode_t *ns_root; - #define EXPTABLESIZE 256 struct exp_hash { @@ -533,6 +534,7 @@ struct exportinfo { int exi_id; avl_node_t exi_id_link; struct exp_kstats *exi_kstats; + zoneid_t exi_zoneid; }; typedef struct exportinfo exportinfo_t; @@ -611,7 +613,7 @@ extern int nfsauth4_secinfo_access(struct exportinfo *, struct svc_req *, int, int, cred_t *); extern int nfsauth_cache_clnt_compar(const void *, const void *); extern int nfs_fhbcmp(char *, char *, int); -extern int nfs_exportinit(void); +extern void nfs_exportinit(void); extern void nfs_exportfini(void); extern int chk_clnt_sec(struct exportinfo *, struct svc_req *); extern int makefh(fhandle_t *, struct vnode *, struct exportinfo *); @@ -628,32 +630,57 @@ extern struct exportinfo *nfs_vptoexi(vnode_t *, vnode_t *, cred_t *, int *, int *, bool_t); extern int nfs_check_vpexi(vnode_t *, vnode_t *, cred_t *, struct exportinfo **); -extern void export_link(struct exportinfo *); -extern void export_unlink(struct exportinfo *); extern vnode_t *untraverse(vnode_t *); extern int vn_is_nfs_reparse(vnode_t *, cred_t *); extern int client_is_downrev(struct svc_req *); extern char *build_symlink(vnode_t *, cred_t *, size_t *); +extern fhandle_t nullfh2; /* for comparing V2 filehandles */ + +typedef struct nfs_export { + /* Root of nfs pseudo namespace */ + treenode_t *ns_root; + + struct exportinfo *exptable_path_hash[PKP_HASH_SIZE]; + struct exportinfo *exptable[EXPTABLESIZE]; + + /* + * Read/Write lock that protects the exportinfo list. This lock + * must be held when searching or modifiying the exportinfo list. + */ + krwlock_t exported_lock; + + /* "public" and default (root) location for public filehandle */ + struct exportinfo *exi_public, *exi_root; + /* For checking default public file handle */ + fid_t exi_rootfid; + /* For comparing V2 filehandles */ + fhandle_t nullfh2; + + /* The change attribute value of the root of nfs pseudo namespace */ + timespec_t ns_root_change; +} nfs_export_t; + /* * Functions that handle the NFSv4 server namespace */ extern exportinfo_t *vis2exi(treenode_t *); extern int treeclimb_export(struct exportinfo *); -extern void treeclimb_unexport(struct exportinfo *); +extern void treeclimb_unexport(nfs_export_t *, struct exportinfo *); extern int nfs_visible(struct exportinfo *, vnode_t *, int *); extern int nfs_visible_inode(struct exportinfo *, ino64_t, - struct exp_visible **); + struct exp_visible **); extern int has_visible(struct exportinfo *, vnode_t *); extern void free_visible(struct exp_visible *); extern int nfs_exported(struct exportinfo *, vnode_t *); -extern struct exportinfo *pseudo_exportfs(vnode_t *, fid_t *, - struct exp_visible *, struct exportdata *); +extern struct exportinfo *pseudo_exportfs(nfs_export_t *, vnode_t *, fid_t *, + struct exp_visible *, struct exportdata *); extern int vop_fid_pseudo(vnode_t *, fid_t *); extern int nfs4_vget_pseudo(struct exportinfo *, vnode_t **, fid_t *); extern bool_t nfs_visible_change(struct exportinfo *, vnode_t *, - timespec_t *); -extern void tree_update_change(treenode_t *, timespec_t *); + timespec_t *); +extern void tree_update_change(nfs_export_t *, treenode_t *, timespec_t *); + /* * Functions that handle the NFSv4 server namespace security flavors * information. @@ -661,17 +688,14 @@ extern void tree_update_change(treenode_t *, timespec_t *); extern void srv_secinfo_exp2pseu(struct exportdata *, struct exportdata *); extern void srv_secinfo_list_free(struct secinfo *, int); -/* - * "public" and default (root) location for public filehandle - */ -extern struct exportinfo *exi_public, *exi_root; -extern fhandle_t nullfh2; /* for comparing V2 filehandles */ -extern krwlock_t exported_lock; -extern struct exportinfo *exptable[]; +extern nfs_export_t *nfs_get_export(); +extern void export_link(nfs_export_t *, struct exportinfo *); +extern void export_unlink(nfs_export_t *, struct exportinfo *); /* * exi_id support */ +extern krwlock_t nfs_exi_id_lock; extern avl_tree_t exi_id_tree; extern int exi_id_get_next(void); diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h index 3018a4175929..cacc44e8f8f3 100644 --- a/usr/src/uts/common/nfs/nfs.h +++ b/usr/src/uts/common/nfs/nfs.h @@ -20,19 +20,20 @@ */ /* - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + #ifndef _NFS_NFS_H #define _NFS_NFS_H -/* nfs.h 2.38 88/08/19 SMI */ - #include #include #include @@ -72,8 +73,34 @@ extern "C" { #define NFS_VERSMIN_DEFAULT ((rpcvers_t)2) #define NFS_VERSMAX_DEFAULT ((rpcvers_t)4) -extern rpcvers_t nfs_versmin; -extern rpcvers_t nfs_versmax; +/* + * Used to track the state of the server so that initialization + * can be done properly. + */ +typedef enum { + NFS_SERVER_STOPPED, /* server state destroyed */ + NFS_SERVER_STOPPING, /* server state being destroyed */ + NFS_SERVER_RUNNING, + NFS_SERVER_QUIESCED, /* server state preserved */ + NFS_SERVER_OFFLINE /* server pool offline */ +} nfs_server_running_t; + +/* + * Zone globals variables of NFS server + */ +typedef struct nfs_globals { + rpcvers_t nfs_versmin; + rpcvers_t nfs_versmax; + + /* NFS server locks and state */ + nfs_server_running_t nfs_server_upordown; + kmutex_t nfs_server_upordown_lock; + kcondvar_t nfs_server_upordown_cv; + + /* RDMA wait variables */ + kcondvar_t rdma_wait_cv; + kmutex_t rdma_wait_mutex; +} nfs_globals_t; /* * Default delegation setting for the server ==> "on" @@ -917,7 +944,7 @@ extern int nfs_async_stop_sig(struct vfs *); extern int nfs_clntinit(void); extern void nfs_clntfini(void); extern int nfstsize(void); -extern int nfs_srvinit(void); +extern void nfs_srvinit(void); extern void nfs_srvfini(void); extern int vattr_to_sattr(struct vattr *, struct nfssattr *); extern void setdiropargs(struct nfsdiropargs *, char *, vnode_t *); @@ -970,6 +997,8 @@ extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths; extern kstat_named_t *global_svstat_ptr[]; +extern zone_key_t rfs4_zone_key; +extern zone_key_t nfssrv_zone_key; extern krwlock_t rroklock; extern vtype_t nf_to_vt[]; extern kstat_named_t *rfsproccnt_v2_ptr; @@ -2340,12 +2369,10 @@ extern bool_t rfs4_check_delegated(int mode, vnode_t *, bool_t trunc); * if no delegation is present. */ extern int rfs4_delegated_getattr(vnode_t *, vattr_t *, int, cred_t *); -extern void rfs4_hold_deleg_policy(void); -extern void rfs4_rele_deleg_policy(void); extern int do_xattr_exists_check(vnode_t *, ulong_t *, cred_t *); -extern int protect_zfs_mntpt(vnode_t *); +extern int protect_zfs_mntpt(vnode_t *); extern ts_label_t *nfs_getflabel(vnode_t *, struct exportinfo *); extern boolean_t do_rfs_label_check(bslabel_t *, vnode_t *, int, diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h index ca5bafa9ad96..b6dd3af0d8a2 100644 --- a/usr/src/uts/common/nfs/nfs4.h +++ b/usr/src/uts/common/nfs/nfs4.h @@ -20,12 +20,12 @@ */ /* - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2018 Nexenta Systems, Inc. */ #ifndef _NFS4_H @@ -39,6 +39,7 @@ #ifdef _KERNEL #include +#include #include #else #include @@ -369,12 +370,6 @@ typedef struct rfs4_dss_path { char **rfs4_dss_newpaths; uint_t rfs4_dss_numnewpaths; -/* - * Circular doubly-linked list of paths for currently-served RGs. - * No locking required: only changed on warmstart. Managed with insque/remque. - */ -rfs4_dss_path_t *rfs4_dss_pathlist; - /* nvlists of all DSS paths: current, and before last warmstart */ nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths; @@ -740,26 +735,8 @@ typedef struct rfs4_file { krwlock_t rf_file_rwlock; } rfs4_file_t; -extern int rfs4_seen_first_compound; /* set first time we see one */ - -extern rfs4_servinst_t *rfs4_cur_servinst; /* current server instance */ -extern kmutex_t rfs4_servinst_lock; /* protects linked list */ -extern void rfs4_servinst_create(int, int, char **); -extern void rfs4_servinst_destroy_all(void); -extern void rfs4_servinst_assign(rfs4_client_t *, - rfs4_servinst_t *); -extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *); -extern int rfs4_clnt_in_grace(rfs4_client_t *); -extern int rfs4_servinst_in_grace(rfs4_servinst_t *); -extern int rfs4_servinst_grace_new(rfs4_servinst_t *); -extern void rfs4_grace_start(rfs4_servinst_t *); -extern void rfs4_grace_start_new(void); -extern void rfs4_grace_reset_all(void); -extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *); -extern void rfs4_dss_readstate(int, char **); - /* - * rfs4_deleg_policy is used to signify the server's global delegation + * nfs4_deleg_policy is used to signify the server's global delegation * policy. The default is to NEVER delegate files and the * administrator must configure the server to enable delegations. * @@ -771,8 +748,6 @@ typedef enum { SRV_NORMAL_DELEGATE = 1 } srv_deleg_policy_t; -extern srv_deleg_policy_t rfs4_deleg_policy; -extern kmutex_t rfs4_deleg_lock; extern void rfs4_disable_delegation(void), rfs4_enable_delegation(void); /* @@ -789,6 +764,52 @@ typedef enum { #define NFS4_DELEG4TYPE2REQTYPE(x) (delegreq_t)(x) +/* + * Zone global variables of NFSv4 server + */ +typedef struct nfs4_srv { + /* Unique write verifier */ + verifier4 write4verf; + /* Delegation lock */ + kmutex_t deleg_lock; + /* Used to manage access to server instance linked list */ + kmutex_t servinst_lock; + rfs4_servinst_t *nfs4_cur_servinst; + /* Used to manage access to nfs4_deleg_policy */ + krwlock_t deleg_policy_lock; + srv_deleg_policy_t nfs4_deleg_policy; + /* Set first time we see one */ + int seen_first_compound; + /* + * Circular double-linked list of paths for currently-served RGs. + * No locking required -- only changed on warmstart. + * Managed with insque/remque. + */ + rfs4_dss_path_t *dss_pathlist; + /* Duplicate request cache */ + rfs4_drc_t *nfs4_drc; + /* nfsv4 server start time */ + time_t rfs4_start_time; + /* CPR callback id -- not related to v4 callbacks */ + callb_id_t cpr_id; +} nfs4_srv_t; + +extern srv_deleg_policy_t nfs4_get_deleg_policy(); + +extern void rfs4_servinst_create(nfs4_srv_t *, int, int, char **); +extern void rfs4_servinst_destroy_all(nfs4_srv_t *); +extern void rfs4_servinst_assign(nfs4_srv_t *, rfs4_client_t *, + rfs4_servinst_t *); +extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *); +extern int rfs4_clnt_in_grace(rfs4_client_t *); +extern int rfs4_servinst_in_grace(rfs4_servinst_t *); +extern int rfs4_servinst_grace_new(rfs4_servinst_t *); +extern void rfs4_grace_start(rfs4_servinst_t *); +extern void rfs4_grace_start_new(nfs4_srv_t *); +extern void rfs4_grace_reset_all(nfs4_srv_t *); +extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *); +extern void rfs4_dss_readstate(nfs4_srv_t *, int, char **); + /* * Various interfaces to manipulate the state structures introduced * above @@ -946,7 +967,10 @@ extern fem_t *deleg_wrops; extern int rfs4_share(rfs4_state_t *, uint32_t, uint32_t); extern int rfs4_unshare(rfs4_state_t *); -extern void rfs4_set_deleg_policy(srv_deleg_policy_t); +extern void rfs4_set_deleg_policy(nfs4_srv_t *, srv_deleg_policy_t); +extern void rfs4_hold_deleg_policy(nfs4_srv_t *); +extern void rfs4_rele_deleg_policy(nfs4_srv_t *); + #ifdef DEBUG #define NFS4_DEBUG(var, args) if (var) cmn_err args @@ -1377,6 +1401,7 @@ extern stateid4 clnt_special1; * The NFS Version 4 service procedures. */ +extern void rfs4_do_server_start(int, int, int); extern void rfs4_compound(COMPOUND4args *, COMPOUND4res *, struct exportinfo *, struct svc_req *, cred_t *, int *); extern void rfs4_compound_free(COMPOUND4res *); @@ -1384,10 +1409,12 @@ extern void rfs4_compound_flagproc(COMPOUND4args *, int *); extern void rfs4_compound_kstat_args(COMPOUND4args *); extern void rfs4_compound_kstat_res(COMPOUND4res *); -extern int rfs4_srvrinit(void); +extern void rfs4_srvrinit(void); extern void rfs4_srvrfini(void); -extern void rfs4_state_init(void); -extern void rfs4_state_fini(void); +extern void rfs4_state_g_init(void); +extern void rfs4_state_zone_init(nfs4_srv_t *); +extern void rfs4_state_g_fini(void); +extern void rfs4_state_zone_fini(void); #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/nfs/nfs4_drc.h b/usr/src/uts/common/nfs/nfs4_drc.h index a77fb60818e4..554e0f77cab1 100644 --- a/usr/src/uts/common/nfs/nfs4_drc.h +++ b/usr/src/uts/common/nfs/nfs4_drc.h @@ -18,16 +18,19 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS4_DRC_H #define _NFS4_DRC_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -70,12 +73,11 @@ typedef struct rfs4_dupreq { #define NFS4_DUP_REPLAY 4 #define NFS4_DUP_INUSE 5 -extern rfs4_drc_t *nfs4_drc; extern uint32_t nfs4_drc_max; extern uint32_t nfs4_drc_hash; rfs4_drc_t *rfs4_init_drc(uint32_t, uint32_t); -void rfs4_fini_drc(rfs4_drc_t *); +void rfs4_fini_drc(void); void rfs4_dr_chstate(rfs4_dupreq_t *, int); rfs4_dupreq_t *rfs4_alloc_dr(rfs4_drc_t *); int rfs4_find_dr(struct svc_req *, rfs4_drc_t *, rfs4_dupreq_t **); diff --git a/usr/src/uts/common/nfs/nfs_cmd.h b/usr/src/uts/common/nfs/nfs_cmd.h index b72979b95e56..1d7bcb4eeaca 100644 --- a/usr/src/uts/common/nfs/nfs_cmd.h +++ b/usr/src/uts/common/nfs/nfs_cmd.h @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -26,6 +27,10 @@ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS_CMD_H #define _NFS_CMD_H @@ -80,6 +85,8 @@ extern struct charset_cache *nfscmd_findmap(struct exportinfo *, struct sockaddr *); extern char *nfscmd_convname(struct sockaddr *, struct exportinfo *, char *, int, size_t); +extern void nfscmd_init(void); +extern void nfscmd_fini(void); #endif diff --git a/usr/src/uts/common/nfs/nfs_log.h b/usr/src/uts/common/nfs/nfs_log.h index ff0f38a455b1..2bb90b37afaa 100644 --- a/usr/src/uts/common/nfs/nfs_log.h +++ b/usr/src/uts/common/nfs/nfs_log.h @@ -19,16 +19,19 @@ * * CDDL HEADER END */ + /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS_LOG_H #define _NFS_LOG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -654,7 +657,7 @@ extern void nfslog_dprint(const int, const char *fmt, ...) extern void *nfslog_record_alloc(struct exportinfo *, int, void **, int); extern void nfslog_record_free(void *, void *, size_t); -extern struct exportinfo *nfslog_get_exi(struct exportinfo *, +extern struct exportinfo *nfslog_get_exi(nfs_export_t *, struct exportinfo *, struct svc_req *, caddr_t, unsigned int *); extern void nfslog_write_record(struct exportinfo *, struct svc_req *, caddr_t, caddr_t, cred_t *, struct netbuf *, unsigned int, diff --git a/usr/src/uts/common/sharefs/sharefs.h b/usr/src/uts/common/sharefs/sharefs.h index 3587504c5e5c..d222227cb8ba 100644 --- a/usr/src/uts/common/sharefs/sharefs.h +++ b/usr/src/uts/common/sharefs/sharefs.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _SHAREFS_SHAREFS_H #define _SHAREFS_SHAREFS_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This header provides service for the sharefs module. */ @@ -67,24 +69,27 @@ typedef struct sharefs_vfs { #define SHAREFS_NAME_MAX MAXNAMELEN -/* - * The lock ordering whenever sharefs_lock and sharetab_lock both - * need to be held is: sharefs_lock and then sharetab_lock. - */ -extern krwlock_t sharefs_lock; /* lock for the vnode ops */ -extern sharetab_t *sharefs_sharetab; /* The sharetab. */ +typedef struct sharetab_globals { + /* + * The lock ordering whenever sharefs_lock and sharetab_lock both + * need to be held is: sharefs_lock and then sharetab_lock. + */ + krwlock_t sharefs_lock; /* lock for the vnode ops */ + sharetab_t *sharefs_sharetab; /* The sharetab. */ -extern uint_t sharetab_count; /* How many shares? */ -extern krwlock_t sharetab_lock; /* lock for the cached sharetab */ -extern size_t sharetab_size; /* How big is the sharetab file? */ + uint_t sharetab_count; /* How many shares? */ + krwlock_t sharetab_lock; /* lock for the cached sharetab */ + size_t sharetab_size; /* How big is the sharetab file? */ -extern timestruc_t sharetab_mtime; /* Last mod to sharetab */ -extern timestruc_t sharetab_snap_time; /* Last snap */ -extern uint_t sharetab_generation; /* Which copy is it? */ + timestruc_t sharetab_mtime; /* Last mod to sharetab */ + timestruc_t sharetab_snap_time; /* Last snap */ + uint_t sharetab_generation; /* Which copy is it? */ +} sharetab_globals_t; #define SHAREFS_INO_FILE 0x80 extern vnode_t *sharefs_create_root_file(vfs_t *); +extern sharetab_globals_t *sharetab_get_globals(zone_t *zone); /* * Sharetab file diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 27f52c57e29d..7b4306c14a8f 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -18,10 +18,14 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* * Copyright 2015 Joyent, Inc. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2014 Igor Kozhukhov . */ @@ -637,7 +641,6 @@ typedef struct zone { */ #define ZONE_PS_INVAL PS_MYID - extern zone_t zone0; extern zone_t *global_zone; extern uint_t maxzones; @@ -778,6 +781,11 @@ struct zsd_entry { */ #define ZONE_SPECIALPID(x) ((x) == 0 || (x) == 1) +/* + * A root vnode of the current zone. + */ +#define ZONE_ROOTVP() (curproc->p_zone->zone_rootvp) + /* * Zone-safe version of thread_create() to be used when the caller wants to * create a kernel thread to run within the current zone's context.