Skip to content

Commit

Permalink
kernfs: Reduce contention around global per-fs kernfs_rwsem.
Browse files Browse the repository at this point in the history
Right now a global per file system based rwsem (kernfs_rwsem)
synchronizes multiple kernfs operations. On a large system with
few hundred CPUs and few hundred applications simultaenously trying
to access sysfs, this results in multiple sys_open(s) contending on
kernfs_rwsem via kernfs_iop_permission and kernfs_dop_revalidate.

-   21.42%    21.34%  showgids   [kernel.kallsyms]     [k] up_read
     21.34% __libc_start_main
        __GI___libc_open
        entry_SYSCALL_64_after_hwframe
        do_syscall_64
        sys_open
        do_sys_open
        do_filp_open
      - path_openat
         - 20.05% link_path_walk
            - 9.76% walk_component
                 lookup_fast
               - d_revalidate.part.24
                  - 9.75% kernfs_dop_revalidate
                       up_read
            - 9.46% inode_permission
               - __inode_permission
                  - 9.46% kernfs_iop_permission
                       up_read
            - 0.83% kernfs_iop_get_link
                 up_read
         - 0.80% lookup_fast
              d_revalidate.part.24
              kernfs_dop_revalidate
              up_read

-   21.31%    21.21%  showgids   [kernel.kallsyms]    [k] down_read
     21.21% __libc_start_main
        __GI___libc_open
        entry_SYSCALL_64_after_hwframe
        do_syscall_64
        sys_open
        do_sys_open
        do_filp_open
      - path_openat
         - 19.78% link_path_walk
            - 10.62% inode_permission
               - __inode_permission
                  - 10.62% kernfs_iop_permission
                       down_read
            - 8.45% walk_component
                 lookup_fast
               - d_revalidate.part.24
                  - 8.45% kernfs_dop_revalidate
                       down_read
            - 0.71% kernfs_iop_get_link
                 down_read
         - 0.72% lookup_fast
            - d_revalidate.part.24
               - 0.72% kernfs_dop_revalidate
                    down_read
         - 0.71% may_open
              inode_permission
              __inode_permission
              kernfs_iop_permission
              down_read

Since permission is specific to a kernfs_node we can use a per
kernfs_node based lock to access/modify permission. Also use kernfs
reference counting to ensure we are accessing/modifying permissions
for an existing kernfs_node object.

Using this change brings down the above mentioned down_read/up_read
numbers to ~8%, thus indicating that contention around kernfs_rwsem
has reduced to about 1/3rd of earlier value.

Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
  • Loading branch information
imran-kn authored and intel-lab-lkp committed Jan 3, 2022
1 parent 0fc4d2a commit 3dd2a5f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
4 changes: 4 additions & 0 deletions fs/kernfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -750,11 +750,13 @@ int kernfs_add_one(struct kernfs_node *kn)
goto out_unlock;

/* Update timestamps on the parent */
down_write(&parent->kernfs_iop_rwsem);
ps_iattr = parent->iattr;
if (ps_iattr) {
ktime_get_real_ts64(&ps_iattr->ia_ctime);
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
}
up_write(&parent->kernfs_iop_rwsem);

up_write(&root->kernfs_rwsem);

Expand Down Expand Up @@ -1380,8 +1382,10 @@ static void __kernfs_remove(struct kernfs_node *kn)

/* update timestamps on the parent */
if (ps_iattr) {
down_write(&pos->parent->kernfs_iop_rwsem);
ktime_get_real_ts64(&ps_iattr->ia_ctime);
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
up_write(&pos->parent->kernfs_iop_rwsem);
}

kernfs_put(pos);
Expand Down
25 changes: 17 additions & 8 deletions fs/kernfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
int ret;
struct kernfs_root *root = kernfs_root(kn);

down_write(&root->kernfs_rwsem);
kernfs_get(kn);
down_write(&kn->kernfs_iop_rwsem);
ret = __kernfs_setattr(kn, iattr);
up_write(&root->kernfs_rwsem);
up_write(&kn->kernfs_iop_rwsem);
kernfs_put(kn);
return ret;
}

Expand All @@ -119,7 +121,8 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EINVAL;

root = kernfs_root(kn);
down_write(&root->kernfs_rwsem);
kernfs_get(kn);
down_write(&kn->kernfs_iop_rwsem);
error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
goto out;
Expand All @@ -132,7 +135,8 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
setattr_copy(&init_user_ns, inode, iattr);

out:
up_write(&root->kernfs_rwsem);
up_write(&kn->kernfs_iop_rwsem);
kernfs_put(kn);
return error;
}

Expand Down Expand Up @@ -189,12 +193,14 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
struct kernfs_node *kn = inode->i_private;
struct kernfs_root *root = kernfs_root(kn);

down_read(&root->kernfs_rwsem);
kernfs_get(kn);
down_read(&kn->kernfs_iop_rwsem);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
generic_fillattr(&init_user_ns, inode, stat);
spin_unlock(&inode->i_lock);
up_read(&root->kernfs_rwsem);
up_read(&kn->kernfs_iop_rwsem);
kernfs_put(kn);

return 0;
}
Expand All @@ -207,6 +213,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
inode->i_op = &kernfs_iops;
inode->i_generation = kernfs_gen(kn);

init_rwsem(&kn->kernfs_iop_rwsem);
set_default_inode_attr(inode, kn->mode);
kernfs_refresh_inode(kn, inode);

Expand Down Expand Up @@ -287,12 +294,14 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns,
kn = inode->i_private;
root = kernfs_root(kn);

down_read(&root->kernfs_rwsem);
kernfs_get(kn);
down_read(&kn->kernfs_iop_rwsem);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
ret = generic_permission(&init_user_ns, inode, mask);
spin_unlock(&inode->i_lock);
up_read(&root->kernfs_rwsem);
up_read(&kn->kernfs_iop_rwsem);
kernfs_put(kn);

return ret;
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/kernfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct kernfs_node {
struct kernfs_iattrs *iattr;
spinlock_t kernfs_open_node_lock;
struct mutex kernfs_open_file_mutex;
struct rw_semaphore kernfs_iop_rwsem;
};

/*
Expand Down

0 comments on commit 3dd2a5f

Please sign in to comment.