kernfs: use hashed mutex and spinlock in place of global ones.

Right now a global mutex (kernfs_open_file_mutex) protects list of kernfs_open_file instances corresponding to a sysfs attribute. So even if different tasks are opening or closing different sysfs files they can contend on osq_lock of this mutex. The contention is more apparent in large scale systems with few hundred CPUs where most of the CPUs have running tasks that are opening, accessing or closing sysfs files at any point of time. Since each list of kernfs_open_file belongs to a kernfs_open_node instance which in turn corresponds to one kernfs_node, moving global kernfs_open_file_mutex within kernfs_node would sound like fixing this contention but it has unwanted side effect of bloating up kernfs_node size and hence kobject memory usage. Also since kernfs_node->attr.open points to kernfs_open_node instance corresponding to the kernfs_node, we can use a kernfs_node specific spinlock in place of current global spinlock i.e kernfs_open_node_lock. But this approach will increase kobject memory usage as well. Use hashed locks in place of above mentioned global locks to reduce kernfs access contention without increasing kobject memory usage. Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
0day-ci · Jan 13, 2022 · ee30c7a · ee30c7a
1 parent a70bf4a
commit ee30c7a
Show file tree

Hide file tree

Showing 3 changed files with 99 additions and 34 deletions.
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
@@ -18,20 +18,6 @@
 
 #include "kernfs-internal.h"
 
-/*
- * There's one kernfs_open_file for each open file and one kernfs_open_node
- * for each kernfs_node with one or more open files.
- *
- * kernfs_node->attr.open points to kernfs_open_node.  attr.open is
- * protected by kernfs_open_node_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * kernfs_open_file.  kernfs_open_files are chained at
- * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
- */
-static DEFINE_SPINLOCK(kernfs_open_node_lock);
-static DEFINE_MUTEX(kernfs_open_file_mutex);
-
 struct kernfs_open_node {
 	atomic_t		refcnt;
 	atomic_t		event;
@@ -524,10 +510,13 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
 				struct kernfs_open_file *of)
 {
 	struct kernfs_open_node *on, *new_on = NULL;
-
+	struct mutex *mutex = NULL;
+	spinlock_t *lock = NULL;
  retry:
-	mutex_lock(&kernfs_open_file_mutex);
-	spin_lock_irq(&kernfs_open_node_lock);
+	mutex = open_file_mutex_ptr(kn);
+	lock = open_node_lock_ptr(kn);
+	mutex_lock(mutex);
+	spin_lock_irq(lock);
 
 	if (!kn->attr.open && new_on) {
 		kn->attr.open = new_on;
@@ -540,8 +529,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
 		list_add_tail(&of->list, &on->files);
 	}
 
-	spin_unlock_irq(&kernfs_open_node_lock);
-	mutex_unlock(&kernfs_open_file_mutex);
+	spin_unlock_irq(lock);
+	mutex_unlock(mutex);
 
 	if (on) {
 		kfree(new_on);
@@ -575,10 +564,15 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
 				 struct kernfs_open_file *of)
 {
 	struct kernfs_open_node *on = kn->attr.open;
+	struct mutex *mutex = NULL;
+	spinlock_t *lock = NULL;
 	unsigned long flags;
 
-	mutex_lock(&kernfs_open_file_mutex);
-	spin_lock_irqsave(&kernfs_open_node_lock, flags);
+	mutex = open_file_mutex_ptr(kn);
+	lock = open_node_lock_ptr(kn);
+
+	mutex_lock(mutex);
+	spin_lock_irqsave(lock, flags);
 
 	if (of)
 		list_del(&of->list);
@@ -588,8 +582,8 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
 	else
 		on = NULL;
 
-	spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
-	mutex_unlock(&kernfs_open_file_mutex);
+	spin_unlock_irqrestore(lock, flags);
+	mutex_unlock(mutex);
 
 	kfree(on);
 }
@@ -729,11 +723,11 @@ static void kernfs_release_file(struct kernfs_node *kn,
 	/*
 	 * @of is guaranteed to have no other file operations in flight and
 	 * we just want to synchronize release and drain paths.
-	 * @kernfs_open_file_mutex is enough.  @of->mutex can't be used
+	 * @open_file_mutex is enough.  @of->mutex can't be used
 	 * here because drain path may be called from places which can
 	 * cause circular dependency.
 	 */
-	lockdep_assert_held(&kernfs_open_file_mutex);
+	lockdep_assert_held(open_file_mutex_ptr(kn));
 
 	if (!of->released) {
 		/*
@@ -752,9 +746,9 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
 	struct kernfs_open_file *of = kernfs_of(filp);
 
 	if (kn->flags & KERNFS_HAS_RELEASE) {
-		mutex_lock(&kernfs_open_file_mutex);
+		mutex_lock(open_file_mutex_ptr(kn));
 		kernfs_release_file(kn, of);
-		mutex_unlock(&kernfs_open_file_mutex);
+		mutex_unlock(open_file_mutex_ptr(kn));
 	}
 
 	kernfs_put_open_node(kn, of);
@@ -769,19 +763,23 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 {
 	struct kernfs_open_node *on;
 	struct kernfs_open_file *of;
+	struct mutex *mutex = NULL;
+	spinlock_t *lock = NULL;
 
 	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
 		return;
 
-	spin_lock_irq(&kernfs_open_node_lock);
+	lock = open_node_lock_ptr(kn);
+	spin_lock_irq(lock);
 	on = kn->attr.open;
 	if (on)
 		atomic_inc(&on->refcnt);
-	spin_unlock_irq(&kernfs_open_node_lock);
+	spin_unlock_irq(lock);
 	if (!on)
 		return;
 
-	mutex_lock(&kernfs_open_file_mutex);
+	mutex = open_file_mutex_ptr(kn);
+	mutex_lock(mutex);
 
 	list_for_each_entry(of, &on->files, list) {
 		struct inode *inode = file_inode(of->file);
@@ -793,8 +791,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 			kernfs_release_file(kn, of);
 	}
 
-	mutex_unlock(&kernfs_open_file_mutex);
-
+	mutex_unlock(mutex);
 	kernfs_put_open_node(kn, NULL);
 }
 
@@ -922,13 +919,13 @@ void kernfs_notify(struct kernfs_node *kn)
 		return;
 
 	/* kick poll immediately */
-	spin_lock_irqsave(&kernfs_open_node_lock, flags);
+	spin_lock_irqsave(open_node_lock_ptr(kn), flags);
 	on = kn->attr.open;
 	if (on) {
 		atomic_inc(&on->event);
 		wake_up_interruptible(&on->poll);
 	}
-	spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+	spin_unlock_irqrestore(open_node_lock_ptr(kn), flags);
 
 	/* schedule work to kick fsnotify */
 	spin_lock_irqsave(&kernfs_notify_lock, flags);

diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
@@ -19,6 +19,63 @@
 #include <linux/kernfs.h>
 #include <linux/fs_context.h>
 
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/cache.h>
+
+#ifdef CONFIG_SMP
+#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32)))
+#else
+#define NR_KERNFS_LOCK_BITS     1
+#endif
+
+#define NR_KERNFS_LOCKS     (1 << NR_KERNFS_LOCK_BITS)
+
+struct kernfs_open_node_lock {
+	spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct kernfs_open_file_mutex {
+	struct mutex lock;
+} ____cacheline_aligned_in_smp;
+
+/*
+ * There's one kernfs_open_file for each open file and one kernfs_open_node
+ * for each kernfs_node with one or more open files.
+ *
+ * kernfs_node->attr.open points to kernfs_open_node.  attr.open is
+ * protected by open_node_locks[i].
+ *
+ * filp->private_data points to seq_file whose ->private points to
+ * kernfs_open_file.  kernfs_open_files are chained at
+ * kernfs_open_node->files, which is protected by open_file_mutex[i].
+ *
+ * To reduce possible contention in sysfs access, arising due to single
+ * locks, use an array of locks and use kernfs_node object address as
+ * hash keys to get the index of these locks.
+ */
+
+struct kernfs_global_locks {
+	struct kernfs_open_node_lock open_node_locks[NR_KERNFS_LOCKS];
+	struct kernfs_open_file_mutex open_file_mutex[NR_KERNFS_LOCKS];
+};
+
+static struct kernfs_global_locks kernfs_global_locks;
+
+static inline spinlock_t *open_node_lock_ptr(struct kernfs_node *kn)
+{
+	int index = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+	return &kernfs_global_locks.open_node_locks[index].lock;
+}
+
+static inline struct mutex *open_file_mutex_ptr(struct kernfs_node *kn)
+{
+	int index = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+	return &kernfs_global_locks.open_file_mutex[index].lock;
+}
+
 struct kernfs_iattrs {
 	kuid_t			ia_uid;
 	kgid_t			ia_gid;

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
@@ -387,6 +387,16 @@ void kernfs_kill_sb(struct super_block *sb)
 	kfree(info);
 }
 
+void __init kernfs_lock_init(void)
+{
+	int count;
+
+	for (count = 0; count < NR_KERNFS_LOCKS; count++) {
+		spin_lock_init(&kernfs_global_locks.open_node_locks[count].lock);
+		mutex_init(&kernfs_global_locks.open_file_mutex[count].lock);
+	}
+}
+
 void __init kernfs_init(void)
 {
 	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
@@ -397,4 +407,5 @@ void __init kernfs_init(void)
 	kernfs_iattrs_cache  = kmem_cache_create("kernfs_iattrs_cache",
 					      sizeof(struct kernfs_iattrs),
 					      0, SLAB_PANIC, NULL);
+	kernfs_lock_init();
 }