Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

rwsem: Implement writer lock-stealing for better scalability

Commit 5a50508 ("mm/rmap: Convert the struct anon_vma::mutex
to an rwsem") changed struct anon_vma::mutex to an rwsem, which
caused aim7 fork_test performance to drop by 50%.

Yuanhan Liu did the following excellent analysis:

    https://lkml.org/lkml/2013/1/29/84

and found that the regression is caused by strict, serialized,
FIFO sequential write-ownership of rwsems. Ingo suggested
implementing opportunistic lock-stealing for the front writer
task in the waitqueue.

Yuanhan Liu implemented lock-stealing for spinlock-rwsems,
which indeed recovered much of the regression - confirming
the analysis that the main factor in the regression was the
FIFO writer-fairness of rwsems.

In this patch we allow lock-stealing to happen when the first
waiter is also writer. With that change in place the
aim7 fork_test performance is fully recovered on my
Intel NHM EP, NHM EX, SNB EP 2S and 4S test-machines.

Reported-by: lkp@linux.intel.com
Reported-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Alex Shi <alex.shi@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: paul.gortmaker@windriver.com
Link: https://lkml.org/lkml/2013/1/29/84
Link: http://lkml.kernel.org/r/1360069915-31619-1-git-send-email-alex.shi@intel.com
[ Small stylistic fixes, updated changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information...
commit ce6711f3d196f09ca0ed29a24dfad42d83912b20 1 parent 5cd3f5a
authored February 05, 2013 Ingo Molnar committed February 19, 2013

Showing 1 changed file with 46 additions and 29 deletions. Show diff stats Hide diff stats

  1. 75  lib/rwsem.c
75  lib/rwsem.c
@@ -2,6 +2,8 @@
2 2
  *
3 3
  * Written by David Howells (dhowells@redhat.com).
4 4
  * Derived from arch/i386/kernel/semaphore.c
  5
+ *
  6
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
5 7
  */
6 8
 #include <linux/rwsem.h>
7 9
 #include <linux/sched.h>
@@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
60 62
 	struct rwsem_waiter *waiter;
61 63
 	struct task_struct *tsk;
62 64
 	struct list_head *next;
63  
-	signed long oldcount, woken, loop, adjustment;
  65
+	signed long woken, loop, adjustment;
64 66
 
65 67
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
66 68
 	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
@@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
72 74
 		 */
73 75
 		goto out;
74 76
 
75  
-	/* There's a writer at the front of the queue - try to grant it the
76  
-	 * write lock.  However, we only wake this writer if we can transition
77  
-	 * the active part of the count from 0 -> 1
78  
-	 */
79  
-	adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80  
-	if (waiter->list.next == &sem->wait_list)
81  
-		adjustment -= RWSEM_WAITING_BIAS;
82  
-
83  
- try_again_write:
84  
-	oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85  
-	if (oldcount & RWSEM_ACTIVE_MASK)
86  
-		/* Someone grabbed the sem already */
87  
-		goto undo_write;
88  
-
89  
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
90  
-	 * It is an allocated on the waiter's stack and may become invalid at
91  
-	 * any time after that point (due to a wakeup from another source).
92  
-	 */
93  
-	list_del(&waiter->list);
94  
-	tsk = waiter->task;
95  
-	smp_mb();
96  
-	waiter->task = NULL;
97  
-	wake_up_process(tsk);
98  
-	put_task_struct(tsk);
  77
+	/* Wake up the writing waiter and let the task grab the sem: */
  78
+	wake_up_process(waiter->task);
99 79
 	goto out;
100 80
 
101 81
  readers_only:
@@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
157 137
 
158 138
  out:
159 139
 	return sem;
  140
+}
  141
+
  142
+/* Try to get write sem, caller holds sem->wait_lock: */
  143
+static int try_get_writer_sem(struct rw_semaphore *sem,
  144
+					struct rwsem_waiter *waiter)
  145
+{
  146
+	struct rwsem_waiter *fwaiter;
  147
+	long oldcount, adjustment;
160 148
 
161  
-	/* undo the change to the active count, but check for a transition
162  
-	 * 1->0 */
163  
- undo_write:
  149
+	/* only steal when first waiter is writing */
  150
+	fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  151
+	if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
  152
+		return 0;
  153
+
  154
+	adjustment = RWSEM_ACTIVE_WRITE_BIAS;
  155
+	/* Only one waiter in the queue: */
  156
+	if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
  157
+		adjustment -= RWSEM_WAITING_BIAS;
  158
+
  159
+try_again_write:
  160
+	oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
  161
+	if (!(oldcount & RWSEM_ACTIVE_MASK)) {
  162
+		/* No active lock: */
  163
+		struct task_struct *tsk = waiter->task;
  164
+
  165
+		list_del(&waiter->list);
  166
+		smp_mb();
  167
+		put_task_struct(tsk);
  168
+		tsk->state = TASK_RUNNING;
  169
+		return 1;
  170
+	}
  171
+	/* some one grabbed the sem already */
164 172
 	if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
165  
-		goto out;
  173
+		return 0;
166 174
 	goto try_again_write;
167 175
 }
168 176
 
@@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
210 218
 	for (;;) {
211 219
 		if (!waiter.task)
212 220
 			break;
  221
+
  222
+		raw_spin_lock_irq(&sem->wait_lock);
  223
+		/* Try to get the writer sem, may steal from the head writer: */
  224
+		if (flags == RWSEM_WAITING_FOR_WRITE)
  225
+			if (try_get_writer_sem(sem, &waiter)) {
  226
+				raw_spin_unlock_irq(&sem->wait_lock);
  227
+				return sem;
  228
+			}
  229
+		raw_spin_unlock_irq(&sem->wait_lock);
213 230
 		schedule();
214 231
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
215 232
 	}

0 notes on commit ce6711f

Please sign in to comment.
Something went wrong with that request. Please try again.