@@ -587,55 +587,6 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock_irq (&curr->pi_lock );
}
/*
* We need to check the following states:
*
* Waiter | pi_state | pi->owner | uTID | uODIED | ?
*
* [1] NULL | --- | --- | 0 | 0/1 | Valid
* [2] NULL | --- | --- | >0 | 0/1 | Valid
*
* [3] Found | NULL | -- | Any | 0/1 | Invalid
*
* [4] Found | Found | NULL | 0 | 1 | Valid
* [5] Found | Found | NULL | >0 | 1 | Invalid
*
* [6] Found | Found | task | 0 | 1 | Valid
*
* [7] Found | Found | NULL | Any | 0 | Invalid
*
* [8] Found | Found | task | ==taskTID | 0/1 | Valid
* [9] Found | Found | task | 0 | 0 | Invalid
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
*
* [1] Indicates that the kernel can acquire the futex atomically. We
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
*
* [2] Valid, if TID does not belong to a kernel thread. If no matching
* thread is found then it indicates that the owner TID has died.
*
* [3] Invalid. The waiter is queued on a non PI futex
*
* [4] Valid state after exit_robust_list(), which sets the user space
* value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
*
* [5] The user space value got manipulated between exit_robust_list()
* and exit_pi_state_list()
*
* [6] Valid state after exit_pi_state_list() which sets the new owner in
* the pi_state but cannot access the user space value.
*
* [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
*
* [8] Owner and user space value match
*
* [9] There is no transient state which sets the user space TID to 0
* except exit_robust_list(), but this is indicated by the
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
*/
static int
lookup_pi_state (u32 uval, struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps)
@@ -651,84 +602,47 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
plist_for_each_entry_safe (this, next, head, list) {
if (match_futex (&this->key , key)) {
/*
* Sanity check the waiter before increasing
* the refcount and attaching to it.
* Another waiter already exists - bump up
* the refcount and return its pi_state:
*/
pi_state = this->pi_state ;
/*
* Userspace might have messed up non-PI and
* PI futexes [3]
* Userspace might have messed up non-PI and PI futexes
*/
if (unlikely (!pi_state))
return -EINVAL;
WARN_ON (!atomic_read (&pi_state->refcount ));
/*
* Handle the owner died case:
* When pi_state->owner is NULL then the owner died
* and another waiter is on the fly. pi_state->owner
* is fixed up by the task which acquires
* pi_state->rt_mutex.
*
* We do not check for pid == 0 which can happen when
* the owner died and robust_list_exit() cleared the
* TID.
*/
if (uval & FUTEX_OWNER_DIED ) {
if (pid && pi_state-> owner ) {
/*
* exit_pi_state_list sets owner to NULL and
* wakes the topmost waiter. The task which
* acquires the pi_state->rt_mutex will fixup
* owner.
* Bail out if user space manipulated the
* futex value.
*/
if (!pi_state->owner ) {
/*
* No pi state owner, but the user
* space TID is not 0. Inconsistent
* state. [5]
*/
if (pid)
return -EINVAL;
/*
* Take a ref on the state and
* return. [4]
*/
goto out_state;
}
/*
* If TID is 0, then either the dying owner
* has not yet executed exit_pi_state_list()
* or some waiter acquired the rtmutex in the
* pi state, but did not yet fixup the TID in
* user space.
*
* Take a ref on the state and return. [6]
*/
if (!pid)
goto out_state;
} else {
/*
* If the owner died bit is not set,
* then the pi_state must have an
* owner. [7]
*/
if (!pi_state->owner )
if (pid != task_pid_vnr (pi_state->owner ))
return -EINVAL;
}
/*
* Bail out if user space manipulated the
* futex value. If pi state exists then the
* owner TID must be the same as the user
* space TID. [9/10]
*/
if (pid != task_pid_vnr (pi_state->owner ))
return -EINVAL;
out_state:
atomic_inc (&pi_state->refcount );
*ps = pi_state;
return 0 ;
}
}
/*
* We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 [1]
* the new pi_state to it, but bail out when TID = 0
*/
if (!pid)
return -ESRCH;
@@ -756,9 +670,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return ret;
}
/*
* No existing pi state. First waiter. [2]
*/
pi_state = alloc_pi_state ();
/*
@@ -830,18 +741,10 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
return -EDEADLK;
/*
* Surprise - we got the lock, but we do not trust user space at all.
* Surprise - we got the lock. Just return to userspace:
*/
if (unlikely (!curval)) {
/*
* We verify whether there is kernel state for this
* futex. If not, we can safely assume, that the 0 ->
* TID transition is correct. If state exists, we do
* not bother to fixup the user space state as it was
* corrupted already.
*/
return futex_top_waiter (hb, key) ? -EINVAL : 1 ;
}
if (unlikely (!curval))
return 1 ;
uval = curval;
@@ -965,7 +868,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state ;
u32 uninitialized_var (curval), newval;
int ret = 0 ;
if (!pi_state)
return -EINVAL;
@@ -989,19 +891,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
new_owner = this->task ;
/*
* We pass it to the next owner. The WAITERS bit is always
* kept enabled while there is PI state around. We cleanup the
* owner died bit, because we are the owner.
* We pass it to the next owner. ( The WAITERS bit is always
* kept enabled while there is PI state around. We must also
* preserve the owner died bit.)
*/
newval = FUTEX_WAITERS | task_pid_vnr (new_owner);
if (!(uval & FUTEX_OWNER_DIED)) {
int ret = 0 ;
if (cmpxchg_futex_value_locked (&curval, uaddr, uval, newval))
ret = -EFAULT;
else if (curval != uval)
ret = -EINVAL;
if (ret) {
raw_spin_unlock (&pi_state->pi_mutex .wait_lock );
return ret;
newval = FUTEX_WAITERS | task_pid_vnr (new_owner);
if (cmpxchg_futex_value_locked (&curval, uaddr, uval, newval))
ret = -EFAULT;
else if (curval != uval)
ret = -EINVAL;
if (ret) {
raw_spin_unlock (&pi_state->pi_mutex .wait_lock );
return ret;
}
}
raw_spin_lock_irq (&pi_state->owner ->pi_lock );
@@ -1349,13 +1255,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
u32 curval2;
if (requeue_pi) {
/*
* Requeue PI only works on two distinct uaddrs. This
* check is only valid for private futexes. See below.
*/
if (uaddr1 == uaddr2)
return -EINVAL;
/*
* requeue_pi requires a pi_state, try to allocate it now
* without any locks in case it fails.
@@ -1394,15 +1293,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
if (unlikely (ret != 0 ))
goto out_put_key1;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
if (requeue_pi && match_futex (&key1, &key2)) {
ret = -EINVAL;
goto out_put_keys;
}
hb1 = hash_futex (&key1);
hb2 = hash_futex (&key2);
@@ -2223,10 +2113,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
/*
* To avoid races, try to do the TID -> 0 atomic transition
* again. If it succeeds then we can return without waking
* anyone else up. We only try this if neither the waiters nor
* the owner died bit are set.
* anyone else up:
*/
if (!(uval & ~FUTEX_TID_MASK ) &&
if (!(uval & FUTEX_OWNER_DIED ) &&
cmpxchg_futex_value_locked (&uval, uaddr, vpid, 0 ))
goto pi_faulted;
/*
@@ -2258,9 +2147,11 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
/*
* No waiters - kernel unlocks the futex:
*/
ret = unlock_futex_pi (uaddr, uval);
if (ret == -EFAULT)
goto pi_faulted;
if (!(uval & FUTEX_OWNER_DIED)) {
ret = unlock_futex_pi (uaddr, uval);
if (ret == -EFAULT)
goto pi_faulted;
}
out_unlock:
spin_unlock (&hb->lock );
@@ -2417,15 +2308,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (ret)
goto out_key2;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
if (match_futex (&q.key , &key2)) {
ret = -EINVAL;
goto out_put_keys;
}
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me (hb, &q, to);