Skip to content

Commit

Permalink
tcp: fix race condition when creating child sockets from syncookies
Browse files Browse the repository at this point in the history
When the TCP stack is in SYN flood mode, the server child socket is
created from the SYN cookie received in a TCP packet with the ACK flag
set.

The child socket is created when the server receives the first TCP
packet with a valid SYN cookie from the client. Usually, this packet
corresponds to the final step of the TCP 3-way handshake, the ACK
packet. But is also possible to receive a valid SYN cookie from the
first TCP data packet sent by the client, and thus create a child socket
from that SYN cookie.

Since a client socket is ready to send data as soon as it receives the
SYN+ACK packet from the server, the client can send the ACK packet (sent
by the TCP stack code), and the first data packet (sent by the userspace
program) almost at the same time, and thus the server will equally
receive the two TCP packets with valid SYN cookies almost at the same
instant.

When such event happens, the TCP stack code has a race condition that
occurs between the momement a lookup is done to the established
connections hashtable to check for the existence of a connection for the
same client, and the moment that the child socket is added to the
established connections hashtable. As a consequence, this race condition
can lead to a situation where we add two child sockets to the
established connections hashtable and deliver two sockets to the
userspace program to the same client.

This patch fixes the race condition by checking if an existing child
socket exists for the same client when we are adding the second child
socket to the established connections socket. If an existing child
socket exists, we return that socket and use it to process the TCP
packet received, and discard the second child socket to the same client.

Signed-off-by: Ricardo Dias <rdias@memsql.com>
  • Loading branch information
rjfdias authored and intel-lab-lkp committed Oct 23, 2020
1 parent 105faa8 commit 35d7202
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 2 deletions.
1 change: 1 addition & 0 deletions include/net/inet_hashtables.h
Expand Up @@ -248,6 +248,7 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
int inet_hashinfo2_init_mod(struct inet_hashinfo *h);

bool inet_ehash_insert(struct sock *sk, struct sock *osk);
struct sock *inet_ehash_insert_chk_dup(struct sock *sk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
Expand Down
59 changes: 59 additions & 0 deletions net/ipv4/inet_hashtables.c
Expand Up @@ -537,6 +537,65 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
return ret;
}

/* Inserts a socket into ehash if no existing socket exists for the same
* quadruple (saddr, sport, daddr, dport).
* If there is an existing socket, returns that socket, otherwise returns NULL.
*/
struct sock *inet_ehash_insert_chk_dup(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
const struct hlist_nulls_node *node;
struct sock *esk;
spinlock_t *lock; /* protects hashinfo socket entry */
struct net *net = sock_net(sk);
const int dif, sdif = sk->sk_bound_dev_if;

INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);

WARN_ON_ONCE(!sk_unhashed(sk));

sk->sk_hash = sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);

spin_lock(lock);
begin:
sk_nulls_for_each_rcu(esk, node, list) {
if (esk->sk_hash != sk->sk_hash)
continue;
if (likely(INET_MATCH(esk, net, acookie,
sk->sk_daddr, sk->sk_rcv_saddr, ports,
dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&esk->sk_refcnt)))
goto out;
if (unlikely(!INET_MATCH(esk, net, acookie,
sk->sk_daddr,
sk->sk_rcv_saddr, ports,
dif, sdif))) {
sock_gen_put(esk);
goto begin;
}
goto found;
}
}
out:
esk = NULL;
__sk_nulls_add_node_rcu(sk, list);
found:
spin_unlock(lock);
if (esk) {
percpu_counter_inc(sk->sk_prot->orphan_count);
inet_sk_set_state(sk, TCP_CLOSE);
sock_set_flag(sk, SOCK_DEAD);
inet_csk_destroy_sock(sk);
}
return esk;
}

bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
{
bool ok = inet_ehash_insert(sk, osk);
Expand Down
5 changes: 4 additions & 1 deletion net/ipv4/syncookies.c
Expand Up @@ -208,7 +208,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,

child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
NULL, &own_req);
if (child) {
if (child && own_req) {
refcount_set(&req->rsk_refcnt, 1);
tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
Expand All @@ -223,6 +223,9 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,

bh_unlock_sock(child);
sock_put(child);
} else if (child && !own_req) {
__reqsk_free(req);
return child;
}
__reqsk_free(req);

Expand Down
16 changes: 15 additions & 1 deletion net/ipv4/tcp_ipv4.c
Expand Up @@ -1507,6 +1507,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
int l3index;
#endif
struct ip_options_rcu *inet_opt;
struct sock *esk = NULL;
bool syncookie = false;

if (sk_acceptq_is_full(sk))
goto exit_overflow;
Expand Down Expand Up @@ -1545,6 +1547,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
goto put_and_exit;
} else {
/* syncookie case : see end of cookie_v4_check() */
syncookie = true;
}
sk_setup_caps(newsk, dst);

Expand Down Expand Up @@ -1575,7 +1578,18 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,

if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
if (!syncookie) {
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
} else {
esk = inet_ehash_insert_chk_dup(newsk);
/* We're going to notify tcp_get_cookie_sock through own_req
* that an existing socket is going to be returned instead,
* since own_req is not used by the syncookies code.
*/
*own_req = !esk;
if (esk)
newsk = esk;
}
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
Expand Down

0 comments on commit 35d7202

Please sign in to comment.