Skip to content

Commit

Permalink
net: mptcp, Fast Open Mechanism
Browse files Browse the repository at this point in the history
This set of patches will bring "Fast Open" Option support to MPTCP.
The aim of Fast Open Mechanism is to eliminate one round trip
time from a TCP conversation by allowing data to be included as
part of the SYN segment that initiates the connection.

IETF RFC 8684: Appendix B.  TCP Fast Open and MPTCP.

[PATCH v2] includes "client-server" partial support for :
1. MPTCP cookie request from client.
2. MPTCP cookie offering from server.
3. MPTCP SYN+DATA+COOKIE from client.
4. subsequent write + read on the opened socket.

This patch is Work In Progress and an early draft shared due community
request.

Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net>
  • Loading branch information
Dmytro SHYTYI authored and intel-lab-lkp committed Jan 16, 2022
1 parent df0cc57 commit 52c7bf8
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 19 deletions.
7 changes: 7 additions & 0 deletions include/linux/tcp.h
Expand Up @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */

#if IS_ENABLED(CONFIG_MPTCP)
#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */
#else
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
#endif



/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/inet_connection_sock.c
Expand Up @@ -501,7 +501,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
req = reqsk_queue_remove(queue, sk);
newsk = req->sk;

if (sk->sk_protocol == IPPROTO_TCP &&
if ((sk->sk_protocol == IPPROTO_TCP ||
sk->sk_protocol == IPPROTO_MPTCP) &&
tcp_rsk(req)->tfo_listener) {
spin_lock_bh(&queue->fastopenq.lock);
if (tcp_rsk(req)->tfo_listener) {
Expand Down
42 changes: 35 additions & 7 deletions net/ipv4/tcp_fastopen.c
Expand Up @@ -119,15 +119,26 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
const siphash_key_t *key,
struct tcp_fastopen_cookie *foc)
{
#if IS_ENABLED(CONFIG_MPTCP)
BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u32));
#else
BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
#endif

if (req->rsk_ops->family == AF_INET) {
const struct iphdr *iph = ip_hdr(syn);

#if IS_ENABLED(CONFIG_MPTCP)
foc->val[0] = cpu_to_le32(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
key));
#else
foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
key));
#endif
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
return true;
}
Expand All @@ -149,6 +160,7 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
/* Generate the fastopen cookie by applying SipHash to both the source and
* destination addresses.
*/
/*
static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
Expand All @@ -162,6 +174,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk,
__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
rcu_read_unlock();
}
*/

/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
Expand Down Expand Up @@ -291,12 +304,12 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/
return child;
}

/*
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
/* Make sure the listener has enabled fastopen, and we don't
* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
* to validating the cookie in order to avoid burning CPU cycles
* unnecessarily.
Expand All @@ -305,7 +318,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* processing a cookie request is that clients can't differentiate
* between qlen overflow causing Fast Open to be disabled
* temporarily vs a server not supporting Fast Open at all.
*/
*
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
if (fastopenq->max_qlen == 0)
return false;
Expand All @@ -327,7 +340,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
}
return true;
}

*/
static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
Expand All @@ -346,28 +359,43 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie *foc,
const struct dst_entry *dst)
{
/*
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
*/
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;

if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);

/*
if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}

*/
if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;

if (foc->len == 0) {
/* Client requests a cookie. */
tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
//tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);

struct tcp_fastopen_context *ctx;
struct iphdr *iph = ip_hdr(skb);

tcp_fastopen_init_key_once(sock_net(sk));
ctx = tcp_fastopen_get_ctx(sk);

valid_foc.val[0] = cpu_to_le32(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
&ctx->key[0]));
valid_foc.len = TCP_FASTOPEN_COOKIE_SIZE;

} else if (foc->len > 0) {
ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
&valid_foc);
Expand Down
16 changes: 9 additions & 7 deletions net/ipv4/tcp_input.c
Expand Up @@ -5908,7 +5908,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
} else {
tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}

__tcp_ack_snd_check(sk, 0);
no_ack:
if (eaten)
Expand Down Expand Up @@ -6229,9 +6228,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
}
if (fastopen_fail)
return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
inet_csk_in_pingpong_mode(sk)) {

if ((sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
inet_csk_in_pingpong_mode(sk)) && !th->syn) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
Expand All @@ -6243,9 +6243,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);

discard:
tcp_drop(sk, skb);
tcp_send_ack(sk);

return 0;
} else {
tcp_send_ack(sk);
Expand Down Expand Up @@ -6425,6 +6426,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);

return 0;
}

Expand Down Expand Up @@ -6901,7 +6903,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
*/
pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
rsk_ops->family);
goto drop_and_release;
//goto drop_and_release;
}

isn = af_ops->init_seq(skb);
Expand Down Expand Up @@ -6954,7 +6956,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
reqsk_put(req);
return 0;

drop_and_release:
//drop_and_release:
dst_release(dst);
drop_and_free:
__reqsk_free(req);
Expand Down
59 changes: 55 additions & 4 deletions net/mptcp/protocol.c
Expand Up @@ -52,6 +52,8 @@ static struct percpu_counter mptcp_sockets_allocated;

static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);

DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
Expand Down Expand Up @@ -1631,6 +1633,53 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
}
}

static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, struct mptcp_sock *msk, size_t copied)
{
const struct iphdr *iph;
struct ubuf_info *uarg;
struct sockaddr *uaddr;
struct sk_buff *skb;
struct tcp_sock *tp;
struct socket *ssk;
int ret;

ssk = __mptcp_nmpc_socket(msk);
if (unlikely(!ssk))
goto out_EFAULT;
skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true);
if (unlikely(!skb))
goto out_EFAULT;
iph = ip_hdr(skb);
if (unlikely(!iph))
goto out_EFAULT;
uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb));
if (unlikely(!uarg))
goto out_EFAULT;
uaddr = msg->msg_name;

tp = tcp_sk(ssk->sk);
if (unlikely(!tp))
goto out_EFAULT;
if (!tp->fastopen_req)
tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation);

if (unlikely(!tp->fastopen_req))
goto out_EFAULT;
tp->fastopen_req->data = msg;
tp->fastopen_req->size = len;
tp->fastopen_req->uarg = uarg;

/* requests a cookie */
ret = mptcp_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, msg->msg_flags);

return ret;
out_EFAULT:
ret = -EFAULT;
return ret;
}

static void mptcp_set_nospace(struct sock *sk)
{
/* enable autotune */
Expand All @@ -1648,9 +1697,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;

/* we don't support FASTOPEN yet */
/* we don't fully support FASTOPEN yet */
if (msg->msg_flags & MSG_FASTOPEN)
return -EOPNOTSUPP;
ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied);

/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
Expand Down Expand Up @@ -2436,10 +2485,10 @@ static void mptcp_worker(struct work_struct *work)

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);

/*
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);

*/
unlock:
release_sock(sk);
sock_put(sk);
Expand Down Expand Up @@ -2543,6 +2592,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
case TCP_SYN_SENT:
tcp_disconnect(ssk, O_NONBLOCK);
break;
case TCP_ESTABLISHED:
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
Expand Down
40 changes: 40 additions & 0 deletions net/mptcp/sockopt.c
Expand Up @@ -538,6 +538,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_TIMESTAMP:
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_FASTOPEN:
return true;
}

Expand Down Expand Up @@ -597,6 +598,43 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}

static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval,
unsigned int optlen)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
int val;
int ret;

ret = 0;

if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;

lock_sock(sk);

mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

lock_sock(ssk);

if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
ret = -EINVAL;
}

release_sock(ssk);
}

release_sock(sk);

return ret;
}

static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
Expand All @@ -605,6 +643,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
case TCP_FASTOPEN:
return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen);
}

return -EOPNOTSUPP;
Expand Down

0 comments on commit 52c7bf8

Please sign in to comment.