Skip to content
Permalink
Browse files
net: mptcp, Fast Open Mechanism
This set of patches will bring "Fast Open" Option support to MPTCP.
The aim of Fast Open Mechanism is to eliminate one round trip
time from a TCP conversation by allowing data to be included as
part of the SYN segment that initiates the connection.

IETF RFC 8684: Appendix B.  TCP Fast Open and MPTCP.

[PATCH v2] includes "client-server" partial support for :
1. MPTCP cookie request from client.
2. MPTCP cookie offering from server.
3. MPTCP SYN+DATA+COOKIE from client.
4. subsequent write + read on the opened socket.

This patch is Work In Progress and an early draft shared due community
request.

Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net>
  • Loading branch information
Dmytro SHYTYI authored and intel-lab-lkp committed Jan 16, 2022
1 parent df0cc57 commit 52c7bf82e2e91eb10c89ef6169fe02e0b63a6772
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 19 deletions.
@@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */

#if IS_ENABLED(CONFIG_MPTCP)
#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */
#else
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
#endif



/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
@@ -501,7 +501,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
req = reqsk_queue_remove(queue, sk);
newsk = req->sk;

if (sk->sk_protocol == IPPROTO_TCP &&
if ((sk->sk_protocol == IPPROTO_TCP ||
sk->sk_protocol == IPPROTO_MPTCP) &&
tcp_rsk(req)->tfo_listener) {
spin_lock_bh(&queue->fastopenq.lock);
if (tcp_rsk(req)->tfo_listener) {
@@ -119,15 +119,26 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
const siphash_key_t *key,
struct tcp_fastopen_cookie *foc)
{
#if IS_ENABLED(CONFIG_MPTCP)
BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u32));
#else
BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
#endif

if (req->rsk_ops->family == AF_INET) {
const struct iphdr *iph = ip_hdr(syn);

#if IS_ENABLED(CONFIG_MPTCP)
foc->val[0] = cpu_to_le32(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
key));
#else
foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
key));
#endif
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
return true;
}
@@ -149,6 +160,7 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
/* Generate the fastopen cookie by applying SipHash to both the source and
* destination addresses.
*/
/*
static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
@@ -162,6 +174,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk,
__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
rcu_read_unlock();
}
*/

/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
@@ -291,12 +304,12 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/
return child;
}

/*
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
/* Make sure the listener has enabled fastopen, and we don't
* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
* to validating the cookie in order to avoid burning CPU cycles
* unnecessarily.
@@ -305,7 +318,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* processing a cookie request is that clients can't differentiate
* between qlen overflow causing Fast Open to be disabled
* temporarily vs a server not supporting Fast Open at all.
*/
*
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
if (fastopenq->max_qlen == 0)
return false;
@@ -327,7 +340,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
}
return true;
}

*/
static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
@@ -346,28 +359,43 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie *foc,
const struct dst_entry *dst)
{
/*
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
*/
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;

if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);

/*
if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}

*/
if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;

if (foc->len == 0) {
/* Client requests a cookie. */
tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
//tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);

struct tcp_fastopen_context *ctx;
struct iphdr *iph = ip_hdr(skb);

tcp_fastopen_init_key_once(sock_net(sk));
ctx = tcp_fastopen_get_ctx(sk);

valid_foc.val[0] = cpu_to_le32(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
&ctx->key[0]));
valid_foc.len = TCP_FASTOPEN_COOKIE_SIZE;

} else if (foc->len > 0) {
ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
&valid_foc);
@@ -5908,7 +5908,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
} else {
tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}

__tcp_ack_snd_check(sk, 0);
no_ack:
if (eaten)
@@ -6229,9 +6228,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
}
if (fastopen_fail)
return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
inet_csk_in_pingpong_mode(sk)) {

if ((sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
inet_csk_in_pingpong_mode(sk)) && !th->syn) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
@@ -6243,9 +6243,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);

discard:
tcp_drop(sk, skb);
tcp_send_ack(sk);

return 0;
} else {
tcp_send_ack(sk);
@@ -6425,6 +6426,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);

return 0;
}

@@ -6901,7 +6903,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
*/
pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
rsk_ops->family);
goto drop_and_release;
//goto drop_and_release;
}

isn = af_ops->init_seq(skb);
@@ -6954,7 +6956,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
reqsk_put(req);
return 0;

drop_and_release:
//drop_and_release:
dst_release(dst);
drop_and_free:
__reqsk_free(req);
@@ -52,6 +52,8 @@ static struct percpu_counter mptcp_sockets_allocated;

static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);

DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -1631,6 +1633,53 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
}
}

static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, struct mptcp_sock *msk, size_t copied)
{
const struct iphdr *iph;
struct ubuf_info *uarg;
struct sockaddr *uaddr;
struct sk_buff *skb;
struct tcp_sock *tp;
struct socket *ssk;
int ret;

ssk = __mptcp_nmpc_socket(msk);
if (unlikely(!ssk))
goto out_EFAULT;
skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true);
if (unlikely(!skb))
goto out_EFAULT;
iph = ip_hdr(skb);
if (unlikely(!iph))
goto out_EFAULT;
uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb));
if (unlikely(!uarg))
goto out_EFAULT;
uaddr = msg->msg_name;

tp = tcp_sk(ssk->sk);
if (unlikely(!tp))
goto out_EFAULT;
if (!tp->fastopen_req)
tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation);

if (unlikely(!tp->fastopen_req))
goto out_EFAULT;
tp->fastopen_req->data = msg;
tp->fastopen_req->size = len;
tp->fastopen_req->uarg = uarg;

/* requests a cookie */
ret = mptcp_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, msg->msg_flags);

return ret;
out_EFAULT:
ret = -EFAULT;
return ret;
}

static void mptcp_set_nospace(struct sock *sk)
{
/* enable autotune */
@@ -1648,9 +1697,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;

/* we don't support FASTOPEN yet */
/* we don't fully support FASTOPEN yet */
if (msg->msg_flags & MSG_FASTOPEN)
return -EOPNOTSUPP;
ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied);

/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
@@ -2436,10 +2485,10 @@ static void mptcp_worker(struct work_struct *work)

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);

/*
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);

*/
unlock:
release_sock(sk);
sock_put(sk);
@@ -2543,6 +2592,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
case TCP_SYN_SENT:
tcp_disconnect(ssk, O_NONBLOCK);
break;
case TCP_ESTABLISHED:
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
@@ -538,6 +538,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_TIMESTAMP:
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_FASTOPEN:
return true;
}

@@ -597,6 +598,43 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}

static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval,
unsigned int optlen)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
int val;
int ret;

ret = 0;

if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;

lock_sock(sk);

mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

lock_sock(ssk);

if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
ret = -EINVAL;
}

release_sock(ssk);
}

release_sock(sk);

return ret;
}

static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -605,6 +643,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
case TCP_FASTOPEN:
return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen);
}

return -EOPNOTSUPP;

0 comments on commit 52c7bf8

Please sign in to comment.