Skip to content

Commit

Permalink
net: namespaceify sysctl_tcp_rmem and sysctl_tcp_wmem
Browse files Browse the repository at this point in the history
upstream commit: 356d1833b638bd465672aefeb71def3ab93fc17d

Note that when a new netns is created, it inherits its
sysctl_tcp_rmem and sysctl_tcp_wmem from initial netns.

This change is needed so that we can refine TCP rcvbuf autotuning,
to take RTT into consideration.

Signed-off-by: Hongbo Li <herberthbli@tencent.com>
  • Loading branch information
herberthbli authored and gxm-newton committed Jan 2, 2020
1 parent c87ad92 commit 9632996
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 41 deletions.
2 changes: 2 additions & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ struct netns_ipv4 {
int sysctl_tcp_no_delay_ack;
int sysctl_tcp_max_orphans;
int sysctl_tcp_workaround_signed_windows;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];

struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
Expand Down
2 changes: 0 additions & 2 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,6 @@ extern int sysctl_tcp_reordering;
extern int sysctl_tcp_max_reordering;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_frto;
Expand Down
32 changes: 16 additions & 16 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,22 +501,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_app_win",
.data = &sysctl_tcp_app_win,
Expand Down Expand Up @@ -1183,6 +1167,22 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_wmem",
.data = &init_net.ipv4.sysctl_tcp_wmem,
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_rmem",
.data = &init_net.ipv4.sysctl_tcp_rmem,
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{ }
};

Expand Down
20 changes: 8 additions & 12 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,8 @@ struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);

long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;

EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);

atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
Expand Down Expand Up @@ -449,8 +445,8 @@ void tcp_init_sock(struct sock *sk)

icsk->icsk_sync_mss = tcp_sync_mss;

sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];

sk_sockets_allocated_inc(sk);
}
Expand Down Expand Up @@ -3539,13 +3535,13 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit);

sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_wmem[1] = 16*1024;
sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);

sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_rmem[1] = 87380;
sysctl_tcp_rmem[2] = max(87380, max_rshare);
init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);

pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
Expand Down
17 changes: 11 additions & 6 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss;

if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
sk->sk_sndbuf = min(sndmem,
sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}

/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
Expand Down Expand Up @@ -376,7 +377,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(skb->truesize) >> 1;
int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
int window = tcp_win_from_space(
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;

while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
Expand Down Expand Up @@ -431,7 +433,8 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
rcvmem <<= 2;

if (sk->sk_rcvbuf < rcvmem)
sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
sk->sk_rcvbuf = min(rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
}

/* 4. Try to fixup all. It is made immediately after connection enters
Expand Down Expand Up @@ -478,15 +481,16 @@ static void tcp_clamp_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);

icsk->icsk_ack.quick = 0;

if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
net->ipv4.sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
Expand Down Expand Up @@ -649,7 +653,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvmem += 128;

do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem, sysctl_tcp_rmem[2]);
rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;

Expand Down
12 changes: 10 additions & 2 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -2429,8 +2429,8 @@ struct proto tcp_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
Expand Down Expand Up @@ -2511,6 +2511,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_max_orphans = NR_FILE;
if (net != &init_net) {
memcpy(net->ipv4.sysctl_tcp_rmem,
init_net.ipv4.sysctl_tcp_rmem,
sizeof(init_net.ipv4.sysctl_tcp_rmem));
memcpy(net->ipv4.sysctl_tcp_wmem,
init_net.ipv4.sysctl_tcp_wmem,
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}

return 0;
fail:
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, sysctl_tcp_rmem[2]);
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
Expand Down
4 changes: 2 additions & 2 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -1942,8 +1942,8 @@ struct proto tcpv6_prot = {
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
Expand Down

0 comments on commit 9632996

Please sign in to comment.