generated from oracle/template-repo
-
Notifications
You must be signed in to change notification settings - Fork 85
/
Copy pathtcp_conn_tuner.h
133 lines (118 loc) · 4.14 KB
/
tcp_conn_tuner.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2023, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <bpftune/bpftune.h>
enum tcp_cong_tunables {
TCP_CONG,
TCP_ALLOWED_CONG,
TCP_AVAILABLE_CONG,
TCP_CONG_DEFAULT,
TCP_THIN_LINEAR_TIMEOUTS
};
enum tcp_cong_scenarios {
TCP_CONG_SET,
};
#define CONG_MAXNAME 16
enum tcp_states {
TCP_STATE_CONG_CUBIC,
TCP_STATE_CONG_BBR,
TCP_STATE_CONG_HTCP,
TCP_STATE_CONG_DCTCP,
NUM_TCP_CONG_ALGS
};
/* match order of enum tcp_states */
const char congs[NUM_TCP_CONG_ALGS][6] = {
{ 'c', 'u', 'b', 'i', 'c', '\0' },
{ 'b', 'b', 'r', '\0' },
{ 'h', 't', 'c', 'p', '\0' },
{ 'd', 'c', 't', 'c', 'p', '\0' }
};
struct tcp_conn_metric {
__u64 state_flags; /* logical OR of states */
__u64 greedy_count; /* amount of times greedy option was taken */
__u64 min_rtt;
__u64 max_rate_delivered;
__u64 metric_count;
__u64 metric_value;
};
#define NUM_TCP_CONN_METRICS NUM_TCP_CONG_ALGS
struct tcp_conn_event_data {
struct in6_addr raddr;
__u64 state_flags;
__u64 rate_delivered;
__u64 min_rtt;
__u64 metric;
};
struct remote_host {
__u64 min_rtt;
__u64 max_rate_delivered;
__u64 instances;
struct tcp_conn_metric metrics[NUM_TCP_CONN_METRICS];
};
/* collect per-conn data once we see > REMOTE_HOST_MIN_INSTANCES */
#define REMOTE_HOST_MIN_INSTANCES 4
/* if total retrans/segs_out > 1(2^DROP_SHIFT) (1/64 by default)
* apply BBR congestion control.
*/
#define DROP_SHIFT 6
#define RTT_SCALE 1000000
#define DELIVERY_SCALE 1000000
/* The metric we calcuate compares current connection min_rtt and rate_delivered to
* the min rtt and max rate delivered we have observed for the remote host.
* The idea is that we want to reward congestion control algorithms that minimize
* RTT and maximize delivery rate, as these are operating at the bottleneck
* bandwitdh, which is the optimal operating mode. This does not unduly favour
* a particular algorithm in practice it seems, and choices can fluctuate over
* time. One concern is that the delivery rate is rather low and does not
* fluctuate much - we see 1 most often for delivery rate. Our cost function
* rates rtt deviation and delivery rate deviation equally however; this may
* need to be tweaked.
*
* Cost function is
*
* (conn_min_rtt - min_rtt) + (max_delivery_rate - delivery_rate)
* ----------------------- -----------------------------------
* overall min rtt overall_max_delivery_rate
*
*
* Both of these are scaled by RTT_SCALE, DELIVERY_SCALE to ensure we get integer
* values. Note we do not need to square values because both are asymmetric;
* a connection min_rtt > overall_min_rtt is bad, while a delivery_rate < overall
* max delivery rate is bad. As a result a higher cost here is a problem, and
* we pick action (congestion algorithm) with minimum cost.
*
* Metrics are updated using standard reinforcement learning update;
*
* new_estimate = old_estimate + learning_rate * (reward - old_estimate)
*/
static __always_inline __u64 tcp_metric_calc(struct remote_host *r,
__u64 min_rtt,
__u64 rate_delivered)
{
__u64 metric = 0;
if (!r->min_rtt || min_rtt < r->min_rtt)
r->min_rtt = min_rtt;
if (!r->max_rate_delivered || rate_delivered > r->max_rate_delivered)
r->max_rate_delivered = rate_delivered;
if (r->min_rtt)
metric += ((min_rtt - r->min_rtt)*RTT_SCALE)/r->min_rtt;
if (r->max_rate_delivered)
metric +=
((r->max_rate_delivered - rate_delivered)*DELIVERY_SCALE)/r->max_rate_delivered;
return metric;
}