Skip to content

Commit

Permalink
net: DSCP in IPv4 routing
Browse files Browse the repository at this point in the history
[PATCH 2/2] DSCP in IPv4 routing

TOS handling in ipv4 routing does not use all the bits in a DSCP
value.  This change introduces a sysctl "route_tos_as_dscp" control
that, when enabled, widens masks to used the 6 DSCP bits in routing.

This commit converts macros
RT_TOS -> rt_tos
IPTOS_RT_MASK -> iptos_rt_mask

Signed-off-by: Russell Strong <russell@strong.id.au>
  • Loading branch information
Russell Strong authored and intel-lab-lkp committed Nov 14, 2020
1 parent 9e6cad5 commit f5e2676
Show file tree
Hide file tree
Showing 26 changed files with 65 additions and 66 deletions.
3 changes: 2 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
Expand Up @@ -354,6 +354,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct net *net = dev_net(mirred_dev);
struct flowi6 fl6 = {};
struct ipv6hdr *ip6h;
struct neighbour *n = NULL;
Expand All @@ -364,7 +365,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,

ttl = tun_key->ttl;

fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
fl6.flowlabel = ip6_make_flowinfo(rt_tos(net, tun_key->tos), tun_key->label);
fl6.daddr = tun_key->u.ipv6.dst;
fl6.saddr = tun_key->u.ipv6.src;

Expand Down
4 changes: 2 additions & 2 deletions drivers/net/geneve.c
Expand Up @@ -797,7 +797,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
use_cache = false;
}
fl4->flowi4_tos = RT_TOS(tos);
fl4->flowi4_tos = rt_tos(geneve->net, tos);

dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
Expand Down Expand Up @@ -851,7 +851,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
use_cache = false;
}

fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
fl6->flowlabel = ip6_make_flowinfo(rt_tos(geneve->net, prio),
info->key.label);
dst_cache = (struct dst_cache *)&info->dst_cache;
if (use_cache) {
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ipvlan/ipvlan_core.c
Expand Up @@ -421,7 +421,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
int err, ret = NET_XMIT_DROP;
struct flowi4 fl4 = {
.flowi4_oif = dev->ifindex,
.flowi4_tos = RT_TOS(ip4h->tos),
.flowi4_tos = rt_tos(net, ip4h->tos),
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
.daddr = ip4h->daddr,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ppp/pptp.c
Expand Up @@ -155,7 +155,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
opt->dst_addr.sin_addr.s_addr,
opt->src_addr.sin_addr.s_addr,
0, 0, IPPROTO_GRE,
RT_TOS(0), sk->sk_bound_dev_if);
rt_tos(net, 0), sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto tx_error;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/vrf.c
Expand Up @@ -534,7 +534,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
/* needed to match OIF rule */
fl4.flowi4_oif = vrf_dev->ifindex;
fl4.flowi4_iif = LOOPBACK_IFINDEX;
fl4.flowi4_tos = RT_TOS(ip4h->tos);
fl4.flowi4_tos = rt_tos(net, ip4h->tos);
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
fl4.flowi4_proto = ip4h->protocol;
fl4.daddr = ip4h->daddr;
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/vxlan.c
Expand Up @@ -2412,7 +2412,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos);
fl4.flowi4_tos = rt_tos(vxlan->net, tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = daddr;
Expand Down Expand Up @@ -2469,7 +2469,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
fl6.saddr = *saddr;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
fl6.flowlabel = ip6_make_flowinfo(rt_tos(vxlan->net, tos), label);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
fl6.fl6_dport = dport;
Expand Down
3 changes: 2 additions & 1 deletion include/net/ip.h
Expand Up @@ -241,7 +241,8 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)

static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
struct net *net = sock_net(&inet->sk);
return (ipc->tos != -1) ? rt_tos(net, ipc->tos) : rt_tos(net, inet->tos);
}

static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk)
Expand Down
6 changes: 2 additions & 4 deletions include/net/route.h
Expand Up @@ -40,8 +40,8 @@

#define RTO_ONLINK 0x01

#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS(sk) (rt_tos(sock_net(sk), inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (rt_tos(sock_net(sk), tos) | sock_flag(sk, SOCK_LOCALROUTE))

struct fib_nh;
struct fib_info;
Expand Down Expand Up @@ -255,8 +255,6 @@ static inline void ip_rt_put(struct rtable *rt)
dst_release(&rt->dst);
}

#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)

extern const __u8 ip_tos2prio[16];

static inline char rt_tos2priority(u8 tos)
Expand Down
2 changes: 1 addition & 1 deletion net/bridge/br_netfilter_hooks.c
Expand Up @@ -379,7 +379,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;

rt = ip_route_output(net, iph->daddr, 0,
RT_TOS(iph->tos), 0);
rt_tos(net, iph->tos), 0);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
Expand Down
4 changes: 2 additions & 2 deletions net/core/filter.c
Expand Up @@ -2345,7 +2345,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
.flowi4_tos = RT_TOS(ip4h->tos),
.flowi4_tos = rt_tos(net, ip4h->tos),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
Expand Down Expand Up @@ -5309,7 +5309,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
fl4.flowi4_tos = params->tos & iptos_rt_mask(net);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;

Expand Down
2 changes: 1 addition & 1 deletion net/core/lwt_bpf.c
Expand Up @@ -206,7 +206,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_tos = rt_tos(net, iph->tos);
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/fib_frontend.c
Expand Up @@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
.flowi4_tos = rt_tos(net, ip_hdr(skb)->tos),
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/fib_rules.c
Expand Up @@ -229,7 +229,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;

if (frh->tos & ~IPTOS_TOS_MASK) {
if (frh->tos & ~iptos_rt_mask(net)) {
NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
}
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/icmp.c
Expand Up @@ -444,7 +444,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_tos = rt_tos(net, ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
Expand Down Expand Up @@ -496,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
fl4->flowi4_tos = RT_TOS(tos);
fl4->flowi4_tos = rt_tos(net, tos);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
Expand Down Expand Up @@ -544,7 +544,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
RT_TOS(tos), rt2->dst.dev);
rt_tos(net, tos), rt2->dst.dev);

dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
Expand Down Expand Up @@ -712,7 +712,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_unlock();
}

tos = icmp_pointers[type].error ? (RT_TOS(iph->tos) |
tos = icmp_pointers[type].error ? (rt_tos(net, iph->tos) |
IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_gre.c
Expand Up @@ -882,7 +882,7 @@ static int ipgre_open(struct net_device *dev)
t->parms.iph.daddr,
t->parms.iph.saddr,
t->parms.o_key,
RT_TOS(t->parms.iph.tos),
rt_tos(t->net, t->parms.iph.tos),
t->parms.link);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_output.c
Expand Up @@ -1694,7 +1694,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,

flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
RT_TOS(arg->tos),
rt_tos(net, arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
Expand Down
6 changes: 3 additions & 3 deletions net/ipv4/ip_tunnel.c
Expand Up @@ -294,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)

ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
RT_TOS(iph->tos), tunnel->parms.link,
rt_tos(tunnel->net, iph->tos), tunnel->parms.link,
tunnel->fwmark, 0);
rt = ip_route_output_key(tunnel->net, &fl4);

Expand Down Expand Up @@ -565,7 +565,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
tunnel_id_to_key32(key->tun_id), rt_tos(tunnel->net, tos),
0, skb->mark, skb_get_hash(skb));
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
Expand Down Expand Up @@ -722,7 +722,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}

ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
tunnel->parms.o_key, rt_tos(tunnel->net, tos), tunnel->parms.link,
tunnel->fwmark, skb_get_hash(skb));

if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
Expand Down
6 changes: 3 additions & 3 deletions net/ipv4/ipmr.c
Expand Up @@ -1840,15 +1840,15 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->remote, vif->local,
0, 0,
IPPROTO_IPIP,
RT_TOS(iph->tos), vif->link);
rt_tos(net, iph->tos), vif->link);
if (IS_ERR(rt))
goto out_free;
encap = sizeof(struct iphdr);
} else {
rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
0, 0,
IPPROTO_IPIP,
RT_TOS(iph->tos), vif->link);
rt_tos(net, iph->tos), vif->link);
if (IS_ERR(rt))
goto out_free;
}
Expand Down Expand Up @@ -2048,7 +2048,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowi4_tos = RT_TOS(iph->tos),
.flowi4_tos = rt_tos(net, iph->tos),
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/netfilter.c
Expand Up @@ -42,7 +42,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_tos = rt_tos(net, iph->tos);
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
if (!fl4.flowi4_oif)
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/netfilter/ipt_rpfilter.c
Expand Up @@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_tos = rt_tos(xt_net(par), iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));

Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/netfilter/nf_dup_ipv4.c
Expand Up @@ -32,7 +32,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;

fl4.daddr = gw->s_addr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_tos = rt_tos(net, iph->tos);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
Expand Down

0 comments on commit f5e2676

Please sign in to comment.