Skip to content
Permalink
Browse files
netfilter: add bpf base hook program generator
Add a kernel bpf program generator for netfilter base hooks.

Currently netfilter hooks are invoked by nf_hook_slow:

for i in hooks; do
  verdict = hooks[i]->indirect_func(hooks->[i].hook_arg, skb, state);

  switch (verdict) { ....

The autogenerator unrolls the loop, so we get:

state->priv = hooks->[0].hook_arg;
v = first_hook_function(state);
if (v != ACCEPT) goto done;
state->priv = hooks->[1].hook_arg;
v = second_hook_function(state); ...

Indirections are replaced by direct calls. Invocation of the
autogenerated programs is done via bpf dispatcher from nf_hook().

The autogenerated program has the same return value scheme as
nf_hook_slow(). NF_HOOK() points are converted to call the
autogenerated bpf program instead of nf_hook_slow().

Purpose of this is to eventually add a 'netfilter prog type' to bpf and
permit attachment of (userspace generated) bpf programs to the netfilter
machinery, e.g.  'attach bpf prog id 1234 to ipv6 PREROUTING at prio -300'.

This will require to expose the context structure (program argument,
'__nf_hook_state', with rewriting accesses to match nf_hook_state layout.

TODO:
1. Test !x86_64.
2. Test bridge family.

Future work:
add support for NAT hooks, they still use indirect calls, but those
are less of a problem because these get called only once per
connection.

Could annotate ops struct as to what kind of verdicts the
C function can return.  This would allow to elide retval
check when hook can only return NF_ACCEPT.

Could add extra support for INGRESS hook to move more code from
inline functions to the autogenerated program.

Signed-off-by: Florian Westphal <fw@strlen.de>
  • Loading branch information
Florian Westphal authored and intel-lab-lkp committed Oct 14, 2021
1 parent bbdbb15 commit 2d7f7c3f2aa0c1933f06cfb066511f4cdefd6ff7
Show file tree
Hide file tree
Showing 6 changed files with 577 additions and 3 deletions.
@@ -2,6 +2,7 @@
#ifndef __LINUX_NETFILTER_H
#define __LINUX_NETFILTER_H

#include <linux/filter.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/net.h>
@@ -106,6 +107,9 @@ struct nf_hook_entries_rcu_head {
};

struct nf_hook_entries {
#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
struct bpf_prog *hook_prog;
#endif
u16 num_hook_entries;
/* padding */
struct nf_hook_entry hooks[];
@@ -205,6 +209,17 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,

void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e);

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
DECLARE_BPF_DISPATCHER(nf_hook_base);

static __always_inline int bpf_prog_run_nf(const struct bpf_prog *prog,
struct nf_hook_state *state)
{
return __bpf_prog_run(prog, state, BPF_DISPATCHER_FUNC(nf_hook_base));
}
#endif

/**
* nf_hook - call a netfilter hook
*
@@ -259,11 +274,24 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,

if (hook_head) {
struct nf_hook_state state;
#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
const struct bpf_prog *p = READ_ONCE(hook_head->hook_prog);

nf_hook_state_init(&state, hook, pf, indev, outdev,
sk, net, okfn);

state.priv = (void *)hook_head;
state.skb = skb;

migrate_disable();
ret = bpf_prog_run_nf(p, &state);
migrate_enable();
#else
nf_hook_state_init(&state, hook, pf, indev, outdev,
sk, net, okfn);

ret = nf_hook_slow(skb, &state, hook_head);
#endif
}
rcu_read_unlock();

@@ -341,10 +369,38 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,

if (hook_head) {
struct nf_hook_state state;
#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
const struct bpf_prog *p = hook_head->hook_prog;
struct sk_buff *skb, *next;
struct list_head sublist;
int ret;

nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);

INIT_LIST_HEAD(&sublist);

migrate_disable();

list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);

state.priv = (void *)hook_head;
state.skb = skb;

ret = bpf_prog_run_nf(p, &state);
if (ret == 1)
list_add_tail(&skb->list, &sublist);
}

migrate_enable();

/* Put passed packets back on main list */
list_splice(&sublist, head);
#else
nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);

nf_hook_slow_list(head, &state, hook_head);
#endif
}
rcu_read_unlock();
}
@@ -0,0 +1,14 @@
struct bpf_dispatcher;
struct bpf_prog;

struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n);
struct bpf_prog *nf_hook_bpf_create_fb(void);

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to);
#else
static inline void
nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *f, struct bpf_prog *t)
{
}
#endif
@@ -19,6 +19,16 @@ config NETFILTER_FAMILY_BRIDGE
config NETFILTER_FAMILY_ARP
bool

config HAVE_NF_HOOK_BPF
bool

config NF_HOOK_BPF
bool "netfilter base hook bpf translator"
depends on BPF_JIT
help
This partially unrolls nf_hook_slow interpreter loop with
auto-generated BPF programs.

config NETFILTER_NETLINK_HOOK
tristate "Netfilter base hook dump support"
depends on NETFILTER_ADVANCED
@@ -16,6 +16,7 @@ nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o

obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NF_HOOK_BPF) += nf_hook_bpf.o

obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_queue.h>
#include <net/netfilter/nf_hook_bpf.h>
#include <net/sock.h>

#include "nf_internals.h"
@@ -47,6 +48,12 @@ static DEFINE_MUTEX(nf_hook_mutex);
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
DEFINE_BPF_DISPATCHER(nf_hook_base);

static struct bpf_prog *fallback_nf_hook_slow;
#endif

static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
{
struct nf_hook_entries *e;
@@ -58,9 +65,25 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
if (num == 0)
return NULL;

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
if (!fallback_nf_hook_slow) {
/* never free'd */
fallback_nf_hook_slow = nf_hook_bpf_create_fb();

if (!fallback_nf_hook_slow)
return NULL;
}
#endif

e = kvzalloc(alloc, GFP_KERNEL);
if (e)
e->num_hook_entries = num;
if (!e)
return NULL;

e->num_hook_entries = num;
#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
e->hook_prog = fallback_nf_hook_slow;
#endif

return e;
}

@@ -104,6 +127,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
{
unsigned int i, alloc_entries, nhooks, old_entries;
struct nf_hook_ops **orig_ops = NULL;
struct bpf_prog *hook_bpf_prog;
struct nf_hook_ops **new_ops;
struct nf_hook_entries *new;
bool inserted = false;
@@ -156,6 +180,27 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
new->hooks[nhooks].priv = reg->priv;
}

hook_bpf_prog = nf_hook_bpf_create(new);

/* XXX: jit failure handling?
* We could refuse hook registration.
*
* For now, allocate_hook_entries_size() sets
* ->hook_prog to a small fallback program that
* calls nf_hook_slow().
*/
if (hook_bpf_prog) {
struct bpf_prog *old_prog = NULL;

new->hook_prog = hook_bpf_prog;

if (old)
old_prog = old->hook_prog;

nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
old_prog, hook_bpf_prog);
}

return new;
}

@@ -221,6 +266,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
struct nf_hook_entries __rcu **pp)
{
unsigned int i, j, skip = 0, hook_entries;
struct bpf_prog *hook_bpf_prog = NULL;
struct nf_hook_entries *new = NULL;
struct nf_hook_ops **orig_ops;
struct nf_hook_ops **new_ops;
@@ -244,8 +290,15 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,

hook_entries -= skip;
new = allocate_hook_entries_size(hook_entries);
if (!new)
if (!new) {
#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
struct bpf_prog *old_prog = old->hook_prog;

WRITE_ONCE(old->hook_prog, fallback_nf_hook_slow);
nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base), old_prog, NULL);
#endif
return NULL;
}

new_ops = nf_hook_entries_get_hook_ops(new);
for (i = 0, j = 0; i < old->num_hook_entries; i++) {
@@ -256,7 +309,16 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
j++;
}
hooks_validate(new);

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
/* if this fails fallback prog calls nf_hook_slow. */
hook_bpf_prog = nf_hook_bpf_create(new);
if (hook_bpf_prog)
new->hook_prog = hook_bpf_prog;
#endif
out_assign:
nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
old ? old->hook_prog : NULL, hook_bpf_prog);
rcu_assign_pointer(*pp, new);
return old;
}
@@ -584,6 +646,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
int ret;

state->skb = skb;

for (; s < e->num_hook_entries; s++) {
verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
@@ -764,6 +827,11 @@ int __init netfilter_init(void)
if (ret < 0)
goto err_pernet;

#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
fallback_nf_hook_slow = nf_hook_bpf_create_fb();
WARN_ON_ONCE(!fallback_nf_hook_slow);
#endif

return 0;
err_pernet:
unregister_pernet_subsys(&netfilter_net_ops);

0 comments on commit 2d7f7c3

Please sign in to comment.