Skip to content

Commit

Permalink
selftests/bpf: Add tests for bpf_for_each
Browse files Browse the repository at this point in the history
In this patch -
1) Add a new prog "for_each_helper" which tests the basic functionality of
the bpf_for_each helper.

2) Add pyperf600_foreach and strobemeta_foreach to test the performance
of using bpf_for_each instead of a for loop

The results of pyperf600 and strobemeta are as follows:

~strobemeta~

Baseline
    verification time 6808200 usec
    stack depth 496
    processed 592132 insns (limit 1000000) max_states_per_insn 14
    total_states 16018 peak_states 13684 mark_read 3132
    torvalds#188 verif_scale_strobemeta:OK (unrolled loop)

Using bpf_for_each
    verification time 31589 usec
    stack depth 96+408
    processed 1630 insns (limit 1000000) max_states_per_insn 4
    total_states 107 peak_states 107 mark_read 60
    torvalds#189 verif_scale_strobemeta_foreach:OK

~pyperf600~

Baseline
    verification time 29702486 usec
    stack depth 368
    processed 626838 insns (limit 1000000) max_states_per_insn 7
    total_states 30368 peak_states 30279 mark_read 748
    torvalds#182 verif_scale_pyperf600:OK (unrolled loop)

Using bpf_for_each
    verification time 148488 usec
    stack depth 320+40
    processed 10518 insns (limit 1000000) max_states_per_insn 10
    total_states 705 peak_states 517 mark_read 38
    torvalds#183 verif_scale_pyperf600_foreach:OK

Using the bpf_for_each helper led to approximately a 100% decrease
in the verification time and in the number of instructions.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
  • Loading branch information
jkoong-fb authored and intel-lab-lkp committed Nov 18, 2021
1 parent 39b67c7 commit 1b5f8eb
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 4 deletions.
12 changes: 12 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
Expand Up @@ -115,6 +115,12 @@ void test_verif_scale_pyperf600()
scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_pyperf600_foreach(void)
{
/* use the bpf_for_each helper*/
scale_test("pyperf600_foreach.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_pyperf600_nounroll()
{
/* no unroll at all.
Expand Down Expand Up @@ -165,6 +171,12 @@ void test_verif_scale_strobemeta()
scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_strobemeta_foreach(void)
{
/* use the bpf_for_each helper*/
scale_test("strobemeta_foreach.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_strobemeta_nounroll1()
{
/* no unroll, tiny loops */
Expand Down
61 changes: 61 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/for_each.c
Expand Up @@ -4,6 +4,7 @@
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
#include "for_each_helper.skel.h"

static unsigned int duration;

Expand Down Expand Up @@ -121,10 +122,70 @@ static void test_array_map(void)
for_each_array_map_elem__destroy(skel);
}

static void test_for_each_helper(void)
{
struct for_each_helper *skel;
__u32 retval;
int err;

skel = for_each_helper__open_and_load();
if (!ASSERT_OK_PTR(skel, "for_each_helper__open_and_load"))
return;

skel->bss->nr_iterations = 100;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_prog),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "bpf_for_each helper test_prog",
"err %d errno %d retval %d\n", err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->nr_iterations_completed, skel->bss->nr_iterations,
"nr_iterations mismatch");
ASSERT_EQ(skel->bss->g_output, (100 * 99) / 2, "wrong output");

/* test callback_fn returning 1 to stop iteration */
skel->bss->nr_iterations = 400;
skel->data->stop_index = 50;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_prog),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "bpf_for_each helper test_prog",
"err %d errno %d retval %d\n", err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->nr_iterations_completed, skel->data->stop_index + 1,
"stop_index not followed");
ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2, "wrong output");

/* test passing in a null ctx */
skel->bss->nr_iterations = 10;
err = bpf_prog_test_run(bpf_program__fd(skel->progs.prog_null_ctx),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "bpf_for_each helper prog_null_ctx",
"err %d errno %d retval %d\n", err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->nr_iterations_completed, skel->bss->nr_iterations,
"nr_iterations mismatch");

/* test invalid flags */
err = bpf_prog_test_run(bpf_program__fd(skel->progs.prog_invalid_flags),
1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
&retval, &duration);
if (CHECK(err || retval, "bpf_for_each helper prog_invalid_flags",
"err %d errno %d retval %d\n", err, errno, retval))
goto out;
ASSERT_EQ(skel->bss->err, -EINVAL, "invalid_flags");

out:
for_each_helper__destroy(skel);
}

void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
if (test__start_subtest("for_each_helper"))
test_for_each_helper();
}
69 changes: 69 additions & 0 deletions tools/testing/selftests/bpf/progs/for_each_helper.c
@@ -0,0 +1,69 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */

#include "vmlinux.h"
#include <bpf/bpf_helpers.h>

char _license[] SEC("license") = "GPL";

struct callback_ctx {
int output;
};

/* This should be set by the user program */
u32 nr_iterations;
u32 stop_index = -1;

/* Making these global variables so that the userspace program
* can verify the output through the skeleton
*/
int nr_iterations_completed;
int g_output;
int err;

static int callback_fn(__u32 index, void *data)
{
struct callback_ctx *ctx = data;

if (index >= stop_index)
return 1;

ctx->output += index;

return 0;
}

static int empty_callback_fn(__u32 index, void *data)
{
return 0;
}

SEC("tc")
int test_prog(struct __sk_buff *skb)
{
struct callback_ctx data = {};

nr_iterations_completed = bpf_for_each(nr_iterations, callback_fn, &data, 0);

g_output = data.output;

return 0;
}

SEC("tc")
int prog_null_ctx(struct __sk_buff *skb)
{
nr_iterations_completed = bpf_for_each(nr_iterations, empty_callback_fn, NULL, 0);

return 0;
}

SEC("tc")
int prog_invalid_flags(struct __sk_buff *skb)
{
struct callback_ctx data = {};

err = bpf_for_each(nr_iterations, callback_fn, &data, 1);

return 0;
}
70 changes: 69 additions & 1 deletion tools/testing/selftests/bpf/progs/pyperf.h
Expand Up @@ -159,6 +159,57 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");

struct process_frame_ctx {
int cur_cpu;
int32_t *symbol_counter;
void *frame_ptr;
FrameData *frame;
PidData *pidData;
Symbol *sym;
Event *event;
bool done;
};

#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))

static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
{
int zero = 0;
void *frame_ptr = ctx->frame_ptr;
PidData *pidData = ctx->pidData;
FrameData *frame = ctx->frame;
int32_t *symbol_counter = ctx->symbol_counter;
int cur_cpu = ctx->cur_cpu;
Event *event = ctx->event;
Symbol *sym = ctx->sym;

if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);

if (!symbol_id) {
bpf_map_update_elem(&symbolmap, sym, &zero, 0);
symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
if (!symbol_id) {
ctx->done = true;
return 1;
}
}
if (*symbol_id == new_symbol_id)
(*symbol_counter)++;

barrier_var(i);
if (i >= STACK_MAX_LEN)
return 1;

event->stack[i] = *symbol_id;

event->stack_len = i + 1;
frame_ptr = frame->f_back;
}
return 0;
}

#ifdef GLOBAL_FUNC
__noinline
#elif defined(SUBPROGS)
Expand Down Expand Up @@ -228,11 +279,27 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
#ifdef USE_FOREACH
struct process_frame_ctx ctx;

ctx.cur_cpu = cur_cpu;
ctx.symbol_counter = symbol_counter;
ctx.frame_ptr = frame_ptr;
ctx.frame = &frame;
ctx.pidData = pidData;
ctx.sym = &sym;
ctx.event = event;
ctx.done = false;

bpf_for_each(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
if (ctx.done)
return 0;
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma clang loop unroll(full)
#endif
#endif /* NO_UNROLL */
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
Expand All @@ -251,6 +318,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
frame_ptr = frame.f_back;
}
}
#endif /* USE_FOREACH */
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
Expand Down
5 changes: 5 additions & 0 deletions tools/testing/selftests/bpf/progs/pyperf600_foreach.c
@@ -0,0 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2021 Facebook
#define STACK_MAX_LEN 600
#define USE_FOREACH
#include "pyperf.h"
73 changes: 70 additions & 3 deletions tools/testing/selftests/bpf/progs/strobemeta.h
Expand Up @@ -445,6 +445,46 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
return payload;
}

enum read_type {
READ_INT_VAR,
READ_MAP_VAR,
READ_STR_VAR,
};

struct read_var_ctx {
struct strobemeta_payload *data;
void *tls_base;
struct strobemeta_cfg *cfg;
void *payload;
/* value gets mutated */
struct strobe_value_generic *value;
enum read_type type;
};

static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
{
switch (ctx->type) {
case READ_INT_VAR:
if (index >= STROBE_MAX_INTS)
return 1;
read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
break;
case READ_MAP_VAR:
if (index >= STROBE_MAX_MAPS)
return 1;
ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
ctx->value, ctx->data, ctx->payload);
break;
case READ_STR_VAR:
if (index >= STROBE_MAX_STRS)
return 1;
ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
ctx->value, ctx->data, ctx->payload);
break;
}
return 0;
}

/*
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
Expand Down Expand Up @@ -475,30 +515,57 @@ static void *read_strobe_meta(struct task_struct *task,
*/
tls_base = (void *)task;

#ifdef USE_FOREACH
struct read_var_ctx ctx;
int err;

ctx.cfg = cfg;
ctx.tls_base = tls_base;
ctx.value = &value;
ctx.data = data;
ctx.payload = payload;
ctx.type = READ_INT_VAR;

err = bpf_for_each(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_INTS)
return NULL;

ctx.type = READ_STR_VAR;
err = bpf_for_each(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_STRS)
return NULL;

ctx.type = READ_MAP_VAR;
err = bpf_for_each(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_MAPS)
return NULL;
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload += read_str_var(cfg, i, tls_base, &value, data, payload);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload = read_map_var(cfg, i, tls_base, &value, data, payload);
}
#endif /* USE_FOREACH */

/*
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
Expand Down
9 changes: 9 additions & 0 deletions tools/testing/selftests/bpf/progs/strobemeta_foreach.c
@@ -0,0 +1,9 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2021 Facebook

#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 100
#define STROBE_MAX_MAP_ENTRIES 20
#define USE_FOREACH
#include "strobemeta.h"

0 comments on commit 1b5f8eb

Please sign in to comment.