Skip to content

Commit

Permalink
bpf: Allow storing unreferenced kptr in map
Browse files Browse the repository at this point in the history
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value as holds a PTR_TO_BTF_ID stored by a BPF program during
its invocation. Storing to such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.

Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.

The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.

The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.

An example of this specification is shown below:

	#define __kptr __attribute__((btf_type_tag("kptr")))

	struct map_value {
		...
		struct task_struct __kptr *task;
		...
	};

Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.

Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.

Only BPF_LDX, BPF_STX, and BPF_ST with insn->imm = 0 (to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.

For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.

It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptr, similar to the bpf_timer case.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
  • Loading branch information
kkdwivedi authored and intel-lab-lkp committed Mar 20, 2022
1 parent 9e097fe commit 580de82
Show file tree
Hide file tree
Showing 6 changed files with 401 additions and 28 deletions.
29 changes: 28 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,22 @@ struct bpf_map_ops {
const struct bpf_iter_seq_info *iter_seq_info;
};

enum {
/* Support at most 8 pointers in a BPF map value */
BPF_MAP_VALUE_OFF_MAX = 8,
};

struct bpf_map_value_off_desc {
u32 offset;
u32 btf_id;
struct btf *btf;
};

struct bpf_map_value_off {
u32 nr_off;
struct bpf_map_value_off_desc off[];
};

struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
Expand All @@ -171,6 +187,7 @@ struct bpf_map {
u64 map_extra; /* any per-map-type extra fields */
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
struct bpf_map_value_off *kptr_off_tab;
int timer_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
Expand All @@ -184,7 +201,7 @@ struct bpf_map {
char name[BPF_OBJ_NAME_LEN];
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
/* 14 bytes hole */
/* 6 bytes hole */

/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
Expand Down Expand Up @@ -217,6 +234,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
return map->timer_off >= 0;
}

static inline bool map_value_has_kptr(const struct bpf_map *map)
{
return !IS_ERR_OR_NULL(map->kptr_off_tab);
}

static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
Expand Down Expand Up @@ -1497,6 +1519,11 @@ void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);

struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
void bpf_map_free_kptr_off_tab(struct bpf_map *map);
struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
Expand Down
2 changes: 2 additions & 0 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct bpf_map_value_off *btf_find_kptr(const struct btf *btf,
const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
Expand Down
161 changes: 138 additions & 23 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3164,33 +3164,65 @@ static void btf_struct_log(struct btf_verifier_env *env,
enum {
BTF_FIELD_SPIN_LOCK,
BTF_FIELD_TIMER,
BTF_FIELD_KPTR,
};

enum {
BTF_FIELD_IGNORE = 0,
BTF_FIELD_FOUND = 1,
};

struct btf_field_info {
const struct btf_type *type;
u32 off;
};

static int btf_find_field_struct(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info)
{
if (!__btf_type_is_struct(t))
return 0;
return BTF_FIELD_IGNORE;
if (t->size != sz)
return 0;
if (info->off != -ENOENT)
/* only one such field is allowed */
return -E2BIG;
return BTF_FIELD_IGNORE;
info->off = off;
return 0;
return BTF_FIELD_FOUND;
}

static int btf_find_field_kptr(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info)
{
/* For PTR, sz is always == 8 */
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
t = btf_type_by_id(btf, t->type);

if (!btf_type_is_type_tag(t))
return BTF_FIELD_IGNORE;
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
if (strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
return -EINVAL;

/* Get the base type */
if (btf_type_is_modifier(t))
t = btf_type_skip_modifiers(btf, t->type, NULL);
/* Only pointer to struct is allowed */
if (!__btf_type_is_struct(t))
return -EINVAL;

info->type = t;
info->off = off;
return BTF_FIELD_FOUND;
}

static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
const char *name, int sz, int align, int field_type,
struct btf_field_info *info)
struct btf_field_info *info, int info_cnt)
{
const struct btf_member *member;
int ret, idx = 0;
u32 i, off;
int ret;

for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
Expand All @@ -3210,24 +3242,35 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
switch (field_type) {
case BTF_FIELD_SPIN_LOCK:
case BTF_FIELD_TIMER:
ret = btf_find_field_struct(btf, member_type, off, sz, info);
ret = btf_find_field_struct(btf, member_type, off, sz, &info[idx]);
if (ret < 0)
return ret;
break;
case BTF_FIELD_KPTR:
ret = btf_find_field_kptr(btf, member_type, off, sz, &info[idx]);
if (ret < 0)
return ret;
break;
default:
return -EFAULT;
}

if (ret == BTF_FIELD_FOUND && idx >= info_cnt)
return -E2BIG;
else if (ret == BTF_FIELD_IGNORE)
continue;
++idx;
}
return 0;
return idx;
}

static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
const char *name, int sz, int align, int field_type,
struct btf_field_info *info)
struct btf_field_info *info, int info_cnt)
{
const struct btf_var_secinfo *vsi;
int ret, idx = 0;
u32 i, off;
int ret;

for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
Expand All @@ -3245,19 +3288,30 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
switch (field_type) {
case BTF_FIELD_SPIN_LOCK:
case BTF_FIELD_TIMER:
ret = btf_find_field_struct(btf, var_type, off, sz, info);
ret = btf_find_field_struct(btf, var_type, off, sz, &info[idx]);
if (ret < 0)
return ret;
break;
case BTF_FIELD_KPTR:
ret = btf_find_field_kptr(btf, var_type, off, sz, &info[idx]);
if (ret < 0)
return ret;
break;
default:
return -EFAULT;
}

if (ret == BTF_FIELD_FOUND && idx >= info_cnt)
return -E2BIG;
if (ret == BTF_FIELD_IGNORE)
continue;
++idx;
}
return 0;
return idx;
}

static int btf_find_field(const struct btf *btf, const struct btf_type *t,
int field_type, struct btf_field_info *info)
int field_type, struct btf_field_info *info, int info_cnt)
{
const char *name;
int sz, align;
Expand All @@ -3273,14 +3327,20 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
sz = sizeof(struct bpf_timer);
align = __alignof__(struct bpf_timer);
break;
case BTF_FIELD_KPTR:
name = NULL;
sz = sizeof(u64);
align = __alignof__(u64);
break;
default:
return -EFAULT;
}

/* The maximum allowed fields of a certain type will be info_cnt - 1 */
if (__btf_type_is_struct(t))
return btf_find_struct_field(btf, t, name, sz, align, field_type, info);
return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt - 1);
else if (btf_type_is_datasec(t))
return btf_find_datasec_var(btf, t, name, sz, align, field_type, info);
return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt - 1);
return -EINVAL;
}

Expand All @@ -3290,24 +3350,79 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
*/
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
{
struct btf_field_info info = { .off = -ENOENT };
/* btf_find_field requires array of size max + 1 */
struct btf_field_info info_arr[2];
int ret;

ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info);
ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, info_arr, ARRAY_SIZE(info_arr));
if (ret < 0)
return ret;
return info.off;
if (!ret)
return -ENOENT;
return info_arr[0].off;
}

int btf_find_timer(const struct btf *btf, const struct btf_type *t)
{
struct btf_field_info info = { .off = -ENOENT };
/* btf_find_field requires array of size max + 1 */
struct btf_field_info info_arr[2];
int ret;

ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info);
ret = btf_find_field(btf, t, BTF_FIELD_TIMER, info_arr, ARRAY_SIZE(info_arr));
if (ret < 0)
return ret;
return info.off;
if (!ret)
return -ENOENT;
return info_arr[0].off;
}

struct bpf_map_value_off *btf_find_kptr(const struct btf *btf,
const struct btf_type *t)
{
/* btf_find_field requires array of size max + 1 */
struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX + 1];
struct bpf_map_value_off *tab;
int ret, i, nr_off;

/* Revisit stack usage when bumping BPF_MAP_VALUE_OFF_MAX */
BUILD_BUG_ON(BPF_MAP_VALUE_OFF_MAX != 8);

ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
if (ret < 0)
return ERR_PTR(ret);
if (!ret)
return NULL;

nr_off = ret;
tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN);
if (!tab)
return ERR_PTR(-ENOMEM);

tab->nr_off = 0;
for (i = 0; i < nr_off; i++) {
const struct btf_type *t;
struct btf *off_btf;
s32 id;

t = info_arr[i].type;
id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
&off_btf);
if (id < 0) {
ret = id;
goto end;
}

tab->off[i].offset = info_arr[i].off;
tab->off[i].btf_id = id;
tab->off[i].btf = off_btf;
tab->nr_off = i + 1;
}
return tab;
end:
while (tab->nr_off--)
btf_put(tab->off[tab->nr_off].btf);
kfree(tab);
return ERR_PTR(ret);
}

static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
Expand Down
5 changes: 4 additions & 1 deletion kernel/bpf/map_in_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->max_entries = inner_map->max_entries;
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
inner_map_meta->timer_off = inner_map->timer_off;
inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map);
if (inner_map->btf) {
btf_get(inner_map->btf);
inner_map_meta->btf = inner_map->btf;
Expand All @@ -71,6 +72,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)

void bpf_map_meta_free(struct bpf_map *map_meta)
{
bpf_map_free_kptr_off_tab(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
}
Expand All @@ -83,7 +85,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
meta0->timer_off == meta1->timer_off &&
meta0->map_flags == meta1->map_flags;
meta0->map_flags == meta1->map_flags &&
bpf_map_equal_kptr_off_tab(meta0, meta1);
}

void *bpf_map_fd_get_ptr(struct bpf_map *map,
Expand Down

0 comments on commit 580de82

Please sign in to comment.