Skip to content

Commit

Permalink
bpf: enable task local storage for tracing programs
Browse files Browse the repository at this point in the history
To access per-task data, BPF programs usually creates a hash table with
pid as the key. This is not ideal because:
 1. The user need to estimate the proper size of the hash table, which may
    be inaccurate;
 2. Big hash tables are slow;
 3. To clean up the data properly during task terminations, the user need
    to write extra logic.

Task local storage overcomes these issues and offers a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Unlike LSM programs, tracing programs can be called in IRQ contexts.
Helpers that access task local storage are updated to use
raw_spin_lock_irqsave() instead of raw_spin_lock_bh().

Tracing programs can attach to functions on the task free path, e.g.
exit_creds(). To avoid allocating task local storage after
bpf_task_storage_free(). bpf_task_storage_get() is updated to not allocate
new storage when the task is not refcounted (task->usage == 0).

Acked-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
  • Loading branch information
liu-song-6 authored and intel-lab-lkp committed Feb 23, 2021
1 parent 7b1e385 commit a1afc38
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 65 deletions.
7 changes: 7 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);

const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
Expand Down Expand Up @@ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
{
return NULL;
}

static inline void bpf_task_storage_free(struct task_struct *task)
{
}
#endif /* CONFIG_BPF_SYSCALL */

void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
Expand Down Expand Up @@ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
Expand Down
22 changes: 0 additions & 22 deletions include/linux/bpf_lsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
}

static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
if (unlikely(!task->security))
return NULL;

return task->security + bpf_lsm_blob_sizes.lbs_task;
}

extern const struct bpf_func_proto bpf_inode_storage_get_proto;
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
void bpf_inode_storage_free(struct inode *inode);
void bpf_task_storage_free(struct task_struct *task);

#else /* !CONFIG_BPF_LSM */

Expand All @@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
return NULL;
}

static inline struct bpf_storage_blob *bpf_task(
const struct task_struct *task)
{
return NULL;
}

static inline void bpf_inode_storage_free(struct inode *inode)
{
}

static inline void bpf_task_storage_free(struct task_struct *task)
{
}

#endif /* CONFIG_BPF_LSM */

#endif /* _LINUX_BPF_LSM_H */
2 changes: 1 addition & 1 deletion include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
Expand Down
5 changes: 5 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct audit_context;
struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct capture_control;
struct cfs_rq;
struct fs_struct;
Expand Down Expand Up @@ -1348,6 +1349,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void *security;
#endif
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
#endif

#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
unsigned long lowest_stack;
Expand Down
3 changes: 1 addition & 2 deletions kernel/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)

obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
Expand Down
28 changes: 17 additions & 11 deletions kernel/bpf/bpf_local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
unsigned long flags;

if (unlikely(!selem_linked_to_storage(selem)))
/* selem has already been unlinked from sk */
return;

local_storage = rcu_dereference(selem->local_storage);
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);

if (free_local_storage)
kfree_rcu(local_storage, rcu);
Expand All @@ -167,28 +168,30 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
unsigned long flags;

if (unlikely(!selem_linked_to_map(selem)))
/* selem has already be unlinked from smap */
return;

smap = rcu_dereference(SDATA(selem)->smap);
b = select_bucket(smap, selem);
raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}

void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
unsigned long flags;

raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}

void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
Expand Down Expand Up @@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,

sdata = SDATA(selem);
if (cacheit_lockit) {
unsigned long flags;

/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx],
sdata);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}

return sdata;
Expand Down Expand Up @@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;

/* BPF_EXIST and BPF_NOEXIST cannot be both set */
Expand Down Expand Up @@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}

raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);

/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
Expand Down Expand Up @@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}

unlock:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return SDATA(selem);

unlock_err:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return ERR_PTR(err);
}

Expand Down
4 changes: 0 additions & 4 deletions kernel/bpf/bpf_lsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
Expand Down
34 changes: 9 additions & 25 deletions kernel/bpf/bpf_task_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include <linux/bpf_local_storage.h>
#include <linux/filter.h>
#include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>

Expand All @@ -24,12 +23,8 @@ DEFINE_BPF_STORAGE_CACHE(task_cache);
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
struct bpf_storage_blob *bsb;

bsb = bpf_task(task);
if (!bsb)
return NULL;
return &bsb->storage;
return &task->bpf_storage;
}

static struct bpf_local_storage_data *
Expand All @@ -38,13 +33,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
{
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;
struct bpf_storage_blob *bsb;

bsb = bpf_task(task);
if (!bsb)
return NULL;

task_storage = rcu_dereference(bsb->storage);
task_storage = rcu_dereference(task->bpf_storage);
if (!task_storage)
return NULL;

Expand All @@ -57,16 +47,12 @@ void bpf_task_storage_free(struct task_struct *task)
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
bool free_task_storage = false;
struct bpf_storage_blob *bsb;
struct hlist_node *n;

bsb = bpf_task(task);
if (!bsb)
return;
unsigned long flags;

rcu_read_lock();

local_storage = rcu_dereference(bsb->storage);
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage) {
rcu_read_unlock();
return;
Expand All @@ -81,7 +67,7 @@ void bpf_task_storage_free(struct task_struct *task)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from
* local_storage.
Expand All @@ -90,7 +76,7 @@ void bpf_task_storage_free(struct task_struct *task)
free_task_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, false);
}
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
rcu_read_unlock();

/* free_task_storage should always be true as long as
Expand Down Expand Up @@ -225,11 +211,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
if (sdata)
return (unsigned long)sdata->data;

/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
Expand Down
5 changes: 5 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>

#include <asm/pgalloc.h>
#include <linux/uaccess.h>
Expand Down Expand Up @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
Expand Down Expand Up @@ -2062,6 +2064,9 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
#endif

/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
Expand Down
4 changes: 4 additions & 0 deletions kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
default:
return NULL;
}
Expand Down

0 comments on commit a1afc38

Please sign in to comment.