Skip to content
Permalink
Browse files
mm/shmem: support deterministic charging of tmpfs
Add memcg= option to shmem mount.

Users can specify this option at mount time and all data page charges
will be charged to the memcg supplied. Processes are only allowed to
direct tmpfs changes to a cgroup that they themselves can enter and
allocate memory in.

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
CC: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org
  • Loading branch information
Mina Almasry authored and intel-lab-lkp committed Nov 12, 2021
1 parent b828014 commit 51d8c281f9e96cc0269902e4597a972c7e2510f0
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 2 deletions.
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/memcontrol.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h> /* for the emergency remount stuff */
@@ -180,6 +181,9 @@ static void destroy_unused_super(struct super_block *s)
up_write(&s->s_umount);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
#if CONFIG_MEMCG
mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
#endif
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
@@ -292,6 +296,9 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_dentry_lru.node);
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
#if CONFIG_MEMCG
mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
#endif
security_sb_free(s);
fscrypt_sb_free(s);
put_user_ns(s->s_user_ns);
@@ -1567,6 +1567,11 @@ struct super_block {
struct workqueue_struct *s_dio_done_wq;
struct hlist_head s_pins;

#ifdef CONFIG_MEMCG
/* memcg to charge for pages allocated to this filesystem */
struct mem_cgroup *s_memcg_to_charge;
#endif

/*
* Owning user namespace and default context in which to
* interpret filesystem uids, gids, quotas, device nodes,
@@ -27,6 +27,7 @@ struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
struct super_block;

/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
@@ -713,6 +714,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
return __mem_cgroup_charge(folio, mm, gfp);
}

int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
gfp_t gfp);

int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -923,6 +927,24 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
return !!(memcg->css.flags & CSS_ONLINE);
}

struct mem_cgroup *
mem_cgroup_mapping_get_charge_target(struct address_space *mapping);

static inline void mem_cgroup_put_memcg(struct mem_cgroup *memcg)
{
if (memcg)
css_put(&memcg->css);
}

void mem_cgroup_set_charge_target(struct mem_cgroup **target,
struct mem_cgroup *memcg);
struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
/**
* User is responsible for providing a buffer @buf of length @len and freeing
* it.
*/
int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);

void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int zid, int nr_pages);

@@ -1223,6 +1245,42 @@ static inline int mem_cgroup_charge(struct folio *folio,
return 0;
}

static inline int mem_cgroup_charge_memcg(struct folio *folio,
struct mem_cgroup *memcg,
gfp_t gfp_mask)
{
return 0;
}

static inline struct mem_cgroup *
mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
{
return NULL;
}

static inline void mem_cgroup_put_memcg(struct mem_cgroup *memcg)
{
}

static inline void mem_cgroup_set_charge_target(struct mem_cgroup **target,
struct mem_cgroup *memcg)
{
}

static inline struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
{
return NULL;
}

static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
size_t len)
{
if (len < 1)
return -EINVAL;
buf[0] = '\0';
return 0;
}

static inline int mem_cgroup_swapin_charge_page(struct page *page,
struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
@@ -62,6 +62,7 @@
#include <linux/tracehook.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/string.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -2580,6 +2581,126 @@ void mem_cgroup_handle_over_high(void)
css_put(&memcg->css);
}

/*
* Non error return value must eventually be released with css_put().
*/
struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
{
static const char procs_filename[] = "/cgroup.procs";
struct file *file, *procs;
struct cgroup_subsys_state *css;
struct mem_cgroup *memcg;
char *procs_path =
kmalloc(strlen(path) + sizeof(procs_filename), GFP_KERNEL);

if (procs_path == NULL)
return ERR_PTR(-ENOMEM);
strcpy(procs_path, path);
strcat(procs_path, procs_filename);

procs = filp_open(procs_path, O_WRONLY, 0);
kfree(procs_path);

/*
* Restrict the capability for tasks to mount with memcg charging to the
* cgroup they could not join. For example, disallow:
*
* mount -t tmpfs -o memcg=root-cgroup nodev <MOUNT_DIR>
*
* if it is a non-root task.
*/
if (IS_ERR(procs))
return (struct mem_cgroup *)procs;
fput(procs);

file = filp_open(path, O_DIRECTORY | O_RDONLY, 0);
if (IS_ERR(file))
return (struct mem_cgroup *)file;

css = css_tryget_online_from_dir(file->f_path.dentry,
&memory_cgrp_subsys);
if (IS_ERR(css))
memcg = (struct mem_cgroup *)css;
else
memcg = container_of(css, struct mem_cgroup, css);

fput(file);
return memcg;
}

/*
* Get the name of the optional charge target memcg associated with @sb. This
* is the cgroup name, not the cgroup path.
*/
int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
{
struct mem_cgroup *memcg;
int ret = 0;

buf[0] = '\0';

rcu_read_lock();
memcg = rcu_dereference(sb->s_memcg_to_charge);
if (memcg && !css_tryget_online(&memcg->css))
memcg = NULL;
rcu_read_unlock();

if (!memcg)
return 0;

ret = cgroup_path(memcg->css.cgroup, buf + len / 2, len / 2);
if (ret >= len / 2)
strcpy(buf, "?");
else {
char *p = mangle_path(buf, buf + len / 2, " \t\n\\");

if (p)
*p = '\0';
else
strcpy(buf, "?");
}

css_put(&memcg->css);
return ret < 0 ? ret : 0;
}

/*
* Set or clear (if @memcg is NULL) charge association from file system to
* memcg. If @memcg != NULL, then a css reference must be held by the caller to
* ensure that the cgroup is not deleted during this operation.
*/
void mem_cgroup_set_charge_target(struct mem_cgroup **target,
struct mem_cgroup *memcg)
{
if (memcg)
css_get(&memcg->css);
memcg = xchg(target, memcg);
if (memcg)
css_put(&memcg->css);
}

/*
* Returns the memcg to charge for inode pages. If non-NULL is returned, caller
* must drop reference with css_put(). NULL indicates that the inode does not
* have a memcg to charge, so the default process based policy should be used.
*/
struct mem_cgroup *
mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
{
struct mem_cgroup *memcg;

if (!mapping)
return NULL;

rcu_read_lock();
memcg = rcu_dereference(mapping->host->i_sb->s_memcg_to_charge);
if (memcg && !css_tryget_online(&memcg->css))
memcg = NULL;
rcu_read_unlock();

return memcg;
}

static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
@@ -6678,6 +6799,15 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
return ret;
}

int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
gfp_t gfp)
{
if (mem_cgroup_disabled())
return 0;

return charge_memcg(folio, memcg, gfp);
}

int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
{
struct mem_cgroup *memcg;

0 comments on commit 51d8c28

Please sign in to comment.