Skip to content

Commit

Permalink
mm: Free per cpu pages async to shorten program exit time
Browse files Browse the repository at this point in the history
The exit time is long when program allocated big memory and
the most time consuming part is free memory which takes 99.9%
of the total exit time. By using async free we can save 25% of
exit time.

Signed-off-by: chen xiaoguang <xiaoggchen@tencent.com>
Signed-off-by: zeng jingxiang <linuszeng@tencent.com>
Signed-off-by: lu yihui <yihuilu@tencent.com>
  • Loading branch information
txxxggg authored and intel-lab-lkp committed Oct 8, 2021
1 parent 5816b3e commit 3e55b36
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 5 deletions.
1 change: 1 addition & 0 deletions include/linux/mm.h
Expand Up @@ -908,6 +908,7 @@ void put_pages_list(struct list_head *pages);

void split_page(struct page *page, unsigned int order);
void copy_huge_page(struct page *dst, struct page *src);
void kfreepcp_set_run(unsigned int cpu);

/*
* Compound pages have a destructor function. Provide a
Expand Down
2 changes: 2 additions & 0 deletions kernel/exit.c
Expand Up @@ -167,10 +167,12 @@ static void __exit_signal(struct task_struct *tsk)
static void delayed_put_task_struct(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
unsigned int cpu = tsk->cpu;

perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
put_task_struct(tsk);
kfreepcp_set_run(cpu);
}

void put_task_struct_rcu_user(struct task_struct *task)
Expand Down
89 changes: 84 additions & 5 deletions mm/page_alloc.c
Expand Up @@ -72,6 +72,7 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
#include <linux/smpboot.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
Expand Down Expand Up @@ -147,6 +148,12 @@ DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */
EXPORT_PER_CPU_SYMBOL(_numa_mem_);
#endif

struct freepcp_stat {
struct task_struct *thread;
bool should_run;
};
DEFINE_PER_CPU(struct freepcp_stat, kfreepcp);

/* work_structs for global per-cpu drains */
struct pcpu_drain {
struct zone *zone;
Expand Down Expand Up @@ -3361,6 +3368,81 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
return min(READ_ONCE(pcp->batch) << 2, high);
}

void kfreepcp_set_run(unsigned int cpu)
{
struct task_struct *tsk;
struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);

tsk = stat->thread;
per_cpu(kfreepcp.should_run, cpu) = true;

if (tsk && !task_is_running(tsk))
wake_up_process(tsk);
}
EXPORT_SYMBOL_GPL(kfreepcp_set_run);

static int kfreepcp_should_run(unsigned int cpu)
{
struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);

return stat->should_run;
}

static void run_kfreepcp(unsigned int cpu)
{
struct zone *zone;
struct per_cpu_pages *pcp;
unsigned long flags;
struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
bool need_free_more = false;



again:
need_free_more = false;
for_each_populated_zone(zone) {
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count && pcp->high && pcp->count > pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
int high;

high = nr_pcp_high(pcp, zone);
local_irq_save(flags);
free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch),
pcp);
local_irq_restore(flags);
if (pcp->count > pcp->high)
need_free_more = true;
}

cond_resched();
}
if (need_free_more)
goto again;

stat->should_run = false;
}

static struct smp_hotplug_thread freepcp_threads = {
.store = &kfreepcp.thread,
.thread_should_run = kfreepcp_should_run,
.thread_fn = run_kfreepcp,
.thread_comm = "kfreepcp/%u",
};

static int __init freepcp_init(void)
{
int cpu;

for_each_possible_cpu(cpu)
per_cpu(kfreepcp.should_run, cpu) = false;

BUG_ON(smpboot_register_percpu_thread(&freepcp_threads));

return 0;
}
late_initcall(freepcp_init);

static void free_unref_page_commit(struct page *page, unsigned long pfn,
int migratetype, unsigned int order)
{
Expand All @@ -3375,11 +3457,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
list_add(&page->lru, &pcp->lists[pindex]);
pcp->count += 1 << order;
high = nr_pcp_high(pcp, zone);
if (pcp->count >= high) {
int batch = READ_ONCE(pcp->batch);

free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
}
if (pcp->count >= high)
this_cpu_ptr(&kfreepcp)->should_run = false;
}

/*
Expand Down

0 comments on commit 3e55b36

Please sign in to comment.