Skip to content

Commit

Permalink
kswapd: no need reclaim cma pages triggered by unmovable allocation
Browse files Browse the repository at this point in the history
For purpose of better migration cma pages are allocated after
failure movalbe allocations and are used normally for file pages
or anonymous pages.

In reclaim path many cma pages if configurated are reclaimed
from lru lists in kswapd mainly or direct reclaim triggered by
unmovable or reclaimable allocations. But these reclaimed cma
pages can not be used by original unmovable or reclaimable
allocations. So the reclaim are unnecessary.

So the unmovable or reclaimable allocations should not trigger
reclaiming cma pages. The patch adds third factor of migratetype
which is just like factors of zone index or order kswapd need
consider. The modification follows codes of zone index
consideration. And it is straightforward that skips reclaiming
cma pages in reclaim procedure which is triggered only by
unmovable or reclaimable allocations.

This optimization can avoid ~3% unnecessary isolations from cma
(cma isolated / total isolated) with configuration of total 100Mb
cma pages.

Signed-off-by: zhou xianrong <xianrong.zhou@transsion.com>
Signed-off-by: feng ruxian <ruxian.feng@transsion.com>
  • Loading branch information
zhou xianrong authored and intel-lab-lkp committed Mar 13, 2021
1 parent 8bcfdd7 commit f40216b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 21 deletions.
6 changes: 4 additions & 2 deletions include/linux/mmzone.h
Expand Up @@ -301,6 +301,8 @@ struct lruvec {
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
/* Isolate unevictable pages */
#define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
/* Isolate none cma pages */
#define ISOLATE_NONCMA ((__force isolate_mode_t)0x10)

/* LRU Isolation modes. */
typedef unsigned __bitwise isolate_mode_t;
Expand Down Expand Up @@ -756,7 +758,7 @@ typedef struct pglist_data {
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */
int kswapd_order;
int kswapd_order, kswapd_migratetype;
enum zone_type kswapd_highest_zoneidx;

int kswapd_failures; /* Number of 'reclaimed == 0' runs */
Expand Down Expand Up @@ -840,7 +842,7 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)

void build_all_zonelists(pg_data_t *pgdat);
void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
enum zone_type highest_zoneidx);
int migratetype, enum zone_type highest_zoneidx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int highest_zoneidx, unsigned int alloc_flags,
long free_pages);
Expand Down
20 changes: 13 additions & 7 deletions include/trace/events/vmscan.h
Expand Up @@ -51,50 +51,56 @@ TRACE_EVENT(mm_vmscan_kswapd_sleep,

TRACE_EVENT(mm_vmscan_kswapd_wake,

TP_PROTO(int nid, int zid, int order),
TP_PROTO(int nid, int zid, int order, int mt),

TP_ARGS(nid, zid, order),
TP_ARGS(nid, zid, order, mt),

TP_STRUCT__entry(
__field( int, nid )
__field( int, zid )
__field( int, order )
__field( int, mt )
),

TP_fast_assign(
__entry->nid = nid;
__entry->zid = zid;
__entry->order = order;
__entry->mt = mt;
),

TP_printk("nid=%d order=%d",
TP_printk("nid=%d order=%d migratetype=%d",
__entry->nid,
__entry->order)
__entry->order,
__entry->mt)
);

TRACE_EVENT(mm_vmscan_wakeup_kswapd,

TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags),
TP_PROTO(int nid, int zid, int order, int mt, gfp_t gfp_flags),

TP_ARGS(nid, zid, order, gfp_flags),
TP_ARGS(nid, zid, order, mt, gfp_flags),

TP_STRUCT__entry(
__field( int, nid )
__field( int, zid )
__field( int, order )
__field( int, mt )
__field( gfp_t, gfp_flags )
),

TP_fast_assign(
__entry->nid = nid;
__entry->zid = zid;
__entry->order = order;
__entry->mt = mt;
__entry->gfp_flags = gfp_flags;
),

TP_printk("nid=%d order=%d gfp_flags=%s",
TP_printk("nid=%d order=%d migratetype=%d gfp_flags=%s",
__entry->nid,
__entry->order,
__entry->mt,
show_gfp_flags(__entry->gfp_flags))
);

Expand Down
5 changes: 3 additions & 2 deletions mm/page_alloc.c
Expand Up @@ -3514,7 +3514,7 @@ struct page *rmqueue(struct zone *preferred_zone,
/* Separate test+clear to avoid unnecessary atomics */
if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) {
clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
wakeup_kswapd(zone, 0, 0, zone_idx(zone));
wakeup_kswapd(zone, 0, 0, migratetype, zone_idx(zone));
}

VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
Expand Down Expand Up @@ -4423,11 +4423,12 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
struct zone *zone;
pg_data_t *last_pgdat = NULL;
enum zone_type highest_zoneidx = ac->highest_zoneidx;
int migratetype = ac->migratetype;

for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
ac->nodemask) {
if (last_pgdat != zone->zone_pgdat)
wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx);
wakeup_kswapd(zone, gfp_mask, order, migratetype, highest_zoneidx);
last_pgdat = zone->zone_pgdat;
}
}
Expand Down
63 changes: 53 additions & 10 deletions mm/vmscan.c
Expand Up @@ -99,6 +99,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;

/* Can cma pages be reclaimed? */
unsigned int may_cma:1;

/*
* Cgroups are not reclaimed below their configured memory.low,
* unless we threaten to OOM. If any cgroups are skipped due to
Expand Down Expand Up @@ -286,6 +289,11 @@ static bool writeback_throttling_sane(struct scan_control *sc)
}
#endif

static bool movable_reclaim(gfp_t gfp_mask)
{
return is_migrate_movable(gfp_migratetype(gfp_mask));
}

/*
* This misses isolated pages which are not accounted for to save counters.
* As the data only determines if reclaim or compaction continues, it is
Expand Down Expand Up @@ -1497,6 +1505,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_unmap = 1,
.may_cma = 1,
};
struct reclaim_stat stat;
unsigned int nr_reclaimed;
Expand Down Expand Up @@ -1591,6 +1600,9 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;

if ((mode & ISOLATE_NONCMA) && is_migrate_cma(get_pageblock_migratetype(page)))
return ret;

return 0;
}

Expand Down Expand Up @@ -1645,7 +1657,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long skipped = 0;
unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
isolate_mode_t mode;

mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
mode |= (sc->may_cma ? 0 : ISOLATE_NONCMA);

total_scan = 0;
scan = 0;
Expand Down Expand Up @@ -2123,6 +2138,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
.may_cma = 1,
};

while (!list_empty(page_list)) {
Expand Down Expand Up @@ -3251,6 +3267,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
.may_cma = movable_reclaim(gfp_mask),
};

/*
Expand Down Expand Up @@ -3296,6 +3313,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
.may_unmap = 1,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_swap = !noswap,
.may_cma = 1,
};

WARN_ON_ONCE(!current->reclaim_state);
Expand Down Expand Up @@ -3339,6 +3357,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = may_swap,
.may_cma = 1,
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
Expand Down Expand Up @@ -3546,7 +3565,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
* or lower is eligible for reclaim until at least one usable zone is
* balanced.
*/
static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
static int balance_pgdat(pg_data_t *pgdat, int order, int migratetype, int highest_zoneidx)
{
int i;
unsigned long nr_soft_reclaimed;
Expand Down Expand Up @@ -3648,6 +3667,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
*/
sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
sc.may_swap = !nr_boost_reclaim;
sc.may_cma = is_migrate_movable(migratetype);

/*
* Do some background aging of the anon list, to give
Expand Down Expand Up @@ -3769,8 +3789,15 @@ static enum zone_type kswapd_highest_zoneidx(pg_data_t *pgdat,
return curr_idx == MAX_NR_ZONES ? prev_highest_zoneidx : curr_idx;
}

static int kswapd_migratetype(pg_data_t *pgdat, int prev_migratetype)
{
int curr_migratetype = READ_ONCE(pgdat->kswapd_migratetype);

return curr_migratetype == MIGRATE_TYPES ? prev_migratetype : curr_migratetype;
}

static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
unsigned int highest_zoneidx)
int migratetype, unsigned int highest_zoneidx)
{
long remaining = 0;
DEFINE_WAIT(wait);
Expand Down Expand Up @@ -3805,8 +3832,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
remaining = schedule_timeout(HZ/10);

/*
* If woken prematurely then reset kswapd_highest_zoneidx and
* order. The values will either be from a wakeup request or
* If woken prematurely then reset kswapd_highest_zoneidx, order
* and migratetype. The values will either be from a wakeup request or
* the previous request that slept prematurely.
*/
if (remaining) {
Expand All @@ -3816,6 +3843,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o

if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
WRITE_ONCE(pgdat->kswapd_order, reclaim_order);

if (!is_migrate_movable(READ_ONCE(pgdat->kswapd_migratetype)))
WRITE_ONCE(pgdat->kswapd_migratetype,
kswapd_migratetype(pgdat, migratetype));
}

finish_wait(&pgdat->kswapd_wait, &wait);
Expand Down Expand Up @@ -3868,6 +3899,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
*/
static int kswapd(void *p)
{
int migratetype = 0;
unsigned int alloc_order, reclaim_order;
unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
pg_data_t *pgdat = (pg_data_t*)p;
Expand All @@ -3893,23 +3925,27 @@ static int kswapd(void *p)
set_freezable();

WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_migratetype, MIGRATE_TYPES);
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
for ( ; ; ) {
bool ret;

alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
migratetype = kswapd_migratetype(pgdat, migratetype);
highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);

kswapd_try_sleep:
kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
highest_zoneidx);
migratetype, highest_zoneidx);

/* Read the new order and highest_zoneidx */
alloc_order = READ_ONCE(pgdat->kswapd_order);
migratetype = kswapd_migratetype(pgdat, migratetype);
highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);
WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_migratetype, MIGRATE_TYPES);
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);

ret = try_to_freeze();
Expand All @@ -3932,8 +3968,8 @@ static int kswapd(void *p)
* request (alloc_order).
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx,
alloc_order);
reclaim_order = balance_pgdat(pgdat, alloc_order,
alloc_order, migratetype);
reclaim_order = balance_pgdat(pgdat, alloc_order, migratetype,
highest_zoneidx);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
Expand All @@ -3951,11 +3987,12 @@ static int kswapd(void *p)
* has failed or is not needed, still wake up kcompactd if only compaction is
* needed.
*/
void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, int migratetype
enum zone_type highest_zoneidx)
{
pg_data_t *pgdat;
enum zone_type curr_idx;
int curr_migratetype;

if (!managed_zone(zone))
return;
Expand All @@ -3965,13 +4002,17 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,

pgdat = zone->zone_pgdat;
curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
curr_migratetype = READ_ONCE(pgdat->kswapd_migratetype);

if (curr_idx == MAX_NR_ZONES || curr_idx < highest_zoneidx)
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx);

if (READ_ONCE(pgdat->kswapd_order) < order)
WRITE_ONCE(pgdat->kswapd_order, order);

if (curr_migratetype == MIGRATE_TYPES || is_migrate_movable(migratetype))
WRITE_ONCE(pgdat->kswapd_migratetype, migratetype);

if (!waitqueue_active(&pgdat->kswapd_wait))
return;

Expand All @@ -3992,7 +4033,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
}

trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order,
gfp_flags);
migratetype, gfp_flags);
wake_up_interruptible(&pgdat->kswapd_wait);
}

Expand All @@ -4015,6 +4056,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
.may_cma = 1,
.hibernation_mode = 1,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
Expand Down Expand Up @@ -4174,6 +4216,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
.may_cma = movable_reclaim(gfp_mask),
.reclaim_idx = gfp_zone(gfp_mask),
};

Expand Down

0 comments on commit f40216b

Please sign in to comment.