Skip to content
Permalink
Browse files
mm: make alloc_contig_range work at pageblock granularity
alloc_contig_range() worked at MAX_ORDER-1 granularity to avoid merging
pageblocks with different migratetypes. It might unnecessarily convert
extra pageblocks at the beginning and at the end of the range. Change
alloc_contig_range() to work at pageblock granularity.

Special handling is needed for free pages and in-use pages across the
boundaries of the range specified alloc_contig_range(). Because these
partially isolated pages causes free page accounting issues. The free
pages will be split and freed into separate migratetype lists; the
in-use pages will be migrated then the freed pages will be handled.

Signed-off-by: Zi Yan <ziy@nvidia.com>
  • Loading branch information
x-y-z authored and intel-lab-lkp committed Feb 11, 2022
1 parent 9d9460f commit 5aacb9dfc8abb1a0610b70226606408a96d0e997
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 65 deletions.
@@ -54,7 +54,7 @@ int move_freepages_block(struct zone *zone, struct page *page,
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype, int flags);
unsigned migratetype, int flags, gfp_t gfp_flags);

/*
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
@@ -319,6 +319,9 @@ isolate_freepages_range(struct compact_control *cc,
int
isolate_migratepages_range(struct compact_control *cc,
unsigned long low_pfn, unsigned long end_pfn);

int
isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int isolate_before_boundary);
#endif
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
@@ -1863,7 +1863,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
MEMORY_OFFLINE | REPORT_FAILURE);
MEMORY_OFFLINE | REPORT_FAILURE,
GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
if (ret) {
reason = "failure to isolate range";
goto failed_removal_pcplists_disabled;
@@ -8985,7 +8985,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
#endif

/* [start, end) must belong to a single zone. */
static int __alloc_contig_migrate_range(struct compact_control *cc,
int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long start, unsigned long end)
{
/* This function is based on compact_zone() from compaction.c. */
@@ -9043,6 +9043,167 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
return 0;
}

/**
* split_free_page() -- split a free page at split_pfn_offset
* @free_page: the original free page
* @order: the order of the page
* @split_pfn_offset: split offset within the page
*
* It is used when the free page crosses two pageblocks with different migratetypes
* at split_pfn_offset within the page. The split free page will be put into
* separate migratetype lists afterwards. Otherwise, the function achieves
* nothing.
*/
static inline void split_free_page(struct page *free_page,
int order, unsigned long split_pfn_offset)
{
struct zone *zone = page_zone(free_page);
unsigned long free_page_pfn = page_to_pfn(free_page);
unsigned long pfn;
unsigned long flags;
int free_page_order;

spin_lock_irqsave(&zone->lock, flags);
del_page_from_free_list(free_page, zone, order);
for (pfn = free_page_pfn;
pfn < free_page_pfn + (1UL << order);) {
int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);

free_page_order = order_base_2(split_pfn_offset);
__free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
mt, FPI_NONE);
pfn += 1UL << free_page_order;
split_pfn_offset -= (1UL << free_page_order);
/* we have done the first part, now switch to second part */
if (split_pfn_offset == 0)
split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
}
spin_unlock_irqrestore(&zone->lock, flags);
}

/**
* isolate_single_pageblock() -- tries to isolate a pageblock that might be
* within a free or in-use page.
* @boundary_pfn: pageblock-aligned pfn that a page might cross
* @gfp_flags: GFP flags used for migrating pages
* @isolate_before_boundary: isolate the pageblock before (1) or after (0)
* the boundary_pfn
*
* Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
* pageblock. When not all pageblocks within a page are isolated at the same
* time, free page accounting can go wrong. For example, in the case of
* MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
* [ MAX_ORDER-1 ]
* [ pageblock0 | pageblock1 ]
* When either pageblock is isolated, if it is a free page, the page is not
* split into separate migratetype lists, which is supposed to; if it is an
* in-use page and freed later, __free_one_page() does not split the free page
* either. The function handles this by splitting the free page or migrating
* the in-use page then splitting the free page.
*/
int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary)
{
unsigned char saved_mt;
/*
* scan at max(MAX_ORDER_NR_PAGES, pageblock_nr_pages) aligned range to
* avoid isolate pageblocks belonging to a bigger free or in-use page
*/
unsigned long start_pfn = pfn_max_align_down(boundary_pfn);
unsigned long isolated_pageblock_pfn;
unsigned long pfn;

VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));

if (isolate_before_boundary)
isolated_pageblock_pfn = boundary_pfn - pageblock_nr_pages;
else
isolated_pageblock_pfn = boundary_pfn;

saved_mt = get_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn));
set_pageblock_migratetype(pfn_to_page(isolated_pageblock_pfn), MIGRATE_ISOLATE);

for (pfn = start_pfn; pfn < boundary_pfn;) {
struct page *page = pfn_to_page(pfn);

/*
* start_pfn is max(MAX_ORDER_NR_PAGES, pageblock_nr_pages)
* aligned, if there is any free pages in [start_pfn, boundary_pfn),
* its head page will always be in the range.
*/
if (PageBuddy(page)) {
int order = buddy_order(page);

if (pfn + (1UL << order) > boundary_pfn)
split_free_page(page, order, boundary_pfn - pfn);
pfn += (1UL << order);
continue;
}
/*
* migrate compound pages then let the free page handling code
* above do the rest
*/
if (PageHuge(page) || PageTransCompound(page)) {
unsigned long nr_pages = compound_nr(page);
int order = compound_order(page);
struct page *head = compound_head(page);
unsigned long head_pfn = page_to_pfn(head);

if (head_pfn + nr_pages >= boundary_pfn) {
int ret;
struct compact_control cc = {
.nr_migratepages = 0,
.order = -1,
.zone = page_zone(pfn_to_page(head_pfn)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
.no_set_skip_hint = true,
.gfp_mask = current_gfp_context(gfp_flags),
.alloc_contig = true,
};

INIT_LIST_HEAD(&cc.migratepages);

ret = __alloc_contig_migrate_range(&cc, head_pfn,
head_pfn + nr_pages);

if (ret) {
/* restore the original migratetype */
set_pageblock_migratetype(
pfn_to_page(isolated_pageblock_pfn),
saved_mt);
return -EBUSY;
}
/*
* reset pfn, let the free page handling code
* above split the free page to the right
* migratetype list.
*
* head_pfn is not used here as a hugetlb page
* order can be bigger than MAX_ORDER-1, but
* after it is freed, the free page order is not.
* Use pfn within the range to find the head of
* the free page and reset order to 0 if a hugetlb
* page with >MAX_ORDER-1 order is encountered.
*/
if (order > MAX_ORDER-1)
order = 0;
while (!PageBuddy(pfn_to_page(pfn))) {
order++;
pfn &= ~0UL << order;
}
continue;
}
pfn += nr_pages;
continue;
}

pfn++;
}
return 0;
}


/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -9067,8 +9228,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask)
{
unsigned long outer_start, outer_end;
unsigned int order;
unsigned long outer_end;
unsigned long alloc_start = ALIGN_DOWN(start, pageblock_nr_pages);
unsigned long alloc_end = ALIGN(end, pageblock_nr_pages);
int ret = 0;

struct compact_control cc = {
@@ -9087,14 +9249,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE. Because pageblock and max order pages may
* have different sizes, and due to the way page allocator
* work, we align the range to biggest of the two pages so
* that page allocator won't try to merge buddies from
* different pageblocks and change MIGRATE_ISOLATE to some
* other migration type.
* work, start_isolate_page_range() has special handlings for this.
*
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
* migrate the pages from an unaligned range (ie. pages that
* we are interested in). This will put all the pages in
* we are interested in). This will put all the pages in
* range back to page allocator as MIGRATE_ISOLATE.
*
* When this is done, we take the pages in range from page
@@ -9107,9 +9266,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* put back to page allocator so that buddy can use them.
*/

ret = start_isolate_page_range(start, end, migratetype, 0);
ret = start_isolate_page_range(start, end, migratetype, 0, gfp_mask);
if (ret)
return ret;
goto done;

drain_all_pages(cc.zone);

@@ -9128,68 +9287,28 @@ int alloc_contig_range(unsigned long start, unsigned long end,
goto done;
ret = 0;

/*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
* aligned blocks that are marked as MIGRATE_ISOLATE. What's
* more, all pages in [start, end) are free in page allocator.
* What we are going to do is to allocate all pages from
* [start, end) (that is remove them from page allocator).
*
* The only problem is that pages at the beginning and at the
* end of interesting range may be not aligned with pages that
* page allocator holds, ie. they can be part of higher order
* pages. Because of this, we reserve the bigger range and
* once this is done free the pages we are not interested in.
*
* We don't have to hold zone->lock here because the pages are
* isolated thus they won't get removed from buddy.
*/

order = 0;
outer_start = start;
while (!PageBuddy(pfn_to_page(outer_start))) {
if (++order >= MAX_ORDER) {
outer_start = start;
break;
}
outer_start &= ~0UL << order;
}

if (outer_start != start) {
order = buddy_order(pfn_to_page(outer_start));

/*
* outer_start page could be small order buddy page and
* it doesn't include start page. Adjust outer_start
* in this case to report failed page properly
* on tracepoint in test_pages_isolated()
*/
if (outer_start + (1UL << order) <= start)
outer_start = start;
}

/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, 0)) {
if (test_pages_isolated(alloc_start, alloc_end, 0)) {
ret = -EBUSY;
goto done;
}

/* Grab isolated pages from freelists. */
outer_end = isolate_freepages_range(&cc, outer_start, end);
outer_end = isolate_freepages_range(&cc, alloc_start, alloc_end);
if (!outer_end) {
ret = -EBUSY;
goto done;
}

/* Free head and tail (if any) */
if (start != outer_start)
free_contig_range(outer_start, start - outer_start);
if (end != outer_end)
free_contig_range(end, outer_end - end);
if (start != alloc_start)
free_contig_range(alloc_start, start - alloc_start);
if (end != alloc_end)
free_contig_range(end, alloc_end - end);

done:
undo_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype);
undo_isolate_page_range(alloc_start,
alloc_end, migratetype);
return ret;
}
EXPORT_SYMBOL(alloc_contig_range);
@@ -285,6 +285,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* and PageOffline() pages.
* REPORT_FAILURE - report details about the failure to
* isolate the range
* @gfp_flags: GFP flags used for migrating pages that sit across the
* range boundaries.
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
@@ -293,6 +295,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* pages in the range finally, the caller have to free all pages in the range.
* test_page_isolated() can be used for test it.
*
* The function first tries to isolate the pageblocks at the beginning and end
* of the range, since there might be pages across the range boundaries.
* Afterwards, it isolates the rest of the range.
*
* There is no high level synchronization mechanism that prevents two threads
* from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
@@ -313,21 +319,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype, int flags)
unsigned migratetype, int flags, gfp_t gfp_flags)
{
unsigned long pfn;
struct page *page;
/* isolation is done at page block granularity */
unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
int ret;

unsigned long isolate_start = pfn_max_align_down(start_pfn);
unsigned long isolate_end = pfn_max_align_up(end_pfn);
/* isolate [isolate_start, isolate_start + pageblock_nr_pages] pageblock */
ret = isolate_single_pageblock(isolate_start, gfp_flags, 0);
if (ret)
return ret;

/* isolate [isolate_end - pageblock_nr_pages, isolate_end] pageblock */
ret = isolate_single_pageblock(isolate_end, gfp_flags, 1);
if (ret) {
unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
return ret;
}

for (pfn = isolate_start;
pfn < isolate_end;
/* skip isolated pageblocks at the beginning and end */
for (pfn = isolate_start + pageblock_nr_pages;
pfn < isolate_end - pageblock_nr_pages;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
if (page && set_migratetype_isolate(page, migratetype, flags,
start_pfn, end_pfn)) {
undo_isolate_page_range(isolate_start, pfn, migratetype);
unset_migratetype_isolate(
pfn_to_page(isolate_end - pageblock_nr_pages),
migratetype);
return -EBUSY;
}
}

0 comments on commit 5aacb9d

Please sign in to comment.