Skip to content

Commit

Permalink
mm: make pXd_addr_end() functions page-table entry aware
Browse files Browse the repository at this point in the history
Unlike all other page-table abstractions pXd_addr_end() do not take
into account a particular table entry in which context the functions
are called. On architectures with dynamic page-tables folding that
might lead to lack of necessary information that is difficult to
obtain other than from the table entry itself. That already led to
a subtle memory corruption issue on s390.

By letting pXd_addr_end() functions know about the page-table entry
we allow archs not only make extra checks, but also optimizations.

As result of this change the pXd_addr_end_folded() functions used
in gup_fast traversal code become unnecessary and get replaced with
universal pXd_addr_end() variants.

The arch-specific updates not only add dereferencing of page-table
entry pointers, but also small changes to the code flow to make those
dereferences possible, at least for x86 and powerpc. Also for arm64,
but in way that should not have any impact.

So, even though the dereferenced page-table entries are not used on
archs other than s390, and are optimized out by the compiler, there
is a small change in kernel size and this is what bloat-o-meter reports:

x86:
add/remove: 0/0 grow/shrink: 2/0 up/down: 10/0 (10)
Function                                     old     new   delta
vmemmap_populate                             587     592      +5
munlock_vma_pages_range                      556     561      +5
Total: Before=15534694, After=15534704, chg +0.00%

powerpc:
add/remove: 0/0 grow/shrink: 1/0 up/down: 4/0 (4)
Function                                     old     new   delta
.remove_pagetable                           1648    1652      +4
Total: Before=21478240, After=21478244, chg +0.00%

arm64:
add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
Function                                     old     new   delta
Total: Before=20240851, After=20240851, chg +0.00%

sparc:
add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
Function                                     old     new   delta
Total: Before=4907262, After=4907262, chg +0.00%

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
  • Loading branch information
Alexander Gordeev authored and intel-lab-lkp committed Sep 7, 2020
1 parent 6c6e9d6 commit faf6094
Show file tree
Hide file tree
Showing 31 changed files with 165 additions and 173 deletions.
2 changes: 1 addition & 1 deletion arch/arm/include/asm/pgtable-2level.h
Expand Up @@ -209,7 +209,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
} while (0)

/* we don't need complex calculations here as the pmd is folded into the pgd */
#define pmd_addr_end(addr,end) (end)
#define pmd_addr_end(pmd,addr,end) (end)

#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)

Expand Down
6 changes: 3 additions & 3 deletions arch/arm/mm/idmap.c
Expand Up @@ -46,7 +46,7 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
pmd = pmd_offset(pud, addr);

do {
next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmd, addr, end);
*pmd = __pmd((addr & PMD_MASK) | prot);
flush_pmd_entry(pmd);
} while (pmd++, addr = next, addr != end);
Expand All @@ -73,7 +73,7 @@ static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long next;

do {
next = pud_addr_end(addr, end);
next = pud_addr_end(*pud, addr, end);
idmap_add_pmd(pud, addr, next, prot);
} while (pud++, addr = next, addr != end);
}
Expand All @@ -95,7 +95,7 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,

pgd += pgd_index(addr);
do {
next = pgd_addr_end(addr, end);
next = pgd_addr_end(*pgd, addr, end);
idmap_add_pud(pgd, addr, next, prot);
} while (pgd++, addr = next, addr != end);
}
Expand Down
8 changes: 4 additions & 4 deletions arch/arm/mm/mmu.c
Expand Up @@ -777,7 +777,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
* With LPAE, we must loop over to map
* all the pmds for the given range.
*/
next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmd, addr, end);

/*
* Try a section mapping - addr, next and phys must all be
Expand Down Expand Up @@ -805,7 +805,7 @@ static void __init alloc_init_pud(p4d_t *p4d, unsigned long addr,
unsigned long next;

do {
next = pud_addr_end(addr, end);
next = pud_addr_end(*pud, addr, end);
alloc_init_pmd(pud, addr, next, phys, type, alloc, ng);
phys += next - addr;
} while (pud++, addr = next, addr != end);
Expand All @@ -820,7 +820,7 @@ static void __init alloc_init_p4d(pgd_t *pgd, unsigned long addr,
unsigned long next;

do {
next = p4d_addr_end(addr, end);
next = p4d_addr_end(*p4d, addr, end);
alloc_init_pud(p4d, addr, next, phys, type, alloc, ng);
phys += next - addr;
} while (p4d++, addr = next, addr != end);
Expand Down Expand Up @@ -923,7 +923,7 @@ static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md,
pgd = pgd_offset(mm, addr);
end = addr + length;
do {
unsigned long next = pgd_addr_end(addr, end);
unsigned long next = pgd_addr_end(*pgd, addr, end);

alloc_init_p4d(pgd, addr, next, phys, type, alloc, ng);

Expand Down
16 changes: 10 additions & 6 deletions arch/arm64/kernel/hibernate.c
Expand Up @@ -412,7 +412,7 @@ static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
do {
pmd_t pmd = READ_ONCE(*src_pmdp);

next = pmd_addr_end(addr, end);
next = pmd_addr_end(pmd, addr, end);
if (pmd_none(pmd))
continue;
if (pmd_table(pmd)) {
Expand Down Expand Up @@ -447,7 +447,7 @@ static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
do {
pud_t pud = READ_ONCE(*src_pudp);

next = pud_addr_end(addr, end);
next = pud_addr_end(pud, addr, end);
if (pud_none(pud))
continue;
if (pud_table(pud)) {
Expand All @@ -473,8 +473,10 @@ static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(READ_ONCE(*src_p4dp)))
p4d_t p4d = READ_ONCE(*src_p4dp);

next = p4d_addr_end(p4d, addr, end);
if (p4d_none(p4d))
continue;
if (copy_pud(dst_p4dp, src_p4dp, addr, next))
return -ENOMEM;
Expand All @@ -492,8 +494,10 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,

dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
pgd_t pgd = READ_ONCE(*src_pgdp);

next = pgd_addr_end(pgd, addr, end);
if (pgd_none(pgd))
continue;
if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
Expand Down
16 changes: 8 additions & 8 deletions arch/arm64/kvm/mmu.c
Expand Up @@ -547,7 +547,7 @@ static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)

start_pmd = pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmd, addr, end);
/* Hyp doesn't use huge pmds */
if (!pmd_none(*pmd))
unmap_hyp_ptes(pmd, addr, next);
Expand All @@ -564,7 +564,7 @@ static void unmap_hyp_puds(p4d_t *p4d, phys_addr_t addr, phys_addr_t end)

start_pud = pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
next = pud_addr_end(*pud, addr, end);
/* Hyp doesn't use huge puds */
if (!pud_none(*pud))
unmap_hyp_pmds(pud, addr, next);
Expand All @@ -581,7 +581,7 @@ static void unmap_hyp_p4ds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)

start_p4d = p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
next = p4d_addr_end(*p4d, addr, end);
/* Hyp doesn't use huge p4ds */
if (!p4d_none(*p4d))
unmap_hyp_puds(p4d, addr, next);
Expand Down Expand Up @@ -609,7 +609,7 @@ static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
*/
pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
do {
next = pgd_addr_end(addr, end);
next = pgd_addr_end(*pgd, addr, end);
if (!pgd_none(*pgd))
unmap_hyp_p4ds(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
Expand Down Expand Up @@ -712,7 +712,7 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
get_page(virt_to_page(pmd));
}

next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmd, addr, end);

create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
pfn += (next - addr) >> PAGE_SHIFT;
Expand Down Expand Up @@ -744,7 +744,7 @@ static int create_hyp_pud_mappings(p4d_t *p4d, unsigned long start,
get_page(virt_to_page(pud));
}

next = pud_addr_end(addr, end);
next = pud_addr_end(*pud, addr, end);
ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
if (ret)
return ret;
Expand Down Expand Up @@ -777,7 +777,7 @@ static int create_hyp_p4d_mappings(pgd_t *pgd, unsigned long start,
get_page(virt_to_page(p4d));
}

next = p4d_addr_end(addr, end);
next = p4d_addr_end(*p4d, addr, end);
ret = create_hyp_pud_mappings(p4d, addr, next, pfn, prot);
if (ret)
return ret;
Expand Down Expand Up @@ -813,7 +813,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
get_page(virt_to_page(pgd));
}

next = pgd_addr_end(addr, end);
next = pgd_addr_end(*pgd, addr, end);
err = create_hyp_p4d_mappings(pgd, addr, next, pfn, prot);
if (err)
goto out;
Expand Down
8 changes: 4 additions & 4 deletions arch/arm64/mm/kasan_init.c
Expand Up @@ -120,7 +120,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);

do {
next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmdp, addr, end);
kasan_pte_populate(pmdp, addr, next, node, early);
} while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
}
Expand All @@ -132,7 +132,7 @@ static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);

do {
next = pud_addr_end(addr, end);
next = pud_addr_end(*pudp, addr, end);
kasan_pmd_populate(pudp, addr, next, node, early);
} while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
}
Expand All @@ -144,7 +144,7 @@ static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
p4d_t *p4dp = p4d_offset(pgdp, addr);

do {
next = p4d_addr_end(addr, end);
next = p4d_addr_end(*p4dp, addr, end);
kasan_pud_populate(p4dp, addr, next, node, early);
} while (p4dp++, addr = next, addr != end);
}
Expand All @@ -157,7 +157,7 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,

pgdp = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
next = pgd_addr_end(*pgdp, addr, end);
kasan_p4d_populate(pgdp, addr, next, node, early);
} while (pgdp++, addr = next, addr != end);
}
Expand Down
25 changes: 12 additions & 13 deletions arch/arm64/mm/mmu.c
Expand Up @@ -209,7 +209,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
do {
pmd_t old_pmd = READ_ONCE(*pmdp);

next = pmd_addr_end(addr, end);
next = pmd_addr_end(old_pmd, addr, end);

/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
Expand Down Expand Up @@ -307,7 +307,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
do {
pud_t old_pud = READ_ONCE(*pudp);

next = pud_addr_end(addr, end);
next = pud_addr_end(old_pud, addr, end);

/*
* For 4K granule only, attempt to put down a 1GB block
Expand Down Expand Up @@ -356,7 +356,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
end = PAGE_ALIGN(virt + size);

do {
next = pgd_addr_end(addr, end);
next = pgd_addr_end(*pgdp, addr, end);
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
flags);
phys += next - addr;
Expand Down Expand Up @@ -825,9 +825,9 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
pmd_t *pmdp, pmd;

do {
next = pmd_addr_end(addr, end);
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(pmd, addr, end);
if (pmd_none(pmd))
continue;

Expand Down Expand Up @@ -858,9 +858,9 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
pud_t *pudp, pud;

do {
next = pud_addr_end(addr, end);
pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp);
next = pud_addr_end(pud, addr, end);
if (pud_none(pud))
continue;

Expand Down Expand Up @@ -891,9 +891,9 @@ static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr,
p4d_t *p4dp, p4d;

do {
next = p4d_addr_end(addr, end);
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
next = p4d_addr_end(p4d, addr, end);
if (p4d_none(p4d))
continue;

Expand All @@ -917,9 +917,9 @@ static void unmap_hotplug_range(unsigned long addr, unsigned long end,
WARN_ON(!free_mapped && altmap);

do {
next = pgd_addr_end(addr, end);
pgdp = pgd_offset_k(addr);
pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(pgd, addr, end);
if (pgd_none(pgd))
continue;

Expand Down Expand Up @@ -973,9 +973,9 @@ static void free_empty_pmd_table(pud_t *pudp, unsigned long addr,
unsigned long i, next, start = addr;

do {
next = pmd_addr_end(addr, end);
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(pmd, addr, end);
if (pmd_none(pmd))
continue;

Expand Down Expand Up @@ -1013,9 +1013,9 @@ static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
unsigned long i, next, start = addr;

do {
next = pud_addr_end(addr, end);
pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp);
next = pud_addr_end(pud, addr, end);
if (pud_none(pud))
continue;

Expand Down Expand Up @@ -1053,9 +1053,9 @@ static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
p4d_t *p4dp, p4d;

do {
next = p4d_addr_end(addr, end);
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
next = p4d_addr_end(p4d, addr, end);
if (p4d_none(p4d))
continue;

Expand All @@ -1071,9 +1071,9 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
pgd_t *pgdp, pgd;

do {
next = pgd_addr_end(addr, end);
pgdp = pgd_offset_k(addr);
pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(pgd, addr, end);
if (pgd_none(pgd))
continue;

Expand Down Expand Up @@ -1102,8 +1102,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
pmd_t *pmdp;

do {
next = pmd_addr_end(addr, end);

pgdp = vmemmap_pgd_populate(addr, node);
if (!pgdp)
return -ENOMEM;
Expand All @@ -1117,6 +1115,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return -ENOMEM;

pmdp = pmd_offset(pudp, addr);
next = pmd_addr_end(*pmdp, addr, end);
if (pmd_none(READ_ONCE(*pmdp))) {
void *p = NULL;

Expand Down
7 changes: 3 additions & 4 deletions arch/powerpc/mm/book3s64/radix_pgtable.c
Expand Up @@ -832,7 +832,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,

pmd = pmd_start + pmd_index(addr);
for (; addr < end; addr = next, pmd++) {
next = pmd_addr_end(addr, end);
next = pmd_addr_end(*pmd, addr, end);

if (!pmd_present(*pmd))
continue;
Expand Down Expand Up @@ -862,7 +862,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,

pud = pud_start + pud_index(addr);
for (; addr < end; addr = next, pud++) {
next = pud_addr_end(addr, end);
next = pud_addr_end(*pud, addr, end);

if (!pud_present(*pud))
continue;
Expand Down Expand Up @@ -893,10 +893,9 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
spin_lock(&init_mm.page_table_lock);

for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);

pgd = pgd_offset_k(addr);
p4d = p4d_offset(pgd, addr);
next = pgd_addr_end(*pgd, addr, end);
if (!p4d_present(*p4d))
continue;

Expand Down

0 comments on commit faf6094

Please sign in to comment.