Skip to content
Permalink
Browse files
device-dax: compound devmap support
Use the newly added compound devmap facility which maps the assigned dax
ranges as compound pages at a page size of @align.

dax devices are created with a fixed @align (huge page size) which is
enforced through as well at mmap() of the device. Faults, consequently
happen too at the specified @align specified at the creation, and those
don't change throughout dax device lifetime. MCEs unmap a whole dax
huge page, as well as splits occurring at the configured page size.

Performance measured by gup_test improves considerably for
unpin_user_pages() and altmap with NVDIMMs:

$ gup_test -f /dev/dax1.0 -m 16384 -r 10 -S -a -n 512 -w
(pin_user_pages_fast 2M pages) put:~71 ms -> put:~22 ms
[altmap]
(pin_user_pages_fast 2M pages) get:~524ms put:~525 ms -> get: ~127ms put:~71ms

 $ gup_test -f /dev/dax1.0 -m 129022 -r 10 -S -a -n 512 -w
(pin_user_pages_fast 2M pages) put:~513 ms -> put:~188 ms
[altmap with -m 127004]
(pin_user_pages_fast 2M pages) get:~4.1 secs put:~4.12 secs -> get:~1sec put:~563ms

.. as well as unpin_user_page_range_dirty_lock() being just as effective
as THP/hugetlb[0] pages.

[0] https://lore.kernel.org/linux-mm/20210212130843.13865-5-joao.m.martins@oracle.com/

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
  • Loading branch information
jpemartins authored and intel-lab-lkp committed Nov 12, 2021
1 parent 38152a0 commit c9e6e4bec537287b0e2cf8544276f77d02f1d00c
Showing 1 changed file with 44 additions and 13 deletions.
@@ -192,6 +192,42 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
}
#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

static void set_page_mapping(struct vm_fault *vmf, pfn_t pfn,
unsigned long fault_size,
struct address_space *f_mapping)
{
unsigned long i;
pgoff_t pgoff;

pgoff = linear_page_index(vmf->vma, ALIGN(vmf->address, fault_size));

for (i = 0; i < fault_size / PAGE_SIZE; i++) {
struct page *page;

page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
if (page->mapping)
continue;
page->mapping = f_mapping;
page->index = pgoff + i;
}
}

static void set_compound_mapping(struct vm_fault *vmf, pfn_t pfn,
unsigned long fault_size,
struct address_space *f_mapping)
{
struct page *head;

head = pfn_to_page(pfn_t_to_pfn(pfn));
head = compound_head(head);
if (head->mapping)
return;

head->mapping = f_mapping;
head->index = linear_page_index(vmf->vma,
ALIGN(vmf->address, fault_size));
}

static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
@@ -225,26 +261,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
}

if (rc == VM_FAULT_NOPAGE) {
unsigned long i;
pgoff_t pgoff;
struct dev_pagemap *pgmap = dev_dax->pgmap;

/*
* In the device-dax case the only possibility for a
* VM_FAULT_NOPAGE result is when device-dax capacity is
* mapped. No need to consider the zero page, or racing
* conflicting mappings.
*/
pgoff = linear_page_index(vmf->vma,
ALIGN(vmf->address, fault_size));
for (i = 0; i < fault_size / PAGE_SIZE; i++) {
struct page *page;

page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
if (page->mapping)
continue;
page->mapping = filp->f_mapping;
page->index = pgoff + i;
}
if (pgmap->vmemmap_shift)
set_compound_mapping(vmf, pfn, fault_size, filp->f_mapping);
else
set_page_mapping(vmf, pfn, fault_size, filp->f_mapping);
}
dax_read_unlock(id);

@@ -439,6 +467,9 @@ int dev_dax_probe(struct dev_dax *dev_dax)
}

pgmap->type = MEMORY_DEVICE_GENERIC;
if (dev_dax->align > PAGE_SIZE)
pgmap->vmemmap_shift =
order_base_2(dev_dax->align >> PAGE_SHIFT);
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);

0 comments on commit c9e6e4b

Please sign in to comment.