Skip to content

Commit

Permalink
powerpc/vfio/iommu/kvm: Do not pin device memory
Browse files Browse the repository at this point in the history
This new memory does not have page structs as it is not plugged to
the host so gup() will fail anyway.

This adds 2 helpers:
- mm_iommu_newdev() to preregister the "memory device" memory so
the rest of API can still be used;
- mm_iommu_is_devmem() to know if the physical address is one of thise
new regions which we must avoid unpinning of.

This adds @mm to tce_page_is_contained() and iommu_tce_xchg() to test
if the memory is device memory to avoid pfn_to_page().

This adds a check for device memory in mm_iommu_ua_mark_dirty_rm() which
does delayed pages dirtying.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
aik authored and mpe committed Dec 21, 2018
1 parent e0bf78b commit c10c21e
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 32 deletions.
5 changes: 3 additions & 2 deletions arch/powerpc/include/asm/iommu.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,9 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
Expand Down
11 changes: 11 additions & 0 deletions arch/powerpc/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ extern bool mm_iommu_preregistered(struct mm_struct *mm);
extern long mm_iommu_new(struct mm_struct *mm,
unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_put(struct mm_struct *mm,
struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_init(struct mm_struct *mm);
Expand All @@ -39,8 +42,16 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
unsigned int pageshift, unsigned long *size);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#else
static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
unsigned int pageshift, unsigned long *size)
{
return false;
}
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
Expand Down
11 changes: 8 additions & 3 deletions arch/powerpc/kernel/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>

#define DBG(...)

Expand Down Expand Up @@ -993,15 +994,19 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);

long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;

ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);

if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)))
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));

/* if (unlikely(ret))
Expand Down
18 changes: 10 additions & 8 deletions arch/powerpc/kvm/book3s_64_vio.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,12 +397,13 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}

static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;

iommu_tce_xchg(tbl, entry, &hpa, &dir);
iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
}

static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
Expand Down Expand Up @@ -433,15 +434,15 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;

if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
return H_TOO_HARD;

if (dir == DMA_NONE)
return H_SUCCESS;

ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
iommu_tce_xchg(tbl, entry, &hpa, &dir);
iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);

return ret;
}
Expand Down Expand Up @@ -487,7 +488,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;

ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
Expand Down Expand Up @@ -566,7 +567,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
entry, ua, dir);

if (ret != H_SUCCESS) {
kvmppc_clear_tce(stit->tbl, entry);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
}
}
Expand Down Expand Up @@ -655,7 +656,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
iommu_tce_direction(tce));

if (ret != H_SUCCESS) {
kvmppc_clear_tce(stit->tbl, entry);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
entry);
goto unlock_exit;
}
}
Expand Down Expand Up @@ -704,7 +706,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret;

WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
}
}

Expand Down
93 changes: 84 additions & 9 deletions arch/powerpc/mm/mmu_context_iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
u64 dev_hpa; /* Device memory base address */
};

static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
Expand Down Expand Up @@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}

long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
Expand All @@ -150,18 +153,27 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,

}

ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;

locked_entries = entries;
locked_entries = entries;
}

mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem) {
ret = -ENOMEM;
goto unlock_exit;
}

if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
mem->dev_hpa = dev_hpa;
goto good_exit;
}
mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;

/*
* For a starting point for a maximum page size calculation
* we use @ua and @entries natural alignment to allow IOMMU pages
Expand Down Expand Up @@ -230,6 +242,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}

good_exit:
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
Expand All @@ -246,13 +259,31 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,

return ret;
}

long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_new);

long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_newdev);

static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
long i;
struct page *page = NULL;

if (!mem->hpas)
return;

for (i = 0; i < mem->entries; ++i) {
if (!mem->hpas[i])
continue;
Expand Down Expand Up @@ -294,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
unsigned long entries, dev_hpa;

mutex_lock(&mem_list_mutex);

Expand All @@ -315,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
}

/* @mapped became 0 so now mappings are disabled, release the region */
entries = mem->entries;
dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);

mm_iommu_adjust_locked_vm(mm, mem->entries, false);
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
mm_iommu_adjust_locked_vm(mm, entries, false);

unlock_exit:
mutex_unlock(&mem_list_mutex);
Expand Down Expand Up @@ -387,14 +422,20 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
u64 *va = &mem->hpas[entry];
u64 *va;

if (entry >= mem->entries)
return -EFAULT;

if (pageshift > mem->pageshift)
return -EFAULT;

if (!mem->hpas) {
*hpa = mem->dev_hpa + (ua - mem->ua);
return 0;
}

va = &mem->hpas[entry];
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);

return 0;
Expand All @@ -405,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
void *va = &mem->hpas[entry];
unsigned long *pa;

if (entry >= mem->entries)
Expand All @@ -414,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;

pa = (void *) vmalloc_to_phys(va);
if (!mem->hpas) {
*hpa = mem->dev_hpa + (ua - mem->ua);
return 0;
}

pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
if (!pa)
return -EFAULT;

Expand All @@ -434,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
if (!mem)
return;

if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
return;

entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];

Expand All @@ -444,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}

bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
unsigned int pageshift, unsigned long *size)
{
struct mm_iommu_table_group_mem_t *mem;
unsigned long end;

list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
continue;

end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
if ((mem->dev_hpa <= hpa) && (hpa < end)) {
/*
* Since the IOMMU page size might be bigger than
* PAGE_SIZE, the amount of preregistered memory
* starting from @hpa might be smaller than 1<<pageshift
* and the caller needs to distinguish this situation.
*/
*size = min(1UL << pageshift, end - hpa);
return true;
}
}

return false;
}
EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);

long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))
Expand Down
Loading

0 comments on commit c10c21e

Please sign in to comment.