Skip to content

Commit

Permalink
KVM: Enable and expose KVM_MEM_PRIVATE
Browse files Browse the repository at this point in the history
Register/unregister private memslot to fd-based memory backing store
restrictedmem and implement the callbacks for restrictedmem_notifier:
  - invalidate_start()/invalidate_end() to zap the existing memory
    mappings in the KVM page table.
  - error() to request KVM_REQ_MEMORY_MCE and later exit to userspace
    with KVM_EXIT_SHUTDOWN.

Expose KVM_MEM_PRIVATE for memslot and KVM_MEMORY_ATTRIBUTE_PRIVATE for
KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES to userspace but either are
controlled by kvm_arch_has_private_mem() which should be rewritten by
architecture code.

Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Cc: Fuad Tabba <tabba@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
  • Loading branch information
chao-p authored and sean-jc committed Jan 23, 2023
1 parent 6a607e2 commit 5c86db7
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 4 deletions.
71 changes: 71 additions & 0 deletions include/linux/kvm_host.h
Expand Up @@ -34,6 +34,7 @@
#include <linux/instrumentation.h>
#include <linux/interval_tree.h>
#include <linux/rbtree.h>
#include <linux/restrictedmem.h>
#include <linux/xarray.h>
#include <asm/signal.h>

Expand Down Expand Up @@ -575,6 +576,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
* individually added or deleted.
*/
struct kvm_memory_slot {
struct kvm *kvm;
struct hlist_node id_node[2];
struct interval_tree_node hva_node[2];
struct rb_node gfn_node[2];
Expand All @@ -586,8 +588,21 @@ struct kvm_memory_slot {
u32 flags;
short id;
u16 as_id;

#ifdef CONFIG_KVM_PRIVATE_MEM
struct {
struct file *file;
pgoff_t index;
struct restrictedmem_notifier notifier;
} restrictedmem;
#endif
};

static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
{
return slot && (slot->flags & KVM_MEM_PRIVATE);
}

static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
Expand Down Expand Up @@ -682,6 +697,17 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
}
#endif

/*
* Arch code must define kvm_arch_has_private_mem if support for private memory
* is enabled.
*/
#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
{
return false;
}
#endif

struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
Expand Down Expand Up @@ -2289,4 +2315,49 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
/* Max number of entries allowed for each kvm dirty ring */
#define KVM_DIRTY_RING_MAX_ENTRIES 65536

#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
{
return xa_to_value(xa_load(&kvm->mem_attr_array, gfn));
}

static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
{
return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;

}
#else
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
{
return false;
}
#endif /* CONFIG_HAVE_KVM_MEMORY_ATTRIBUTES */

#ifdef CONFIG_KVM_PRIVATE_MEM
static inline int kvm_restrictedmem_get_pfn(struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn,
int *order)
{
pgoff_t index = gfn - slot->base_gfn + slot->restrictedmem.index;
struct page *page;
int ret;

ret = restrictedmem_get_page(slot->restrictedmem.file, index, &page, order);
if (ret)
return ret;

*pfn = page_to_pfn(page);
return 0;
}
#else
static inline int kvm_restrictedmem_get_pfn(struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn,
int *order)
{
KVM_BUG_ON(1, slot->kvm);
return -EIO;
}
#endif /* CONFIG_KVM_PRIVATE_MEM */

#endif
6 changes: 5 additions & 1 deletion include/uapi/linux/kvm.h
Expand Up @@ -102,7 +102,10 @@ struct kvm_userspace_memory_region2 {
__u64 guest_phys_addr;
__u64 memory_size;
__u64 userspace_addr;
__u64 pad[16];
__u64 restrictedmem_offset;
__u32 restrictedmem_fd;
__u32 pad1;
__u64 pad2[14];
};

/*
Expand All @@ -112,6 +115,7 @@ struct kvm_userspace_memory_region2 {
*/
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
#define KVM_MEM_PRIVATE (1UL << 2)

/* for KVM_IRQ_LINE */
struct kvm_irq_level {
Expand Down
4 changes: 4 additions & 0 deletions virt/kvm/Kconfig
Expand Up @@ -103,3 +103,7 @@ config KVM_GENERIC_MMU_NOTIFIER
config KVM_GENERIC_MEMORY_ATTRIBUTES
select KVM_GENERIC_MMU_NOTIFIER
bool

config KVM_PRIVATE_MEM
select RESTRICTEDMEM
bool
187 changes: 184 additions & 3 deletions virt/kvm/kvm_main.c
Expand Up @@ -928,6 +928,151 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)

#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */

#ifdef CONFIG_KVM_PRIVATE_MEM
static int restrictedmem_get_gfn_range(struct kvm_memory_slot *slot,
pgoff_t start, pgoff_t end,
struct kvm_gfn_range *range)
{
start = max(start, slot->restrictedmem.index);
end = min(end, slot->restrictedmem.index + slot->npages);

if (WARN_ON_ONCE(start >= end))
return -EINVAL;

range->start = slot->base_gfn + start;
range->end = slot->base_gfn + end;
range->slot = slot;
range->pte = __pte(0);
range->may_block = true;
return 0;
}

static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *notifier,
pgoff_t start, pgoff_t end)
{
struct kvm_memory_slot *slot = container_of(notifier,
struct kvm_memory_slot,
restrictedmem.notifier);
struct kvm_gfn_range gfn_range;
struct kvm *kvm = slot->kvm;
int idx;

if (restrictedmem_get_gfn_range(slot, start, end, &gfn_range))
return;

idx = srcu_read_lock(&kvm->srcu);
KVM_MMU_LOCK(kvm);

kvm_mmu_invalidate_begin(kvm);
kvm_mmu_invalidate_range_add(kvm, gfn_range.start, gfn_range.end);
if (kvm_unmap_gfn_range(kvm, &gfn_range))
kvm_flush_remote_tlbs(kvm);

KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}

static void kvm_restrictedmem_invalidate_end(struct restrictedmem_notifier *notifier,
pgoff_t start, pgoff_t end)
{
struct kvm_memory_slot *slot = container_of(notifier,
struct kvm_memory_slot,
restrictedmem.notifier);
struct kvm_gfn_range gfn_range;
struct kvm *kvm = slot->kvm;

if (restrictedmem_get_gfn_range(slot, start, end, &gfn_range))
return;

KVM_MMU_LOCK(kvm);
kvm_mmu_invalidate_end(kvm);
KVM_MMU_UNLOCK(kvm);
}

static void kvm_restrictedmem_error(struct restrictedmem_notifier *notifier,
pgoff_t start, pgoff_t end)
{
struct kvm_memory_slot *slot = container_of(notifier,
struct kvm_memory_slot,
restrictedmem.notifier);
gfn_t gfn = 0; /* needs to be calculated. */
unsigned long hva = gfn_to_hva_memslot(slot, gfn);

send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current);
}

static struct restrictedmem_notifier_ops kvm_restrictedmem_notifier_ops = {
.invalidate_start = kvm_restrictedmem_invalidate_begin,
.invalidate_end = kvm_restrictedmem_invalidate_end,
.error = kvm_restrictedmem_error,
};

static int kvm_restrictedmem_bind(struct kvm_memory_slot *slot,
unsigned int fd, loff_t offset)
{
int r;

BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->restrictedmem.index));

if (offset < 0)
return -EINVAL;

slot->restrictedmem.file = fget(fd);
if (!slot->restrictedmem.file)
return -EINVAL;

if (!file_is_restrictedmem(slot->restrictedmem.file)) {
r = -EINVAL;
goto err;
}

slot->restrictedmem.index = offset >> PAGE_SHIFT;
slot->restrictedmem.notifier.ops = &kvm_restrictedmem_notifier_ops;

r = restrictedmem_bind(slot->restrictedmem.file,
slot->restrictedmem.index,
slot->restrictedmem.index + slot->npages,
&slot->restrictedmem.notifier, true);
if (r)
goto err;

return 0;
err:
fput(slot->restrictedmem.file);
slot->restrictedmem.file = NULL;
return r;
}

static void kvm_restrictedmem_unbind(struct kvm_memory_slot *slot)
{
if (WARN_ON_ONCE(!slot->restrictedmem.file))
return;

restrictedmem_unbind(slot->restrictedmem.file,
slot->restrictedmem.index,
slot->restrictedmem.index + slot->npages,
&slot->restrictedmem.notifier);

fput(slot->restrictedmem.file);
slot->restrictedmem.file = NULL;
}

#else /* !CONFIG_KVM_PRIVATE_MEM */

static int kvm_restrictedmem_bind(struct kvm_memory_slot *slot,
unsigned int fd, loff_t offset)
{
KVM_BUG_ON(1, slot->kvm);
return -EIO;
}

static void kvm_restrictedmem_unbind(struct kvm_memory_slot *slot)
{
KVM_BUG_ON(1, slot->kvm);
}

#endif /* CONFIG_HAVE_KVM_RESTRICTED_MEM */

#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
static int kvm_pm_notifier_call(struct notifier_block *bl,
unsigned long state,
Expand Down Expand Up @@ -972,6 +1117,9 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
/* This does not remove the slot from struct kvm_memslots data structures */
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
if (slot->flags & KVM_MEM_PRIVATE)
kvm_restrictedmem_unbind(slot);

kvm_destroy_dirty_bitmap(slot);

kvm_arch_free_memslot(kvm, slot);
Expand Down Expand Up @@ -1538,10 +1686,18 @@ static void kvm_replace_memslot(struct kvm *kvm,
}
}

static int check_memory_region_flags(const struct kvm_userspace_memory_region2 *mem)
static int check_memory_region_flags(struct kvm *kvm,
const struct kvm_userspace_memory_region2 *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;

if (kvm_arch_has_private_mem(kvm))
valid_flags |= KVM_MEM_PRIVATE;

/* Dirty logging private memory is not currently supported. */
if (mem->flags & KVM_MEM_PRIVATE)
valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;

#ifdef __KVM_HAVE_READONLY_MEM
valid_flags |= KVM_MEM_READONLY;
#endif
Expand Down Expand Up @@ -1950,7 +2106,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
int as_id, id;
int r;

r = check_memory_region_flags(mem);
r = check_memory_region_flags(kvm, mem);
if (r)
return r;

Expand All @@ -1969,6 +2125,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size))
return -EINVAL;
if (mem->flags & KVM_MEM_PRIVATE &&
(mem->restrictedmem_offset & (PAGE_SIZE - 1) ||
mem->restrictedmem_offset + mem->memory_size < mem->restrictedmem_offset ||
0 /* TODO: require gfn be aligned with restricted offset */))
return -EINVAL;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
Expand Down Expand Up @@ -2007,6 +2168,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
return -EINVAL;
} else { /* Modify an existing slot. */
/* Private memslots are immutable, they can only be deleted. */
if (mem->flags & KVM_MEM_PRIVATE)
return -EINVAL;
if ((mem->userspace_addr != old->userspace_addr) ||
(npages != old->npages) ||
((mem->flags ^ old->flags) & KVM_MEM_READONLY))
Expand All @@ -2029,16 +2193,31 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (!new)
return -ENOMEM;

new->kvm = kvm;
new->as_id = as_id;
new->id = id;
new->base_gfn = base_gfn;
new->npages = npages;
new->flags = mem->flags;
new->userspace_addr = mem->userspace_addr;
if (mem->flags & KVM_MEM_PRIVATE) {
r = kvm_restrictedmem_bind(new, mem->restrictedmem_fd,
mem->restrictedmem_offset);
if (r)
goto out;
}

r = kvm_set_memslot(kvm, old, new, change);
if (r)
kfree(new);
goto out_restricted;

return 0;

out_restricted:
if (mem->flags & KVM_MEM_PRIVATE)
kvm_restrictedmem_unbind(new);
out:
kfree(new);
return r;
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
Expand Down Expand Up @@ -2338,6 +2517,8 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
{
if (kvm_arch_has_private_mem(kvm))
return KVM_MEMORY_ATTRIBUTE_PRIVATE;
return 0;
}

Expand Down

0 comments on commit 5c86db7

Please sign in to comment.