Skip to content

Commit

Permalink
KVM: x86/mmu: Protect TDP MMU page table memory with RCU
Browse files Browse the repository at this point in the history
In order to enable concurrent modifications to the paging structures in
the TDP MMU, threads must be able to safely remove pages of page table
memory while other threads are traversing the same memory. To ensure
threads do not access PT memory after it is freed, protect PT memory
with RCU.

Protecting concurrent accesses to page table memory from use-after-free
bugs could also have been acomplished using
walk_shadow_page_lockless_begin/end() and READING_SHADOW_PAGE_TABLES,
coupling with the barriers in a TLB flush. The use of RCU for this case
has several distinct advantages over that approach.
1. Disabling interrupts for long running operations is not desirable.
   Future commits will allow operations besides page faults to operate
   without the exclusive protection of the MMU lock and those operations
   are too long to disable iterrupts for their duration.
2. The use of RCU here avoids long blocking / spinning operations in
   perfromance critical paths. By freeing memory with an asynchronous
   RCU API we avoid the longer wait times TLB flushes experience when
   overlapping with a thread in walk_shadow_page_lockless_begin/end().
3. RCU provides a separation of concerns when removing memory from the
   paging structure. Because the RCU callback to free memory can be
   scheduled immediately after a TLB flush, there's no need for the
   thread to manually free a queue of pages later, as commit_zap_pages
   does.

Fixes: 95fb5b0 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Reviewed-by: Peter Feiner <pfeiner@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
  • Loading branch information
Ben Gardon authored and intel-lab-lkp committed Feb 2, 2021
1 parent effeaac commit d47b360
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 21 deletions.
3 changes: 3 additions & 0 deletions arch/x86/kvm/mmu/mmu_internal.h
Expand Up @@ -57,6 +57,9 @@ struct kvm_mmu_page {
atomic_t write_flooding_count;

bool tdp_mmu_page;

/* Used for freeing the page asyncronously if it is a TDP MMU page. */
struct rcu_head rcu_head;
};

extern struct kmem_cache *mmu_page_header_cache;
Expand Down
16 changes: 8 additions & 8 deletions arch/x86/kvm/mmu/tdp_iter.c
Expand Up @@ -12,7 +12,7 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
{
iter->sptep = iter->pt_path[iter->level - 1] +
SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
iter->old_spte = READ_ONCE(*iter->sptep);
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
}

static gfn_t round_gfn_for_level(gfn_t gfn, int level)
Expand All @@ -35,7 +35,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
iter->root_level = root_level;
iter->min_level = min_level;
iter->level = root_level;
iter->pt_path[iter->level - 1] = root_pt;
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;

iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
tdp_iter_refresh_sptep(iter);
Expand All @@ -48,7 +48,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
* address of the child page table referenced by the SPTE. Returns null if
* there is no such entry.
*/
u64 *spte_to_child_pt(u64 spte, int level)
tdp_ptep_t spte_to_child_pt(u64 spte, int level)
{
/*
* There's no child entry if this entry isn't present or is a
Expand All @@ -57,7 +57,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
if (!is_shadow_present_pte(spte) || is_last_spte(spte, level))
return NULL;

return __va(spte_to_pfn(spte) << PAGE_SHIFT);
return (tdp_ptep_t)__va(spte_to_pfn(spte) << PAGE_SHIFT);
}

/*
Expand All @@ -66,7 +66,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
*/
static bool try_step_down(struct tdp_iter *iter)
{
u64 *child_pt;
tdp_ptep_t child_pt;

if (iter->level == iter->min_level)
return false;
Expand All @@ -75,7 +75,7 @@ static bool try_step_down(struct tdp_iter *iter)
* Reread the SPTE before stepping down to avoid traversing into page
* tables that are no longer linked from this entry.
*/
iter->old_spte = READ_ONCE(*iter->sptep);
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));

child_pt = spte_to_child_pt(iter->old_spte, iter->level);
if (!child_pt)
Expand Down Expand Up @@ -109,7 +109,7 @@ static bool try_step_side(struct tdp_iter *iter)
iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
iter->next_last_level_gfn = iter->gfn;
iter->sptep++;
iter->old_spte = READ_ONCE(*iter->sptep);
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));

return true;
}
Expand Down Expand Up @@ -159,7 +159,7 @@ void tdp_iter_next(struct tdp_iter *iter)
iter->valid = false;
}

u64 *tdp_iter_root_pt(struct tdp_iter *iter)
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
{
return iter->pt_path[iter->root_level - 1];
}
Expand Down
10 changes: 6 additions & 4 deletions arch/x86/kvm/mmu/tdp_iter.h
Expand Up @@ -7,6 +7,8 @@

#include "mmu.h"

typedef u64 __rcu *tdp_ptep_t;

/*
* A TDP iterator performs a pre-order walk over a TDP paging structure.
*/
Expand All @@ -23,9 +25,9 @@ struct tdp_iter {
*/
gfn_t yielded_gfn;
/* Pointers to the page tables traversed to reach the current SPTE */
u64 *pt_path[PT64_ROOT_MAX_LEVEL];
tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
/* A pointer to the current SPTE */
u64 *sptep;
tdp_ptep_t sptep;
/* The lowest GFN mapped by the current SPTE */
gfn_t gfn;
/* The level of the root page given to the iterator */
Expand Down Expand Up @@ -55,11 +57,11 @@ struct tdp_iter {
#define for_each_tdp_pte(iter, root, root_level, start, end) \
for_each_tdp_pte_min_level(iter, root, root_level, PG_LEVEL_4K, start, end)

u64 *spte_to_child_pt(u64 pte, int level);
tdp_ptep_t spte_to_child_pt(u64 pte, int level);

void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
int min_level, gfn_t next_last_level_gfn);
void tdp_iter_next(struct tdp_iter *iter);
u64 *tdp_iter_root_pt(struct tdp_iter *iter);
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);

#endif /* __KVM_X86_MMU_TDP_ITER_H */

0 comments on commit d47b360

Please sign in to comment.