Skip to content

Commit

Permalink
KVM: x86/mmu: Use an rwlock for the x86 MMU
Browse files Browse the repository at this point in the history
Add a read / write lock to be used in place of the MMU spinlock on x86.
The rwlock will enable the TDP MMU to handle page faults, and other
operations in parallel in future commits.

Reviewed-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
  • Loading branch information
Ben Gardon authored and intel-lab-lkp committed Feb 2, 2021
1 parent d47b360 commit 74f5a39
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 81 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Expand Up @@ -337,6 +337,8 @@ struct kvm_mmu_root_info {

#define KVM_MMU_NUM_PREV_ROOTS 3

#define KVM_HAVE_MMU_RWLOCK

struct kvm_mmu_page;

/*
Expand Down
90 changes: 45 additions & 45 deletions arch/x86/kvm/mmu/mmu.c
Expand Up @@ -2016,9 +2016,9 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
cond_resched_lock(&vcpu->kvm->mmu_lock);
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
flush = false;
}
}
Expand Down Expand Up @@ -2470,7 +2470,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);

if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
Expand All @@ -2481,7 +2481,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)

kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;

spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}

int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
Expand All @@ -2492,15 +2492,15 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)

pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);

return r;
}
Expand Down Expand Up @@ -3192,7 +3192,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return;
}

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);

for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
Expand All @@ -3215,7 +3215,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
}

kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);

Expand All @@ -3236,16 +3236,16 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
{
struct kvm_mmu_page *sp;

spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);

if (make_mmu_pages_available(vcpu)) {
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
return INVALID_PAGE;
}
sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
++sp->root_count;

spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
return __pa(sp->spt);
}

Expand Down Expand Up @@ -3416,17 +3416,17 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
!smp_load_acquire(&sp->unsync_children))
return;

spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);

mmu_sync_children(vcpu, sp);

kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
return;
}

spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);

for (i = 0; i < 4; ++i) {
Expand All @@ -3440,7 +3440,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
}

kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);

Expand Down Expand Up @@ -3724,7 +3724,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
return r;

r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
Expand All @@ -3739,7 +3739,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
prefault, is_tdp);

out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return r;
}
Expand Down Expand Up @@ -4999,7 +4999,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
*/
mmu_topup_memory_caches(vcpu, true);

spin_lock(&vcpu->kvm->mmu_lock);
write_lock(&vcpu->kvm->mmu_lock);

gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);

Expand Down Expand Up @@ -5035,7 +5035,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
write_unlock(&vcpu->kvm->mmu_lock);
}

int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
Expand Down Expand Up @@ -5233,14 +5233,14 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (iterator.rmap)
flush |= fn(kvm, iterator.rmap);

if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
if (flush && lock_flush_tlb) {
kvm_flush_remote_tlbs_with_address(kvm,
start_gfn,
iterator.gfn - start_gfn + 1);
flush = false;
}
cond_resched_lock(&kvm->mmu_lock);
cond_resched_rwlock_write(&kvm->mmu_lock);
}
}

Expand Down Expand Up @@ -5390,7 +5390,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
* be in active use by the guest.
*/
if (batch >= BATCH_ZAP_PAGES &&
cond_resched_lock(&kvm->mmu_lock)) {
cond_resched_rwlock_write(&kvm->mmu_lock)) {
batch = 0;
goto restart;
}
Expand Down Expand Up @@ -5423,7 +5423,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
{
lockdep_assert_held(&kvm->slots_lock);

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
trace_kvm_mmu_zap_all_fast(kvm);

/*
Expand All @@ -5450,7 +5450,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
if (kvm->arch.tdp_mmu_enabled)
kvm_tdp_mmu_zap_all(kvm);

spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}

static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
Expand Down Expand Up @@ -5492,7 +5492,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
int i;
bool flush;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots) {
Expand All @@ -5516,7 +5516,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
kvm_flush_remote_tlbs(kvm);
}

spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}

static bool slot_rmap_write_protect(struct kvm *kvm,
Expand All @@ -5531,12 +5531,12 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
{
bool flush;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
if (kvm->arch.tdp_mmu_enabled)
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);

/*
* We can flush all the TLBs out of the mmu lock without TLB
Expand Down Expand Up @@ -5596,13 +5596,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/* FIXME: const-ify all uses of struct kvm_memory_slot. */
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
kvm_mmu_zap_collapsible_spte, true);

if (kvm->arch.tdp_mmu_enabled)
kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}

void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
Expand All @@ -5625,11 +5625,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
{
bool flush;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
if (kvm->arch.tdp_mmu_enabled)
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);

/*
* It's also safe to flush TLBs out of mmu lock here as currently this
Expand All @@ -5647,12 +5647,12 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
{
bool flush;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
false);
if (kvm->arch.tdp_mmu_enabled)
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);

if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
Expand All @@ -5664,11 +5664,11 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
{
bool flush;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
if (kvm->arch.tdp_mmu_enabled)
flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);

if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
Expand All @@ -5681,14 +5681,14 @@ void kvm_mmu_zap_all(struct kvm *kvm)
LIST_HEAD(invalid_list);
int ign;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
if (WARN_ON(sp->role.invalid))
continue;
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
goto restart;
if (cond_resched_lock(&kvm->mmu_lock))
if (cond_resched_rwlock_write(&kvm->mmu_lock))
goto restart;
}

Expand All @@ -5697,7 +5697,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
if (kvm->arch.tdp_mmu_enabled)
kvm_tdp_mmu_zap_all(kvm);

spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}

void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
Expand Down Expand Up @@ -5757,7 +5757,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
continue;

idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);

if (kvm_has_zapped_obsolete_pages(kvm)) {
kvm_mmu_commit_zap_page(kvm,
Expand All @@ -5768,7 +5768,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);

unlock:
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);

/*
Expand Down Expand Up @@ -5988,7 +5988,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
ulong to_zap;

rcu_idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);

ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
Expand All @@ -6013,14 +6013,14 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
WARN_ON_ONCE(sp->lpage_disallowed);
}

if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
kvm_mmu_commit_zap_page(kvm, &invalid_list);
cond_resched_lock(&kvm->mmu_lock);
cond_resched_rwlock_write(&kvm->mmu_lock);
}
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);

spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
}

Expand Down
8 changes: 4 additions & 4 deletions arch/x86/kvm/mmu/page_track.c
Expand Up @@ -184,9 +184,9 @@ kvm_page_track_register_notifier(struct kvm *kvm,

head = &kvm->arch.track_notifier_head;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);

Expand All @@ -202,9 +202,9 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,

head = &kvm->arch.track_notifier_head;

spin_lock(&kvm->mmu_lock);
write_lock(&kvm->mmu_lock);
hlist_del_rcu(&n->node);
spin_unlock(&kvm->mmu_lock);
write_unlock(&kvm->mmu_lock);
synchronize_srcu(&head->track_srcu);
}
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
Expand Down

0 comments on commit 74f5a39

Please sign in to comment.