Skip to content
Permalink
Browse files
KVM: x86: add kvm per-vCPU exits disable capability
Introduce new bit KVM_X86_DISABLE_EXITS_PER_VCPU and second arg of
KVM_CAP_X86_DISABLE_EXITS cap as vCPU mask for disabling exits to
enable finer-grained VM exits disabling on per vCPU scales instead
of whole guest. This exits_disable_vcpu_mask default is 0, i.e.
disable exits on all vCPUs, if it is 0x5, i.e. enable exits on vCPU0
and vCPU2, disable exits on all other vCPUs. This patch only enabled
this per-vCPU disable on HLT VM-exits.

In use cases like Windows guest running heavy CPU-bound
workloads, disabling HLT VM-exits could mitigate host sched ctx switch
overhead. Simply HLT disabling on all vCPUs could bring
performance benefits, but if no pCPUs reserved for host threads, could
happened to the forced preemption as host does not know the time to do
the schedule for other host threads want to run. With this patch, we
could only disable part of vCPUs HLT exits for one guest, this still
keeps performance benefits, and also shows resiliency to host stressing
workload running at the same time.

In the host stressing workload experiment with Windows guest heavy
CPU-bound workloads, it shows good resiliency and having the ~3%
performance improvement.

Signed-off-by: Kechen Lu <kechenl@nvidia.com>
  • Loading branch information
Kechen Lu authored and intel-lab-lkp committed Dec 14, 2021
1 parent 1c10f4b commit 08f20df47a09e327aee3e73a8d7c77b1ffc01bc0
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 9 deletions.
@@ -6580,6 +6580,9 @@ branch to guests' 0x200 interrupt vector.

:Architectures: x86
:Parameters: args[0] defines which exits are disabled
args[1] defines vCPU bitmask based on vCPU ID, 1 on
corresponding vCPU ID bit would enable exists
on that vCPU
:Returns: 0 on success, -EINVAL when args[0] contains invalid exits

Valid bits in args[0] are::
@@ -6588,13 +6591,16 @@ Valid bits in args[0] are::
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
#define KVM_X86_DISABLE_EXITS_PER_VCPU (1UL << 63)

Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
workloads, and is suggested when vCPUs are associated to dedicated
physical CPUs. More bits can be added in the future; userspace can
just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
all such vmexits.
all such vmexits. Set KVM_X86_DISABLE_EXITS_PER_VCPU enables per-vCPU
exits disabling based on the vCPUs bitmask for args[1], currently only
set for HLT exits.

Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.

@@ -1090,6 +1090,7 @@ struct kvm_arch {
bool hlt_in_guest;
bool pause_in_guest;
bool cstate_in_guest;
u64 exits_disable_vcpu_mask;

unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -177,7 +177,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

best = kvm_find_kvm_cpuid_features(vcpu);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
if (kvm_hlt_in_guest(vcpu) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

@@ -1052,7 +1052,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_MWAIT);
}

if (!kvm_hlt_in_guest(vcpu->kvm))
if (!kvm_hlt_in_guest(vcpu))
svm_set_intercept(svm, INTERCEPT_HLT);

control->iopm_base_pa = __sme_set(iopm_base);
@@ -1594,7 +1594,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
* then the instruction is already executing and RIP has already been
* advanced.
*/
if (kvm_hlt_in_guest(vcpu->kvm) &&
if (kvm_hlt_in_guest(vcpu) &&
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
@@ -4166,7 +4166,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
if (kvm_mwait_in_guest(vmx->vcpu.kvm))
exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING);
if (kvm_hlt_in_guest(vmx->vcpu.kvm))
if (kvm_hlt_in_guest(&vmx->vcpu))
exec_control &= ~CPU_BASED_HLT_EXITING;
return exec_control;
}
@@ -5784,6 +5784,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.pause_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
kvm->arch.cstate_in_guest = true;
if ((cap->args[0] & KVM_X86_DISABLE_EXITS_PER_VCPU) &&
cap->args[1])
kvm->arch.exits_disable_vcpu_mask = cap->args[1];
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@@ -12147,7 +12150,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
vcpu->arch.exception.pending))
return false;

if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
if (kvm_hlt_in_guest(vcpu) && !kvm_can_deliver_async_pf(vcpu))
return false;

/*
@@ -377,9 +377,10 @@ static inline bool kvm_mwait_in_guest(struct kvm *kvm)
return kvm->arch.mwait_in_guest;
}

static inline bool kvm_hlt_in_guest(struct kvm *kvm)
static inline bool kvm_hlt_in_guest(struct kvm_vcpu *vcpu)
{
return kvm->arch.hlt_in_guest;
return vcpu->kvm->arch.hlt_in_guest && (rol64(1UL, vcpu->vcpu_id) &
~vcpu->kvm->arch.exits_disable_vcpu_mask);
}

static inline bool kvm_pause_in_guest(struct kvm *kvm)
@@ -798,10 +798,12 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
#define KVM_X86_DISABLE_EXITS_PER_VCPU (1UL << 63)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HLT | \
KVM_X86_DISABLE_EXITS_PAUSE | \
KVM_X86_DISABLE_EXITS_CSTATE)
KVM_X86_DISABLE_EXITS_CSTATE| \
KVM_X86_DISABLE_EXITS_PER_VCPU)

/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {

0 comments on commit 08f20df

Please sign in to comment.