Skip to content
Permalink
Browse files
KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK
Handling the migration of TSCs correctly is difficult, in part because
Linux does not provide userspace with the ability to retrieve a (TSC,
realtime) clock pair for a single instant in time. In lieu of a more
convenient facility, KVM can report similar information in the kvm_clock
structure.

Provide userspace with a host TSC & realtime pair iff the realtime clock
is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid
realtime value, advance the KVM clock by the amount of elapsed time. Do
not step the KVM clock backwards, though, as it is a monotonic
oscillator.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Oliver Upton <oupton@google.com>
  • Loading branch information
oupton authored and intel-lab-lkp committed Jul 18, 2021
1 parent 85b724a commit 72b6d584e6ac692038a7d70e8782f0dfa179e5fb
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 64 deletions.
@@ -993,20 +993,34 @@ such as migration.
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
set of bits that KVM can return in struct kvm_clock_data's flag member.

The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
value is the exact kvmclock value seen by all VCPUs at the instant
when KVM_GET_CLOCK was called. If clear, the returned value is simply
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
but the exact value read by each VCPU could differ, because the host
TSC is not stable.
FLAGS:

KVM_CLOCK_TSC_STABLE. If set, the returned value is the exact kvmclock
value seen by all VCPUs at the instant when KVM_GET_CLOCK was called.
If clear, the returned value is simply CLOCK_MONOTONIC plus a constant
offset; the offset can be modified with KVM_SET_CLOCK. KVM will try
to make all VCPUs follow this clock, but the exact value read by each
VCPU could differ, because the host TSC is not stable.

KVM_CLOCK_REAL_TIME. If set, the `realtime` field in the kvm_clock_data
structure is populated with the value of the host's real time
clocksource at the instant when KVM_GET_CLOCK was called. If clear,
the `realtime` field does not contain a value.

KVM_CLOCK_HOST_TSC. If set, the `host_tsc` field in the kvm_clock_data
structure is populated with the value of the host's timestamp counter (TSC)
at the instant when KVM_GET_CLOCK was called. If clear, the `host_tsc` field
does not contain a value.

::

struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
__u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
};


@@ -1023,12 +1037,22 @@ Sets the current timestamp of kvmclock to the value specified in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.

FLAGS:

KVM_CLOCK_REAL_TIME. If set, KVM will compare the value of the `realtime` field
with the value of the host's real time clocksource at the instant when
KVM_SET_CLOCK was called. The difference in elapsed time is added to the final
kvmclock value that will be provided to guests.

::

struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
__u32 pad[9];
__u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
};


@@ -1943,4 +1943,7 @@ int kvm_cpu_dirty_log_size(void);

int alloc_all_memslots_rmaps(struct kvm *kvm);

#define KVM_CLOCK_VALID_FLAGS \
(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REAL_TIME | KVM_CLOCK_HOST_TSC)

#endif /* _ASM_X86_KVM_HOST_H */
@@ -2780,17 +2780,24 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
#endif
}

u64 get_kvmclock_ns(struct kvm *kvm)
/**
* Returns true if realtime and TSC values were written back to the caller.
* Returns false if a clock triplet cannot be obtained, such as if the host's
* realtime clock is not based on the TSC.
*/
static bool get_kvmclock_and_realtime(struct kvm *kvm, u64 *kvmclock_ns,
u64 *realtime_ns, u64 *tsc)
{
struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
u64 ret;
bool ret = false;

spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
return get_kvmclock_base_ns() + ka->kvmclock_offset;
*kvmclock_ns = get_kvmclock_base_ns() + ka->kvmclock_offset;
return false;
}

hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2801,18 +2808,36 @@ u64 get_kvmclock_ns(struct kvm *kvm)
get_cpu();

if (__this_cpu_read(cpu_tsc_khz)) {
struct timespec64 ts;
u64 tsc_val;

kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());

if (kvm_get_walltime_and_clockread(&ts, &tsc_val)) {
*realtime_ns = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
*tsc = tsc_val;
ret = true;
}

*kvmclock_ns = __pvclock_read_cycles(&hv_clock, tsc_val);
} else
ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
*kvmclock_ns = get_kvmclock_base_ns() + ka->kvmclock_offset;

put_cpu();

return ret;
}

u64 get_kvmclock_ns(struct kvm *kvm)
{
u64 kvmclock_ns, realtime_ns, tsc;

get_kvmclock_and_realtime(kvm, &kvmclock_ns, &realtime_ns, &tsc);
return kvmclock_ns;
}

static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
struct gfn_to_hva_cache *cache,
unsigned int offset)
@@ -4031,7 +4056,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_SYNC_X86_VALID_FIELDS;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
@@ -5809,6 +5834,68 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
}
#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */

static int kvm_vm_ioctl_get_clock(struct kvm *kvm,
void __user *argp)
{
struct kvm_clock_data data;

memset(&data, 0, sizeof(data));

if (get_kvmclock_and_realtime(kvm, &data.clock, &data.realtime,
&data.host_tsc))
data.flags |= KVM_CLOCK_REAL_TIME | KVM_CLOCK_HOST_TSC;

if (kvm->arch.use_master_clock)
data.flags |= KVM_CLOCK_TSC_STABLE;

if (copy_to_user(argp, &data, sizeof(data)))
return -EFAULT;

return 0;
}

static int kvm_vm_ioctl_set_clock(struct kvm *kvm,
void __user *argp)
{
struct kvm_arch *ka = &kvm->arch;
struct kvm_clock_data data;
u64 now_raw_ns;

if (copy_from_user(&data, argp, sizeof(data)))
return -EFAULT;

if (data.flags & ~KVM_CLOCK_REAL_TIME)
return -EINVAL;

/*
* TODO: userspace has to take care of races with VCPU_RUN, so
* kvm_gen_update_masterclock() can be cut down to locked
* pvclock_update_vm_gtod_copy().
*/
kvm_gen_update_masterclock(kvm);

spin_lock_irq(&ka->pvclock_gtod_sync_lock);
if (data.flags & KVM_CLOCK_REAL_TIME) {
u64 now_real_ns = ktime_get_real_ns();

/*
* Avoid stepping the kvmclock backwards.
*/
if (now_real_ns > data.realtime)
data.clock += now_real_ns - data.realtime;
}

if (ka->use_master_clock)
now_raw_ns = ka->master_kernel_ns;
else
now_raw_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = data.clock - now_raw_ns;
spin_unlock_irq(&ka->pvclock_gtod_sync_lock);

kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
return 0;
}

long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -6053,57 +6140,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
#endif
case KVM_SET_CLOCK: {
struct kvm_arch *ka = &kvm->arch;
struct kvm_clock_data user_ns;
u64 now_ns;

r = -EFAULT;
if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
goto out;

r = -EINVAL;
if (user_ns.flags)
goto out;

r = 0;
/*
* TODO: userspace has to take care of races with VCPU_RUN, so
* kvm_gen_update_masterclock() can be cut down to locked
* pvclock_update_vm_gtod_copy().
*/
kvm_gen_update_masterclock(kvm);

/*
* This pairs with kvm_guest_time_update(): when masterclock is
* in use, we use master_kernel_ns + kvmclock_offset to set
* unsigned 'system_time' so if we use get_kvmclock_ns() (which
* is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'user_ns.clock' is very small.
*/
spin_lock_irq(&ka->pvclock_gtod_sync_lock);
if (kvm->arch.use_master_clock)
now_ns = ka->master_kernel_ns;
else
now_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = user_ns.clock - now_ns;
spin_unlock_irq(&ka->pvclock_gtod_sync_lock);

kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
r = kvm_vm_ioctl_set_clock(kvm, argp);
break;
}
case KVM_GET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;

now_ns = get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
memset(&user_ns.pad, 0, sizeof(user_ns.pad));

r = -EFAULT;
if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
goto out;
r = 0;
r = kvm_vm_ioctl_get_clock(kvm, argp);
break;
}
case KVM_MEMORY_ENCRYPT_OP: {
@@ -1223,11 +1223,16 @@ struct kvm_irqfd {

/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
#define KVM_CLOCK_TSC_STABLE 2
#define KVM_CLOCK_REAL_TIME (1 << 2)
#define KVM_CLOCK_HOST_TSC (1 << 3)

struct kvm_clock_data {
__u64 clock;
__u32 flags;
__u32 pad[9];
__u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
};

/* For KVM_CAP_SW_TLB */

0 comments on commit 72b6d58

Please sign in to comment.