-
Notifications
You must be signed in to change notification settings - Fork 262
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: added pvclock (kvm_clock) support
- Loading branch information
Showing
3 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
/* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ | ||
|
||
/* | ||
* Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org> | ||
* | ||
* Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
#if !defined(__i386__) && !defined(__amd64__) | ||
#error pvclock(4) is only supported on i386 and amd64 | ||
#endif | ||
|
||
#include <sys/cdefs.h> | ||
|
||
#include <sys/param.h> | ||
#include <sys/device.h> | ||
#include <sys/kernel.h> | ||
#include <sys/module.h> | ||
#include <sys/systm.h> | ||
#include <sys/timetc.h> | ||
|
||
#include <machine/cpu.h> | ||
#include <machine/atomic.h> | ||
#include <uvm/uvm_extern.h> | ||
|
||
#include <x86/x86/pvreg.h> | ||
|
||
uint pvclock_lastcount; | ||
|
||
struct pvclock_softc { | ||
device_t sc_dev; | ||
void *sc_time; | ||
paddr_t sc_paddr; | ||
struct timecounter *sc_tc; | ||
}; | ||
|
||
static int pvclock_match(device_t, cfdata_t, void *); | ||
static void pvclock_attach(device_t, device_t, void *); | ||
static int pvclock_activate(device_t, devact_t); | ||
|
||
/* | ||
void pvclock_read_time_info(struct pvclock_softc *, | ||
struct pvclock_time_info *); | ||
*/ | ||
static inline uint32_t | ||
pvclock_read_begin(const struct pvclock_time_info *); | ||
static inline int | ||
pvclock_read_done(const struct pvclock_time_info *, uint32_t); | ||
static uint | ||
pvclock_get_timecount(struct timecounter *); | ||
|
||
struct timecounter pvclock_timecounter = { | ||
.tc_get_timecount = pvclock_get_timecount, | ||
.tc_counter_mask = ~0u, | ||
.tc_frequency = 0, | ||
.tc_name = NULL, | ||
.tc_quality = -2000, | ||
.tc_priv = NULL, | ||
}; | ||
|
||
CFATTACH_DECL_NEW(pvclock, sizeof(struct pvclock_softc), | ||
pvclock_match, | ||
pvclock_attach, | ||
NULL /* detach */, | ||
pvclock_activate); | ||
|
||
|
||
static int | ||
pvclock_match(device_t parent, cfdata_t cf, void *aux) | ||
{ | ||
u_int regs[6]; | ||
/* | ||
* pvclock is provided by different hypervisors, we currently | ||
* only support the "kvmclock". | ||
*/ | ||
x86_cpuid(0x40000000 + CPUID_OFFSET_KVM_FEATURES, regs); | ||
/* | ||
* We only implement support for the 2nd version of pvclock. | ||
* The first version is basically the same but with different | ||
* non-standard MSRs and it is deprecated. | ||
*/ | ||
if ((regs[0] & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) | ||
return (0); | ||
|
||
/* | ||
* Only the "stable" clock with a sync'ed TSC is supported. | ||
* In this case the host guarantees that the TSC is constant | ||
* and invariant, either by the underlying TSC or by passing | ||
* on a synchronized value. | ||
*/ | ||
if ((regs[0] & | ||
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) | ||
return (0); | ||
|
||
return (1); | ||
} | ||
|
||
static inline uint32_t | ||
pvclock_read_begin(const struct pvclock_time_info *ti) | ||
{ | ||
uint32_t ti_version = ti->ti_version & ~0x1; | ||
virtio_membar_sync(); | ||
return (ti_version); | ||
} | ||
|
||
static inline int | ||
pvclock_read_done(const struct pvclock_time_info *ti, | ||
uint32_t ti_version) | ||
{ | ||
virtio_membar_sync(); | ||
return (ti->ti_version == ti_version); | ||
} | ||
|
||
static uint | ||
pvclock_get_timecount(struct timecounter *tc) | ||
{ | ||
struct pvclock_softc *sc = tc->tc_priv; | ||
struct pvclock_time_info *ti; | ||
uint64_t tsc_timestamp, system_time, delta, ctr; | ||
uint32_t ti_version, mul_frac; | ||
int8_t shift; | ||
uint8_t flags; | ||
|
||
ti = sc->sc_time; | ||
do { | ||
ti_version = pvclock_read_begin(ti); | ||
system_time = ti->ti_system_time; | ||
tsc_timestamp = ti->ti_tsc_timestamp; | ||
mul_frac = ti->ti_tsc_to_system_mul; | ||
shift = ti->ti_tsc_shift; | ||
flags = ti->ti_flags; | ||
} while (!pvclock_read_done(ti, ti_version)); | ||
|
||
/* | ||
* The algorithm is described in | ||
* linux/Documentation/virtual/kvm/msr.txt | ||
*/ | ||
delta = rdtsc() - tsc_timestamp; | ||
if (shift < 0) | ||
delta >>= -shift; | ||
else | ||
delta <<= shift; | ||
ctr = ((delta * mul_frac) >> 32) + system_time; | ||
|
||
if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) | ||
return (ctr); | ||
|
||
if (ctr < pvclock_lastcount) | ||
return (pvclock_lastcount); | ||
|
||
atomic_swap_uint(&pvclock_lastcount, ctr); | ||
|
||
return (ctr); | ||
} | ||
|
||
static void | ||
pvclock_attach(device_t parent, device_t self, void *aux) | ||
{ | ||
struct pvclock_softc *sc = device_private(self); | ||
struct pvclock_time_info *ti; | ||
paddr_t pa; | ||
uint32_t ti_version; | ||
uint8_t flags; | ||
|
||
if ((sc->sc_time = (void *)uvm_km_alloc(kernel_map, | ||
PAGE_SIZE, PAGE_SIZE, | ||
UVM_KMF_WIRED | UVM_KMF_ZERO)) == NULL) { | ||
aprint_error("time page allocation failed\n"); | ||
return; | ||
} | ||
|
||
if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { | ||
aprint_error("time page PA extraction failed\n"); | ||
uvm_km_free(kernel_map, (vaddr_t)sc->sc_time, | ||
PAGE_SIZE, UVM_KMF_WIRED); | ||
sc->sc_time = NULL; | ||
return; | ||
} | ||
|
||
wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); | ||
sc->sc_paddr = pa; | ||
|
||
sc->sc_dev = self; | ||
|
||
ti = sc->sc_time; | ||
do { | ||
ti_version = pvclock_read_begin(ti); | ||
flags = ti->ti_flags; | ||
} while (!pvclock_read_done(ti, ti_version)); | ||
|
||
sc->sc_tc = &pvclock_timecounter; | ||
sc->sc_tc->tc_name = device_xname(sc->sc_dev); | ||
|
||
sc->sc_tc->tc_frequency = 1000000000ULL; | ||
sc->sc_tc->tc_priv = sc; | ||
|
||
pvclock_lastcount = 0; | ||
|
||
/* Better than HPET but below TSC */ | ||
sc->sc_tc->tc_quality = 1500; | ||
|
||
if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { | ||
/* if tsc is not stable, set a lower priority */ | ||
/* Better than i8254 but below HPET */ | ||
sc->sc_tc->tc_quality = 500; | ||
} | ||
|
||
tc_init(sc->sc_tc); | ||
} | ||
|
||
int | ||
pvclock_activate(device_t self, devact_t act) | ||
{ | ||
struct pvclock_softc *sc = (struct pvclock_softc *)self; | ||
int rv = 0; | ||
paddr_t pa = sc->sc_paddr; | ||
|
||
switch (act) { | ||
case DVACT_DEACTIVATE: | ||
wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); | ||
break; | ||
} | ||
|
||
return (rv); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
/* $OpenBSD: pvreg.h,v 1.6 2019/05/13 15:40:34 pd Exp $ */ | ||
|
||
/* | ||
* Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> | ||
* Copyright (c) 2015 Stefan Fritsch <sf@sfritsch.de> | ||
* | ||
* Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | ||
* copyright notice and this permission notice appear in all copies. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
#ifndef _DEV_PV_PVBUS_H_ | ||
#define _DEV_PV_PVBUS_H_ | ||
|
||
#define CPUID_HV_SIGNATURE_START 0x40000000 | ||
#define CPUID_HV_SIGNATURE_END 0x40010000 | ||
#define CPUID_HV_SIGNATURE_STEP 0x100 | ||
#define CPUID_HV_SIGNATURE_STRLEN 12 | ||
|
||
/* | ||
* KVM | ||
*/ | ||
#define CPUID_OFFSET_KVM_FEATURES 0x1 | ||
|
||
#define KVM_FEATURE_CLOCKSOURCE 0 /* deprecated */ | ||
#define KVM_FEATURE_NOP_IO_DELAY 1 | ||
#define KVM_FEATURE_MMU_OP 2 /* deprecated */ | ||
#define KVM_FEATURE_CLOCKSOURCE2 3 | ||
#define KVM_FEATURE_ASYNC_PF 4 | ||
#define KVM_FEATURE_STEAL_TIME 5 | ||
#define KVM_FEATURE_PV_EOI 6 | ||
#define KVM_FEATURE_PV_UNHALT 7 | ||
#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
|
||
#define KVM_MSR_EOI_EN 0x4b564d04 | ||
#define KVM_PV_EOI_BIT 0 | ||
|
||
#define KVM_MSR_WALL_CLOCK 0x4b564d00 | ||
#define KVM_MSR_SYSTEM_TIME 0x4b564d01 | ||
|
||
struct pvclock_wall_clock { | ||
uint32_t wc_version; | ||
uint32_t wc_sec; | ||
uint32_t wc_nsec; | ||
} __packed; | ||
|
||
struct pvclock_time_info { | ||
uint32_t ti_version; | ||
uint32_t ti_pad0; | ||
uint64_t ti_tsc_timestamp; | ||
uint64_t ti_system_time; | ||
uint32_t ti_tsc_to_system_mul; | ||
int8_t ti_tsc_shift; | ||
uint8_t ti_flags; | ||
uint8_t ti_pad[2]; | ||
} __packed; | ||
|
||
#define PVCLOCK_FLAG_TSC_STABLE 0x01 | ||
#define PVCLOCK_SYSTEM_TIME_ENABLE 0x01 | ||
|
||
/* | ||
* Hyper-V | ||
*/ | ||
#define CPUID_OFFSET_HYPERV_INTERFACE 0x1 | ||
#define CPUID_OFFSET_HYPERV_VERSION 0x2 | ||
#define CPUID_OFFSET_HYPERV_FEATURES 0x3 | ||
#define CPUID_OFFSET_HYPERV_ENLIGHTENMENT_INFO 0x4 | ||
#define CPUID_OFFSET_HYPERV_IMPL_LIMITS 0x5 | ||
|
||
#define HYPERV_VERSION_EAX_BUILD_NUMBER 0 | ||
#define HYPERV_VERSION_EBX_MAJOR_M 0xffff0000 | ||
#define HYPERV_VERSION_EBX_MAJOR_S 16 | ||
#define HYPERV_VERSION_EBX_MINOR_M 0x0000ffff | ||
#define HYPERV_VERSION_EBX_MINOR_S 0 | ||
#define HYPERV_VERSION_ECX_SERVICE_PACK 0 | ||
#define HYPERV_VERSION_EDX_SERVICE_BRANCH_M 0xff000000 | ||
#define HYPERV_VERSION_EDX_SERVICE_BRANCH_S 24 | ||
#define HYPERV_VERSION_EDX_SERVICE_NUMBER_M 0x00ffffff | ||
#define HYPERV_VERSION_EDX_SERVICE_NUMBER_S 0 | ||
|
||
#define HYPERV_VERSION_WS2008 0x00060000 | ||
#define HYPERV_VERSION_WIN7 0x00060001 | ||
#define HYPERV_VERSION_WIN8 0x00060002 | ||
#define HYPERV_VERSION_WIN8_1 0x00060003 | ||
#define HYPERV_VERSION_WIN10 0x00100000 | ||
|
||
#define HYPERV_FEATURE_EAX_VP_RUNTIME 0 | ||
#define HYPERV_FEATURE_EAX_TIME_REF_COUNT 1 | ||
#define HYPERV_FEATURE_EAX_SYNIC 2 | ||
#define HYPERV_FEATURE_EAX_STIMER 3 | ||
#define HYPERV_FEATURE_EAX_APIC 4 | ||
#define HYPERV_FEATURE_EAX_HYPERCALL 5 | ||
#define HYPERV_FEATURE_EAX_VP_INDEX 6 | ||
#define HYPERV_FEATURE_EAX_MSR_RESET 7 | ||
#define HYPERV_FEATURE_EAX_STATS_PAGES 8 | ||
#define HYPERV_FEATURE_EAX_REF_TSC 9 | ||
#define HYPERV_FEATURE_EAX_GUEST_IDLE 10 | ||
#define HYPERV_FEATURE_EAX_TIMER_FREQ 11 | ||
#define HYPERV_FEATURE_EAX_DEBUG 12 | ||
|
||
/* | ||
* Xen | ||
*/ | ||
#define CPUID_OFFSET_XEN_VERSION 0x1 | ||
#define CPUID_OFFSET_XEN_HYPERCALL 0x2 | ||
|
||
#define XEN_VERSION_MAJOR_S 16 | ||
#define XEN_VERSION_MINOR_M 0xffff | ||
|
||
#endif /* _DEV_PV_PVBUS_H_ */ |