Skip to content

Commit

Permalink
clocksource: arm_arch_timer: Use pvtime LPT
Browse files Browse the repository at this point in the history
Enable paravirtualized time to be used in a KVM guest if the host
supports it. This allows the guest to derive a counter which is clocked
at a persistent rate even when the guest is migrated.

If we discover that the system supports SMCCC v1.1 then we probe to
determine whether the hypervisor supports paravirtualized features and
finally whether it supports "Live Physical Time" reporting. If so a
shared structure is made available to the guest containing coefficients
to calculate the derived clock.

The guest kernel uses the coefficients to present a clock to user space
that is always clocked at the same rate whenever the guest is running
('live'), even if the physical clock changes (due to the guest being
migrated).

The existing workaround framework for CNTVCT is used to trap user space
accesses to the timer registers so we can present the derived clock.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
  • Loading branch information
Keqian Zhu authored and intel-lab-lkp committed Aug 17, 2020
1 parent 7cdd1e7 commit 47f3527
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 34 deletions.
179 changes: 168 additions & 11 deletions arch/arm64/include/asm/arch_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <asm/barrier.h>
#include <asm/hwcap.h>
#include <asm/pvclock-abi.h>
#include <asm/sysreg.h>

#include <linux/bug.h>
Expand Down Expand Up @@ -64,25 +65,178 @@ struct arch_timer_erratum_workaround {
DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
timer_unstable_counter_workaround);


extern struct pvclock_vm_lpt_time *lpt_info;
DECLARE_STATIC_KEY_FALSE(pvclock_lpt_key_enabled);

/* LPT read/write base layer */

#define lpt_read_base(target, trans, read) ({ \
__le64 _seq_begin, _seq_end; \
u64 _nval, _pval; \
\
do { \
_seq_begin = READ_ONCE(lpt_info->sequence_number); \
/* LPT structure can be treated as readonly device */ \
rmb(); \
\
_nval = read(target); \
_pval = trans(_nval); \
\
rmb(); \
_seq_end = READ_ONCE(lpt_info->sequence_number); \
} while (unlikely(_seq_begin != _seq_end)); \
\
_pval; \
})

#define lpt_write_base(val, target, trans, write) ({ \
__le64 _seq_begin, _seq_end; \
u64 _pval = val; \
u64 _nval; \
\
do { \
_seq_begin = READ_ONCE(lpt_info->sequence_number); \
/* LPT structure can be treated as readonly device */ \
rmb(); \
\
_nval = trans(_pval); \
write(_nval, target); \
\
rmb(); \
_seq_end = READ_ONCE(lpt_info->sequence_number); \
} while (unlikely(_seq_begin != _seq_end)); \
})

#define lpt_read(target, trans, read) ({ \
u64 _val; \
\
if (static_branch_unlikely(&pvclock_lpt_key_enabled)) { \
_val = lpt_read_base(target, trans, read); \
} else { \
_val = read(target); \
} \
\
_val; \
})

#define lpt_write(val, target, trans, write) ({ \
if (static_branch_unlikely(&pvclock_lpt_key_enabled)) { \
lpt_write_base(val, target, trans, write); \
} else { \
write(val, target); \
} \
})

/* LPT read/write layer for timer and count */

static inline u64 native_to_pv_cycles(u64 cnt)
{
u64 scale_mult = le64_to_cpu(lpt_info->scale_mult);
u32 fracbits = le32_to_cpu(lpt_info->fracbits);

return mul_u64_u64_shr(scale_mult, cnt, fracbits);
}

static inline u64 pv_to_native_cycles(u64 cnt)
{
u64 rscale_mult = le64_to_cpu(lpt_info->rscale_mult);
u32 rfracbits = le32_to_cpu(lpt_info->rfracbits);

return mul_u64_u64_shr(rscale_mult, cnt, rfracbits);
}

#define arch_timer_read_mediated(reg) ({ \
lpt_read(reg, native_to_pv_cycles, read_sysreg); \
})

#define arch_timer_write_mediated(val, reg) ({ \
u64 _val = val; \
lpt_write(_val, reg, pv_to_native_cycles, write_sysreg); \
})

#define mem_timer_read_mediated(addr) ({ \
lpt_read(addr, native_to_pv_cycles, readl_relaxed); \
})

#define mem_timer_write_mediated(val, addr) ({ \
u64 _val = val; \
lpt_write(_val, addr, pv_to_native_cycles, writel_relaxed); \
})

/* LPT read/write layer for cntkctl_el1 */

static inline int cntkctl_evnti_shift(void)
{
u32 native_freq = le32_to_cpu(lpt_info->native_freq);
u32 pv_freq = le32_to_cpu(lpt_info->pv_freq);
int div, shift;

if (pv_freq >= native_freq)
div = pv_freq / native_freq;
else
div = native_freq / pv_freq;

/* Find the closest power of two to the divisor */
shift = fls(div);
if ((shift == 1) || (shift > 1 && !(shift & (1 << (shift - 2)))))
shift--;

return pv_freq >= native_freq ? shift : -shift;
}

static inline u64 parse_cntkctl(u64 val, bool native_to_pv)
{
int evnti = (val >> ARCH_TIMER_EVT_TRIGGER_SHIFT) & 0xF;

if (native_to_pv)
evnti = evnti + cntkctl_evnti_shift();
else
evnti = evnti - cntkctl_evnti_shift();

evnti = min(15, max(0, evnti));
val &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
val |= evnti << ARCH_TIMER_EVT_TRIGGER_SHIFT;

return val;
}

#define TRANS_CNTKCTL_N(nval) ({ \
parse_cntkctl(nval, true); \
})

#define TRANS_CNTKCTL_P(pval) ({ \
parse_cntkctl(pval, false); \
})

#define arch_timer_read_cntkctl_mediated() ({ \
lpt_read(cntkctl_el1, TRANS_CNTKCTL_N, read_sysreg); \
})

#define arch_timer_write_cntkctl_mediated(val) ({ \
u64 _val = val; \
lpt_write(_val, cntkctl_el1, TRANS_CNTKCTL_P, write_sysreg); \
})

/* inline sysreg accessors that make erratum_handler() work */
static inline notrace u32 arch_timer_read_cntp_tval_el0(void)
{
return read_sysreg(cntp_tval_el0);
return arch_timer_read_mediated(cntp_tval_el0);
}

static inline notrace u32 arch_timer_read_cntv_tval_el0(void)
{
return read_sysreg(cntv_tval_el0);
return arch_timer_read_mediated(cntv_tval_el0);
}

static inline notrace u64 arch_timer_read_cntpct_el0(void)
{
return read_sysreg(cntpct_el0);
return arch_timer_read_mediated(cntpct_el0);
}

static inline notrace u64 arch_timer_read_cntvct_el0(void)
{
return read_sysreg(cntvct_el0);
return arch_timer_read_mediated(cntvct_el0);
}

#define arch_timer_reg_read_stable(reg) \
Expand Down Expand Up @@ -110,7 +264,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
write_sysreg(val, cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntp_tval_el0);
arch_timer_write_mediated(val, cntp_tval_el0);
break;
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
Expand All @@ -119,7 +273,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
write_sysreg(val, cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntv_tval_el0);
arch_timer_write_mediated(val, cntv_tval_el0);
break;
}
}
Expand Down Expand Up @@ -151,17 +305,20 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)

static inline u32 arch_timer_get_cntfrq(void)
{
return read_sysreg(cntfrq_el0);
if (static_branch_unlikely(&pvclock_lpt_key_enabled))
return le32_to_cpu(lpt_info->pv_freq);
else
return read_sysreg(cntfrq_el0);
}

static inline u32 arch_timer_get_cntkctl(void)
{
return read_sysreg(cntkctl_el1);
return arch_timer_read_cntkctl_mediated();
}

static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
write_sysreg(cntkctl, cntkctl_el1);
arch_timer_write_cntkctl_mediated(cntkctl);
isb();
}

Expand Down Expand Up @@ -199,7 +356,7 @@ static __always_inline u64 __arch_counter_get_cntpct(void)
u64 cnt;

isb();
cnt = read_sysreg(cntpct_el0);
cnt = arch_timer_read_mediated(cntpct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
}
Expand All @@ -219,7 +376,7 @@ static __always_inline u64 __arch_counter_get_cntvct(void)
u64 cnt;

isb();
cnt = read_sysreg(cntvct_el0);
cnt = arch_timer_read_mediated(cntvct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
}
Expand Down

0 comments on commit 47f3527

Please sign in to comment.