Skip to content

Commit

Permalink
i#5365: Add AArch64 SVE vector length support (part 1)
Browse files Browse the repository at this point in the history
This patch adds Arm's Scalable Vector Extension vector length support.
The vector length is determined at runtime on startup in
get_processor_specific_info() and available using
proc_get_vector_length().

Cleancall, machine and signal context code have been updated to handle
SVE registers as have API functions like reg_get_size() which will
return the hardware's vector size rather than OPSZ_SCALABLE.

The SVE specification allows for a maximum vector length of 2048 bits.
We currently support 512 bits maximum due to DR's stack size limitation.
There is currently no stock SVE hardware with vector lengths greater
than 512 bits.

There will be follow on patches to add:
- Predicate registers.
- Handling of First Fault Register (FFR).
- Targetted SVE tests.

Issue: #5365, #3044
  • Loading branch information
AssadHashmi committed Jan 24, 2023
1 parent 4e7437f commit 10a0907
Show file tree
Hide file tree
Showing 23 changed files with 257 additions and 123 deletions.
14 changes: 5 additions & 9 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,7 @@ START_FILE
#endif

/* sizeof(priv_mcontext_t) rounded up to a multiple of 16 */
#define PRIV_MCONTEXT_SIZE 800

/* offset of priv_mcontext_t in dr_mcontext_t */
#define PRIV_MCONTEXT_OFFSET 16

#if PRIV_MCONTEXT_OFFSET < 16 || PRIV_MCONTEXT_OFFSET % 16 != 0
# error PRIV_MCONTEXT_OFFSET
#endif
#define PRIV_MCONTEXT_SIZE 2336

/* offsetof(spill_state_t, r0) */
#define spill_state_r0_OFFSET 0
Expand All @@ -76,7 +69,7 @@ START_FILE
/* offsetof(priv_mcontext_t, simd) */
#define simd_OFFSET (16 * ARG_SZ*2 + 32)
/* offsetof(dcontext_t, dstack) */
#define dstack_OFFSET 0x368
#define dstack_OFFSET 0x968
/* offsetof(dcontext_t, is_exiting) */
#define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ)
/* offsetof(struct tlsdesc_t, arg) */
Expand Down Expand Up @@ -246,6 +239,9 @@ save_priv_mcontext_helper:
st1 {v20.2d-v23.2d}, [x4], #64
st1 {v24.2d-v27.2d}, [x4], #64
st1 {v28.2d-v31.2d}, [x4], #64
/* TODO i#3044: Save Z/P regs as well? Will require runtime check of
* ID_AA64PFR0_EL1 for FEAT_SVE.
*/
ret

DECLARE_EXPORTED_FUNC(dr_app_start)
Expand Down
4 changes: 2 additions & 2 deletions core/arch/aarch64/clean_call_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ insert_inline_reg_save(dcontext_t *dcontext, clean_call_info_t *cci, instrlist_t
insert_get_mcontext_base(dcontext, ilist, where, ci->spill_reg);

insert_save_inline_registers(dcontext, ilist, where, cci->reg_skip, DR_REG_START_GPR,
true, (void *)ci);
GPR_REG_TYPE, (void *)ci);

/* Save nzcv */
if (!cci->skip_save_flags && ci->write_flags) {
Expand Down Expand Up @@ -512,7 +512,7 @@ insert_inline_reg_restore(dcontext_t *dcontext, clean_call_info_t *cci,
}

insert_restore_inline_registers(dcontext, ilist, where, cci->reg_skip, DR_REG_X0,
true, (void *)ci);
GPR_REG_TYPE, (void *)ci);

/* Restore reg used for unprotected_context_t pointer. */
PRE(ilist, where,
Expand Down
44 changes: 28 additions & 16 deletions core/arch/aarch64/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,33 @@ get_processor_specific_info(void)
cpu_info.features.flags_aa64mmfr1 = isa_features[AA64MMFR1];
cpu_info.features.flags_aa64dfr0 = isa_features[AA64DFR0];

# if !defined(DR_HOST_NOT_TARGET) && defined(SVE)
/* TODO i#3044: Vector length will be set by reading value from h/w. */
CLIENT_ASSERT(false, "TODO i#3044: SVE requires initialisation of vector length!");
# elif !defined(STANDALONE_DECODER) || defined(DR_HOST_NOT_TARGET)
/* Set SVE vector length for unit tests. */
/* The SVE vector length is set to:
* - A value read from the host hardware.
* or:
* - 32 bytes, 256 bits.
* Which of the above depends on:
* - SVE or non-SVE AArch64 or x86 host h/w.
* and:
* - Release or development test build.
*/
# if !defined(DR_HOST_NOT_TARGET)
if (proc_has_feature(FEATURE_SVE)) {
# if !defined(BUILD_TESTS)
uint64 vl;
/* TODO i#3044: Ideally this should be generated by INSTR_CREATE_rdvl()
* and executed at startup time with other initialisation code.
*/
asm(".inst 0x04bf5020\n" /* rdvl x0, #1 */
"mov %0, x0" : "=r"(vl) : : "x0");
cpu_info.sve_vector_length_bytes = vl;
# else
cpu_info.sve_vector_length_bytes = 32;
# endif
}
else
cpu_info.sve_vector_length_bytes = 32;
# else
/* Set SVE vector length for unit testing the off-line decoder. */
dr_set_sve_vl(256);
# endif
}
Expand All @@ -109,6 +131,7 @@ proc_init_arch(void)
{
num_simd_saved = MCXT_NUM_SIMD_SLOTS;
num_simd_registers = MCXT_NUM_SIMD_SLOTS;
// TODO i#3044: Machine context slots for 16 predicate registers.
num_opmask_registers = MCXT_NUM_OPMASK_SLOTS;

/* When DR_HOST_NOT_TARGET, get_cache_line_size returns false and does
Expand Down Expand Up @@ -177,17 +200,6 @@ bool
proc_has_feature(feature_bit_t f)
{
#ifndef DR_HOST_NOT_TARGET
/* Pretend features are supported for codec tests run on h/w which does not
* support all features.
*/
# if defined(BUILD_TESTS)
if (f == FEATURE_LSE || f == FEATURE_RDM || f == FEATURE_FP16 ||
f == FEATURE_DotProd || f == FEATURE_SVE || f == FEATURE_LOR ||
f == FEATURE_FHM || f == FEATURE_SM3 || f == FEATURE_SM4 || f == FEATURE_SHA512 ||
f == FEATURE_SHA3 || f == FEATURE_RAS || f == FEATURE_SPE || f == FEATURE_PAUTH ||
f == FEATURE_LRCPC)
return true;
# endif
ushort feat_nibble, feat_val, freg_nibble, feat_nsflag;
uint64 freg_val = 0;

Expand Down
Loading

0 comments on commit 10a0907

Please sign in to comment.