Skip to content

Commit

Permalink
i#5365 AArch64 SVE core, part 2: add signals support (#6725)
Browse files Browse the repository at this point in the history
This patch adds SVE support for signals in the core. It is the follow on
patch from the SVE core work part 1, in PR #5835 (f646a63) and
includes vector address computation for SVE scatter/gather, enabling
first-fault load handling.

Issue: #5365, #5036

Co-authored-by: Jack Gallagher <jack.gallagher@arm.com>
  • Loading branch information
AssadHashmi and jackgallagher-arm committed Apr 3, 2024
1 parent 0838ea7 commit 34b7435
Show file tree
Hide file tree
Showing 22 changed files with 1,213 additions and 118 deletions.
3 changes: 3 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,15 @@ Further non-compatibility-affecting changes include:
#dynamorio::drmemtrace::analysis_tool_t to allow the tool to make holistic
adjustments to the interval snapshots after all have been generated, and before
they are used for merging across shards (potentially), and printing the results.
- Added opnd_is_vector_base_disp() to test if an opnd_t is a base+disp memory operand
that uses a vector register for the base or index register.
- Added -abort_on_invariant_error flag that instructs the invariant checker drmemtrace
analysis tool to abort trace analysis when a trace invariant error is found. This
is set to true by default to match the existing behavior of the invariant checker.
- Added a new instr API instr_is_xrstor() that tells whether an instruction is any
variant of the x86 xrstor opcode.


**************************************************
<hr>

Expand Down
3 changes: 3 additions & 0 deletions core/arch/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ mixed_mode_enabled(void)
# define SCRATCH_REG4_OFFS R4_OFFSET
# define SCRATCH_REG5_OFFS R5_OFFSET
# define REG_OFFSET(reg) (R0_OFFSET + ((reg)-DR_REG_R0) * sizeof(reg_t))
# define Z_REG_OFFSET(reg) \
((MC_OFFS) + \
(offsetof(priv_mcontext_t, simd) + ((reg)-DR_REG_Z0) * sizeof(dr_simd_t)))
# define CALL_SCRATCH_REG DR_REG_R11
# define MC_IBL_REG r2
# define MC_RETVAL_REG r0
Expand Down
14 changes: 9 additions & 5 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@ get_elements_in_sve_vector(aarch64_reg_offset element_size)
{
const uint element_length =
opnd_size_in_bits(get_opnd_size_from_offset(element_size));
return opnd_size_in_bits(OPSZ_SVE_VL_BYTES) / element_length;
return opnd_size_in_bits(OPSZ_SVE_VECLEN_BYTES) / element_length;
}

/*******************************************************************************
Expand Down Expand Up @@ -5195,7 +5195,8 @@ decode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd
const int offset = extract_int(enc, 16, 6);
IF_RETURN_FALSE(offset < -32 || offset > 31)
const reg_id_t rn = decode_reg(extract_uint(enc, 5, 5), true, true);
const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL_BYTES;
const opnd_size_t mem_transfer =
op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VECLEN_BYTES;

/* As specified in the AArch64 SVE reference manual for contiguous prefetch
* instructions, the immediate index value is a vector index into memory, NOT
Expand All @@ -5214,7 +5215,8 @@ static inline bool
encode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL_BYTES;
const opnd_size_t mem_transfer =
op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VECLEN_BYTES;
if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) != DR_REG_NULL ||
opnd_get_size(opnd) != mem_transfer)
return false;
Expand Down Expand Up @@ -5344,7 +5346,8 @@ decode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, OUT opnd_t *opnd
bool is_vector = TEST(1u << 14, enc);

/* Transfer size depends on whether we are transferring a Z or a P register. */
opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL_BYTES : OPSZ_SVE_PL_BYTES;
opnd_size_t memory_transfer_size =
is_vector ? OPSZ_SVE_VECLEN_BYTES : OPSZ_SVE_PREDLEN_BYTES;

/* As specified in the AArch64 SVE reference manual for unpredicated vector
* register load LDR and store STR instructions, the immediate index value is a
Expand Down Expand Up @@ -5374,7 +5377,8 @@ encode_opnd_svemem_gpr_simm9_vl(uint enc, int opcode, byte *pc, opnd_t opnd,
bool is_vector = TEST(1u << 14, enc);

/* Transfer size depends on whether we are transferring a Z or a P register. */
opnd_size_t memory_transfer_size = is_vector ? OPSZ_SVE_VL_BYTES : OPSZ_SVE_PL_BYTES;
opnd_size_t memory_transfer_size =
is_vector ? OPSZ_SVE_VECLEN_BYTES : OPSZ_SVE_PREDLEN_BYTES;

if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != memory_transfer_size)
return false;
Expand Down
15 changes: 0 additions & 15 deletions core/ir/aarch64/codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,6 @@ encode_common(byte *pc, instr_t *i, decode_info_t *di);
#define BITS(_enc, bitmax, bitmin) \
((((uint32)(_enc)) >> (bitmin)) & (uint32)MASK((bitmax) - (bitmin) + 1))

#if !defined(DR_HOST_NOT_TARGET) && !defined(STANDALONE_DECODER) && !defined(BUILD_TESTS)
# define OPSZ_SVE_VL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes())
# define OPSZ_SVE_PL_BYTES opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)
#else
/* SVE vector length for off-line decoder set using -vl option with drdisas,
* e.g.
* $ drdisas -vl 256 e58057a1 85865e6b
* e58057a1 str %z1 -> +0x05(%x29)[32byte]
* 85865e6b ldr +0x37(%x19)[32byte] -> %z11
* $
*/
# define OPSZ_SVE_VL_BYTES opnd_size_from_bytes(dr_get_sve_vector_length() / 8)
# define OPSZ_SVE_PL_BYTES opnd_size_from_bytes((dr_get_sve_vector_length() / 8) / 8)
#endif

#define RETURN_FALSE \
do { \
CLIENT_ASSERT(false, "Unexpected state in AArch64 codec"); \
Expand Down
125 changes: 123 additions & 2 deletions core/ir/aarch64/instr.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* **********************************************************
* Copyright (c) 2017-2023 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* Copyright (c) 2016-2024 ARM Limited. All rights reserved.
* **********************************************************/

/*
Expand Down Expand Up @@ -37,6 +37,8 @@
#include "encode_api.h"
#include "opcode_names.h"

#include <stddef.h>

/* XXX i#6690: currently only A64 is supported for instruction encoding.
* We want to add support for A64 decoding and synthetic ISA encoding as well.
* XXX i#1684: move this function to core/ir/instr_shared.c once we can support
Expand Down Expand Up @@ -447,7 +449,7 @@ reg_is_gpr(reg_id_t reg)
bool
reg_is_simd(reg_id_t reg)
{
return (DR_REG_Q0 <= reg && reg <= DR_REG_B31);
return reg_is_z(reg) || (DR_REG_Q0 <= reg && reg <= DR_REG_B31);
}

bool
Expand Down Expand Up @@ -737,3 +739,122 @@ instr_invert_predicate(dr_pred_type_t pred)
default: CLIENT_ASSERT(false, "Incorrect predicate value"); return DR_PRED_NONE;
}
}

ptr_int_t
d_r_compute_scaled_index_aarch64(opnd_t opnd, reg_t index_val)
{
bool scaled = false;
uint amount = 0;
dr_extend_type_t type = opnd_get_index_extend(opnd, &scaled, &amount);
reg_t extended = 0;
uint msb = 0;
switch (type) {
default: CLIENT_ASSERT(false, "Unsupported extend type"); return 0;
case DR_EXTEND_UXTW: extended = index_val & 0x00000000ffffffffULL; break;
case DR_EXTEND_SXTW:
extended = index_val & 0x00000000ffffffffULL;
msb = extended >> 31u;
if (msb == 1) {
extended = ((~0ull) << 32u) | extended;
}
break;
case DR_EXTEND_UXTX:
case DR_EXTEND_SXTX: extended = index_val; break;
}
if (scaled) {
return extended << amount;
} else {
return extended;
}
}

static bool
is_active_in_mask(size_t element, uint64 mask, size_t element_size_bytes)
{
const uint64 element_flag = 1ull << (element_size_bytes * element);
return TESTALL(element_flag, mask);
}

bool
instr_compute_vector_address(instr_t *instr, priv_mcontext_t *mc, size_t mc_size,
dr_mcontext_flags_t mc_flags, opnd_t curop, uint addr_index,
DR_PARAM_OUT bool *have_addr, DR_PARAM_OUT app_pc *addr,
DR_PARAM_OUT bool *write)
{
CLIENT_ASSERT(have_addr != NULL && addr != NULL && mc != NULL && write != NULL,
"SVE address computation: invalid args");
CLIENT_ASSERT(TEST(DR_MC_MULTIMEDIA, mc_flags),
"dr_mcontext_t.flags must include DR_MC_MULTIMEDIA");
CLIENT_ASSERT(mc_size >= offsetof(dr_mcontext_t, svep) + sizeof(mc->svep),
"Incompatible client, invalid dr_mcontext_t.size.");

*write = instr_is_scatter(instr);
ASSERT(*write || instr_is_gather(instr));

const size_t vl_bytes = opnd_size_in_bytes(OPSZ_SVE_VECLEN_BYTES);
/* DynamoRIO currently supports up to 512-bit vector registers so a predicate register
* value should be <= 64-bits.
* If DynamoRIO is extended in the future to support large vector lengths this
* function will need to be updated to cope with larger predicate mask values.
*/
ASSERT(vl_bytes / 8 < sizeof(uint64));

const reg_t governing_pred = opnd_get_reg(instr_get_src(instr, 1));
ASSERT(governing_pred >= DR_REG_START_P && governing_pred <= DR_REG_STOP_P);
uint64 mask = mc->svep[governing_pred - DR_REG_START_P].d;

if (mask == 0) {
return false;
}

const size_t element_size_bytes =
opnd_size_in_bytes(opnd_get_vector_element_size(curop));
const size_t num_elements = vl_bytes / element_size_bytes;

size_t active_elements_found = 0;
for (size_t element = 0; element < num_elements; element++) {
if (is_active_in_mask(element, mask, element_size_bytes)) {
active_elements_found++;
if (active_elements_found - 1 == addr_index) {
const reg_t base_reg = opnd_get_base(curop);
if (reg_is_z(base_reg)) {
/* Vector base: extract the current element. */
size_t base_reg_num = base_reg - DR_REG_START_Z;
if (element_size_bytes == 4) {
*addr = (app_pc)(reg_t)mc->simd[base_reg_num].u32[element];
} else {
ASSERT(element_size_bytes == 8);
*addr = (app_pc)mc->simd[base_reg_num].u64[element];
}
} else {
/* Scalar base. */
*addr = (app_pc)reg_get_value_priv(base_reg, mc);
}

const reg_t index_reg = opnd_get_index(curop);
reg_t unscaled_index_val = 0;
if (reg_is_z(index_reg)) {
/* Vector index: extract the current element. */
size_t index_reg_num = index_reg - DR_REG_START_Z;
if (element_size_bytes == 4) {
unscaled_index_val = mc->simd[index_reg_num].u32[element];
} else {
ASSERT(element_size_bytes == 8);
unscaled_index_val = mc->simd[index_reg_num].u64[element];
}
} else {
/* Scalar index or no index. */
unscaled_index_val = reg_get_value_priv(index_reg, mc);
}

*have_addr = true;
*addr += d_r_compute_scaled_index_aarch64(curop, unscaled_index_val);
*addr += opnd_get_disp(curop);

return addr_index < num_elements;
}
}
}

return false;
}
2 changes: 2 additions & 0 deletions core/ir/aarchxx/opnd.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ opnd_get_reg_dcontext_offs(reg_id_t reg)
return R0_OFFSET + (R1_OFFSET - R0_OFFSET) * (reg - DR_REG_W0);
if (reg == DR_REG_XSP || reg == DR_REG_WSP)
return XSP_OFFSET;
if (DR_REG_Z0 <= reg && reg <= DR_REG_Z31)
return Z_REG_OFFSET(reg);
CLIENT_ASSERT(false, "opnd_get_reg_dcontext_offs: invalid reg");
return -1;
#else
Expand Down
14 changes: 12 additions & 2 deletions core/ir/arm/instr.c
Original file line number Diff line number Diff line change
Expand Up @@ -909,14 +909,24 @@ DR_API
bool
instr_is_scatter(instr_t *instr)
{
/* XXX i#3837: no scatter-store on ARM? */
/* No scatter-store on AArch32. */
return false;
}

DR_API
bool
instr_is_gather(instr_t *instr)
{
/* XXX i#3837: no gather-load on ARM? */
/* No gather-load on AArch32. */
return false;
}

bool
instr_compute_vector_address(instr_t *instr, priv_mcontext_t *mc, size_t mc_size,
dr_mcontext_flags_t mc_flags, opnd_t curop, uint addr_index,
DR_PARAM_OUT bool *have_addr, DR_PARAM_OUT app_pc *addr,
DR_PARAM_OUT bool *write)
{
CLIENT_ASSERT(false, "There are no AArch32 instructions that use vector addressing");
return false;
}
16 changes: 12 additions & 4 deletions core/ir/instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -676,11 +676,19 @@ int
instr_length_arch(dcontext_t *dcontext, instr_t *instr);
bool
opc_is_not_a_real_memory_load(int opc);

/* Compute the index-th address for a memory operand that uses a vector register for the
* base or index register.
* The return value has the same semantics as instr_compute_address_ex(). It returns:
* true if index is in bounds and an address was calculated and returned,
* false if index >= the number of addresses this instruction accesses.
*/
bool
instr_compute_address_VSIB(instr_t *instr, priv_mcontext_t *mc, size_t mc_size,
dr_mcontext_flags_t mc_flags, opnd_t curop, uint index,
DR_PARAM_OUT bool *have_addr, DR_PARAM_OUT app_pc *addr,
DR_PARAM_OUT bool *write);
instr_compute_vector_address(instr_t *instr, priv_mcontext_t *mc, size_t mc_size,
dr_mcontext_flags_t mc_flags, opnd_t curop, uint index,
DR_PARAM_OUT bool *have_addr, DR_PARAM_OUT app_pc *addr,
DR_PARAM_OUT bool *write);

uint
instr_branch_type(instr_t *cti_instr);
#ifdef AARCH64
Expand Down
3 changes: 2 additions & 1 deletion core/ir/instr_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,8 @@ DR_API
* write is returned in \p is_write. Either or both OUT variables can
* be NULL.
* \p mc->flags must include DR_MC_CONTROL and DR_MC_INTEGER.
* For instructions that use vector addressing (VSIB, introduced in AVX2),
* For instructions that use vector addressing (x86 VSIB, introduced in AVX2, or
* AArch64 scatter/gather instructions introduced in SVE/SVE2),
* mc->flags must additionally include DR_MC_MULTIMEDIA.
*
* Like instr_reads_memory(), this routine does not consider
Expand Down
26 changes: 9 additions & 17 deletions core/ir/instr_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -2655,20 +2655,16 @@ instr_compute_address_helper(instr_t *instr, priv_mcontext_t *mc, size_t mc_size
for (i = 0; i < instr_num_dsts(instr); i++) {
curop = instr_get_dst(instr, i);
if (opnd_is_memory_reference(curop)) {
if (opnd_is_vsib(curop)) {
#ifdef X86
if (instr_compute_address_VSIB(instr, mc, mc_size, mc_flags, curop, index,
&have_addr, addr, &write)) {
CLIENT_ASSERT(
write,
"VSIB found in destination but instruction is not a scatter");
if (opnd_is_vector_base_disp(curop)) {
if (instr_compute_vector_address(instr, mc, mc_size, mc_flags, curop,
index, &have_addr, addr, &write)) {
CLIENT_ASSERT(write,
"Vector address found in destination but instruction "
"is not a scatter");
break;
} else {
return false;
}
#else
CLIENT_ASSERT(false, "VSIB should be x86-only");
#endif
}
memcount++;
if (memcount == (int)index) {
Expand All @@ -2683,16 +2679,12 @@ instr_compute_address_helper(instr_t *instr, priv_mcontext_t *mc, size_t mc_size
for (i = 0; i < instr_num_srcs(instr); i++) {
curop = instr_get_src(instr, i);
if (opnd_is_memory_reference(curop)) {
if (opnd_is_vsib(curop)) {
#ifdef X86
if (instr_compute_address_VSIB(instr, mc, mc_size, mc_flags, curop,
index, &have_addr, addr, &write))
if (opnd_is_vector_base_disp(curop)) {
if (instr_compute_vector_address(instr, mc, mc_size, mc_flags, curop,
index, &have_addr, addr, &write))
break;
else
return false;
#else
CLIENT_ASSERT(false, "VSIB should be x86-only");
#endif
}
memcount++;
if (memcount == (int)index)
Expand Down
Loading

0 comments on commit 34b7435

Please sign in to comment.