Skip to content

Commit

Permalink
i#5365: Add AArch64 SVE support to the core (part 1) (#5835)
Browse files Browse the repository at this point in the history
This patch adds Arm AArch64 Scalable Vector Extension (SVE) support to
the core including related changes to the codec, IR and relevant
clients.

SVE and SVE2 are major extensions to Arm's 64 bit architecture.
Developers and users should reference the relevant documentation at
developer.arm.com, (currently
https://developer.arm.com/Architectures/Scalable%20Vector%20Extensions).

The architecture allows hardware implementations to support vector
lengths from 128 to 2048 bits. This patch supports up to 512 bits due
to DynamoRIO's stack size limitation. There is currently no stock SVE
hardware with vector lengths greater than 512 bits. The vector length
is determined by get_processor_specific_info() at runtime on startup
and is available by calling proc_get_vector_length(). For Z registers,
reg_get_size() will return the vector size implemented by the hardware
rather than OPSZ_SCALABLE.

There will be follow up patches for:
- SVE scatter/gather emulation
- Full SVE signal context support
- Complete SVE support in sample clients and drcachesim tracer.

Issues: #5365, #3044

---------

Co-authored-by: Cam Mannett <camden.mannett@arm.com>
  • Loading branch information
AssadHashmi and cmannett85-arm committed Aug 14, 2023
1 parent 2b55f8b commit f646a63
Show file tree
Hide file tree
Showing 47 changed files with 1,202 additions and 445 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/ci-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -194,7 +194,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -282,7 +282,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -370,7 +370,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -450,7 +450,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -535,7 +535,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER="9.93.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export VERSION_NUMBER="9.94.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export PREFIX="cronbuild-"
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn")

# N.B.: When updating this, update all the default versions in ci-package.yml
# and ci-docs.yml. We should find a way to share (xref i#1565).
set(VERSION_NUMBER_DEFAULT "9.93.${VERSION_NUMBER_PATCHLEVEL}")
set(VERSION_NUMBER_DEFAULT "9.94.${VERSION_NUMBER_PATCHLEVEL}")
# do not store the default VERSION_NUMBER in the cache to prevent a stale one
# from preventing future version updates in a pre-existing build dir
set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default")
Expand Down Expand Up @@ -1381,7 +1381,7 @@ math(EXPR VERSION_NUMBER_INTEGER
# 5.0 broke backcompat in drsyms and xmm opnd sizes
# 4.1 broke backcompat in drsyms + 64-bit core (opcodes + reachability)
# 4.0 broke backcompat in drmgr, drsyms, drinjectlib, and dr_get_milliseconds()
set(OLDEST_COMPATIBLE_VERSION_DEFAULT "990")
set(OLDEST_COMPATIBLE_VERSION_DEFAULT "994")
set(OLDEST_COMPATIBLE_VERSION "" CACHE STRING
"Oldest compatible version: leave empty for default")
if ("${OLDEST_COMPATIBLE_VERSION}" STREQUAL "")
Expand Down
8 changes: 8 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ changes:
their precise counterparts int64_t and uint64_t.
- The #dynamorio::drmemtrace::memref_t structure has a new field appended for
holding the actual target of each indirect branch.
- Increased the size of dr_simd_t to accommodate AArch64's Scalable Vector
Extension (SVE) as well as adding two new dr_simd_t instances to
#dr_mcontext_t: SVE predicate registers svep[] and the SVE first-fault
register, ffr. This is a significant binary compatibility change and will
require re-building clients built before SVE was added.

Further non-compatibility-affecting changes include:
- Added new drmemtrace option -L0_filter_until_instrs which enables filtering
Expand Down Expand Up @@ -279,6 +284,9 @@ Further non-compatibility-affecting changes include:
- Added a new drmemtrace analysis tool: syscall_mix, to count frequency of system
calls in a trace. This tool works in both the online and offline modes of
drmemtrace.
- Added proc_get_vector_length_bytes() for AArch64. This returns the current
vector length on all ARMv8 hardware including hardware which supports the
Scalable Vector Extension (SVE).

**************************************************
<hr>
Expand Down
46 changes: 42 additions & 4 deletions api/samples/memtrace_simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ static int tls_idx;

#define MINSERT instrlist_meta_preinsert

#ifdef AARCH64
static bool reported_sg_warning = false;
#endif

static void
memtrace(void *drcontext)
{
Expand Down Expand Up @@ -314,13 +318,47 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher
DR_ASSERT(instr_is_app(instr_operands));

for (i = 0; i < instr_num_srcs(instr_operands); i++) {
if (opnd_is_memory_reference(instr_get_src(instr_operands, i)))
instrument_mem(drcontext, bb, where, instr_get_src(instr_operands, i), false);
const opnd_t src = instr_get_src(instr_operands, i);
if (opnd_is_memory_reference(src)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(src) &&
(reg_is_z(opnd_get_base(src)) || reg_is_z(opnd_get_index(src)))) {
if (!reported_sg_warning) {
dr_fprintf(STDERR,
"WARNING: Scatter/gather is not supported, results will "
"be inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif
instrument_mem(drcontext, bb, where, src, false);
}
}

for (i = 0; i < instr_num_dsts(instr_operands); i++) {
if (opnd_is_memory_reference(instr_get_dst(instr_operands, i)))
instrument_mem(drcontext, bb, where, instr_get_dst(instr_operands, i), true);
const opnd_t dst = instr_get_dst(instr_operands, i);
if (opnd_is_memory_reference(dst)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
(reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) {
if (!reported_sg_warning) {
dr_fprintf(STDERR,
"WARNING: Scatter/gather is not supported, results will "
"be inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif
instrument_mem(drcontext, bb, where, dst, true);
}
}

/* insert code to call clean_call for processing the buffer */
Expand Down
50 changes: 43 additions & 7 deletions api/samples/memval_simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ static int tls_idx;
static drx_buf_t *write_buffer;
static drx_buf_t *trace_buffer;

#ifdef AARCH64
static bool reported_sg_warning = false;
#endif

/* Requires that hex_buf be at least as long as 2*memref->size + 1. */
static char *
write_hexdump(char *hex_buf, byte *write_base, mem_ref_t *mem_ref)
Expand Down Expand Up @@ -322,14 +326,31 @@ handle_post_write(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t
* this.
*/
for (i = 0; i < instr_num_dsts(prev_instr); ++i) {
if (opnd_is_memory_reference(instr_get_dst(prev_instr, i))) {
const opnd_t dst = instr_get_dst(prev_instr, i);
if (opnd_is_memory_reference(dst)) {
if (seen_memref) {
DR_ASSERT_MSG(false, "Found inst with multiple memory destinations");
break;
}

#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
(reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) {
if (!reported_sg_warning) {
dr_fprintf(STDERR,
"WARNING: Scatter/gather is not supported, results "
"will be inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif

seen_memref = true;
instrument_post_write(drcontext, ilist, where, instr_get_dst(prev_instr, i),
prev_instr, reg_addr);
instrument_post_write(drcontext, ilist, where, dst, prev_instr, reg_addr);
}
}
}
Expand Down Expand Up @@ -377,14 +398,29 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher
* we assume no instruction has multiple distinct memory destination operands.
*/
for (i = 0; i < instr_num_dsts(instr_operands); ++i) {
if (opnd_is_memory_reference(instr_get_dst(instr_operands, i))) {
const opnd_t dst = instr_get_dst(instr_operands, i);
if (opnd_is_memory_reference(dst)) {
if (seen_memref) {
DR_ASSERT_MSG(false, "Found inst with multiple memory destinations");
break;
}
data->reg_addr = instrument_pre_write(drcontext, bb, where,
data->last_opcode, instr_operands,
instr_get_dst(instr_operands, i));
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
(reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) {
if (!reported_sg_warning) {
dr_fprintf(STDERR,
"WARNING: Scatter/gather is not supported, results "
"will be inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif
data->reg_addr = instrument_pre_write(
drcontext, bb, where, data->last_opcode, instr_operands, dst);
seen_memref = true;
}
}
Expand Down
2 changes: 2 additions & 0 deletions clients/drcachesim/tests/burst_gencode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ class code_generator_t {
#ifdef X86
replace = INSTR_CREATE_lahf(dc);
#elif defined(AARCH64)
// OP_psb requires SPE feature.
proc_set_feature(FEATURE_SPE, true);
replace = INSTR_CREATE_psb_csync(dc);
#elif defined(ARM)
replace = INSTR_CREATE_yield(dc);
Expand Down
52 changes: 44 additions & 8 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ static void *trace_thread_cb_user_data;
static bool thread_filtering_enabled;
bool attached_midway;

#ifdef AARCH64
static bool reported_sg_warning = false;
#endif

static bool
bbdup_instr_counting_enabled()
{
Expand Down Expand Up @@ -1304,18 +1308,50 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst

/* insert code to add an entry for each memory reference opnd */
for (i = 0; i < instr_num_srcs(instr_operands); i++) {
if (opnd_is_memory_reference(instr_get_src(instr_operands, i))) {
adjust = instrument_memref(
drcontext, ud, bb, where, reg_ptr, adjust, instr_operands,
instr_get_src(instr_operands, i), i, false, pred, mode);
const opnd_t src = instr_get_src(instr_operands, i);
if (opnd_is_memory_reference(src)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(src) &&
(reg_is_z(opnd_get_base(src)) || reg_is_z(opnd_get_index(src)))) {
if (!reported_sg_warning) {
NOTIFY(
0,
"WARNING: Scatter/gather is not supported, results will be "
"inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif
adjust = instrument_memref(drcontext, ud, bb, where, reg_ptr, adjust,
instr_operands, src, i, false, pred, mode);
}
}

for (i = 0; i < instr_num_dsts(instr_operands); i++) {
if (opnd_is_memory_reference(instr_get_dst(instr_operands, i))) {
adjust = instrument_memref(
drcontext, ud, bb, where, reg_ptr, adjust, instr_operands,
instr_get_dst(instr_operands, i), i, true, pred, mode);
const opnd_t dst = instr_get_dst(instr_operands, i);
if (opnd_is_memory_reference(dst)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
(reg_is_z(opnd_get_base(dst)) || reg_is_z(opnd_get_index(dst)))) {
if (!reported_sg_warning) {
NOTIFY(
0,
"WARNING: Scatter/gather is not supported, results will be "
"inaccurate\n");
reported_sg_warning = true;
}
continue;
}
#endif
adjust = instrument_memref(drcontext, ud, bb, where, reg_ptr, adjust,
instr_operands, dst, i, true, pred, mode);
}
}
if (adjust != 0)
Expand Down
2 changes: 1 addition & 1 deletion clients/drdisas/drdisas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ main(int argc, const char *argv[])
#endif

#ifdef AARCH64
dr_set_sve_vl(op_sve_vl.get_value());
dr_set_sve_vector_length(op_sve_vl.get_value());
#endif

// XXX i#4021: arm not yet supported.
Expand Down
14 changes: 5 additions & 9 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,7 @@ START_FILE
#endif

/* sizeof(priv_mcontext_t) rounded up to a multiple of 16 */
#define PRIV_MCONTEXT_SIZE 800

/* offset of priv_mcontext_t in dr_mcontext_t */
#define PRIV_MCONTEXT_OFFSET 16

#if PRIV_MCONTEXT_OFFSET < 16 || PRIV_MCONTEXT_OFFSET % 16 != 0
# error PRIV_MCONTEXT_OFFSET
#endif
#define PRIV_MCONTEXT_SIZE 3424

/* offsetof(spill_state_t, r0) */
#define spill_state_r0_OFFSET 0
Expand All @@ -76,7 +69,7 @@ START_FILE
/* offsetof(priv_mcontext_t, simd) */
#define simd_OFFSET (16 * ARG_SZ*2 + 32)
/* offsetof(dcontext_t, dstack) */
#define dstack_OFFSET 0x368
#define dstack_OFFSET 0xda8
/* offsetof(dcontext_t, is_exiting) */
#define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ)
/* offsetof(struct tlsdesc_t, arg) */
Expand Down Expand Up @@ -252,6 +245,9 @@ save_priv_mcontext_helper:
st1 {v20.2d-v23.2d}, [x4], #64
st1 {v24.2d-v27.2d}, [x4], #64
st1 {v28.2d-v31.2d}, [x4], #64
/* TODO i#5365: Save Z/P regs as well? Will require runtime check of
* ID_AA64PFR0_EL1 for FEAT_SVE.
*/
ret

DECLARE_EXPORTED_FUNC(dr_app_start)
Expand Down
Loading

0 comments on commit f646a63

Please sign in to comment.