Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#6760 AArch64: Use smaller data types for SVE P and FFR registers #6774

Merged
merged 9 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
jackgallagher-arm marked this conversation as resolved.
Show resolved Hide resolved
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/ci-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -195,7 +195,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -283,7 +283,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -371,7 +371,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -451,7 +451,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -536,7 +536,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER="10.0.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export VERSION_NUMBER="10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export PREFIX="cronbuild-"
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn")

# N.B.: When updating this, update all the default versions in ci-package.yml
# and ci-docs.yml. We should find a way to share (xref i#1565).
set(VERSION_NUMBER_DEFAULT "10.0.${VERSION_NUMBER_PATCHLEVEL}")
set(VERSION_NUMBER_DEFAULT "10.90.${VERSION_NUMBER_PATCHLEVEL}")
# do not store the default VERSION_NUMBER in the cache to prevent a stale one
# from preventing future version updates in a pre-existing build dir
set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default")
Expand Down
3 changes: 3 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ changes:
users sub-classing analyzer_tmpl_t).
- Converted #dynamorio::drmemtrace::analysis_tool_tmpl_t::interval_state_snapshot_t
into a class with all its data members marked private with public accessor functions.
- Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t.
This breaks binary compatibility with clients that were built against versions of
DynamoRIO before this change.

Further non-compatibility-affecting changes include:
- Added DWARF-5 support to the drsyms library by linking in 4 static libraries
Expand Down
4 changes: 2 additions & 2 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ START_FILE
#endif

/* sizeof(priv_mcontext_t) rounded up to a multiple of 16 */
#define PRIV_MCONTEXT_SIZE 3424
#define PRIV_MCONTEXT_SIZE 2480

/* offsetof(spill_state_t, r0) */
#define spill_state_r0_OFFSET 0
Expand All @@ -69,7 +69,7 @@ START_FILE
/* offsetof(priv_mcontext_t, simd) */
#define simd_OFFSET (16 * ARG_SZ*2 + 32)
/* offsetof(dcontext_t, dstack) */
#define dstack_OFFSET 0xda8
#define dstack_OFFSET 0x9f8
/* offsetof(dcontext_t, is_exiting) */
#define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ)
/* offsetof(struct tlsdesc_t, arg) */
Expand Down
96 changes: 43 additions & 53 deletions core/arch/aarch64/emit_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,19 +576,18 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
}
if (proc_has_feature(FEATURE_SVE)) {
for (i = 0; i < 32; i++) {
/* ldr z(i), [x1, #(i mul vl)]
/* ldr z(i), [x1, #(i * sizeof(dr_simd_t))]
* From the SVE manual:
* "Load a vector register from a memory address generated by a
* 64-bit scalar base, plus an immediate offset in the range -256
* to 255 which is multiplied by the current vector register size
* in bytes."
*/
APP(ilist,
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_Z0 + i),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes()))));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_Z0 + i),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_simd_t),
OPSZ_SVE_VECLEN_BYTES)));
}
/* add x1, x(dcxt), #(offset svep) */
APP(ilist,
Expand All @@ -599,40 +598,36 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
* register for FFR load below, then restored from svep afterwards.
*/
for (i = 0; i < 15; i++) {
/* ldr p(i), [x1, #(i mul vl)] */
/* ldr p(i), [x1, #(i * sizeof(dr_svep_t))] */
APP(ilist,
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P0 + i),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_P0 + i),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_svep_t),
OPSZ_SVE_PREDLEN_BYTES)));
}
/* There is no load instruction for the first-fault register (FFR). Use
* a temporary predicate register to load:
* add x2, x(dcxt), #(offset ffr)
* ldr p15, [x2, #(ffr)]
* ldr p15, [x2, #0]
* wrffr p15.b
* ldr p15, [x1, #(15 mul vl)]
* ldr p15, [x1, #(15 * sizeof(dr_svep_t)]
*/
APP(ilist,
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X2),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, ffr))));
APP(ilist,
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(
DR_REG_X2, DR_REG_NULL, 0, 0,
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(DR_REG_X2, DR_REG_NULL, 0, 0,
OPSZ_SVE_PREDLEN_BYTES)));
APP(ilist,
INSTR_CREATE_wrffr_sve(dcontext,
opnd_create_reg_element_vector(DR_REG_P15, OPSZ_1)));
APP(ilist,
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
15 * sizeof(dr_svep_t),
OPSZ_SVE_PREDLEN_BYTES)));
}
}

Expand Down Expand Up @@ -791,41 +786,39 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
}
if (proc_has_feature(FEATURE_SVE)) {
for (i = 0; i < 32; i++) {
/* str z(i), [x1, #(i mul vl)]
/* str z(i), [x1, #(i * sizeof(dr_simd_t))]
* "Store a vector register to a memory address generated by a
* 64-bit scalar base, plus an immediate offset in the range -256
* to 255 which is multiplied by the current vector register size
* in bytes."
*/
APP(ilist,
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes())),
opnd_create_reg(DR_REG_Z0 + i)));
INSTR_CREATE_str(dcontext,
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_simd_t),
OPSZ_SVE_VECLEN_BYTES),
opnd_create_reg(DR_REG_Z0 + i)));
}
/* add x1, x(dcxt), #(off) */
APP(ilist,
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, svep))));
for (i = 0; i < 16; i++) {
/* str p(i), [x1, #(i mul vl)] */
/* str p(i), [x1, #(i * sizeof(dr_svep_t))] */
APP(ilist,
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)),
opnd_create_reg(DR_REG_P0 + i)));
INSTR_CREATE_str(dcontext,
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_svep_t),
OPSZ_SVE_PREDLEN_BYTES),
opnd_create_reg(DR_REG_P0 + i)));
}
/* There is no store instruction for the first-fault register (FFR). Use
* a temporary predicate register to store:
* rdffr p15.b
* add x2, x(dcxt), #(offset ffr)
* str p15, [x2, #(ffr)]
* ldr p15, [x1, #(15 mul vl)]
* rdffr p15.b ; Read FFR to P15
* add x2, x(dcxt), #(offset ffr) ; Calculate FFR dcxt offset
* str p15, [x2, #0] ; Save FFR to dcxt
* ldr p15, [x1, #(15 * sizeof(dr_svep_t))] ; Restore app P15 value from dcxt
*/
APP(ilist,
INSTR_CREATE_rdffr_sve(dcontext,
Expand All @@ -835,18 +828,15 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, ffr))));
APP(ilist,
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(
DR_REG_X2, DR_REG_NULL, 0, 0,
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)),
opnd_create_reg(DR_REG_P15)));
INSTR_CREATE_str(dcontext,
opnd_create_base_disp(DR_REG_X2, DR_REG_NULL, 0, 0,
OPSZ_SVE_PREDLEN_BYTES),
opnd_create_reg(DR_REG_P15)));
APP(ilist,
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
15 * sizeof(dr_svep_t),
OPSZ_SVE_PREDLEN_BYTES)));
}
}

Expand Down
17 changes: 13 additions & 4 deletions core/arch/aarchxx/mangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,9 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,
DR_REG_Q0, SIMD_REG_TYPE);
}

dstack_offs += MCXT_NUM_SIMD_SLOTS * sizeof(dr_simd_t);
dstack_offs += (MCXT_NUM_SIMD_SVE_SLOTS * sizeof(dr_simd_t)) +
(MCXT_NUM_SVEP_SLOTS * sizeof(dr_svep_t)) +
(MCXT_NUM_FFR_SLOTS * sizeof(dr_ffr_t));

/* Restore the registers we used. */
/* ldp x0, x1, [sp] */
Expand All @@ -577,6 +579,10 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X2),
opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0,
REG_OFFSET(DR_REG_X2), OPSZ_8)));

/* Make dstack_offs 16-byte aligned. */
dstack_offs = ALIGN_FORWARD(dstack_offs, get_ABI_stack_alignment());

#else
/* vstmdb always does writeback */
PRE(ilist, instr,
Expand Down Expand Up @@ -655,9 +661,9 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci,

/* Make dstack_offs 8-byte algined, as we only accounted for 17 4-byte slots. */
dstack_offs += XSP_SZ;
#endif
ASSERT(cci->skip_save_flags || cci->num_simd_skip != 0 || cci->num_regs_skip != 0 ||
dstack_offs == (uint)get_clean_call_switch_stack_size());
#endif
return dstack_offs;
}

Expand All @@ -678,8 +684,11 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist
XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0),
opnd_create_reg(DR_REG_SP)));

current_offs =
get_clean_call_switch_stack_size() - (MCXT_NUM_SIMD_SLOTS * sizeof(dr_simd_t));
current_offs = ALIGN_BACKWARD(get_clean_call_switch_stack_size() -
(MCXT_NUM_SIMD_SVE_SLOTS * sizeof(dr_simd_t)) -
(MCXT_NUM_SVEP_SLOTS * sizeof(dr_svep_t)) -
(MCXT_NUM_FFR_SLOTS * sizeof(dr_ffr_t)),
16);

/* add x0, x0, current_offs */
PRE(ilist, instr,
Expand Down
45 changes: 37 additions & 8 deletions core/arch/arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -3825,21 +3825,50 @@ dump_mcontext(priv_mcontext_t *context, file_t f, bool dump_xml)
}
#elif defined(AARCHXX)
{
int i, j;
# ifdef AARCH64
int words = proc_has_feature(FEATURE_SVE) ? 16 : 4;
const uint vector_length_bytes =
(proc_has_feature(FEATURE_SVE) ? proc_get_vector_length_bytes()
: opnd_size_in_bytes(reg_get_size(DR_REG_Q0)));
const uint u32_count = vector_length_bytes / sizeof(uint);
/* XXX: should be proc_num_simd_saved(). */
const uint num_simd_regs = MCXT_NUM_SIMD_SVE_SLOTS;
const char *reg_prefix = proc_has_feature(FEATURE_SVE) ? "z" : "q";
# else
int words = 4;
# endif
const uint u32_count =
sizeof(context->simd[0].u32) / sizeof(context->simd[0].u32[0]);
/* XXX: should be proc_num_simd_saved(). */
for (i = 0; i < proc_num_simd_registers(); i++) {
print_file(f, dump_xml ? "\t\tqd= \"0x" : "\tq%-3d= 0x", i);
for (j = 0; j < words; j++) {
const uint num_simd_regs = proc_num_simd_registers();
const char *reg_prefix = "q";
# endif
ASSERT(u32_count <=
sizeof(context->simd[0].u32) / sizeof(context->simd[0].u32[0]));
for (uint i = 0; i < num_simd_regs; i++) {
print_file(f, dump_xml ? "\t\t%s%d= \"0x" : "\t%s%-3d= 0x", reg_prefix, i);
for (uint j = 0; j < u32_count; j++) {
print_file(f, "%08x ", context->simd[i].u32[j]);
}
print_file(f, dump_xml ? "\"\n" : "\n");
}
/* TODO i#5365: SVE predicate registers and FFR dump. */
# ifdef AARCH64
if (proc_has_feature(FEATURE_SVE)) {
/* Dump SVE P registers */
const uint pred_u16_count =
(proc_get_vector_length_bytes() / 8) / sizeof(ushort);
for (uint i = 0; i < MCXT_NUM_SVEP_SLOTS; i++) {
print_file(f, dump_xml ? "\t\tp%d= \"0x" : "\tp%-3d= 0x", i);
for (size_t j = 0; j < pred_u16_count; j++) {
print_file(f, "%04x ", context->svep[i].u16[j]);
}
print_file(f, dump_xml ? "\"\n" : "\n");
}
/* Dump SVE FFR register */
print_file(f, dump_xml ? "\t\tffr= \"0x" : "\tffr = 0x");
for (size_t j = 0; j < pred_u16_count; j++) {
print_file(f, "%04x ", context->ffr.u16[j]);
}
print_file(f, dump_xml ? "\"\n" : "\n");
}
# endif
}
#endif

Expand Down
9 changes: 7 additions & 2 deletions core/arch/arm/arm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,22 @@ DECL_EXTERN(initstack_mutex)

#ifdef X64
# define MCXT_NUM_SIMD_SLOTS 32
# define SIMD_REG_SIZE 16
# define SIMD_REG_SIZE 64
# define MCXT_NUM_PRED_SLOTS 17 /* P regs + FFR */
# define PRED_REG_SIZE 8
# define NUM_GPR_SLOTS 33 /* incl flags */
# define GPR_REG_SIZE 8
#else
# define MCXT_NUM_SIMD_SLOTS 16
# define SIMD_REG_SIZE 16
# define MCXT_NUM_PRED_SLOTS 0
# define PRED_REG_SIZE 0
# define NUM_GPR_SLOTS 17 /* incl flags */
# define GPR_REG_SIZE 4
#endif
#define PRE_SIMD_PADDING 0
#define PRIV_MCXT_SIMD_SIZE (PRE_SIMD_PADDING + MCXT_NUM_SIMD_SLOTS*SIMD_REG_SIZE)
#define PRIV_MCXT_SIMD_SIZE (PRE_SIMD_PADDING + MCXT_NUM_SIMD_SLOTS*SIMD_REG_SIZE \
+ MCXT_NUM_PRED_SLOTS*PRED_REG_SIZE)
#define PRIV_MCXT_SIZE (NUM_GPR_SLOTS*GPR_REG_SIZE + PRIV_MCXT_SIMD_SIZE)
#define PRIV_MCXT_SP_FROM_SIMD (-(4*GPR_REG_SIZE)) /* flags, pc, lr, then sp */
#define PRIV_MCXT_PC_FROM_SIMD (-(2*GPR_REG_SIZE)) /* flags, then pc */
Expand Down
Loading
Loading