Skip to content

Commit

Permalink
i#3044 AArch64 SVE codec: Add ADR instructions (#5866)
Browse files Browse the repository at this point in the history
This patch adds the appropriate macros, tests and codec entries to
encode the following variants:

ADR     <Zd>.D, [<Zn>.D, <Zm>.D, SXTW <amount>]
ADR     <Zd>.D, [<Zn>.D, <Zm>.D, UXTW <amount>]
ADR     <Zd>.<Ts>, [<Zn>.<Ts>, <Zm>.<Ts>, <extend> <amount>]

and the required changes to support the use of vector registers in
base+disp adress operands.

This required two main changes:

1) Adding element size to base disp operand

 ADR uses Z vector registers for the base and index register so we
 need to be able to specify the element size in the operand.

 This adds an element size field to base+disp operands (AArch64
 only). The following sizes are supported:
     OPSZ_4: Single
     OPSZ_8: Double

 For example, the memory operand for:
     adr z0.d, [z1.d, z2.d, lsl 2]

  could be created with the call:
  opnd_create_vector_base_disp_aarch64(DR_REG_Z1, DR_REG_Z2,
                                       OPSZ_8, // Element size
                                       DR_EXTEND_UXTX, // LSL
                                       true, 0, 0,
                                       OPSZ_0, // Transfers 0 bytes
                                       2); // Shift amount

 This will also be needed for SVE scatter/gather instructions.

2) Move DR_REG_Z* < 256

 opnd_t only stores the first 8 bits of the reg_id_t values for the base
 and index, so in order to use a Z register in an address operand we need
 to make sure the DR_REG_Z* constants are < 256.

 While I was there I also added the Z and P registers and one system
 register to the dr_reg_fixer array as they were previously missing.
 The B, H, S, D, Q registers have been changed to map to the Z registers
 because they overlap with the lower 128 bits of the Z registers.

Issues: #3044
  • Loading branch information
jackgallagher-arm committed Feb 23, 2023
1 parent 218327a commit de22aaf
Show file tree
Hide file tree
Showing 16 changed files with 850 additions and 87 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-docs.yml
Expand Up @@ -90,7 +90,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/ci-package.yml
Expand Up @@ -102,7 +102,7 @@ jobs:
# We only use a non-zero build # when making multiple manual builds in one day.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -194,7 +194,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -282,7 +282,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -370,7 +370,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -450,7 +450,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER=9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))
export VERSION_NUMBER=9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
fi
Expand Down Expand Up @@ -535,7 +535,7 @@ jobs:
# XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt.
run: |
if test -z "${{ github.event.inputs.version }}"; then
export VERSION_NUMBER="9.90.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export VERSION_NUMBER="9.91.$((`git log -n 1 --format=%ct` / (60*60*24)))"
export PREFIX="cronbuild-"
else
export VERSION_NUMBER=${{ github.event.inputs.version }}
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -567,7 +567,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn")

# N.B.: When updating this, update all the default versions in ci-package.yml
# and ci-docs.yml. We should find a way to share (xref i#1565).
set(VERSION_NUMBER_DEFAULT "9.90.${VERSION_NUMBER_PATCHLEVEL}")
set(VERSION_NUMBER_DEFAULT "9.91.${VERSION_NUMBER_PATCHLEVEL}")
# do not store the default VERSION_NUMBER in the cache to prevent a stale one
# from preventing future version updates in a pre-existing build dir
set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default")
Expand Down
7 changes: 7 additions & 0 deletions api/docs/release.dox
Expand Up @@ -142,6 +142,10 @@ changes:
- Reduced the value of #DR_NOTE_FIRST_RESERVED. This is not expected to cause
problems unless clients are directly choosing high note values without using
drmgr_reserve_note_range().
- Changed the values of the AArch64 DR_REG_Z* constants so that Z registers can be
used in base+disp operands in SVE scatter/gather instructions. This breaks binary
compatibility for clients built against an older version of opnd_api.h, but source
code compatibility is unchanged.

Further non-compatibility-affecting changes include:
- Added AArchXX support for attaching to a running process.
Expand Down Expand Up @@ -203,6 +207,9 @@ Further non-compatibility-affecting changes include:
- Added opnd_create_increment_reg() to create a register from an existing
register whose register number is incremented by some amount, wrapping
at the max register number for that register.
- Added opnd_create_vector_base_disp_aarch64() and reg_is_z() for creating
memory address operands that use SVE Z registers with a specified element
size.

**************************************************
<hr>
Expand Down
4 changes: 4 additions & 0 deletions core/arch/arch.c
Expand Up @@ -801,6 +801,10 @@ d_r_arch_init(void)
}
#endif
}

/* Ensure addressing registers fit into base+disp operand base and index fields. */
IF_AARCHXX(ASSERT_BITFIELD_TRUNCATE(REG_SPECIFIER_BITS, DR_REG_MAX_ADDRESSING_REG));

mangle_init();
}

Expand Down
108 changes: 108 additions & 0 deletions core/ir/aarch64/codec.c
Expand Up @@ -5252,6 +5252,35 @@ encode_opnd_hs_fsz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_ou
return false;
}

/* z_sz_sd # sve vector reg, element size depending on sz. */

static inline bool
encode_opnd_z_sz_sd(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
IF_RETURN_FALSE(!opnd_is_reg(opnd))

const uint reg_number = (uint)(opnd_get_reg(opnd) - DR_REG_Z0);
IF_RETURN_FALSE(!(reg_number < 32))

uint sz = 0;
switch (opnd_get_vector_element_size(opnd)) {
case OPSZ_4: sz = 0; break;
case OPSZ_8: sz = 1; break;
default: RETURN_FALSE;
}

*enc_out |= (sz << 22) | (reg_number << 0);

return true;
}

static inline bool
decode_opnd_z_sz_sd(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
const aarch64_reg_offset element_size = TEST(1u << 22, enc) ? DOUBLE_REG : SINGLE_REG;
return decode_single_sized(DR_REG_Z0, DR_REG_Z31, 0, 5, element_size, 0, enc, opnd);
}

/* dq5_sz: D/Q register at bit position 5; bit 22 selects Q reg */

static inline bool
Expand Down Expand Up @@ -5627,6 +5656,85 @@ encode_opnd_wx_size_16_zr(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
return encode_wx_size_reg(false, 16, opnd, enc_out);
}

/* svemem_vec_vec_idx: SVE memory address [<Zn>.<T>, <Zm>.<T>{, <mod> <amount>}] */

static inline bool
decode_svemem_vec_vec_opc(uint opc, OUT opnd_size_t *element_size,
OUT dr_extend_type_t *extend_type)
{
switch (opc) {
case 0b00:
*element_size = OPSZ_8;
*extend_type = DR_EXTEND_SXTW;
return true;
case 0b01:
*element_size = OPSZ_8;
*extend_type = DR_EXTEND_UXTW;
return true;
// DR_EXTEND_UXTX is an alias for LSL. LSL preferred in disassembly.
case 0b10:
*element_size = OPSZ_4;
*extend_type = DR_EXTEND_UXTX;
return true;
case 0b11:
*element_size = OPSZ_8;
*extend_type = DR_EXTEND_UXTX;
return true;
}
return false;
}

static inline bool
decode_opnd_svemem_vec_vec_idx(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
opnd_size_t element_size;
dr_extend_type_t extend_type;
if (!decode_svemem_vec_vec_opc(BITS(enc, 23, 22), &element_size, &extend_type))
return false;

const uint msz = extract_uint(enc, 10, 2);

const reg_id_t zn = DR_REG_Z0 + extract_uint(enc, 5, 5);
const reg_id_t zm = DR_REG_Z0 + extract_uint(enc, 16, 5);

/* This operand is used for SVE ADR instructions which don't transfer any memory.
* If this operand ends up being used for other instructions in the future we will
* need to calculate the appropriate transfer amount here.
*/
ASSERT(opcode == OP_adr);
const opnd_size_t mem_transfer_size = OPSZ_0;

*opnd = opnd_create_vector_base_disp_aarch64(zn, zm, element_size, extend_type,
/*scaled=*/msz != 0,
/*disp=*/0,
/*flags=*/0, mem_transfer_size, msz);
return true;
}

static inline bool
encode_opnd_svemem_vec_vec_idx(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
if (!opnd_is_base_disp(opnd))
return false;

const uint zn = (uint)(opnd_get_base(opnd) - DR_REG_Z0);
const uint zm = (uint)(opnd_get_index(opnd) - DR_REG_Z0);

opnd_size_t element_size;
dr_extend_type_t extend_type;
uint msz;
if (!((zn < 32) && (zm < 32)) ||
!decode_svemem_vec_vec_opc(BITS(enc, 23, 22), &element_size, &extend_type) ||
element_size != opnd_get_vector_element_size(opnd) ||
extend_type != opnd_get_index_extend(opnd, NULL, &msz))
return false;

*enc_out |= (zm << 16) | (msz << 10) | (zn << 5);

return true;
}

/* fpimm13: floating-point immediate for scalar fmov */

static inline bool
Expand Down
3 changes: 3 additions & 0 deletions core/ir/aarch64/codec_sve.txt
Expand Up @@ -46,6 +46,9 @@
00100101xx10000011xxxxxxxxxxxxxx n 9 SVE add z_size_bhsd_0 : z_size_bhsd_0 imm8_5 lsl shift1
00000100011xxxxx01010xxxxxxxxxxx n 934 SVE addpl x0sp : x16sp simm6_5
00000100001xxxxx01010xxxxxxxxxxx n 935 SVE addvl x0sp : x16sp simm6_5
00000100001xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_d_0 : svemem_vec_vec_idx
00000100011xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_d_0 : svemem_vec_vec_idx
000001001x1xxxxx1010xxxxxxxxxxxx n 15 SVE adr z_sz_sd : svemem_vec_vec_idx
00000100xx011010000xxxxxxxxxxxxx n 21 SVE and z0 : p10_lo z0 z5 bhsd_sz
00000101100000xxxxxxxxxxxxxxxxxx n 21 SVE and z_imm13_bhsd_0 : z_imm13_bhsd_0 imm13_const
001001010000xxxx01xxxx0xxxx0xxxx n 21 SVE and p_b_0 : p10_zer p_b_5 p_b_16
Expand Down
43 changes: 25 additions & 18 deletions core/ir/aarch64/encode.c
Expand Up @@ -57,6 +57,11 @@ const char *const reg_names[] = {
"w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29",
"w30", "wsp", "wzr",

"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9",
"z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19",
"z20", "z21", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29",
"z30", "z31",

"q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19",
"q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", "q28", "q29",
Expand Down Expand Up @@ -109,11 +114,6 @@ const char *const reg_names[] = {
"pmevtyper28_el0", "pmevtyper29_el0", "pmevtyper30_el0", "pmccfiltr_el0",
"spsr_irq", "spsr_abt", "spsr_und", "spsr_fiq", "tpidr_el0", "tpidrro_el0",

"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9",
"z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19",
"z20", "z21", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29",
"z30", "z31",

"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9",
"p10", "p11", "p12", "p13", "p14", "p15",

Expand All @@ -137,18 +137,19 @@ const reg_id_t dr_reg_fixer[] = { REG_NULL,
XREGS /* W0-WSP */
#undef XREGS

#define QREGS \
DR_REG_Q0, DR_REG_Q1, DR_REG_Q2, DR_REG_Q3, DR_REG_Q4, DR_REG_Q5, DR_REG_Q6, \
DR_REG_Q7, DR_REG_Q8, DR_REG_Q9, DR_REG_Q10, DR_REG_Q11, DR_REG_Q12, DR_REG_Q13, \
DR_REG_Q14, DR_REG_Q15, DR_REG_Q16, DR_REG_Q17, DR_REG_Q18, DR_REG_Q19, \
DR_REG_Q20, DR_REG_Q21, DR_REG_Q22, DR_REG_Q23, DR_REG_Q24, DR_REG_Q25, \
DR_REG_Q26, DR_REG_Q27, DR_REG_Q28, DR_REG_Q29, DR_REG_Q30, DR_REG_Q31,
QREGS /* Q0-Q31*/
QREGS /* D0-D31 */
QREGS /* S0-S31 */
QREGS /* H0-H31 */
QREGS /* B0-B31 */
#undef QREGS
#define ZREGS \
DR_REG_Z0, DR_REG_Z1, DR_REG_Z2, DR_REG_Z3, DR_REG_Z4, DR_REG_Z5, DR_REG_Z6, \
DR_REG_Z7, DR_REG_Z8, DR_REG_Z9, DR_REG_Z10, DR_REG_Z11, DR_REG_Z12, DR_REG_Z13, \
DR_REG_Z14, DR_REG_Z15, DR_REG_Z16, DR_REG_Z17, DR_REG_Z18, DR_REG_Z19, \
DR_REG_Z20, DR_REG_Z21, DR_REG_Z22, DR_REG_Z23, DR_REG_Z24, DR_REG_Z25, \
DR_REG_Z26, DR_REG_Z27, DR_REG_Z28, DR_REG_Z29, DR_REG_Z30, DR_REG_Z31,
ZREGS /* Z0-Z31 */
ZREGS /* Q0-Q31*/
ZREGS /* D0-D31 */
ZREGS /* S0-S31 */
ZREGS /* H0-H31 */
ZREGS /* B0-B31 */
#undef ZREGS

DR_REG_NZCV, DR_REG_FPCR, DR_REG_FPSR,
DR_REG_MDCCSR_EL0, DR_REG_DBGDTR_EL0, DR_REG_DBGDTRRX_EL0, DR_REG_SP_EL0,
Expand Down Expand Up @@ -185,7 +186,13 @@ const reg_id_t dr_reg_fixer[] = { REG_NULL,
DR_REG_PMEVTYPER26_EL0, DR_REG_PMEVTYPER27_EL0, DR_REG_PMEVTYPER28_EL0,
DR_REG_PMEVTYPER29_EL0, DR_REG_PMEVTYPER30_EL0, DR_REG_PMCCFILTR_EL0,
DR_REG_SPSR_IRQ, DR_REG_SPSR_ABT, DR_REG_SPSR_UND, DR_REG_SPSR_FIQ,
DR_REG_TPIDR_EL0, DR_REG_TPIDRRO_EL0
DR_REG_TPIDR_EL0, DR_REG_TPIDRRO_EL0,

DR_REG_P0, DR_REG_P1, DR_REG_P2, DR_REG_P3, DR_REG_P4, DR_REG_P5,
DR_REG_P6, DR_REG_P7, DR_REG_P8, DR_REG_P9, DR_REG_P10, DR_REG_P11,
DR_REG_P12, DR_REG_P13, DR_REG_P14, DR_REG_P15,

DR_REG_CNTVCT_EL0,
};
/* clang-format on */

Expand Down
6 changes: 6 additions & 0 deletions core/ir/aarch64/instr.c
Expand Up @@ -441,6 +441,12 @@ reg_is_fp(reg_id_t reg)
return false;
}

bool
reg_is_z(reg_id_t reg)
{
return DR_REG_Z0 <= reg && reg <= DR_REG_Z31;
}

bool
instr_is_nop(instr_t *instr)
{
Expand Down
22 changes: 22 additions & 0 deletions core/ir/aarch64/instr_create_api.h
Expand Up @@ -11217,6 +11217,28 @@
#define INSTR_CREATE_prfw_sve_pred(dc, prfop, Pg, Rn) \
instr_create_0dst_3src(dc, OP_prfw, prfop, Pg, Rn)

/*
* Creates an ADR instruction.
*
* This macro is used to encode the forms:
* \verbatim
* ADR <Zd>.D, [<Zn>.D, <Zm>.D, SXTW <amount>]
* ADR <Zd>.D, [<Zn>.D, <Zm>.D, UXTW <amount>]
* ADR <Zd>.<Ts>, [<Zn>.<Ts>, <Zm>.<Ts>, <extend> <amount>]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zd The destination vector register, Z (Scalable).
* \param Zn The first source vector base register with a register offset,
* constructed with one of:
* opnd_create_vector_base_disp_aarch64(Zn, Zm, OPSZ_8, DR_EXTEND_SXTW,
* 0, 0, 0, OPSZ_0, shift_amount)
* opnd_create_vector_base_disp_aarch64(Zn, Zm, OPSZ_8, DR_EXTEND_UXTW,
* 0, 0, 0, OPSZ_0, shift_amount)
* opnd_create_vector_base_disp_aarch64(Zn, Zm, elsz, DR_EXTEND_UXTX,
* 0, 0, 0, OPSZ_0, shift_amount)
*/
#define INSTR_CREATE_adr_sve(dc, Zd, Zn) instr_create_1dst_1src(dc, OP_adr, Zd, Zn)

/*
* Creates a LD2B instruction.
*
Expand Down
2 changes: 2 additions & 0 deletions core/ir/aarch64/opnd_defs.txt
Expand Up @@ -242,6 +242,7 @@
# elements, depending on bit 22 (sz)
---------x---------------------- sd_sz # element width of FP vector reg for single or double
---------x---------------------- hs_fsz # element width of FP vector reg for half or single
---------x-----------------xxxxx z_sz_sd # SVE vector reg, elsz depending on sz
---------x------------xxxxx----- dq5_sz # as dqx, but depending on the sz bit rather than the Q bit
---------x------------xxxxx----- wx_sz_5 # W/X register (or WZR/XZR) with size indicated in bit 22
---------x-xx------------------- i3_index_19 # Index value from 22, 20:19
Expand All @@ -259,6 +260,7 @@
--------??-??--------------xxxxx z_tszl19_bhsd_0 # z element register mediated by the tszl and tszh fields
--------??-??---------xxxxx----- z_tszl19_bhsd_5 # z element register mediated by the tszl and tszh fields
--------??-xxxxx---------------- wx_size_16_zr # GPR scalar register, register size, W or X depending on size bits
--------??-xxxxx----xxxxxxx----- svemem_vec_vec_idx # SVE memory address [<Zn>.<T>, <Zm>.<T>{, <mod> <amount>}]
--------??-xxxxxxxx------------- fpimm8_13 # floating-point immediate for scalar fmov
--------xx---------------------- b_sz # element width of a vector (8<<b_sz)
--------xx---------------------- hs_sz # element width of a vector (8<<hs_sz)
Expand Down

0 comments on commit de22aaf

Please sign in to comment.