Skip to content

Commit

Permalink
i#5365 AArch64: Change scatter/gather instructions to per-element size
Browse files Browse the repository at this point in the history
This makes the IR consistent with x86 which already uses the per-element
transfer size for the scatter/gather memory operand size.

Issues: #5365, #5036, #6561
  • Loading branch information
jackgallagher-arm committed Jan 23, 2024
1 parent e4da1a7 commit 3c83b78
Show file tree
Hide file tree
Showing 4 changed files with 477 additions and 510 deletions.
6 changes: 2 additions & 4 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -7991,8 +7991,7 @@ decode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc,
const aarch64_reg_offset element_size =
BITS(enc, size_bit, size_bit) == single_bit_value ? SINGLE_REG : DOUBLE_REG;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));
const opnd_size_t mem_transfer = opnd_size_from_bytes(scale);

const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5));
ASSERT(reg_is_z(zn));
Expand Down Expand Up @@ -8038,8 +8037,7 @@ encode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc, opnd_t
const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));
const opnd_size_t mem_transfer = opnd_size_from_bytes(scale);
IF_RETURN_FALSE(opnd_get_size(opnd) != mem_transfer)

uint xreg_number;
Expand Down
96 changes: 38 additions & 58 deletions core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -11609,8 +11609,10 @@
* dr_get_sve_vector_length() / 8))
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant: opnd_create_base_disp(Rn,
* DR_REG_NULL, 0, imm, opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the vector+scalar variant: opnd_create_base_disp_aarch64(Zn, Rm,
* DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
*/
#define INSTR_CREATE_ldnt1b_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1b, Zt, Rn, Pg), DR_PRED_MASKED)
Expand Down Expand Up @@ -11681,16 +11683,10 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 8),
* 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 4),
* 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
*/
#define INSTR_CREATE_stnt1b_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1b, Rn, Zt, Pg), DR_PRED_MASKED)
Expand Down Expand Up @@ -12949,10 +12945,8 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes()), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0)
*/
#define INSTR_CREATE_ldnt1d_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1d, Zt, Rn, Pg), DR_PRED_MASKED)
Expand All @@ -12979,15 +12973,10 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 4),
* 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
*/
#define INSTR_CREATE_ldnt1h_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1h, Zt, Rn, Pg), DR_PRED_MASKED)
Expand All @@ -13014,13 +13003,10 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0,
* 0, opnd_size_from_bytes(proc_get_vector_length_bytes()), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0)
*/
#define INSTR_CREATE_ldnt1w_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1w, Zt, Rn, Pg), DR_PRED_MASKED)
Expand Down Expand Up @@ -13299,10 +13285,8 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes()), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_8, 0)
*/
#define INSTR_CREATE_stnt1d_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1d, Rn, Zt, Pg), DR_PRED_MASKED)
Expand All @@ -13329,14 +13313,10 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 4), 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
*/
#define INSTR_CREATE_stnt1h_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1h, Rn, Zt, Pg), DR_PRED_MASKED)
Expand All @@ -13363,14 +13343,10 @@
* For the [\<Xn|SP\>{, #\<imm\>, MUL VL}] variant:
* opnd_create_base_disp(Rn, DR_REG_NULL, 0, imm,
* opnd_size_from_bytes(dr_get_sve_vector_length() / 8))
* For the [\<Zn\>.D{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_8,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes() / 2), 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant:
* opnd_create_vector_base_disp_aarch64(Zn, Xm, OPSZ_4,
* DR_EXTEND_UXTX, 0, 0, 0,
* opnd_size_from_bytes(proc_get_vector_length_bytes()), 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0)
*/
#define INSTR_CREATE_stnt1w_sve_pred(dc, Zt, Pg, Rn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_stnt1w, Rn, Zt, Pg), DR_PRED_MASKED)
Expand Down Expand Up @@ -17601,8 +17577,10 @@
* \param Pg The governing predicate register, P (Predicate).
* \param Zn The first source vector base register with a register offset,
* constructed with the function:
* opnd_create_vector_base_disp_aarch64(Zn, Rm,
* OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_1, 0)
*/
#define INSTR_CREATE_ldnt1sb_sve_pred(dc, Zt, Pg, Zn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sb, Zt, Zn, Pg), DR_PRED_MASKED)
Expand All @@ -17620,8 +17598,10 @@
* \param Pg The governing predicate register, P (Predicate).
* \param Zn The first source vector base register with a register offset,
* constructed with the function:
* opnd_create_vector_base_disp_aarch64(Zn, Rm,
* OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8, 0)
* For the [\<Zn\>.D{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
* For the [\<Zn\>.S{, \<Xm\>}] variant: opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_4, DR_EXTEND_UXTX, 0, 0, OPSZ_2, 0)
*/
#define INSTR_CREATE_ldnt1sh_sve_pred(dc, Zt, Pg, Zn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sh, Zt, Zn, Pg), DR_PRED_MASKED)
Expand All @@ -17638,8 +17618,8 @@
* \param Pg The governing predicate register, P (Predicate).
* \param Zn The first source vector base register with a register offset,
* constructed with the function:
* opnd_create_vector_base_disp_aarch64(Zn, Rm,
* OPSZ_8, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16, 0)
* opnd_create_vector_base_disp_aarch64(
* Zn, Xm, OPSZ_8, DR_EXTEND_UXTX, 0, 0, OPSZ_4, 0)
*/
#define INSTR_CREATE_ldnt1sw_sve_pred(dc, Zt, Pg, Zn) \
INSTR_PRED(instr_create_1dst_2src(dc, OP_ldnt1sw, Zt, Zn, Pg), DR_PRED_MASKED)
Expand Down
Loading

0 comments on commit 3c83b78

Please sign in to comment.