Skip to content

Commit

Permalink
i#4550: Support EVEX encoded vsqrt instructions. (#4739)
Browse files Browse the repository at this point in the history
Adds support for the EVEX encodings of vsqrtps, vsqrtpd, vsqrtss, and vsqrtsd. Also adds API tests and uncomments the relevant sections of the binutils decoder tests, with updated expectation files.

Fixes #4550
  • Loading branch information
khuey committed Feb 17, 2021
1 parent 64515f4 commit df18aa8
Show file tree
Hide file tree
Showing 8 changed files with 862 additions and 77 deletions.
34 changes: 26 additions & 8 deletions core/ir/x86/decode_table.c
Expand Up @@ -3380,14 +3380,14 @@ const instr_info_t prefix_extensions[][12] = {
{OP_sqrtss, 0xf30f5110, "sqrtss", Vss, xx, Wss, xx, xx, mrm, x, END_LIST},
{OP_sqrtpd, 0x660f5110, "sqrtpd", Vpd, xx, Wpd, xx, xx, mrm, x, END_LIST},
{OP_sqrtsd, 0xf20f5110, "sqrtsd", Vsd, xx, Wsd, xx, xx, mrm, x, END_LIST},
{OP_vsqrtps, 0x0f5110, "vsqrtps", Vvs, xx, Wvs, xx, xx, mrm|vex, x, END_LIST},
{OP_vsqrtss, 0xf30f5110, "vsqrtss", Vdq, xx, H12_dq, Wss, xx, mrm|vex, x, END_LIST},
{OP_vsqrtpd, 0x660f5110, "vsqrtpd", Vvd, xx, Wvd, xx, xx, mrm|vex, x, END_LIST},
{OP_vsqrtsd, 0xf20f5110, "vsqrtsd", Vdq, xx, Hsd, Wsd, xx, mrm|vex, x, END_LIST},
{INVALID, 0x0f5110, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
{INVALID, 0xf30f5110, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
{INVALID, 0x660f5110, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
{INVALID, 0xf20f5110, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
{OP_vsqrtps, 0x0f5110, "vsqrtps", Vvs, xx, Wvs, xx, xx, mrm|vex, x, tevexwb[265][0]},
{OP_vsqrtss, 0xf30f5110, "vsqrtss", Vdq, xx, H12_dq, Wss, xx, mrm|vex, x, tevexwb[266][0]},
{OP_vsqrtpd, 0x660f5110, "vsqrtpd", Vvd, xx, Wvd, xx, xx, mrm|vex, x, tevexwb[265][2]},
{OP_vsqrtsd, 0xf20f5110, "vsqrtsd", Vdq, xx, Hsd, Wsd, xx, mrm|vex, x, tevexwb[266][2]},
{EVEX_Wb_EXT, 0x0f5110, "(evex_Wb ext 265)", xx, xx, xx, xx, xx, mrm|evex, x, 265},
{EVEX_Wb_EXT, 0xf30f5110, "(evex_Wb ext 266)", xx, xx, xx, xx, xx, mrm|evex, x, 266},
{EVEX_Wb_EXT, 0x660f5110, "(evex_Wb ext 265)", xx, xx, xx, xx, xx, mrm|evex, x, 265},
{EVEX_Wb_EXT, 0xf20f5110, "(evex_Wb ext 266)", xx, xx, xx, xx, xx, mrm|evex, x, 266},
}, /* prefix extension 18 */
{
{OP_rsqrtps, 0x0f5210, "rsqrtps", Vps, xx, Wps, xx, xx, mrm, x, END_LIST},
Expand Down Expand Up @@ -6905,6 +6905,14 @@ const instr_info_t mod_extensions[][2] = {
{OP_vcvttps2dq, 0xf30f5b10, "vcvttps2dq", Ve, xx, KEw, Md, xx, mrm|evex|ttfv, x, modx[117][1]},
{OP_vcvttps2dq, 0xf30f5b10, "vcvttps2dq", Voq, xx, KEw, Uoq, xx, mrm|evex|sae|ttfv, x, END_LIST},
},
{ /* mod extension 118 */
{OP_vsqrtps,0x0f5110, "vsqrtps", Ves, xx, KEw, Md, xx, mrm|evex|ttfv, x, modx[118][1]},
{OP_vsqrtps,0x660f5110, "vsqrtps", Voq, xx, KEw, Uoq, xx, mrm|evex|er|ttfv, x, END_LIST},
},
{ /* mod extension 119 */
{OP_vsqrtpd,0x660f5150, "vsqrtpd", Ved, xx, KEb, Mq, xx, mrm|evex|ttfv, x, modx[119][1]},
{OP_vsqrtpd,0x660f5150, "vsqrtpd", Voq, xx, KEb, Uoq, xx, mrm|evex|er|ttfv, x, END_LIST},
},
};

/* Naturally all of these have modrm bytes even if they have no explicit operands */
Expand Down Expand Up @@ -9206,6 +9214,16 @@ const instr_info_t evex_Wb_extensions[][4] = {
{OP_vcvtps2ph, 0x663a1d18, "vcvtps2ph", Uqq, xx, KEw, Voq, Ib, mrm|evex|sae|reqp|tthvm, x, END_LIST},
{INVALID, 0, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
{INVALID, 0, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
}, { /* evex_W_ext 265 */
{OP_vsqrtps, 0x0f5100, "vsqrtps", Ves, xx, KEw, Wes, xx, mrm|evex|ttfv, x, modx[118][0]},
{MOD_EXT, 0x0f5110, "(mod ext 118)", xx, xx, xx, xx, xx, mrm|evex, x, 118},
{OP_vsqrtpd, 0x660f5140, "vsqrtpd", Ved, xx, KEb, Wed, xx, mrm|evex|ttfv, x, modx[119][0]},
{MOD_EXT, 0x660f5150, "(mod ext 119)", xx, xx, xx, xx, xx, mrm|evex, x, 119},
}, { /* evex_W_ext 266 */
{OP_vsqrtss, 0xf30f5100, "vsqrtss", Vdq, xx, KE1b, Hdq, Wss, mrm|evex|ttt1s, x, tevexwb[266][1]},
{OP_vsqrtss, 0xf30f5110, "vsqrtss", Vdq, xx, KE1b, Hdq, Uss, mrm|evex|er|ttt1s, x, END_LIST},
{OP_vsqrtsd, 0xf20f5140, "vsqrtsd", Vdq, xx, KE1b, Hdq, Wsd, mrm|evex|ttt1s, x, tevexwb[266][3]},
{OP_vsqrtsd, 0xf20f5150, "vsqrtsd", Vdq, xx, KE1b, Hdq, Usd, mrm|evex|er|ttt1s, x, END_LIST},
},
};

Expand Down
9 changes: 9 additions & 0 deletions core/ir/x86/instr_create.h
Expand Up @@ -2644,6 +2644,11 @@
instr_create_1dst_2src((dc), OP_vplzcntd, (d), (k), (s))
#define INSTR_CREATE_vplzcntq_mask(dc, d, k, s) \
instr_create_1dst_2src((dc), OP_vplzcntq, (d), (k), (s))
#define INSTR_CREATE_vsqrtps_mask(dc, d, k, s) \
instr_create_1dst_2src((dc), OP_vsqrtps, (d), (k), (s))
#define INSTR_CREATE_vsqrtpd_mask(dc, d, k, s) \
instr_create_1dst_2src((dc), OP_vsqrtpd, (d), (k), (s))

/* @} */ /* end doxygen group */

/* 1 destination, 2 sources: 1 explicit, 1 implicit */
Expand Down Expand Up @@ -3740,6 +3745,10 @@
instr_create_1dst_3src((dc), OP_vpmadd52huq, (d), (k), (s1), (s2))
#define INSTR_CREATE_vpmadd52luq_mask(dc, d, k, s1, s2) \
instr_create_1dst_3src((dc), OP_vpmadd52luq, (d), (k), (s1), (s2))
#define INSTR_CREATE_vsqrtss_mask(dc, d, k, s1, s2) \
instr_create_1dst_3src((dc), OP_vsqrtss, (d), (k), (s1), (s2))
#define INSTR_CREATE_vsqrtsd_mask(dc, d, k, s1, s2) \
instr_create_1dst_3src((dc), OP_vsqrtsd, (d), (k), (s1), (s2))
/* @} */ /* end doxygen group */

/** @name 1 destination, 3 sources including one immediate */
Expand Down
66 changes: 66 additions & 0 deletions suite/tests/api/ir_x86_3args_avx512_evex_mask.h
Expand Up @@ -3245,3 +3245,69 @@ OPCODE(vscatterqpd_vsibzlok1, vscatterqpd, vscatterqpd_mask, 0, VSIBZ6(OPSZ_8),
REGARG(K1), REGARG(ZMM0))
OPCODE(vscatterqpd_vsibzhik7, vscatterqpd, vscatterqpd_mask, X64_ONLY, VSIBZ31(OPSZ_8),
REGARG(K7), REGARG(ZMM16))
OPCODE(vsqrtps_xlok0xlo, vsqrtps, vsqrtps_mask, 0, REGARG(XMM0), REGARG(K0), REGARG(XMM1))
OPCODE(vsqrtps_xlok0mem, vsqrtps, vsqrtps_mask, 0, REGARG(XMM0), REGARG(K0),
MEMARG(OPSZ_16))
OPCODE(vsqrtps_xlok0bcst, vsqrtps, vsqrtps_mask, 0, REGARG(XMM0), REGARG(K0),
MEMARG(OPSZ_4))
OPCODE(vsqrtps_xhik7xhi, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17))
OPCODE(vsqrtps_xhik7mem, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
MEMARG(OPSZ_16))
OPCODE(vsqrtps_xhik7bcst, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
MEMARG(OPSZ_4))
OPCODE(vsqrtps_ylok0ylo, vsqrtps, vsqrtps_mask, 0, REGARG(YMM0), REGARG(K0), REGARG(YMM1))
OPCODE(vsqrtps_ylok0mem, vsqrtps, vsqrtps_mask, 0, REGARG(YMM0), REGARG(K0),
MEMARG(OPSZ_32))
OPCODE(vsqrtps_ylok0bcst, vsqrtps, vsqrtps_mask, 0, REGARG(YMM0), REGARG(K0),
MEMARG(OPSZ_4))
OPCODE(vsqrtps_yhik7yhi, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
REGARG(YMM17))
OPCODE(vsqrtps_yhik7mem, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
MEMARG(OPSZ_32))
OPCODE(vsqrtps_yhik7bcst, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
MEMARG(OPSZ_4))
OPCODE(vsqrtps_zlok0zlo, vsqrtps, vsqrtps_mask, 0, REGARG(ZMM0), REGARG(K0), REGARG(ZMM1))
OPCODE(vsqrtps_zlok0mem, vsqrtps, vsqrtps_mask, 0, REGARG(ZMM0), REGARG(K0),
MEMARG(OPSZ_64))
OPCODE(vsqrtps_zlok0bcst, vsqrtps, vsqrtps_mask, 0, REGARG(ZMM0), REGARG(K0),
MEMARG(OPSZ_4))
OPCODE(vsqrtps_zhik7zhi, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
REGARG(ZMM17))
OPCODE(vsqrtps_zhik7mem, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
MEMARG(OPSZ_64))
OPCODE(vsqrtps_zhik7bcst, vsqrtps, vsqrtps_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
MEMARG(OPSZ_4))
OPCODE(vsqrtpd_xlok0xlo, vsqrtpd, vsqrtpd_mask, 0, REGARG(XMM0), REGARG(K0), REGARG(XMM1))
OPCODE(vsqrtpd_xlok0mem, vsqrtpd, vsqrtpd_mask, 0, REGARG(XMM0), REGARG(K0),
MEMARG(OPSZ_16))
OPCODE(vsqrtpd_xlok0bcst, vsqrtpd, vsqrtpd_mask, 0, REGARG(XMM0), REGARG(K0),
MEMARG(OPSZ_8))
OPCODE(vsqrtpd_xhik7xhi, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17))
OPCODE(vsqrtpd_xhik7mem, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
MEMARG(OPSZ_16))
OPCODE(vsqrtpd_xhik7bcst, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
MEMARG(OPSZ_8))
OPCODE(vsqrtpd_ylok0ylo, vsqrtpd, vsqrtpd_mask, 0, REGARG(YMM0), REGARG(K0), REGARG(YMM1))
OPCODE(vsqrtpd_ylok0mem, vsqrtpd, vsqrtpd_mask, 0, REGARG(YMM0), REGARG(K0),
MEMARG(OPSZ_32))
OPCODE(vsqrtpd_ylok0bcst, vsqrtpd, vsqrtpd_mask, 0, REGARG(YMM0), REGARG(K0),
MEMARG(OPSZ_8))
OPCODE(vsqrtpd_yhik7yhi, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
REGARG(YMM17))
OPCODE(vsqrtpd_yhik7mem, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
MEMARG(OPSZ_32))
OPCODE(vsqrtpd_yhik7bcst, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(YMM16), REGARG(K7),
MEMARG(OPSZ_8))
OPCODE(vsqrtpd_zlok0zlo, vsqrtpd, vsqrtpd_mask, 0, REGARG(ZMM0), REGARG(K0), REGARG(ZMM1))
OPCODE(vsqrtpd_zlok0mem, vsqrtpd, vsqrtpd_mask, 0, REGARG(ZMM0), REGARG(K0),
MEMARG(OPSZ_64))
OPCODE(vsqrtpd_zlok0bcst, vsqrtpd, vsqrtpd_mask, 0, REGARG(ZMM0), REGARG(K0),
MEMARG(OPSZ_8))
OPCODE(vsqrtpd_zhik7zhi, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
REGARG(ZMM17))
OPCODE(vsqrtpd_zhik7mem, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
MEMARG(OPSZ_64))
OPCODE(vsqrtpd_zhik7bcst, vsqrtpd, vsqrtpd_mask, X64_ONLY, REGARG(ZMM16), REGARG(K7),
MEMARG(OPSZ_8))
16 changes: 16 additions & 0 deletions suite/tests/api/ir_x86_4args_avx512_evex_mask_C.h
Expand Up @@ -1943,3 +1943,19 @@ OPCODE(vpmadd52luq_zhik7zhild, vpmadd52luq, vpmadd52luq_mask, X64_ONLY, REGARG(Z
REGARG(K7), REGARG(ZMM17), MEMARG(OPSZ_64))
OPCODE(vpmadd52luq_zhik7zhibcst, vpmadd52luq, vpmadd52luq_mask, X64_ONLY, REGARG(ZMM16),
REGARG(K7), REGARG(ZMM17), MEMARG(OPSZ_8))
OPCODE(vsqrtss_xlok0xloxlo, vsqrtss, vsqrtss_mask, 0, REGARG(XMM0), REGARG(K0),
REGARG(XMM1), REGARG_PARTIAL(XMM2, OPSZ_4))
OPCODE(vsqrtss_xlok0xlomem, vsqrtss, vsqrtss_mask, 0, REGARG(XMM0), REGARG(K0),
REGARG(XMM1), MEMARG(OPSZ_4))
OPCODE(vsqrtss_xhik7xhixhi, vsqrtss, vsqrtss_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17), REGARG_PARTIAL(XMM18, OPSZ_4))
OPCODE(vsqrtss_xhik7xhimem, vsqrtss, vsqrtss_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17), MEMARG(OPSZ_4))
OPCODE(vsqrtsd_xlok0xloxlo, vsqrtsd, vsqrtsd_mask, 0, REGARG(XMM0), REGARG(K0),
REGARG(XMM1), REGARG_PARTIAL(XMM2, OPSZ_8))
OPCODE(vsqrtsd_xlok0xlomem, vsqrtsd, vsqrtsd_mask, 0, REGARG(XMM0), REGARG(K0),
REGARG(XMM1), MEMARG(OPSZ_8))
OPCODE(vsqrtsd_xhik7xhixhi, vsqrtsd, vsqrtsd_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17), REGARG_PARTIAL(XMM18, OPSZ_8))
OPCODE(vsqrtsd_xhik7xhimem, vsqrtsd, vsqrtsd_mask, X64_ONLY, REGARG(XMM16), REGARG(K7),
REGARG(XMM17), MEMARG(OPSZ_8))

0 comments on commit df18aa8

Please sign in to comment.