Skip to content

Commit

Permalink
more SIMD support
Browse files Browse the repository at this point in the history
  • Loading branch information
WalterBright committed Nov 3, 2012
1 parent 8552c45 commit e4e6744
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 151 deletions.
1 change: 1 addition & 0 deletions src/backend/oper.h
Expand Up @@ -228,6 +228,7 @@ enum OPER
OPframeptr, // load pointer to base of frame
OPgot, // load pointer to global offset table
OPvector, // SIMD vector operations
OPvecsto = OPvector, // SIMD vector store operations

// Jupiter operators
OParray, // access Jupiter array, left is handle, right is index
Expand Down
306 changes: 155 additions & 151 deletions src/backend/xmm.h
Expand Up @@ -2,139 +2,140 @@

enum
{
ADDSS = 0xF30F58,
ADDSD = 0xF20F58,
ADDPS = 0x000F58,
ADDPD = 0x660F58,
PADDB = 0x660FFC,
PADDW = 0x660FFD,
PADDD = 0x660FFE,
PADDQ = 0x660FD4,
ADDSS = 0xF30F58, // ADDSS xmm1, xmm2/mem32 F3 0F 58 /r
ADDSD = 0xF20F58, // ADDSD xmm1, xmm2/mem64 F2 0F 58 /r
ADDPS = 0x000F58, // ADDPS xmm1, xmm2/mem128 0F 58 /r
ADDPD = 0x660F58, // ADDPD xmm1, xmm2/mem128 66 0F 58 /r
PADDB = 0x660FFC, // PADDB xmm1, xmm2/mem128 66 0F FC /r
PADDW = 0x660FFD, // PADDW xmm1, xmm2/mem128 66 0F FD /r
PADDD = 0x660FFE, // PADDD xmm1, xmm2/mem128 66 0F FE /r
PADDQ = 0x660FD4, // PADDQ xmm1, xmm2/mem128 66 0F D4 /r

SUBSS = 0xF30F5C,
SUBSD = 0xF20F5C,
SUBPS = 0x000F5C,
SUBPD = 0x660F5C,
PSUBB = 0x660FF8,
PSUBW = 0x660FF9,
PSUBD = 0x660FFA,
PSUBQ = 0x660FFB,
SUBSS = 0xF30F5C, // SUBSS xmm1, xmm2/mem32 F3 0F 5C /r
SUBSD = 0xF20F5C, // SUBSD xmm1, xmm2/mem64 F2 0F 5C /r
SUBPS = 0x000F5C, // SUBPS xmm1, xmm2/mem128 0F 5C /r
SUBPD = 0x660F5C, // SUBPD xmm1, xmm2/mem128 66 0F 5C /r
PSUBB = 0x660FF8, // PSUBB xmm1, xmm2/mem128 66 0F F8 /r
PSUBW = 0x660FF9, // PSUBW xmm1, xmm2/mem128 66 0F F9 /r
PSUBD = 0x660FFA, // PSUBD xmm1, xmm2/mem128 66 0F FA /r
PSUBQ = 0x660FFB, // PSUBQ xmm1, xmm2/mem128 66 0F FB /r

MULSS = 0xF30F59,
MULSD = 0xF20F59,
MULPS = 0x000F59,
MULPD = 0x660F59,
PMULLW = 0x660FD5,
MULSS = 0xF30F59, // MULSS xmm1, xmm2/mem32 F3 0F 59 /r
MULSD = 0xF20F59, // MULSD xmm1, xmm2/mem64 F2 0F 59 /r
MULPS = 0x000F59, // MULPS xmm1, xmm2/mem128 0F 59 /r
MULPD = 0x660F59, // MULPD xmm1, xmm2/mem128 66 0F 59 /r
PMULLW = 0x660FD5, // PMULLW xmm1, xmm2/mem128 66 0F D5 /r

DIVSS = 0xF30F5E,
DIVSD = 0xF20F5E,
DIVPS = 0x000F5E,
DIVPD = 0x660F5E,
DIVSS = 0xF30F5E, // DIVSS xmm1, xmm2/mem32 F3 0F 5E /r
DIVSD = 0xF20F5E, // DIVSD xmm1, xmm2/mem64 F2 0F 5E /r
DIVPS = 0x000F5E, // DIVPS xmm1, xmm2mem/128 0F 5E /r
DIVPD = 0x660F5E, // DIVPD xmm1, xmm2/mem128 66 0F 5E /r

PAND = 0x660FDB,
POR = 0x660FEB,
PAND = 0x660FDB, // PAND xmm1, xmm2/mem128 66 0F DB /r
POR = 0x660FEB, // POR xmm1, xmm2/mem128 66 0F EB /r

UCOMISS = 0x000F2E,
UCOMISD = 0x660F2E,
UCOMISS = 0x000F2E, // UCOMISS xmm1, xmm2/mem32 0F 2E /r
UCOMISD = 0x660F2E, // UCOMISD xmm1, xmm2/mem64 66 0F 2E /r

XORPS = 0x000F57,
XORPD = 0x660F57,
XORPS = 0x000F57, // XORPS xmm1, xmm2/mem128 0F 57 /r
XORPD = 0x660F57, // XORPD xmm1, xmm2/mem128 66 0F 57 /r

// Use STO and LOD instead of MOV to distinguish the direction
STOSS = 0xF30F11, // MOVSS
STOSD = 0xF20F11,
STOAPS = 0x000F29,
STOSS = 0xF30F11, // MOVSS xmm1/mem32, xmm2 F3 0F 11 /r
STOSD = 0xF20F11, // MOVSD xmm1/mem64, xmm2 F2 0F 11 /r
STOAPS = 0x000F29, // MOVAPS xmm1/mem128, xmm2 0F 29 /r
STOAPD = 0x660F29, // MOVAPD xmm1/mem128, xmm2 66 0F 29 /r
STODQA = 0x660F7F,
STOD = 0x660F7E, // MOVD reg/mem64, xmm 66 0F 7E /r
STOQ = 0x660FD6,
STODQA = 0x660F7F, // MOVDQA xmm1/mem128, xmm2 66 0F 7F /r
STOD = 0x660F7E, // MOVD reg/mem64, xmm 66 0F 7E /r
STOQ = 0x660FD6, // MOVQ xmm1/mem64, xmm2 66 0F D6 /

LODSS = 0xF30F10, // MOVSS
LODSD = 0xF20F10,
LODAPS = 0x000F28,
LODSS = 0xF30F10, // MOVSS xmm1, xmm2/mem32 F3 0F 10 /r
LODSD = 0xF20F10, // MOVSD xmm1, xmm2/mem64 F2 0F 10 /r
LODAPS = 0x000F28, // MOVAPS xmm1, xmm2/mem128 0F 28 /r
LODAPD = 0x660F28, // MOVAPD xmm1, xmm2/mem128 66 0F 28 /r
LODDQA = 0x660F6F,
LODD = 0x660F6E, // MOVD xmm, reg/mem64 66 0F 6E /r
LODQ = 0xF30F7E,
LODDQA = 0x660F6F, // MOVDQA xmm1, xmm2/mem128 66 0F 6F /r
LODD = 0x660F6E, // MOVD xmm, reg/mem64 66 0F 6E /r
LODQ = 0xF30F7E, // MOVQ xmm1, xmm2/mem64 F3 0F 7E /r

LODDQU = 0xF30F6F, // MOVDQU xmm1, xmm2/mem128 F3 0F 6F /r
STODQU = 0xF30F7F, // MOVDQU xmm1/mem128, xmm2 F3 0F 7F /r
MOVDQ2Q = 0xF20FD6, // MOVDQ2Q mmx, xmm F2 0F D6 /r
MOVHLPS = 0x0F12, // MOVHLPS xmm1, xmm2 0F 12 /r
LODHPD = 0x660F16,
LODHPD = 0x660F16, // MOVHPD xmm, mem64 66 0F 16 /r
STOHPD = 0x660F17, // MOVHPD mem64, xmm 66 0F 17 /r
LODHPS = 0x0F16, // MOVHPD xmm, mem64 66 0F 16 /r
STOHPS = 0x0F17,
MOVLHPS = 0x0F16,
LODLPD = 0x660F12,
STOLPD = 0x660F13,
LODLPS = 0x0F12,
STOLPS = 0x0F13,
MOVMSKPD = 0x660F50,
MOVMSKPS = 0x0F50,
MOVNTDQ = 0x660FE7,
MOVNTI = 0x0FC3,
MOVNTPD = 0x660F2B,
MOVNTPS = 0x0F2B,
MOVNTQ = 0x0FE7,
MOVQ2DQ = 0xF30FD6,
LODUPD = 0x660F10,
STOUPD = 0x660F11,
LODUPS = 0x0F10,
STOUPS = 0x0F11,
LODHPS = 0x0F16, // MOVHPS xmm, mem64 0F 16 /r
STOHPS = 0x0F17, // MOVHPS mem64, xmm 0F 17 /r
MOVLHPS = 0x0F16, // MOVLHPS xmm1, xmm2 0F 16 /r
LODLPD = 0x660F12, // MOVLPD xmm, mem64 66 0F 12 /r
STOLPD = 0x660F13, // MOVLPD mem64, xmm 66 0F 13 /r
LODLPS = 0x0F12, // MOVLPS xmm, mem64 0F 12 /r
STOLPS = 0x0F13, // MOVLPS mem64, xmm 0F 13 /r
MOVMSKPD = 0x660F50, // MOVMSKPD reg32, xmm 66 0F 50 /r
MOVMSKPS = 0x0F50, // MOVMSKPS reg32, xmm 0F 50 /r
MOVNTDQ = 0x660FE7, // MOVNTDQ mem128, xmm 66 0F E7 /r
MOVNTI = 0x0FC3, // MOVNTI m32,r32 0F C3 /r
// MOVNTI m64,r64 0F C3 /r
MOVNTPD = 0x660F2B, // MOVNTPD mem128, xmm 66 0F 2B /r
MOVNTPS = 0x0F2B, // MOVNTPS mem128, xmm 0F 2B /r
MOVNTQ = 0x0FE7, // MOVNTQ m64, mmx 0F E7 /r
MOVQ2DQ = 0xF30FD6, // MOVQ2DQ xmm, mmx F3 0F D6 /r
LODUPD = 0x660F10, // MOVUPD xmm1, xmm2/mem128 66 0F 10 /r
STOUPD = 0x660F11, // MOVUPD xmm1/mem128, xmm2 66 0F 11 /r
LODUPS = 0x0F10, // MOVUPS xmm1, xmm2/mem128 0F 10 /r
STOUPS = 0x0F11, // MOVUPS xmm1/mem128, xmm2 0F 11 /r

PACKSSDW = 0x660F6B,
PACKSSWB = 0x660F63,
PACKUSWB = 0x660F67,
PADDSB = 0x660FEC,
PADDSW = 0x660FED,
PADDUSB = 0x660FDC,
PADDUSW = 0x660FDD,
PANDN = 0x660FDF,
PCMPEQB = 0x660F74,
PCMPEQD = 0x660F76,
PCMPEQW = 0x660F75,
PCMPGTB = 0x660F64,
PCMPGTD = 0x660F66,
PCMPGTW = 0x660F65,
PMADDWD = 0x660FF5,
PSLLW = 0x660FF1, // PSLLW xmm1, xmm2/mem128 66 0F F1 /r
// PSLLW xmm, imm8 66 0F 71 /6 ib
PSLLD = 0x660FF2, // PSLLD xmm1, xmm2/mem128 66 0F F2 /r
// PSLLD xmm, imm8 66 0F 72 /6 ib
PSLLQ = 0x660FF3, // PSLLQ xmm1, xmm2/mem128 66 0F F3 /r
// PSLLQ xmm, imm8 66 0F 73 /6 ib
PSRAW = 0x660FE1, // PSRAW xmm1, xmm2/mem128 66 0F E1 /r
// PSRAW xmm, imm8 66 0F 71 /4 ib
PSRAD = 0x660FE2, // PSRAD xmm1, xmm2/mem128 66 0F E2 /r
// PSRAD xmm, imm8 66 0F 72 /4 ib
PSRLW = 0x660FD1, // PSRLW xmm1, xmm2/mem128 66 0F D1 /r
// PSRLW xmm, imm8 66 0F 71 /2 ib
PSRLD = 0x660FD2, // PSRLD xmm1, xmm2/mem128 66 0F D2 /r
// PSRLD xmm, imm8 66 0F 72 /2 ib
PSRLQ = 0x660FD3, // PSRLQ xmm1, xmm2/mem128 66 0F D3 /r
// PSRLQ xmm, imm8 66 0F 73 /2 ib
PSUBSB = 0x660FE8,
PSUBSW = 0x660FE9,
PSUBUSB = 0x660FD8,
PSUBUSW = 0x660FD9,
PUNPCKHBW = 0x660F68,
PUNPCKHDQ = 0x660F6A,
PUNPCKHWD = 0x660F69,
PUNPCKLBW = 0x660F60,
PUNPCKLDQ = 0x660F62,
PUNPCKLWD = 0x660F61,
PXOR = 0x660FEF,
ANDPD = 0x660F54,
ANDPS = 0x0F54,
ANDNPD = 0x660F55,
ANDNPS = 0x0F55,
CMPPS = 0x0FC2,
CMPPD = 0x660FC2,
CMPSD = 0xF20FC2,
CMPSS = 0xF30FC2,
COMISD = 0x660F2F,
COMISS = 0x0F2F,
PACKSSDW = 0x660F6B, // PACKSSDW xmm1, xmm2/mem128 66 0F 6B /r
PACKSSWB = 0x660F63, // PACKSSWB xmm1, xmm2/mem128 66 0F 63 /r
PACKUSWB = 0x660F67, // PACKUSWB xmm1, xmm2/mem128 66 0F 67 /r
PADDSB = 0x660FEC, // PADDSB xmm1, xmm2/mem128 66 0F EC /r
PADDSW = 0x660FED, // PADDSW xmm1, xmm2/mem128 66 0F ED /r
PADDUSB = 0x660FDC, // PADDUSB xmm1, xmm2/mem128 66 0F DC /r
PADDUSW = 0x660FDD, // PADDUSW xmm1, xmm2/mem128 66 0F DD /r
PANDN = 0x660FDF, // PANDN xmm1, xmm2/mem128 66 0F DF /r
PCMPEQB = 0x660F74, // PCMPEQB xmm1, xmm2/mem128 66 0F 74 /r
PCMPEQD = 0x660F76, // PCMPEQD xmm1, xmm2/mem128 66 0F 76 /r
PCMPEQW = 0x660F75, // PCMPEQW xmm1, xmm2/mem128 66 0F 75 /r
PCMPGTB = 0x660F64, // PCMPGTB xmm1, xmm2/mem128 66 0F 64 /r
PCMPGTD = 0x660F66, // PCMPGTD xmm1, xmm2/mem128 66 0F 66 /r
PCMPGTW = 0x660F65, // PCMPGTW xmm1, xmm2/mem128 66 0F 65 /r
PMADDWD = 0x660FF5, // PMADDWD xmm1, xmm2/mem128 66 0F F5 /r
PSLLW = 0x660FF1, // PSLLW xmm1, xmm2/mem128 66 0F F1 /r
// PSLLW xmm, imm8 66 0F 71 /6 ib
PSLLD = 0x660FF2, // PSLLD xmm1, xmm2/mem128 66 0F F2 /r
// PSLLD xmm, imm8 66 0F 72 /6 ib
PSLLQ = 0x660FF3, // PSLLQ xmm1, xmm2/mem128 66 0F F3 /r
// PSLLQ xmm, imm8 66 0F 73 /6 ib
PSRAW = 0x660FE1, // PSRAW xmm1, xmm2/mem128 66 0F E1 /r
// PSRAW xmm, imm8 66 0F 71 /4 ib
PSRAD = 0x660FE2, // PSRAD xmm1, xmm2/mem128 66 0F E2 /r
// PSRAD xmm, imm8 66 0F 72 /4 ib
PSRLW = 0x660FD1, // PSRLW xmm1, xmm2/mem128 66 0F D1 /r
// PSRLW xmm, imm8 66 0F 71 /2 ib
PSRLD = 0x660FD2, // PSRLD xmm1, xmm2/mem128 66 0F D2 /r
// PSRLD xmm, imm8 66 0F 72 /2 ib
PSRLQ = 0x660FD3, // PSRLQ xmm1, xmm2/mem128 66 0F D3 /r
// PSRLQ xmm, imm8 66 0F 73 /2 ib
PSUBSB = 0x660FE8, // PSUBSB xmm1, xmm2/mem128 66 0F E8 /r
PSUBSW = 0x660FE9, // PSUBSW xmm1, xmm2/mem128 66 0F E9 /r
PSUBUSB = 0x660FD8, // PSUBUSB xmm1, xmm2/mem128 66 0F D8 /r
PSUBUSW = 0x660FD9, // PSUBUSW xmm1, xmm2/mem128 66 0F D9 /r
PUNPCKHBW = 0x660F68, // PUNPCKHBW xmm1, xmm2/mem128 66 0F 68 /r
PUNPCKHDQ = 0x660F6A, // PUNPCKHDQ xmm1, xmm2/mem128 66 0F 6A /r
PUNPCKHWD = 0x660F69, // PUNPCKHWD xmm1, xmm2/mem128 66 0F 69 /r
PUNPCKLBW = 0x660F60, // PUNPCKLBW xmm1, xmm2/mem128 66 0F 60 /r
PUNPCKLDQ = 0x660F62, // PUNPCKLDQ xmm1, xmm2/mem128 66 0F 62 /r
PUNPCKLWD = 0x660F61, // PUNPCKLWD xmm1, xmm2/mem128 66 0F 61 /r
PXOR = 0x660FEF, // PXOR xmm1, xmm2/mem128 66 0F EF /r
ANDPD = 0x660F54, // ANDPD xmm1, xmm2/mem128 66 0F 54 /r
ANDPS = 0x0F54, // ANDPS xmm1, xmm2/mem128 0F 54 /r
ANDNPD = 0x660F55, // ANDNPD xmm1, xmm2/mem128 66 0F 55 /r
ANDNPS = 0x0F55, // ANDNPS xmm1, xmm2/mem128 0F 55 /r
CMPPS = 0x0FC2, // CMPPS xmm1, xmm2/mem128, imm8 0F C2 /r ib
CMPPD = 0x660FC2, // CMPPD xmm1, xmm2/mem128, imm8 66 0F C2 /r ib
CMPSD = 0xF20FC2, // CMPSD xmm1, xmm2/mem64, imm8 F2 0F C2 /r ib
CMPSS = 0xF30FC2, // CMPSS xmm1, xmm2/mem32, imm8 F3 0F C2 /r ib
COMISD = 0x660F2F, // COMISD xmm1, xmm2/mem64 66 0F 2F /r
COMISS = 0x0F2F, // COMISS xmm1, xmm2/mem32 0F 2F /r
CVTDQ2PD = 0xF30FE6, // CVTDQ2PD xmm1, xmm2/mem64 F3 0F E6 /r
CVTDQ2PS = 0x0F5B, // CVTDQ2PS xmm1, xmm2/mem128 0F 5B /r
CVTPD2DQ = 0xF20FE6, // CVTPD2DQ xmm1, xmm2/mem128 F2 0F E6 /r
Expand Down Expand Up @@ -168,51 +169,54 @@ enum
MAXPD = 0x660F5F, // MAXPD xmm1, xmm2/mem128 66 0F 5F /r
MAXPS = 0x0F5F, // MAXPS xmm1, xmm2/mem128 0F 5F /r
MAXSD = 0xF20F5F, // MAXSD xmm1, xmm2/mem64 F2 0F 5F /r
MAXSS = 0xF30F5F,
MINPD = 0x660F5D,
MINPS = 0x0F5D,
MINSD = 0xF20F5D,
MAXSS = 0xF30F5F, // MAXSS xmm1, xmm2/mem32 F3 0F 5F /r
MINPD = 0x660F5D, // MINPD xmm1, xmm2/mem128 66 0F 5D /r
MINPS = 0x0F5D, // MINPS xmm1, xmm2/mem128 0F 5D /r
MINSD = 0xF20F5D, // MINSD xmm1, xmm2/mem64 F2 0F 5D /r
MINSS = 0xF30F5D, // MINSS xmm1, xmm2/mem32 F3 0F 5D /r
ORPD = 0x660F56,
ORPS = 0x0F56,
PAVGB = 0x660FE0,
PAVGW = 0x660FE3,
PMAXSW = 0x660FEE,
ORPD = 0x660F56, // ORPD xmm1, xmm2/mem128 66 0F 56 /r
ORPS = 0x0F56, // ORPS xmm1, xmm2/mem128 0F 56 /r
PAVGB = 0x660FE0, // PAVGB xmm1, xmm2/mem128 66 0F E0 /r
PAVGW = 0x660FE3, // PAVGW xmm1, xmm2/mem128 66 0F E3 /r
PMAXSW = 0x660FEE, // PMAXSW xmm1, xmm2/mem128 66 0F EE /
PINSRW = 0x660FC4, // PINSRW xmm, reg32/mem16, imm8 66 0F C4 /r ib
PMAXUB = 0x660FDE,
PMINSW = 0x660FEA,
PMINUB = 0x660FDA,
PMAXUB = 0x660FDE, // PMAXUB xmm1, xmm2/mem128 66 0F DE /r
PMINSW = 0x660FEA, // PMINSW xmm1, xmm2/mem128 66 0F EA /r
PMINUB = 0x660FDA, // PMINUB xmm1, xmm2/mem128 66 0F DA /r
PMOVMSKB = 0x660FD7, // PMOVMSKB reg32, xmm 66 0F D7 /r
PMULHUW = 0x660FE4,
PMULHW = 0x660FE5,
PMULUDQ = 0x660FF4,
PSADBW = 0x660FF6,
PUNPCKHQDQ = 0x660F6D,
PUNPCKLQDQ = 0x660F6C,
RCPPS = 0x0F53,
RCPSS = 0xF30F53,
RSQRTPS = 0x0F52,
RSQRTSS = 0xF30F52,
SQRTPD = 0x660F51,
SHUFPD = 0x660FC6,
SHUFPS = 0x0FC6,
SQRTPS = 0x0F51,
SQRTSD = 0xF20F51,
SQRTSS = 0xF30F51,
UNPCKHPD = 0x660F15,
UNPCKHPS = 0x0F15,
PMULHUW = 0x660FE4, // PMULHUW xmm1, xmm2/mem128 66 0F E4 /r
PMULHW = 0x660FE5, // PMULHW xmm1, xmm2/mem128 66 0F E5 /
PMULUDQ = 0x660FF4, // PMULUDQ xmm1, xmm2/mem128 66 0F F4 /r
PSADBW = 0x660FF6, // PSADBW xmm1, xmm2/mem128 66 0F F6 /r
PUNPCKHQDQ = 0x660F6D, // PUNPCKHQDQ xmm1, xmm2/mem128 66 0F 6D /r
PUNPCKLQDQ = 0x660F6C, // PUNPCKLQDQ xmm1, xmm2/mem128 66 0F 6C /r
RCPPS = 0x0F53, // RCPPS xmm1, xmm2/mem128 0F 53 /r
RCPSS = 0xF30F53, // RCPSS xmm1, xmm2/mem32 F3 0F 53 /r
RSQRTPS = 0x0F52, // RSQRTPS xmm1, xmm2/mem128 0F 52 /r
RSQRTSS = 0xF30F52, // RSQRTSS xmm1, xmm2/mem32 F3 0F 52 /r
SQRTPD = 0x660F51, // SQRTPD xmm1, xmm2/mem128 66 0F 51 /r
SHUFPD = 0x660FC6, // SHUFPD xmm1, xmm2/mem128, imm8 66 0F C6 /r ib
SHUFPS = 0x0FC6, // SHUFPS xmm1, xmm2/mem128, imm8 0F C6 /r ib
SQRTPS = 0x0F51, // SQRTPS xmm1, xmm2/mem128 0F 51 /r
SQRTSD = 0xF20F51, // SQRTSD xmm1, xmm2/mem64 F2 0F 51 /r
SQRTSS = 0xF30F51, // SQRTSS xmm1, xmm2/mem32 F3 0F 51 /r
UNPCKHPD = 0x660F15, // UNPCKHPD xmm1, xmm2/mem12866 0F 15 /r
UNPCKHPS = 0x0F15, // UNPCKHPS xmm1, xmm2/mem1280F 15 /r
UNPCKLPD = 0x660F14, // UNPCKLPD xmm1, xmm2/mem128 66 0F 14 /r
UNPCKLPS = 0x0F14,
UNPCKLPS = 0x0F14, // UNPCKLPS xmm1, xmm2/mem1280F 14 /r

PSHUFD = 0x660F70,
PSHUFHW = 0xF30F70,
PSHUFD = 0x660F70, // PSHUFD xmm1, xmm2/mem128, imm8 66 0F 70 /r ib
PSHUFHW = 0xF30F70, // PSHUFHW xmm1, xmm2/mem128, imm8 F3 0F 70 /r ib
PSHUFLW = 0xF20F70, // PSHUFLW xmm1, xmm2/mem128, imm8 F2 0F 70 /r ib
PSHUFW = 0x0F70,
PSLLDQ = 0x07660F73, // PSLLDQ xmm, imm8 66 0F 73 /7 ib
PSRLDQ = 0x03660F73, // PSRLDQ xmm, imm8 66 0F 73 /3 ib
PSHUFW = 0x0F70, // PSHUFW mm1, mm2/mem64, imm8 0F 70 /r ib
PSLLDQ = 0x07660F73, // PSLLDQ xmm, imm8 66 0F 73 /7 ib
PSRLDQ = 0x03660F73, // PSRLDQ xmm, imm8 66 0F 73 /3 ib

PREFETCH = 0x0F18,

PEXTRW = 0x660FC5, // PEXTRW reg32, xmm, imm8 66 0F C5 /r ib
STMXCSR = 0x0FAE, // STMXCSR mem32 0F AE /3

// SSE3 Pentium 4 (Prescott)

ADDSUBPD = 0x660FD0,
Expand Down

0 comments on commit e4e6744

Please sign in to comment.