Skip to content

Commit

Permalink
Merge pull request #624 from dawgfoto/SSEConvert
Browse files Browse the repository at this point in the history
use SSE2 CVT instructions
  • Loading branch information
WalterBright committed Jan 15, 2012
2 parents 203eb26 + c88e3ac commit bd91001
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 41 deletions.
123 changes: 123 additions & 0 deletions src/backend/cgxmm.c
Expand Up @@ -231,6 +231,129 @@ code *xmmeq(elem *e,regm_t *pretregs)
return c;
}

/********************************
* Generate code for conversion using SSE2 instructions.
*
* OPs32_d
* OPs64_d (64-bit only)
* OPu32_d (64-bit only)
* OPd_f
* OPf_d
* OPd_s32
* OPd_s64 (64-bit only)
*
*/

code *xmmcnvt(elem *e,regm_t *pretregs)
{
code *c;
unsigned op=0, regs;
tym_t ty;
unsigned char rex = 0;
bool zx = false; // zero extend uint

/* There are no ops for integer <-> float/real conversions
* but there are instructions for them. In order to use these
* try to fuse chained conversions. Be careful not to loose
* precision for real to long.
*/
elem *e1 = e->E1;
switch (e->Eoper)
{
case OPd_f:
switch (e1->Eoper)
{
case OPs32_d: goto Litof;
case OPs64_d: rex = REX_W; goto Litof;
case OPu32_d: rex = REX_W; zx = true; goto Litof;
Litof:
// directly use si2ss
regs = ALLREGS;
e1 = e1->E1;
op = CVTSI2SS;
break;
default:
regs = XMMREGS;
op = CVTSD2SS;
break;
}
ty = TYfloat;
break;

case OPs32_d: goto Litod;
case OPs64_d: rex = REX_W; goto Litod;
case OPu32_d: rex = REX_W; zx = true; goto Litod;
Litod:
regs = ALLREGS;
op = CVTSI2SD;
ty = TYdouble;
break;

case OPd_s32: ty = TYint; goto Ldtoi;
case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi;
Ldtoi:
regs = XMMREGS;
switch (e1->Eoper)
{
case OPf_d:
e1 = e1->E1;
op = CVTTSS2SI;
break;
case OPld_d:
if (e->Eoper == OPd_s64)
return cnvt87(e,pretregs); // precision
/* FALL-THROUGH */
default:
op = CVTTSD2SI;
break;
}
break;

case OPf_d:
regs = XMMREGS;
op = CVTSS2SD;
ty = TYdouble;
break;
}
assert(op);

c = codelem(e1, &regs, FALSE);
unsigned reg = findreg(regs);
if (reg >= XMM0)
reg -= XMM0;
else if (zx)
{ assert(I64);
c = cat(c,getregs(regs));
c = genregs(c,0x89,reg,reg); // MOV reg,reg to zero upper 32-bit
code_orflag(c,CFvolatile);
}

unsigned retregs = *pretregs;
if (tyxmmreg(ty)) // target is XMM
{ if (!(*pretregs & XMMREGS))
retregs = XMMREGS;
}
else // source is XMM
{ assert(regs & XMMREGS);
if (!(retregs & ALLREGS))
retregs = ALLREGS;
}

unsigned rreg;
c = cat(c,allocreg(&retregs,&rreg,ty));
if (rreg >= XMM0)
rreg -= XMM0;

c = gen2(c, op, modregxrmx(3,rreg,reg));
assert(I64 || !rex);
if (rex)
code_orrex(c, rex);

if (*pretregs != retregs)
c = cat(c,fixresult(e,retregs,pretregs));
return c;
}

/********************************
* Generate code for op=
*/
Expand Down
62 changes: 21 additions & 41 deletions src/backend/cod4.c
Expand Up @@ -2524,16 +2524,8 @@ code *cdcnvt(elem *e, regm_t *pretregs)
case OPf_d:
case OPd_f:
if (config.fpxmmregs && *pretregs & XMMREGS)
{
c1 = codelem(e->E1,pretregs,FALSE);
unsigned reg = findreg(*pretregs) - XMM0;
if (e->Eoper == OPf_d)
c1 = gen2(c1, 0xF30F5A, modregxrmx(3,reg,reg));
else
// CVTSD2SS XMMreg,XMMreg
c1 = gen2(c1, 0xF20F5A, modregxrmx(3,reg,reg));
return c1;
}
return xmmcnvt(e, pretregs);

/* if won't do us much good to transfer back and */
/* forth between 8088 registers and 8087 registers */
if (OTcall(e->E1->Eoper) && !(*pretregs & allregs))
Expand All @@ -2551,42 +2543,23 @@ code *cdcnvt(elem *e, regm_t *pretregs)
}
if (tycomplex(e->E1->Ety))
goto Lcomplex;
/* FALL-THROUGH */
goto Lload87;

case OPs64_d:
if (!I64)
goto Lload87;
/* FALL-THROUGH */
case OPs32_d:
if (I64 && *pretregs & XMMREGS)
{
LXMMint2double:
unsigned retregs = ALLREGS;

c1 = codelem(e->E1, &retregs, FALSE);
unsigned reg = findreg(retregs);

if (e->Eoper == OPu32_d)
{ // MOV reg,reg to zero upper 32 bits
c1 = genregs(c1,0x89,reg,reg);
}

unsigned xreg;
retregs = XMMREGS & *pretregs;
c1 = cat(c1,allocreg(&retregs,&xreg,TYdouble));
xreg = findreg(retregs);

// CVTSI2SD xreg,reg
c2 = gen2(NULL, 0xF20F2A, modregxrmx(3,xreg-XMM0,reg));
if (e->Eoper == OPs64_d || e->Eoper == OPu32_d)
c2->Irex |= REX_W;
*pretregs = mask[xreg];
return cat(c1, c2);
}
if (config.fpxmmregs && *pretregs & XMMREGS)
return xmmcnvt(e, pretregs);
/* FALL-THROUGH */
case OPs16_d:
case OPu16_d:
Lload87:
return load87(e,0,pretregs,NULL,-1);
case OPu32_d:
// load as 64-bit signed value
if (I64 && *pretregs & XMMREGS)
goto LXMMint2double;
if (I64 && config.fpxmmregs && *pretregs & XMMREGS)
return xmmcnvt(e,pretregs);
else if (!I16)
{
unsigned retregs = ALLREGS;
Expand All @@ -2604,10 +2577,17 @@ code *cdcnvt(elem *e, regm_t *pretregs)
return cat(c1, c2);
}
break;
case OPd_s16:
case OPd_s64:
if (!I64)
goto Lcnvt87;
/* FALL-THROUGH */
case OPd_s32:
if (config.fpxmmregs)
return xmmcnvt(e,pretregs);
/* FALL-THROUGH */
case OPd_s16:
case OPd_u16:
case OPd_s64:
Lcnvt87:
return cnvt87(e,pretregs);
case OPd_u32: // use subroutine, not 8087
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
Expand Down
1 change: 1 addition & 0 deletions src/backend/code.h
Expand Up @@ -811,6 +811,7 @@ void cod5_noprol();
code *movxmmconst(unsigned reg, unsigned sz, targ_size_t value, regm_t flags);
code *orthxmm(elem *e, regm_t *pretregs);
code *xmmeq(elem *e, regm_t *pretregs);
code *xmmcnvt(elem *e,regm_t *pretregs);
code *xmmopass(elem *e, regm_t *pretregs);
code *xmmneg(elem *e, regm_t *pretregs);
unsigned xmmload(tym_t tym);
Expand Down

0 comments on commit bd91001

Please sign in to comment.