Skip to content

Commit

Permalink
more vector support
Browse files Browse the repository at this point in the history
  • Loading branch information
WalterBright committed Jan 9, 2012
1 parent 1f14a6e commit 2553e0c
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 47 deletions.
2 changes: 1 addition & 1 deletion src/backend/cgreg.c
Expand Up @@ -611,7 +611,7 @@ void cgreg_spillreg_epilog(block *b,Symbol *s,code **pcstore,code **pcload)

void cgreg_map(Symbol *s, unsigned regmsw, unsigned reglsw)
{
assert(I64 || reglsw < 8);
//assert(I64 || reglsw < 8);

if (vec_disjoint(s->Srange,regrange[reglsw]) &&
(regmsw == NOREG || vec_disjoint(s->Srange,regrange[regmsw]))
Expand Down
157 changes: 137 additions & 20 deletions src/backend/cgxmm.c
@@ -1,4 +1,4 @@
// Copyright (C) 2011-2011 by Digital Mars
// Copyright (C) 2011-2012 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
Expand Down Expand Up @@ -77,27 +77,127 @@ code *orthxmm(elem *e, regm_t *pretregs)
case TYifloat: op = 0xF30F58; break; // ADDSS
case TYdouble:
case TYidouble: op = 0xF20F58; break; // ADDSD

// SIMD vector types
case TYfloat4: op = 0x000F58; break; // ADDPS
case TYdouble2: op = 0x660F58; break; // ADDPD
case TYschar16:
case TYuchar16: op = 0x660FFC; break; // PADDB
case TYshort8:
case TYushort8: op = 0x660FFD; break; // PADDW
case TYlong4:
case TYulong4: op = 0x660FFE; break; // PADDD
case TYllong2:
case TYullong2: op = 0x660FD4; break; // PADDQ

default: assert(0);
}
break;

case OPmin:
op = 0xF20F5C; // SUBSD
if (sz1 == 4) // float
op = 0xF30F5C; // SUBSS
switch (ty1)
{
case TYfloat:
case TYifloat: op = 0xF30F5C; break; // SUBSS
case TYdouble:
case TYidouble: op = 0xF20F5C; break; // SUBSD

// SIMD vector types
case TYfloat4: op = 0x000F5C; break; // SUBPS
case TYdouble2: op = 0x660F5C; break; // SUBPD
case TYschar16:
case TYuchar16: op = 0x660FF8; break; // PSUBB
case TYshort8:
case TYushort8: op = 0x660FF9; break; // PSUBW
case TYlong4:
case TYulong4: op = 0x660FFA; break; // PSUBD
case TYllong2:
case TYullong2: op = 0x660FFB; break; // PSUBQ

default: assert(0);
}
break;

case OPmul:
op = 0xF20F59; // MULSD
if (sz1 == 4) // float
op = 0xF30F59; // MULSS
switch (ty1)
{
case TYfloat:
case TYifloat: op = 0xF30F59; break; // MULSS
case TYdouble:
case TYidouble: op = 0xF20F59; break; // MULSD

// SIMD vector types
case TYfloat4: op = 0x000F59; break; // MULPS
case TYdouble2: op = 0x660F59; break; // MULPD
case TYschar16:
case TYuchar16: assert(0); break; // PMULB
case TYshort8:
case TYushort8: op = 0x660FD5; break; // PMULW
case TYlong4:
case TYulong4: assert(0); break; // PMULD
case TYllong2:
case TYullong2: assert(0); break; // PMULQ

default: assert(0);
}
break;

case OPdiv:
op = 0xF20F5E; // DIVSD
if (sz1 == 4) // float
op = 0xF30F5E; // DIVSS
switch (ty1)
{
case TYfloat:
case TYifloat: op = 0xF30F5E; break; // DIVSS
case TYdouble:
case TYidouble: op = 0xF20F5E; break; // DIVSD

// SIMD vector types
case TYfloat4: op = 0x000F5E; break; // DIVPS
case TYdouble2: op = 0x660F5E; break; // DIVPD
case TYschar16:
case TYuchar16: assert(0); break; // PDIVB
case TYshort8:
case TYushort8: assert(0); break; // PDIVW
case TYlong4:
case TYulong4: assert(0); break; // PDIVD
case TYllong2:
case TYullong2: assert(0); break; // PDIVQ

default: assert(0);
}
break;

case OPor:
switch (ty1)
{
// SIMD vector types
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = 0x660FEB; break; // POR

default: assert(0);
}
break;

case OPand:
switch (ty1)
{
// SIMD vector types
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = 0x660FDB; break; // PAND

default: assert(0);
}
break;

case OPlt:
Expand Down Expand Up @@ -127,9 +227,12 @@ code *orthxmm(elem *e, regm_t *pretregs)
case OPnug:
case OPnue:
{ retregs = mPSW;
op = 0x660F2E; // UCOMISD
if (sz1 == 4) // float
op = 0x0F2E; // UCIMISS
else
{ assert(sz1 == 8);
op = 0x660F2E; // UCOMISD
}
code *cc = gen2(CNIL,op,modregxrmx(3,rreg-XMM0,reg-XMM0));
return cat4(c,cr,cg,cc);
}
Expand Down Expand Up @@ -169,9 +272,6 @@ code *xmmeq(elem *e,regm_t *pretregs)
tym_t tyml = tybasic(e1->Ety); /* type of lvalue */
regm_t retregs = *pretregs;

unsigned sz = tysize[tyml]; // # of bytes to transfer
assert(sz == 4 || sz == 8); // float or double size

if (!(retregs & XMMREGS))
retregs = XMMREGS; // pick any XMM reg

Expand Down Expand Up @@ -280,7 +380,6 @@ code *xmmopass(elem *e,regm_t *pretregs)
elem *e2 = e->E2;
tym_t ty1 = tybasic(e1->Ety);
unsigned sz1 = tysize[ty1];
assert(sz1 == 4 || sz1 == 8); // float or double
regm_t rretregs = XMMREGS & ~*pretregs;
if (!rretregs)
rretregs = XMMREGS;
Expand Down Expand Up @@ -318,9 +417,7 @@ code *xmmopass(elem *e,regm_t *pretregs)
if (!retregs)
retregs = XMMREGS & ~rretregs;
cg = allocreg(&retregs,&reg,ty1);
cs.Iop = 0xF20F10; // MOVSD xmm,xmm_m64
if (sz1 == 4)
cs.Iop = 0xF30F10; // MOVSS xmm,xmm_m32
cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64
code_newreg(&cs,reg - XMM0);
cg = gen(cg,&cs);
}
Expand Down Expand Up @@ -430,10 +527,20 @@ unsigned xmmload(tym_t tym)
case TYifloat: op = 0xF30F10; break; // MOVSS
case TYdouble:
case TYidouble: op = 0xF20F10; break; // MOVSD

case TYfloat4: op = 0x0F28; break; // MOVAPS
case TYdouble2: op = 0x660F28; break; // MOVAPD
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = 0x660F6F; break; // MOVDQA

default:
printf("tym = x%x\n", tym);
*(char*)0=0;
assert(0);
}
return op;
Expand All @@ -452,10 +559,20 @@ unsigned xmmstore(tym_t tym)
case TYdouble:
case TYidouble:
case TYcfloat: op = 0xF20F11; break; // MOVSD

case TYfloat4: op = 0x0F29; break; // MOVAPS
case TYdouble2: op = 0x660F29; break; // MOVAPD
case TYschar16:
case TYuchar16:
case TYshort8:
case TYushort8:
case TYlong4:
case TYulong4:
case TYllong2:
case TYullong2: op = 0x660F7F; break; // MOVDQA

default:
printf("tym = x%x\n", tym);
*(char*)0=0;
assert(0);
}
return op;
Expand Down
2 changes: 1 addition & 1 deletion src/backend/cod2.c
Expand Up @@ -4140,7 +4140,7 @@ code *cdneg(elem *e,regm_t *pretregs)
if (tyfloating(tyml))
{ if (tycomplex(tyml))
return neg_complex87(e, pretregs);
if (config.fpxmmregs && tyxmmreg(tyml) && e->Eoper == OPneg && *pretregs & XMMREGS)
if (tyxmmreg(tyml) && e->Eoper == OPneg && *pretregs & XMMREGS)
return xmmneg(e,pretregs);
if (config.inline8087 &&
((*pretregs & (ALLREGS | mBP)) == 0 || e->Eoper == OPsqrt || I64))
Expand Down
28 changes: 15 additions & 13 deletions src/backend/cod4.c
Expand Up @@ -335,11 +335,11 @@ code *cdeq(elem *e,regm_t *pretregs)
tym_t tyml = tybasic(e1->Ety); /* type of lvalue */
regm_t retregs = *pretregs;

if (tyfloating(tyml) && config.inline8087)
{
if (tyxmmreg(tyml) && config.fpxmmregs)
return xmmeq(e, pretregs);
if (tyxmmreg(tyml) && config.fpxmmregs)
return xmmeq(e, pretregs);

if (tyfloating(tyml) && config.inline8087)
{
if (tycomplex(tyml))
return complex_eq87(e, pretregs);

Expand All @@ -353,7 +353,7 @@ code *cdeq(elem *e,regm_t *pretregs)
return eq87(e,pretregs);
if (tyml == TYldouble || tyml == TYildouble)
return eq87(e,pretregs);
}
}

unsigned sz = tysize[tyml]; // # of bytes to transfer
assert((int)sz > 0);
Expand Down Expand Up @@ -782,11 +782,13 @@ code *cdaddass(elem *e,regm_t *pretregs)
tyml = tybasic(e1->Ety); // type of lvalue
sz = tysize[tyml];
byte = (sz == 1); // 1 for byte operation, else 0
if (tyfloating(tyml))
{
// See if evaluate in XMM registers
if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
return xmmopass(e,pretregs);

// See if evaluate in XMM registers
if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0))
return xmmopass(e,pretregs);

if (tyfloating(tyml))
{
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (op == OPnegass)
c = cdnegass87(e,pretregs);
Expand Down Expand Up @@ -1246,12 +1248,12 @@ code *cdmulass(elem *e,regm_t *pretregs)
unsigned rex = (I64 && sz == 8) ? REX_W : 0;
unsigned grex = rex << 16; // 64 bit operands

// See if evaluate in XMM registers
if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
return xmmopass(e,pretregs);

if (tyfloating(tyml))
{
// See if evaluate in XMM registers
if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0))
return xmmopass(e,pretregs);
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
return opass87(e,pretregs);
#else
Expand Down
19 changes: 18 additions & 1 deletion src/glue.c
Expand Up @@ -1058,8 +1058,25 @@ unsigned Type::totym()
break;

case Tvector:
t = TYfloat4;
{ TypeVector *tv = (TypeVector *)this;
TypeBasic *tb = tv->elementType();
switch (tb->ty)
{ case Tint8: t = TYschar16; break;
case Tuns8: t = TYuchar16; break;
case Tint16: t = TYshort8; break;
case Tuns16: t = TYushort8; break;
case Tint32: t = TYlong4; break;
case Tuns32: t = TYulong4; break;
case Tint64: t = TYllong2; break;
case Tuns64: t = TYullong2; break;
case Tfloat32: t = TYfloat4; break;
case Tfloat64: t = TYdouble2; break;
default:
assert(0);
break;
}
break;
}

default:
#ifdef DEBUG
Expand Down

3 comments on commit 2553e0c

@ibuclaw
Copy link
Member

@ibuclaw ibuclaw commented on 2553e0c Jan 9, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also you give TypeVector its own toCtype? This would make implementation a little easier/cleaner for gdc - if not, I can always derive.

@WalterBright
Copy link
Member Author

@WalterBright WalterBright commented on 2553e0c Jan 9, 2012 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ibuclaw
Copy link
Member

@ibuclaw ibuclaw commented on 2553e0c Jan 9, 2012

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be necessary, as Type::toCtype() should do it right.

ok, that's fine. I've put it in ifdef blocks my end. Support is looking good. :)

Please sign in to comment.