Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

252 lines (203 sloc) 9.608 kB
Ddoc
$(SPEC_S Vector Extensions,
$(P Modern CPUs often support specialized vector types and vector
operations, sometimes called "media instructions".
Vector types are a fixed array of floating or integer types, and
vector operations operate simultaneously on them, thus achieving
great speedups.)
$(P When the compiler takes advantage of these instructions with standard D code
to speed up loops over arithmetic data, this is called auto-vectorization.
Auto-vectorization, however, has had only limited success and has not
been able to really take advantage of the richness (and often quirkiness)
of the native vector instructions.
)
$(P D has the array operation notation, such as:
)
---
int[] a,b;
...
a[] += b[];
---
$(P which can be vectorized by the compiler, but again success is limited
for the same reason auto-vectorization is.
)
$(P The difficulties with trying to use vector instructions on regular arrays
are:)
$(OL
$(LI The vector types have stringent alignment requirements that are not
and cannot be met by conventional arrays.)
$(LI C ABI's often have vector extensions and have special name mangling
for them, call/return conventions, and symbolic debug support.)
$(LI The only way to get at the full vector instruction set would be to use
inline assembler - but the compiler cannot do register allocation across
inline assembler blocks (or other optimizations), leading to poor code
performance.)
$(LI Interleaving conventional array code with vector operations on the same
data can unwittingly lead to extremely poor runtime performance.)
)
$(P These issues are cleared up by using special vector types.
)
$(H2 $(D core.simd))
$(P Vector types and operations are introduced to D code by importing
$(CORE_SIMD):)
---
import core.simd;
---
$(P These types and operations will be the ones defined for the architecture
the compiler is targetting. If a particular CPU family has varying
support for vector types, an additional runtime check may be necessary.
The compiler does not emit runtime checks; those must be done by the programmer.
)
$(P The types defined will all follow the naming convention:)
$(GRAMMAR
$(I typeNN)
)
$(P where $(I type) is the vector element type and $(I NN) is the number
of those elements in the vector type.
The type names will not be keywords.
)
$(H3 Properties)
$(P Vector types have the property:)
$(TABLE2 Vector Type Properties,
$(THEAD Property, Description)
$(TROW .array, Returns static array representation)
)
$(P All the properties of the static array representation also work.)
$(H3 Conversions)
$(P Vector types of the same size can be implicitly converted among
each other. Vector types can be cast to the static array representation.)
$(P Integers and floating point values can be implicitly converted
to their vector equivalents:)
---
int4 v = 7;
v = 3 * v; // multiply each element in v by 3
---
$(H3 Accessing Individual Vector Elements)
$(P They cannot be accessed directly, but can be when converted to
an array type:)
----
int4 v;
(cast(int*)&v)[3] = 2; // set 3rd element of the 4 int vector
(cast(int[4])v)[3] = 2; // set 3rd element of the 4 int vector
v.array[3] = 2; // set 3rd element of the 4 int vector
v.ptr[3] = 2; // set 3rd element of the 4 int vector
----
$(H3 Conditional Compilation)
$(P If vector extensions are implemented, the
$(DDSUBLINK version, PredefinedVersions, version identifier)
$(D D_SIMD) is set.)
$(P Whether a type exists or not can be tested at compile time with
an $(DDSUBLINK expression, IsExpression, $(I IsExpression)):
)
---
static if (is(typeNN))
... yes, it is supported ...
else
... nope, use workaround ...
---
$(P Whether a particular operation on a type is supported can be tested
at compile time with:
)
---
float4 a,b;
static if (__traits(compiles, a+b))
... yes, it is supported ...
else
... nope, use workaround ...
---
$(P For runtime testing to see if certain vector instructions are
available, see the functions in $(LINK2 phobos/core_cpuid.html, core.cpuid).
)
$(P A typical workaround would be to use array vector operations instead:)
---
float4 a,b;
static if (__traits(compiles, a/b))
c = a / b;
else
c[] = a[] / b[];
---
$(H2 X86 And X86$(UNDERSCORE)64 Vector Extension Implementation)
$(P The rest of this document describes the specific implementation of the
vector types for the X86 and X86$(UNDERSCORE)64 architectures.
)
$(P The vector extensions are currently implemented for the OS X 32
bit target, and all 64 bit targets.)
$(P $(CORE_SIMD) defines the following types: )
$(TABLE2 Vector Types,
$(THEAD Type Name, Description, gcc Equivalent)
$(TROW void16, 16 bytes of untyped data, $(I no equivalent))
$(TROW byte16, 16 $(D byte)'s, $(D signed char __attribute__((vector_size(16)))))
$(TROW ubyte16, 16 $(D ubyte)'s, $(D unsigned char __attribute__((vector_size(16)))))
$(TROW short8, 8 $(D short)'s, $(D short __attribute__((vector_size(16)))))
$(TROW ushort8, 8 $(D ushort)'s, $(D ushort __attribute__((vector_size(16)))))
$(TROW int4, 4 $(D int)'s, $(D int __attribute__((vector_size(16)))))
$(TROW uint4, 4 $(D uint)'s, $(D unsigned __attribute__((vector_size(16)))))
$(TROW long2, 2 $(D long)'s, $(D long __attribute__((vector_size(16)))))
$(TROW ulong2, 2 $(D ulong)'s, $(D unsigned long __attribute__((vector_size(16)))))
$(TROW float4, 4 $(D float)'s, $(D float __attribute__((vector_size(16)))))
$(TROW double2, 2 $(D double)'s, $(D double __attribute__((vector_size(16)))))
$(TROW void32, 32 bytes of untyped data, $(I no equivalent))
$(TROW byte32, 32 $(D byte)'s, $(D signed char __attribute__((vector_size(32)))))
$(TROW ubyte32, 32 $(D ubyte)'s, $(D unsigned char __attribute__((vector_size(32)))))
$(TROW short16, 16 $(D short)'s, $(D short __attribute__((vector_size(32)))))
$(TROW ushort16, 16 $(D ushort)'s, $(D ushort __attribute__((vector_size(32)))))
$(TROW int8, 8 $(D int)'s, $(D int __attribute__((vector_size(32)))))
$(TROW uint8, 8 $(D uint)'s, $(D unsigned __attribute__((vector_size(32)))))
$(TROW long4, 4 $(D long)'s, $(D long __attribute__((vector_size(32)))))
$(TROW ulong4, 4 $(D ulong)'s, $(D unsigned long __attribute__((vector_size(32)))))
$(TROW float8, 8 $(D float)'s, $(D float __attribute__((vector_size(32)))))
$(TROW double4, 4 $(D double)'s, $(D double __attribute__((vector_size(32)))))
)
$(P Note: for 32 bit gcc, it's $(D long long) instead of $(D long).)
$(TABLE2 Supported 128-bit Vector Operators,
$(THEAD Operator,void16,byte16,ubyte16,short8,ushort8,int4,uint4,long2,ulong2,float4,double2)
$(TROW =,$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW +,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW -,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW *,$(N),$(N),$(N),$(Y),$(Y),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW /,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW $(CODE_AMP),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW |,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(D ^),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW +=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW -=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW *=,$(N),$(N),$(N),$(Y),$(Y),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW /=,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW $(CODE_AMP)=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW |=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(D ^=),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(I unary)$(D ~),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(I unary)+,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW $(I unary)-,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
)
$(TABLE2 Supported 256-bit Vector Operators,
$(THEAD Operator,void32,byte32,ubyte32,short16,ushort16,int8,uint8,long4,ulong4,float8,double4)
$(TROW =,$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW +,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW -,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW *,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW /,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW $(CODE_AMP),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW |,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(D ^),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW +=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW -=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW *=,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW /=,$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(N),$(Y),$(Y))
$(TROW $(CODE_AMP)=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW |=,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(D ^=),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(I unary)$(D ~),$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(N),$(N))
$(TROW $(I unary)+,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
$(TROW $(I unary)-,$(N),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y),$(Y))
)
$(P Operators not listed are not supported at all.)
$(H3 Vector Operation Intrinsics)
$(P See $(CORE_SIMD) for the supported intrinsics.)
)
Macros:
TITLE=Vector Extensions
WIKI=Float
Y=$(TIMES)
N=$(NDASH)
CORE_SIMD=$(LINK2 phobos/core_simd.html, $(D core.simd))
Jump to Line
Something went wrong with that request. Please try again.