Skip to content

Commit

Permalink
Enable promotion of SIMD fields of structs
Browse files Browse the repository at this point in the history
Only look for SIMD fields if a SIMD type has been found.
Also, since more cases of local struct values are no longer marked GTF_GLOB_REF, adjust the heuristics for allocating a temporary for a struct arrRef.

Fix #7508
  • Loading branch information
CarolEidt committed Jan 10, 2017
1 parent 01f8e7d commit 990d018
Show file tree
Hide file tree
Showing 9 changed files with 328 additions and 100 deletions.
3 changes: 2 additions & 1 deletion src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3021,7 +3021,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags)

#ifdef FEATURE_SIMD
// Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
foundSIMDType = false;
#endif // FEATURE_SIMD

if (compIsForInlining() || compIsForImportOnly())
Expand Down
7 changes: 4 additions & 3 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ class LclVarDsc
// type of an arg node is TYP_BYREF and a local node is TYP_SIMD*.
unsigned char lvSIMDType : 1; // This is a SIMD struct
unsigned char lvUsedInSIMDIntrinsic : 1; // This tells lclvar is used for simd intrinsic
var_types lvBaseType : 5; // Note: this only packs because var_types is a typedef of unsigned char
#endif // FEATURE_SIMD
unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.

Expand All @@ -330,9 +331,6 @@ class LclVarDsc
// local.
unsigned lvParentLcl; // The index of the local var representing the parent (i.e. the promoted struct local).
// Valid on promoted struct local fields.
#ifdef FEATURE_SIMD
var_types lvBaseType; // The base type of a SIMD local var. Valid on TYP_SIMD locals.
#endif // FEATURE_SIMD
};

unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc.
Expand Down Expand Up @@ -6931,6 +6929,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Should we support SIMD intrinsics?
bool featureSIMD;

// Have we identified any SIMD types?
bool foundSIMDType;

// This is a temp lclVar allocated on the stack as TYP_SIMD. It is used to implement intrinsics
// that require indexed access to the individual fields of the vector, which is not well supported
// by the hardware. It is allocated when/if such situations are encountered during Lowering.
Expand Down
13 changes: 12 additions & 1 deletion src/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,6 @@ inline GenTreePtr Compiler::gtNewFieldRef(
tree->gtField.gtFldObj = obj;
tree->gtField.gtFldHnd = fldHnd;
tree->gtField.gtFldOffset = offset;
tree->gtFlags |= GTF_GLOB_REF;

#ifdef FEATURE_READYTORUN_COMPILER
tree->gtField.gtFieldLookup.addr = nullptr;
Expand All @@ -1154,6 +1153,18 @@ inline GenTreePtr Compiler::gtNewFieldRef(
{
unsigned lclNum = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
lvaTable[lclNum].lvFieldAccessed = 1;
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
// These structs are passed by reference; we should probably be able to treat these
// as non-global refs, but downstream logic expects these to be marked this way.
if (lvaTable[lclNum].lvIsParam)
{
tree->gtFlags |= GTF_GLOB_REF;
}
#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
}
else
{
tree->gtFlags |= GTF_GLOB_REF;
}

return tree;
Expand Down
4 changes: 4 additions & 0 deletions src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21970,6 +21970,10 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;

#ifdef FEATURE_SIMD
foundSIMDType |= InlineeCompiler->foundSIMDType;
#endif // FEATURE_SIMD

// Update unmanaged call count
info.compCallUnmanaged += InlineeCompiler->info.compCallUnmanaged;

Expand Down
38 changes: 35 additions & 3 deletions src/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1507,13 +1507,31 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
CorInfoType corType = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
var_types varType = JITtype2varType(corType);
pFieldInfo->fldType = varType;
pFieldInfo->fldSize = genTypeSize(varType);
unsigned size = genTypeSize(varType);
pFieldInfo->fldSize = size;

if (varTypeIsGC(varType))
{
containsGCpointers = true;
}

#ifdef FEATURE_SIMD
// Check to see if this is a SIMD type.
// We will only check this if we have already found a SIMD type, which will be true if
// we have encountered any SIMD intrinsics.
if (foundSIMDType && (pFieldInfo->fldSize == 0) && isSIMDClass(pFieldInfo->fldTypeHnd))
{
unsigned simdSize;
var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(pFieldInfo->fldTypeHnd, &simdSize);
if (simdBaseType != TYP_UNKNOWN)
{
varType = getSIMDTypeForSize(simdSize);
pFieldInfo->fldType = varType;
pFieldInfo->fldSize = simdSize;
}
}
#endif // FEATURE_SIMD

if (pFieldInfo->fldSize == 0)
{
// Non-primitive struct field. Don't promote.
Expand Down Expand Up @@ -1683,7 +1701,7 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
{
lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[index];

if (varTypeIsFloating(pFieldInfo->fldType))
if (varTypeIsFloating(pFieldInfo->fldType) || varTypeIsSIMD(pFieldInfo->fldType))
{
lvaTable[lclNum].lvContainsFloatingFields = 1;
// Whenever we promote a struct that contains a floating point field
Expand Down Expand Up @@ -1727,12 +1745,27 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
fieldVarDsc->lvIsRegArg = true;
fieldVarDsc->lvArgReg = varDsc->lvArgReg;
fieldVarDsc->setPrefReg(varDsc->lvArgReg, this); // Set the preferred register
#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
if (varTypeIsSIMD(fieldVarDsc))
{
fieldVarDsc->lvOtherArgReg = varDsc->lvOtherArgReg;
}
#endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)

lvaMarkRefsWeight = BB_UNITY_WEIGHT; // incRefCnts can use this compiler global variable
fieldVarDsc->incRefCnts(BB_UNITY_WEIGHT, this); // increment the ref count for prolog initialization
}
#endif

#ifdef FEATURE_SIMD
if (varTypeIsSIMD(pFieldInfo->fldType))
{
// Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields.
fieldVarDsc->lvExactSize = 0;
lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true);
}
#endif // FEATURE_SIMD

#ifdef DEBUG
// This temporary should not be converted to a double in stress mode,
// because we introduce assigns to it after the stress conversion
Expand Down Expand Up @@ -2029,7 +2062,6 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
}
else
{
assert(varDsc->lvExactSize != 0);
#if FEATURE_SIMD
assert(!varTypeIsSIMD(varDsc) || (varDsc->lvBaseType != TYP_UNKNOWN));
#endif // FEATURE_SIMD
Expand Down
178 changes: 87 additions & 91 deletions src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5630,7 +5630,8 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// dereference.
// Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
//
if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
{
unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
Expand All @@ -5649,7 +5650,8 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// dereference.
// Also we allocate the temporary when the index is sufficiently complex/expensive.
//
if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
(arrRef->OperGet() == GT_FIELD))
{
unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
indexDefn = gtNewTempAssign(indexTmpNum, index);
Expand Down Expand Up @@ -6051,14 +6053,15 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
{
assert(tree->gtOper == GT_FIELD);

noway_assert(tree->gtFlags & GTF_GLOB_REF);

CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
unsigned fldOffset = tree->gtField.gtFldOffset;
GenTreePtr objRef = tree->gtField.gtFldObj;
bool fieldMayOverlap = false;
bool objIsLocal = false;

noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
((tree->gtFlags & GTF_GLOB_REF) != 0));

if (tree->gtField.gtFldMayOverlap)
{
fieldMayOverlap = true;
Expand Down Expand Up @@ -17203,109 +17206,102 @@ void Compiler::fgPromoteStructs()
Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
{
noway_assert(tree->OperGet() == GT_FIELD);
noway_assert(tree->gtFlags & GTF_GLOB_REF);

GenTreePtr objRef = tree->gtField.gtFldObj;
GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));

/* Is this an instance data member? */

if (objRef)
if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
{
if (objRef->gtOper == GT_ADDR)
{
GenTreePtr obj = objRef->gtOp.gtOp1;
unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
LclVarDsc* varDsc = &lvaTable[lclNum];

if (obj->gtOper == GT_LCL_VAR)
if (varTypeIsStruct(obj))
{
if (varDsc->lvPromoted)
{
unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
LclVarDsc* varDsc = &lvaTable[lclNum];
// Promoted struct
unsigned fldOffset = tree->gtField.gtFldOffset;
unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
noway_assert(fieldLclIndex != BAD_VAR_NUM);

tree->SetOper(GT_LCL_VAR);
tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
tree->gtType = lvaTable[fieldLclIndex].TypeGet();
tree->gtFlags &= GTF_NODE_MASK;
tree->gtFlags &= ~GTF_GLOB_REF;

if (varTypeIsStruct(obj))
GenTreePtr parent = fgWalkPre->parentStack->Index(1);
if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
if (varDsc->lvPromoted)
{
// Promoted struct
unsigned fldOffset = tree->gtField.gtFldOffset;
unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
noway_assert(fieldLclIndex != BAD_VAR_NUM);

tree->SetOper(GT_LCL_VAR);
tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
tree->gtType = lvaTable[fieldLclIndex].TypeGet();
tree->gtFlags &= GTF_NODE_MASK;
tree->gtFlags &= ~GTF_GLOB_REF;

GenTreePtr parent = fgWalkPre->parentStack->Index(1);
if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
tree->gtFlags |= GTF_VAR_DEF;
tree->gtFlags |= GTF_DONT_CSE;
}
#ifdef DEBUG
if (verbose)
{
printf("Replacing the field in promoted struct with a local var:\n");
fgWalkPre->printModified = true;
}
#endif // DEBUG
return WALK_SKIP_SUBTREES;
}
tree->gtFlags |= GTF_VAR_DEF;
tree->gtFlags |= GTF_DONT_CSE;
}
else
#ifdef DEBUG
if (verbose)
{
// Normed struct
// A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
// the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
// bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
// there is one extremely rare case where that won't be true. An enum type is a special value type
// that contains exactly one element of a primitive integer type (that, for CLS programs is named
// "value__"). The VM tells us that a local var of that enum type is the primitive type of the
// enum's single field. It turns out that it is legal for IL to access this field using ldflda or
// ldfld. For example:
//
// .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
// {
// .field public specialname rtspecialname int16 value__
// .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
// }
// .method public hidebysig static void Main() cil managed
// {
// .locals init (valuetype mynamespace.e_t V_0)
// ...
// ldloca.s V_0
// ldflda int16 mynamespace.e_t::value__
// ...
// }
//
// Normally, compilers will not generate the ldflda, since it is superfluous.
//
// In the example, the lclVar is short, but the JIT promotes all trees using this local to the
// "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
// mismatch like this, don't do this morphing. The local var may end up getting marked as
// address taken, and the appropriate SHORT load will be done from memory in that case.
printf("Replacing the field in promoted struct with a local var:\n");
fgWalkPre->printModified = true;
}
#endif // DEBUG
return WALK_SKIP_SUBTREES;
}
}
else
{
// Normed struct
// A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
// the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
// bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
// there is one extremely rare case where that won't be true. An enum type is a special value type
// that contains exactly one element of a primitive integer type (that, for CLS programs is named
// "value__"). The VM tells us that a local var of that enum type is the primitive type of the
// enum's single field. It turns out that it is legal for IL to access this field using ldflda or
// ldfld. For example:
//
// .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
// {
// .field public specialname rtspecialname int16 value__
// .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
// }
// .method public hidebysig static void Main() cil managed
// {
// .locals init (valuetype mynamespace.e_t V_0)
// ...
// ldloca.s V_0
// ldflda int16 mynamespace.e_t::value__
// ...
// }
//
// Normally, compilers will not generate the ldflda, since it is superfluous.
//
// In the example, the lclVar is short, but the JIT promotes all trees using this local to the
// "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
// mismatch like this, don't do this morphing. The local var may end up getting marked as
// address taken, and the appropriate SHORT load will be done from memory in that case.

if (tree->TypeGet() == obj->TypeGet())
{
tree->ChangeOper(GT_LCL_VAR);
tree->gtLclVarCommon.SetLclNum(lclNum);
tree->gtFlags &= GTF_NODE_MASK;
if (tree->TypeGet() == obj->TypeGet())
{
tree->ChangeOper(GT_LCL_VAR);
tree->gtLclVarCommon.SetLclNum(lclNum);
tree->gtFlags &= GTF_NODE_MASK;

GenTreePtr parent = fgWalkPre->parentStack->Index(1);
if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
tree->gtFlags |= GTF_VAR_DEF;
tree->gtFlags |= GTF_DONT_CSE;
}
GenTreePtr parent = fgWalkPre->parentStack->Index(1);
if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
tree->gtFlags |= GTF_VAR_DEF;
tree->gtFlags |= GTF_DONT_CSE;
}
#ifdef DEBUG
if (verbose)
{
printf("Replacing the field in normed struct with the local var:\n");
fgWalkPre->printModified = true;
}
#endif // DEBUG
return WALK_SKIP_SUBTREES;
}
if (verbose)
{
printf("Replacing the field in normed struct with the local var:\n");
fgWalkPre->printModified = true;
}
#endif // DEBUG
return WALK_SKIP_SUBTREES;
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/jit/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,8 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
size = getSIMDVectorRegisterByteLength();
}

*sizeBytes = size;
*sizeBytes = size;
foundSIMDType = true;
}

return simdBaseType;
Expand Down

0 comments on commit 990d018

Please sign in to comment.