Skip to content

Commit ec351aa

Browse files
authored
[RISC-V] Optimize comparisons (#115039)
* 12-bit immediates for sltiu * Sign-extend for sltu * Use sub for calculating difference for GT_EQ and GT_NE * Special-case for -1, 0, 1 * Sign-extend operands for branch comparisons * Fix assert * Implement load imm * Fix sltiu rd, rs, 0
1 parent b04d40e commit ec351aa

File tree

10 files changed

+373
-203
lines changed

10 files changed

+373
-203
lines changed

src/coreclr/jit/codegenriscv64.cpp

Lines changed: 85 additions & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -3123,6 +3123,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
31233123
assert(!op2->isUsedFromMemory());
31243124

31253125
emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
3126+
assert(cmpSize == EA_4BYTE || cmpSize == EA_8BYTE);
31263127

31273128
assert(genTypeSize(op1Type) == genTypeSize(op2Type));
31283129

@@ -3205,192 +3206,126 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
32053206
{
32063207
ssize_t imm = op2->AsIntCon()->gtIconVal;
32073208

3208-
switch (cmpSize)
3209+
bool useAddSub = !(!tree->OperIs(GT_EQ, GT_NE) || (imm == -2048));
3210+
bool useShiftRight =
3211+
!isUnsigned && ((tree->OperIs(GT_LT) && (imm == 0)) || (tree->OperIs(GT_LE) && (imm == -1)));
3212+
bool useLoadImm = isUnsigned && ((tree->OperIs(GT_LT, GT_GE) && (imm == 0)) ||
3213+
(tree->OperIs(GT_LE, GT_GT) && (imm == -1)));
3214+
3215+
if (cmpSize == EA_4BYTE)
32093216
{
3210-
case EA_4BYTE:
3217+
if (!useAddSub && !useShiftRight && !useLoadImm)
32113218
{
32123219
regNumber tmpRegOp1 = internalRegisters.GetSingle(tree);
32133220
assert(regOp1 != tmpRegOp1);
3214-
if (isUnsigned)
3215-
{
3216-
imm = static_cast<uint32_t>(imm);
3217-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
3218-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
3219-
}
3220-
else
3221-
{
3222-
imm = static_cast<int32_t>(imm);
3223-
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
3224-
}
3221+
imm = static_cast<int32_t>(imm);
3222+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
32253223
regOp1 = tmpRegOp1;
3226-
break;
32273224
}
3228-
case EA_8BYTE:
3229-
break;
3230-
default:
3231-
unreached();
32323225
}
32333226

3234-
if (tree->OperIs(GT_LT))
3235-
{
3236-
if (!isUnsigned && emitter::isValidSimm12(imm))
3237-
{
3238-
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
3239-
}
3240-
else if (isUnsigned && emitter::isValidUimm11(imm))
3241-
{
3242-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm);
3243-
}
3244-
else
3245-
{
3246-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
3247-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
3248-
}
3249-
}
3250-
else if (tree->OperIs(GT_LE))
3251-
{
3252-
if (!isUnsigned && emitter::isValidSimm12(imm + 1))
3253-
{
3254-
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
3255-
}
3256-
else if (isUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0)))
3257-
{
3258-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1);
3259-
}
3260-
else
3261-
{
3262-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm + 1);
3263-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
3264-
}
3265-
}
3266-
else if (tree->OperIs(GT_GT))
3227+
if (tree->OperIs(GT_EQ, GT_NE))
32673228
{
3268-
if (!isUnsigned && emitter::isValidSimm12(imm + 1))
3229+
if ((imm != 0) || (cmpSize == EA_4BYTE))
32693230
{
3270-
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
3271-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
3231+
instruction diff = INS_xori;
3232+
if (imm != -2048)
3233+
{
3234+
assert(useAddSub);
3235+
diff = (cmpSize == EA_4BYTE) ? INS_addiw : INS_addi;
3236+
imm = -imm;
3237+
}
3238+
emit->emitIns_R_R_I(diff, cmpSize, targetReg, regOp1, imm);
3239+
regOp1 = targetReg;
32723240
}
3273-
else if (isUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0)))
3241+
assert(emitter::isValidSimm12(imm));
3242+
3243+
if (tree->OperIs(GT_EQ))
32743244
{
3275-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1);
3276-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
3245+
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1);
32773246
}
32783247
else
32793248
{
3280-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
3281-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1);
3249+
assert(tree->OperIs(GT_NE));
3250+
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, regOp1);
32823251
}
32833252
}
3284-
else if (tree->OperIs(GT_GE))
3253+
else
32853254
{
3286-
if (!isUnsigned && emitter::isValidSimm12(imm))
3287-
{
3288-
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
3289-
}
3290-
else if (isUnsigned && emitter::isValidUimm11(imm))
3291-
{
3292-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm);
3293-
}
3294-
else
3255+
assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE));
3256+
if (useLoadImm)
32953257
{
3296-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
3297-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
3258+
// unsigned (a <= ~0), (a >= 0) / (a > ~0), (a < 0) is always true / false
3259+
imm = tree->OperIs(GT_GE, GT_LE) ? 1 : 0;
3260+
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, targetReg, REG_ZERO, imm);
32983261
}
3299-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
3300-
}
3301-
else if (tree->OperIs(GT_NE))
3302-
{
3303-
if (!imm)
3262+
else if (useShiftRight)
33043263
{
3305-
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
3264+
// signed (a < 0) or (a <= -1) is just the sign bit
3265+
instruction srli = (cmpSize == EA_4BYTE) ? INS_srliw : INS_srli;
3266+
emit->emitIns_R_R_I(srli, cmpSize, targetReg, regOp1, cmpSize * 8 - 1);
33063267
}
3307-
else if (emitter::isValidUimm12(imm))
3268+
else if ((tree->OperIs(GT_GT) && (imm == 0)) || (tree->OperIs(GT_GE) && (imm == 1)))
33083269
{
3309-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
3310-
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
3270+
instruction slt = isUnsigned ? INS_sltu : INS_slt;
3271+
emit->emitIns_R_R_R(slt, EA_PTRSIZE, targetReg, REG_ZERO, regOp1);
33113272
}
33123273
else
33133274
{
3314-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
3315-
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
3316-
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
3317-
}
3318-
}
3319-
else if (tree->OperIs(GT_EQ))
3320-
{
3321-
if (!imm)
3322-
{
3323-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1);
3324-
}
3325-
else if (emitter::isValidUimm12(imm))
3326-
{
3327-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
3328-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
3329-
}
3330-
else
3331-
{
3332-
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
3333-
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
3334-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
3275+
instruction slti = isUnsigned ? INS_sltiu : INS_slti;
3276+
if (tree->OperIs(GT_LE, GT_GT))
3277+
imm += 1;
3278+
assert(emitter::isValidSimm12(imm));
3279+
assert(!isUnsigned || (imm != 0)); // should be handled in useLoadImm
3280+
3281+
emit->emitIns_R_R_I(slti, EA_PTRSIZE, targetReg, regOp1, imm);
3282+
3283+
if (tree->OperIs(GT_GT, GT_GE))
3284+
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
33353285
}
33363286
}
33373287
}
33383288
else
33393289
{
33403290
regNumber regOp2 = op2->GetRegNum();
33413291

3342-
if (cmpSize == EA_4BYTE)
3292+
if (tree->OperIs(GT_EQ, GT_NE))
33433293
{
3344-
regNumber tmpRegOp1 = REG_RA;
3345-
regNumber tmpRegOp2 = internalRegisters.GetSingle(tree);
3346-
assert(regOp1 != tmpRegOp2);
3347-
assert(regOp2 != tmpRegOp2);
3348-
3349-
if (isUnsigned)
3294+
instruction sub = (cmpSize == EA_4BYTE) ? INS_subw : INS_sub;
3295+
emit->emitIns_R_R_R(sub, EA_PTRSIZE, targetReg, regOp1, regOp2);
3296+
if (tree->OperIs(GT_EQ))
33503297
{
3351-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
3352-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
3353-
3354-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32);
3355-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32);
3298+
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
33563299
}
33573300
else
33583301
{
3359-
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0);
3360-
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0);
3302+
assert(tree->OperIs(GT_NE));
3303+
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, targetReg);
33613304
}
3362-
3363-
regOp1 = tmpRegOp1;
3364-
regOp2 = tmpRegOp2;
3365-
}
3366-
3367-
if (tree->OperIs(GT_LT))
3368-
{
3369-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
3370-
}
3371-
else if (tree->OperIs(GT_LE))
3372-
{
3373-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
3374-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
3375-
}
3376-
else if (tree->OperIs(GT_GT))
3377-
{
3378-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
33793305
}
3380-
else if (tree->OperIs(GT_GE))
3381-
{
3382-
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
3383-
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
3384-
}
3385-
else if (tree->OperIs(GT_NE))
3386-
{
3387-
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
3388-
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
3389-
}
3390-
else if (tree->OperIs(GT_EQ))
3306+
else
33913307
{
3392-
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
3393-
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
3308+
assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE));
3309+
if (cmpSize == EA_4BYTE)
3310+
{
3311+
regNumber tmpRegOp1 = REG_RA;
3312+
regNumber tmpRegOp2 = internalRegisters.GetSingle(tree);
3313+
assert(regOp1 != tmpRegOp2);
3314+
assert(regOp2 != tmpRegOp2);
3315+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
3316+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2);
3317+
regOp1 = tmpRegOp1;
3318+
regOp2 = tmpRegOp2;
3319+
}
3320+
3321+
instruction slt = isUnsigned ? INS_sltu : INS_slt;
3322+
if (tree->OperIs(GT_LE, GT_GT))
3323+
std::swap(regOp1, regOp2);
3324+
3325+
emit->emitIns_R_R_R(slt, EA_8BYTE, targetReg, regOp1, regOp2);
3326+
3327+
if (tree->OperIs(GT_LE, GT_GE))
3328+
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
33943329
}
33953330
}
33963331
}
@@ -3452,19 +3387,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
34523387
{
34533388
regNumber tmpRegOp1 = rsGetRsvdReg();
34543389
assert(regOp1 != tmpRegOp1);
3455-
if (cond.IsUnsigned())
3456-
{
3457-
imm = static_cast<uint32_t>(imm);
3458-
3459-
assert(regOp1 != tmpRegOp1);
3460-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
3461-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
3462-
}
3463-
else
3464-
{
3465-
imm = static_cast<int32_t>(imm);
3466-
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
3467-
}
3390+
imm = static_cast<int32_t>(imm);
3391+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
34683392
regOp1 = tmpRegOp1;
34693393
break;
34703394
}
@@ -3500,15 +3424,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
35003424
{
35013425
regNumber tmpRegOp1 = rsGetRsvdReg();
35023426
assert(regOp1 != tmpRegOp1);
3503-
if (cond.IsUnsigned())
3504-
{
3505-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
3506-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
3507-
}
3508-
else
3509-
{
3510-
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
3511-
}
3427+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
35123428
regOp1 = tmpRegOp1;
35133429
}
35143430
}
@@ -3557,20 +3473,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
35573473
regNumber tmpRegOp2 = rsGetRsvdReg();
35583474
assert(regOp1 != tmpRegOp2);
35593475
assert(regOp2 != tmpRegOp2);
3560-
3561-
if (cond.IsUnsigned())
3562-
{
3563-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
3564-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
3565-
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32);
3566-
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32);
3567-
}
3568-
else
3569-
{
3570-
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0);
3571-
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0);
3572-
}
3573-
3476+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
3477+
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2);
35743478
regOp1 = tmpRegOp1;
35753479
regOp2 = tmpRegOp2;
35763480
}

src/coreclr/jit/emitriscv64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5492,7 +5492,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
54925492

54935493
// TODO-RISCV64-CQ: here sign-extend dst when deal with 32bit data is too conservative.
54945494
if (EA_SIZE(attr) == EA_4BYTE)
5495-
emitIns_R_R_I(INS_slliw, attr, dstReg, dstReg, 0);
5495+
emitIns_R_R(INS_sext_w, attr, dstReg, dstReg);
54965496
}
54975497
break;
54985498

src/coreclr/jit/lowerriscv64.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,17 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
6464

6565
switch (parentNode->OperGet())
6666
{
67-
case GT_ADD:
6867
case GT_EQ:
6968
case GT_NE:
69+
return emitter::isValidSimm12(-immVal) || (immVal == -2048);
70+
71+
case GT_LE: // a <= N -> a < N+1
72+
case GT_GT: // a > N -> !(a <= N) -> !(a < N+1)
73+
immVal += 1;
74+
FALLTHROUGH;
7075
case GT_LT:
71-
case GT_LE:
7276
case GT_GE:
73-
case GT_GT:
77+
case GT_ADD:
7478
case GT_AND:
7579
case GT_OR:
7680
case GT_XOR:
@@ -85,9 +89,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
8589
case GT_XCHG:
8690
case GT_STORE_LCL_FLD:
8791
case GT_STORE_LCL_VAR:
88-
if (immVal == 0)
89-
return true;
90-
break;
92+
return (immVal == 0);
9193

9294
default:
9395
break;

src/coreclr/jit/lsrabuild.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4611,6 +4611,8 @@ int LinearScan::BuildCmp(GenTree* tree)
46114611
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT));
46124612
#elif defined(TARGET_ARM64)
46134613
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP, GT_JTEST, GT_CCMP));
4614+
#elif defined(TARGET_RISCV64)
4615+
assert(tree->OperIsCmpCompare() || tree->OperIs(GT_JCMP));
46144616
#else
46154617
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP));
46164618
#endif

0 commit comments

Comments
 (0)