diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 9d055c814f..d2579349c4 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1659,8 +1659,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) asm_intmin_max(as, ir, cc); } -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ @@ -1852,7 +1852,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 624cc2da17..f640b91b28 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1598,7 +1598,7 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); emit_nm(as, A64I_FCMPd, left, right); } @@ -1610,8 +1610,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) asm_intmin_max(as, ir, cc); } -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 9309b78194..a242904e0e 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -2121,12 +2121,12 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) right = (left >> 8); left &= 255; #if !LJ_TARGET_MIPSR6 if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); + emit_fg(as, MIPSI_MOVF_D, dest, right); } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); + emit_fg(as, MIPSI_MOVT_D, dest, left); if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); + emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); #else emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); #endif diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 8fa8c8ef65..afcd6b7a0a 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1724,9 +1724,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) if (tmp == left || tmp == right) tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); + emit_facb(as, PPCI_FSEL, dest, tmp, left, right); + emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg tmp1 = RID_TMP, tmp2 = dest; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index b4d05a2630..cefd69c8f9 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1774,8 +1774,6 @@ LJFOLDF(reassoc_intarith_k64) #endif } -LJFOLD(MIN MIN any) -LJFOLD(MAX MAX any) LJFOLD(BAND BAND any) LJFOLD(BOR BOR any) LJFOLDF(reassoc_dup) @@ -1785,6 +1783,15 @@ LJFOLDF(reassoc_dup) return NEXTFOLD; } +LJFOLD(MIN MIN any) +LJFOLD(MAX MAX any) +LJFOLDF(reassoc_dup_minmax) +{ + if (fins->op2 == fleft->op2) + return LEFTFOLD; /* (a o b) o b ==> a o b */ + return NEXTFOLD; +} + LJFOLD(BXOR BXOR any) LJFOLDF(reassoc_bxor) { @@ -1823,23 +1830,12 @@ LJFOLDF(reassoc_shift) return NEXTFOLD; } -LJFOLD(MIN MIN KNUM) -LJFOLD(MAX MAX KNUM) LJFOLD(MIN MIN KINT) LJFOLD(MAX MAX KINT) LJFOLDF(reassoc_minmax_k) { IRIns *irk = IR(fleft->op2); - if (irk->o == IR_KNUM) { - lua_Number a = ir_knum(irk)->n; - lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); - if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ - return LEFTFOLD; - PHIBARRIER(fleft); - fins->op1 = fleft->op1; - fins->op2 = (IRRef1)lj_ir_knum(J, y); - return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ - } else if (irk->o == IR_KINT) { + if (irk->o == IR_KINT) { int32_t a = irk->i; int32_t y = kfold_intop(a, fright->i, fins->o); if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ @@ -1852,24 +1848,6 @@ LJFOLDF(reassoc_minmax_k) return NEXTFOLD; } -LJFOLD(MIN MAX any) -LJFOLD(MAX MIN any) -LJFOLDF(reassoc_minmax_left) -{ - if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) - return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ - return NEXTFOLD; -} - -LJFOLD(MIN any MAX) -LJFOLD(MAX any MIN) -LJFOLDF(reassoc_minmax_right) -{ - if (fins->op1 == fright->op1 || fins->op1 == fright->op2) - return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ - return NEXTFOLD; -} - /* -- Array bounds check elimination -------------------------------------- */ /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. @@ -1995,8 +1973,6 @@ LJFOLDF(comm_comp) LJFOLD(BAND any any) LJFOLD(BOR any any) -LJFOLD(MIN any any) -LJFOLD(MAX any any) LJFOLDF(comm_dup) { if (fins->op1 == fins->op2) /* x o x ==> x */ @@ -2004,6 +1980,15 @@ LJFOLDF(comm_dup) return fold_comm_swap(J); } +LJFOLD(MIN any any) +LJFOLD(MAX any any) +LJFOLDF(comm_dup_minmax) +{ + if (fins->op1 == fins->op2) /* x o x ==> x */ + return LEFTFOLD; + return NEXTFOLD; +} + LJFOLD(BXOR any any) LJFOLDF(comm_bxor) { diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2a41bcaae6..e89405d7de 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -50,8 +50,8 @@ double lj_vm_foldarith(double x, double y, int op) #if LJ_HASJIT case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; - case IR_MIN - IR_ADD: return x > y ? y : x; break; - case IR_MAX - IR_ADD: return x < y ? y : x; break; + case IR_MIN - IR_ADD: return x < y ? x : y; break; + case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif default: return x; } diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 5bbdbbff77..013688fbee 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1716,8 +1716,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 62946373f6..c157696ca5 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1489,8 +1489,8 @@ static void build_subroutines(BuildCtx *ctx) | b <6 |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 37506139b9..0c84c13b6b 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1768,7 +1768,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, intins, fpins + |.macro math_minmax, name, intins, ismax | .ffunc_1 name | addu TMP3, BASE, NARGS8:RC | bne SFARG1HI, TISNUM, >5 @@ -1822,13 +1822,21 @@ static void build_subroutines(BuildCtx *ctx) |.endif |7: |.if FPU + |.if ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.else + |.if ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 + | movz SFARG1LO, SFARG2LO, CRET1 + | movz SFARG1HI, SFARG2HI, CRET1 |.endif | b <6 |. addiu TMP2, TMP2, 8 @@ -1849,8 +1857,8 @@ static void build_subroutines(BuildCtx *ctx) | |.endmacro | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d + | math_minmax math_min, movz, 0 + | math_minmax math_max, movn, 1 | |//-- String library ----------------------------------------------------- | @@ -2692,6 +2700,43 @@ static void build_subroutines(BuildCtx *ctx) |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | sll AT, SFARG2HI, 1 + | sll TMP0, SFARG1HI, 1 + | or CRET1, SFARG2LO, SFARG1LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG2LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG1LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG2HI, SFARG1HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2734,24 +2779,24 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins + |.macro sfmin_max, name, fpcall |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra - | bal ->vm_sfcmpolt + | bal ->fpcall |. nop | move TMP0, CRET1 | move SFRETHI, SFARG1HI | move SFRETLO, SFARG1LO | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 + | movz SFRETHI, SFARG2HI, TMP0 | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 + |. movz SFRETLO, SFARG2LO, TMP0 |.endif |.endmacro | - | sfmin_max min, movz - | sfmin_max max, movn + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 4ae19b7d9a..dac143a439 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -1852,18 +1852,26 @@ static void build_subroutines(BuildCtx *ctx) |.if MIPSR6 | fpins FRET1, FRET1, FARG1 |.else + |.if fpins // ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 |.endif + | movf.d FRET1, FARG1 + |.endif + |.else + |.if fpins // ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop |.if MIPSR6 - | intins AT, CARG2, CRET1 - | intinsc CARG1, CARG1, CRET1 + | seleqz AT, CARG2, CRET1 + | selnez CARG1, CARG1, CRET1 | or CARG1, CARG1, AT |.else - | intins CARG1, CARG2, CRET1 + | movz CARG1, CARG2, CRET1 |.endif |.endif | b <6 @@ -1889,8 +1897,8 @@ static void build_subroutines(BuildCtx *ctx) | math_minmax math_min, seleqz, selnez, min.d | math_minmax math_max, selnez, seleqz, max.d |.else - | math_minmax math_min, movz, _, movf.d - | math_minmax math_max, movn, _, movt.d + | math_minmax math_min, movz, _, 0 + | math_minmax math_max, movn, _, 1 |.endif | |//-- String library ----------------------------------------------------- @@ -2108,7 +2116,6 @@ static void build_subroutines(BuildCtx *ctx) | dsllv CRET2, CRET2, TMP0 // Integer check. | sextw AT, CRET1 | xor AT, CRET1, AT // Range check. - | jr ra |.if MIPSR6 | seleqz AT, AT, CRET2 | selnez CRET2, CRET2, CRET2 @@ -2809,6 +2816,34 @@ static void build_subroutines(BuildCtx *ctx) |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | dsll AT, CARG2, 1 + | dsll TMP0, CARG1, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG2, CARG1 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 0 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2840,34 +2875,29 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins, intinsc + |.macro sfmin_max, name, fpcall |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra - | bal ->vm_sfcmpolt + | bal ->fpcall |. nop | move ra, TMP2 | move TMP0, CRET1 | move CRET1, CARG1 |.if MIPSR6 - | intins CRET1, CRET1, TMP0 - | intinsc TMP0, CARG2, TMP0 + | selnez CRET1, CRET1, TMP0 + | seleqz TMP0, CARG2, TMP0 | jr ra |. or CRET1, CRET1, TMP0 |.else | jr ra - |. intins CRET1, CARG2, TMP0 + |. movz CRET1, CARG2, TMP0 |.endif |.endif |.endmacro | - |.if MIPSR6 - | sfmin_max min, selnez, seleqz - | sfmin_max max, seleqz, selnez - |.else - | sfmin_max min, movz, _ - | sfmin_max max, movn, _ - |.endif + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index a66e30b5e4..7a2d321ed0 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -2309,12 +2309,12 @@ static void build_subroutines(BuildCtx *ctx) |6: | addi SAVE0, SAVE0, 8 |.if FPU - | fsub f0, FARG1, FARG2 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | fsel FARG1, f0, FARG1, FARG2 |.else | stw CARG1, SFSAVE_1 | stw CARG2, SFSAVE_2 @@ -2354,13 +2354,13 @@ static void build_subroutines(BuildCtx *ctx) | checknum CARG2 | bge cr1, ->fff_resn | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | addi TMP1, TMP1, 8 + | fsel FARG1, f0, FARG1, FARG2 | b <1 |.endif |.endmacro diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index a5749b17a9..c714f4c7bf 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -1840,7 +1840,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RAd, 2 |.if DUALNUM | mov RB, [BASE] diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 1965b06ba8..c3999e7c7a 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2233,7 +2233,7 @@ static void build_subroutines(BuildCtx *ctx) | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM