Fix 8 bit remainder instruction. Fixes #32

MrSmith33 · Dec 11, 2021 · 0df3902 · 0df3902
1 parent a5fdd03
commit 0df3902
Show file tree

Hide file tree

Showing 2 changed files with 115 additions and 15 deletions.
diff --git a/source/be/ir_to_lir_amd64.d b/source/be/ir_to_lir_amd64.d
@@ -563,31 +563,61 @@ void processFunc(CompilationContext* context, IrBuilder* builder, ModuleDeclNode
 					//   zx/sx dx:ax
 					//   ax = div ax, dx, v3
 					//   mov v1, ax
+					// since 8 bit division doesn't follow dx:ax pattern and uses ax instead, we use bigger division there
 
 					bool isSigned = instrHeader.op == IrOpcode.sdiv || instrHeader.op == IrOpcode.srem;
 					bool isDivision = instrHeader.op == IrOpcode.udiv || instrHeader.op == IrOpcode.sdiv;
 
+					IrIndex dividendBottom;
+
+					IrArgSize argSize = instrHeader.argSize;
+					switch(argSize) with(IrArgSize)
+					{
+						case size8:
+							// we transform 8 bit div/rem into 16 bit, so we don't need to deal with ah register
+							argSize = size16;
+							dividendBottom = IrIndex(amd64_reg.ax, argSize);
+							ExtraInstrArgs extra = { addUsers : false, result : dividendBottom };
+							if (isSigned) builder.emitInstr!(Amd64Opcode.movsx_btod)(lirBlockIndex, extra, getFixedIndex(instrHeader.arg(ir, 0)));
+							else builder.emitInstr!(Amd64Opcode.movzx_btod)(lirBlockIndex, extra, getFixedIndex(instrHeader.arg(ir, 0)));
+							break;
+						case size16:
+						case size32:
+						case size64:
+							// copy bottom half of dividend
+							dividendBottom = IrIndex(amd64_reg.ax, argSize);
+							makeMov(dividendBottom, getFixedIndex(instrHeader.arg(ir, 0)), argSize, lirBlockIndex);
+							break;
+						default: context.internal_error("%s:%s: Invalid target size %s", funName, instrIndex, argSize);
+					}
+
+
 					// divisor must be in register
 					IrIndex divisor = instrHeader.arg(ir, 1);
-					if (instrHeader.arg(ir, 1).isConstant) {
+					if (divisor.isSimpleConstant) {
+						auto con = context.constants.get(divisor);
+						if (instrHeader.argSize == IrArgSize.size8) {
+							divisor = context.constants.add(makeIrType(IrBasicType.i16), con.i16);
+						}
 						ExtraInstrArgs extra = { addUsers : false, type : getValueType(divisor, ir, context) };
 						divisor = builder.emitInstr!(Amd64Opcode.mov)(lirBlockIndex, extra, divisor).result;
 					}
-					else fixIndex(divisor);
-
-					IrIndex dividendTop = IrIndex(amd64_reg.dx, instrHeader.argSize);
+					else
+					{
+						fixIndex(divisor);
+						if (instrHeader.argSize == IrArgSize.size8) {
+							ExtraInstrArgs extra = { addUsers : false, type : makeIrType(IrBasicType.i16) };
+							if (isSigned) divisor = builder.emitInstr!(Amd64Opcode.movsx_btod)(lirBlockIndex, extra, divisor).result;
+							else divisor = builder.emitInstr!(Amd64Opcode.movzx_btod)(lirBlockIndex, extra, divisor).result;
+						}
+					}
 
-					// copy bottom half of dividend
-					IrIndex dividendBottom = IrIndex(amd64_reg.ax, instrHeader.argSize);
-					makeMov(dividendBottom, getFixedIndex(instrHeader.arg(ir, 0)), instrHeader.argSize, lirBlockIndex);
+					IrIndex dividendTop = IrIndex(amd64_reg.dx, argSize);
 
 					if (isSigned) {
-						// if dividend is 8bit we use movsx and only change ax
-						// if it is bigger we use cwd/cdq/cqo that affects dx too
-						// TODO: for now always say that we modify dx even if we dont (8bit arg doesn't touch dx, only ax)
-						IrIndex divsxResult = IrIndex(amd64_reg.dx, instrHeader.argSize);
+						IrIndex divsxResult = IrIndex(amd64_reg.dx, argSize);
 						// sign-extend top half of dividend
-						ExtraInstrArgs extra2 = { argSize : instrHeader.argSize, result : divsxResult };
+						ExtraInstrArgs extra2 = { argSize : argSize, result : divsxResult };
 						builder.emitInstr!(Amd64Opcode.divsx)(lirBlockIndex, extra2);
 					} else {
 						// zero top half of dividend
@@ -601,14 +631,17 @@ void processFunc(CompilationContext* context, IrBuilder* builder, ModuleDeclNode
 					}
 
 					// divide
-					ExtraInstrArgs extra3 = { addUsers : false, argSize : instrHeader.argSize, result : resultReg };
+					ExtraInstrArgs extra3 = { addUsers : false, argSize : argSize, result : resultReg };
 					InstrWithResult res;
 					if (isSigned)
 						res = builder.emitInstr!(Amd64Opcode.idiv)(lirBlockIndex, extra3, dividendBottom, dividendTop, divisor);
 					else
 						res = builder.emitInstr!(Amd64Opcode.div)(lirBlockIndex, extra3, dividendBottom, dividendTop, divisor);
 
-					// copy result (quotient)
+					// fix size for size8
+					resultReg.physRegSize = instrHeader.argSize;
+
+					// copy result (quotient) with truncation in case of 8/16 bits
 					ExtraInstrArgs extra4 = { addUsers : false, argSize : instrHeader.argSize, type : ir.getVirtReg(instrHeader.result(ir)).type };
 					InstrWithResult movResult = builder.emitInstr!(Amd64Opcode.mov)(lirBlockIndex, extra4, resultReg);
 					recordIndex(instrHeader.result(ir), movResult.result);

diff --git a/source/tests/passing.d b/source/tests/passing.d
@@ -5282,7 +5282,7 @@ void tester241(ref TestContext ctx) {
 
 @TestInfo(&tester242)
 immutable test242 = q{--- test242
-	// Compile-time float op float
+	// Compile-time binary float ops
 	bool f32_gt()  { return cast(f32)100.0 >  cast(f32)50.0; }
 	bool f32_ge()  { return cast(f32)100.0 >= cast(f32)50.0; }
 	bool f32_lt()  { return cast(f32)100.0 <  cast(f32)50.0; }
@@ -5324,3 +5324,70 @@ void tester242(ref TestContext ctx) {
 	assert(ctx.getFunctionPtr!(double)("f64_mul")() == 100 * 100);
 	assert(ctx.getFunctionPtr!(double)("f64_div")() == 100 / 100);
 }
+
+
+@TestInfo(&tester243)
+immutable test243 = q{--- test243
+	// remainder
+	i8  rem_i8__fold() { return cast(i8) -32 % 10; }
+	i16 rem_i16_fold() { return cast(i16)-32 % 10; }
+	i32 rem_i32_fold() { return cast(i32)-32 % 10; }
+	i64 rem_i64_fold() { return cast(i64)-32 % 10; }
+
+	u8  rem_u8__fold() { return cast(u8) 32 % 10; }
+	u16 rem_u16_fold() { return cast(u16)32 % 10; }
+	u32 rem_u32_fold() { return cast(u32)32 % 10; }
+	u64 rem_u64_fold() { return cast(u64)32 % 10; }
+
+	i8  rem_i8__con(i8  a) { return a % 10; }
+	i16 rem_i16_con(i16 a) { return a % 10; }
+	i32 rem_i32_con(i32 a) { return a % 10; }
+	i64 rem_i64_con(i64 a) { return a % 10; }
+
+	u8  rem_u8__con(u8  a) { return a % 10; }
+	u16 rem_u16_con(u16 a) { return a % 10; }
+	u32 rem_u32_con(u32 a) { return a % 10; }
+	u64 rem_u64_con(u64 a) { return a % 10; }
+
+	i8  rem_i8__var(i8  a, i8  b) { return a % b; }
+	i16 rem_i16_var(i16 a, i16 b) { return a % b; }
+	i32 rem_i32_var(i32 a, i32 b) { return a % b; }
+	i64 rem_i64_var(i64 a, i64 b) { return a % b; }
+
+	u8  rem_u8__var(u8  a, u8  b) { return a % b; }
+	u16 rem_u16_var(u16 a, u16 b) { return a % b; }
+	u32 rem_u32_var(u32 a, u32 b) { return a % b; }
+	u64 rem_u64_var(u64 a, u64 b) { return a % b; }
+};
+void tester243(ref TestContext ctx) {
+	assert(ctx.getFunctionPtr!(  byte)("rem_i8__fold")() == -32 % 10);
+	assert(ctx.getFunctionPtr!( short)("rem_i16_fold")() == -32 % 10);
+	assert(ctx.getFunctionPtr!(   int)("rem_i32_fold")() == -32 % 10);
+	assert(ctx.getFunctionPtr!(  long)("rem_i64_fold")() == -32 % 10);
+
+	assert(ctx.getFunctionPtr!( ubyte)("rem_u8__fold")() == 32 % 10);
+	assert(ctx.getFunctionPtr!(ushort)("rem_u16_fold")() == 32 % 10);
+	assert(ctx.getFunctionPtr!(  uint)("rem_u32_fold")() == 32 % 10);
+	assert(ctx.getFunctionPtr!( ulong)("rem_u64_fold")() == 32 % 10);
+
+	assert(ctx.getFunctionPtr!(  byte,  byte)("rem_i8__con")(-32) == -32 % 10);
+	assert(ctx.getFunctionPtr!( short, short)("rem_i16_con")(-32) == -32 % 10);
+	assert(ctx.getFunctionPtr!(   int,   int)("rem_i32_con")(-32) == -32 % 10);
+	assert(ctx.getFunctionPtr!(  long,  long)("rem_i64_con")(-32) == -32 % 10);
+
+	assert(ctx.getFunctionPtr!( ubyte, ubyte)("rem_u8__con")(32) == 32 % 10);
+	assert(ctx.getFunctionPtr!(ushort,ushort)("rem_u16_con")(32) == 32 % 10);
+	assert(ctx.getFunctionPtr!(  uint,  uint)("rem_u32_con")(32) == 32 % 10);
+	assert(ctx.getFunctionPtr!( ulong, ulong)("rem_u64_con")(32) == 32 % 10);
+
+	assert(ctx.getFunctionPtr!(  byte,  byte,  byte)("rem_i8__var")(-32, 10) == -32 % 10);
+	assert(ctx.getFunctionPtr!( short, short, short)("rem_i16_var")(-32, 10) == -32 % 10);
+	assert(ctx.getFunctionPtr!(   int,   int,   int)("rem_i32_var")(-32, 10) == -32 % 10);
+	assert(ctx.getFunctionPtr!(  long,  long,  long)("rem_i64_var")(-32, 10) == -32 % 10);
+
+	assert(ctx.getFunctionPtr!( ubyte, ubyte, ubyte)("rem_u8__var")(32, 10) == 32 % 10);
+	assert(ctx.getFunctionPtr!(ushort,ushort,ushort)("rem_u16_var")(32, 10) == 32 % 10);
+	assert(ctx.getFunctionPtr!(  uint,  uint,  uint)("rem_u32_var")(32, 10) == 32 % 10);
+	assert(ctx.getFunctionPtr!( ulong, ulong, ulong)("rem_u64_var")(32, 10) == 32 % 10);
+}
+