From e1c51a1ffbdb2ba263bc1ca11391717a837b04a5 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Fri, 7 Aug 2020 02:06:11 -0700 Subject: [PATCH] GBA BIOS: Make HLE BIOS calls interruptable (fixes #1711, fixes #1823) --- CHANGES | 1 + include/mgba/internal/gba/gba.h | 2 + include/mgba/internal/gba/serialize.h | 6 ++- src/gba/bios.c | 55 +++++++++++++++++++-------- src/gba/gba.c | 3 +- src/gba/hle-bios.c | 29 +++++++------- src/gba/hle-bios.s | 36 +++++++++++------- src/gba/serialize.c | 2 + 8 files changed, 87 insertions(+), 47 deletions(-) diff --git a/CHANGES b/CHANGES index 98dffa506cd..1a96d9d1f47 100644 --- a/CHANGES +++ b/CHANGES @@ -15,6 +15,7 @@ Emulation fixes: - GBA BIOS: Implement dummy sound driver calls - GBA BIOS: Improve HLE BIOS timing - GBA BIOS: Fix reloading video registers after reset (fixes mgba.io/i/1808) + - GBA BIOS: Make HLE BIOS calls interruptable (fixes mgba.io/i/1711 and mgba.io/i/1823) - GBA DMA: Linger last DMA on bus (fixes mgba.io/i/301 and mgba.io/i/1320) - GBA Memory: Improve gamepak prefetch timing - GBA Memory: Stall on VRAM access in mode 2 (fixes mgba.io/i/190) diff --git a/include/mgba/internal/gba/gba.h b/include/mgba/internal/gba/gba.h index 931ae5b6491..909a5174fc1 100644 --- a/include/mgba/internal/gba/gba.h +++ b/include/mgba/internal/gba/gba.h @@ -109,6 +109,8 @@ struct GBA { bool cpuBlocked; bool earlyExit; uint32_t dmaPC; + uint32_t biosStall; + int idleDetectionStep; int idleDetectionFailures; int32_t cachedRegisters[16]; diff --git a/include/mgba/internal/gba/serialize.h b/include/mgba/internal/gba/serialize.h index a8ea138e4dc..e9ef92cee4a 100644 --- a/include/mgba/internal/gba/serialize.h +++ b/include/mgba/internal/gba/serialize.h @@ -201,7 +201,8 @@ mLOG_DECLARE_CATEGORY(GBA_STATE); * | bit 1: POSTFLG * | bit 2: Is IRQ pending? * 0x00320 - 0x00323: Next IRQ event - * 0x00324 - 0x003FF: Reserved (leave zero) + * 0x00324 - 0x00327: Interruptable BIOS stall cycles + * 0x00328 - 0x003FF: Reserved (leave zero) * 0x00400 - 0x007FF: I/O memory * 0x00800 - 0x00BFF: Palette * 0x00C00 - 0x00FFF: OAM @@ -334,8 +335,9 @@ struct GBASerializedState { uint32_t lastPrefetchedPc; GBASerializedMiscFlags miscFlags; uint32_t nextIrq; + int32_t biosStall; - uint32_t reserved[55]; + uint32_t reserved[54]; uint16_t io[SIZE_IO >> 1]; uint16_t pram[SIZE_PALETTE_RAM >> 1]; diff --git a/src/gba/bios.c b/src/gba/bios.c index 911fd77bc27..57e57ea0f53 100644 --- a/src/gba/bios.c +++ b/src/gba/bios.c @@ -298,10 +298,10 @@ static void _Div(struct GBA* gba, int32_t num, int32_t denom) { if (loops < 1) { loops = 1; } - cpu->cycles += 4 /* prologue */ + 13 * loops + 7 /* epilogue */; + gba->biosStall = 4 /* prologue */ + 13 * loops + 7 /* epilogue */; } -static int16_t _ArcTan(int32_t i, int32_t* r1, int32_t* r3, int32_t* cycles) { +static int16_t _ArcTan(int32_t i, int32_t* r1, int32_t* r3, uint32_t* cycles) { int currentCycles = 37; currentCycles += _mulWait(i * i); int32_t a = -((i * i) >> 14); @@ -325,11 +325,11 @@ static int16_t _ArcTan(int32_t i, int32_t* r1, int32_t* r3, int32_t* cycles) { if (r3) { *r3 = b; } - *cycles += currentCycles; + *cycles = currentCycles; return (i * b) >> 16; } -static int16_t _ArcTan2(int32_t x, int32_t y, int32_t* r1, int32_t* cycles) { +static int16_t _ArcTan2(int32_t x, int32_t y, int32_t* r1, uint32_t* cycles) { if (!y) { if (x >= 0) { return 0; @@ -363,9 +363,9 @@ static int16_t _ArcTan2(int32_t x, int32_t y, int32_t* r1, int32_t* cycles) { } } -static int32_t _Sqrt(uint32_t x, int32_t* cycles) { +static int32_t _Sqrt(uint32_t x, uint32_t* cycles) { if (!x) { - *cycles += 53; + *cycles = 53; return 0; } int32_t currentCycles = 15; @@ -412,7 +412,7 @@ static int32_t _Sqrt(uint32_t x, int32_t* cycles) { break; } } - *cycles += currentCycles; + *cycles = currentCycles; return bound; } @@ -422,6 +422,9 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { immediate, cpu->gprs[0], cpu->gprs[1], cpu->gprs[2], cpu->gprs[3]); switch (immediate) { + case 0xF0: // Used for internal stall counting + cpu->gprs[4] = gba->biosStall; + return; case 0xFA: GBAPrintFlush(gba); return; @@ -431,6 +434,8 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { ARMRaiseSWI(cpu); return; } + + bool useStall = false; switch (immediate) { case GBA_SWI_SOFT_RESET: _SoftReset(gba); @@ -452,19 +457,24 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { ARMRaiseSWI(cpu); return; case GBA_SWI_DIV: + useStall = true; _Div(gba, cpu->gprs[0], cpu->gprs[1]); break; case GBA_SWI_DIV_ARM: + useStall = true; _Div(gba, cpu->gprs[1], cpu->gprs[0]); break; case GBA_SWI_SQRT: - cpu->gprs[0] = _Sqrt(cpu->gprs[0], &cpu->cycles); + useStall = true; + cpu->gprs[0] = _Sqrt(cpu->gprs[0], &gba->biosStall); break; case GBA_SWI_ARCTAN: - cpu->gprs[0] = _ArcTan(cpu->gprs[0], &cpu->gprs[1], &cpu->gprs[3], &cpu->cycles); + useStall = true; + cpu->gprs[0] = _ArcTan(cpu->gprs[0], &cpu->gprs[1], &cpu->gprs[3], &gba->biosStall); break; case GBA_SWI_ARCTAN2: - cpu->gprs[0] = (uint16_t) _ArcTan2(cpu->gprs[0], cpu->gprs[1], &cpu->gprs[1], &cpu->cycles); + useStall = true; + cpu->gprs[0] = (uint16_t) _ArcTan2(cpu->gprs[0], cpu->gprs[1], &cpu->gprs[1], &gba->biosStall); cpu->gprs[3] = 0x170; break; case GBA_SWI_CPU_SET: @@ -589,12 +599,25 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { default: mLOG(GBA_BIOS, STUB, "Stub software interrupt: %02X", immediate); } - gba->cpu->cycles += 45 + cpu->memory.activeNonseqCycles16 /* 8 bit load for SWI # */; - // Return cycles - if (gba->cpu->executionMode == MODE_ARM) { - gba->cpu->cycles += cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32; - } else { - gba->cpu->cycles += cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16; + if (useStall) { + if (gba->biosStall >= 18) { + gba->biosStall -= 18; + gba->cpu->cycles += gba->biosStall & 3; + gba->biosStall &= ~3; + ARMRaiseSWI(cpu); + } else { + gba->cpu->cycles += gba->biosStall; + useStall = false; + } + } + if (!useStall) { + gba->cpu->cycles += 45 + cpu->memory.activeNonseqCycles16 /* 8 bit load for SWI # */; + // Return cycles + if (gba->cpu->executionMode == MODE_ARM) { + gba->cpu->cycles += cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32; + } else { + gba->cpu->cycles += cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16; + } } gba->memory.biosPrefetch = 0xE3A02004; } diff --git a/src/gba/gba.c b/src/gba/gba.c index c1f87d2e5d5..18ae17222a8 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -201,6 +201,7 @@ void GBAReset(struct ARMCore* cpu) { gba->cpuBlocked = false; gba->earlyExit = false; gba->dmaPC = 0; + gba->biosStall = 0; if (gba->yankedRomSize) { gba->memory.romSize = gba->yankedRomSize; gba->memory.romMask = toPow2(gba->memory.romSize) - 1; @@ -928,4 +929,4 @@ static bool _setSoftwareBreakpoint(struct ARMDebugger* debugger, uint32_t addres static void _clearSoftwareBreakpoint(struct ARMDebugger* debugger, const struct ARMDebugBreakpoint* breakpoint) { GBAClearBreakpoint((struct GBA*) debugger->cpu->master, breakpoint->d.address, breakpoint->sw.mode, breakpoint->sw.opcode); } -#endif \ No newline at end of file +#endif diff --git a/src/gba/hle-bios.c b/src/gba/hle-bios.c index 8422f629def..4da22d005af 100644 --- a/src/gba/hle-bios.c +++ b/src/gba/hle-bios.c @@ -3,28 +3,28 @@ #include const uint8_t hleBios[SIZE_BIOS] = { - 0x06, 0x00, 0x00, 0xea, 0x66, 0x00, 0x00, 0xea, 0x0b, 0x00, 0x00, 0xea, + 0x06, 0x00, 0x00, 0xea, 0x66, 0x00, 0x00, 0xea, 0x0c, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x00, 0x00, 0xa0, 0xe1, 0x59, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x02, 0x03, 0xa0, 0xe3, 0x03, 0x10, 0xd0, 0xe5, 0xea, 0x00, 0x51, 0xe3, 0x4c, 0x01, 0x9f, 0x15, 0x10, 0xff, 0x2f, 0xe1, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x29, 0xe1, - 0x00, 0x00, 0x5d, 0xe3, 0x01, 0xd3, 0xa0, 0x03, 0x20, 0xd0, 0x4d, 0x02, - 0x00, 0x58, 0x2d, 0xe9, 0x02, 0xb0, 0x5e, 0xe5, 0xd4, 0xc0, 0xa0, 0xe3, - 0x0b, 0xb1, 0x9c, 0xe7, 0x00, 0x00, 0x5b, 0xe3, 0x00, 0xc0, 0x4f, 0xe1, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5d, 0xe3, 0x01, 0xd3, 0xa0, 0x03, + 0x20, 0xd0, 0x4d, 0x02, 0x10, 0x58, 0x2d, 0xe9, 0x02, 0xb0, 0x5e, 0xe5, + 0xd4, 0xc0, 0xa0, 0xe3, 0x0b, 0xb1, 0x9c, 0xe7, 0xd2, 0xcf, 0xa0, 0xe3, + 0x0b, 0x00, 0x5c, 0xe1, 0x00, 0x00, 0xf0, 0x0f, 0x00, 0xc0, 0x4f, 0xe1, 0x00, 0x10, 0x2d, 0xe9, 0x80, 0xc0, 0x0c, 0xe2, 0x1f, 0xc0, 0x8c, 0xe3, - 0x0c, 0xf0, 0x29, 0xe1, 0x00, 0x40, 0x2d, 0xe9, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, + 0x0c, 0xf0, 0x21, 0xe1, 0x00, 0x40, 0x2d, 0xe9, 0x00, 0x00, 0x5b, 0xe3, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x0f, 0xe0, 0xa0, 0xe1, 0x1b, 0xff, 0x2f, 0x11, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x40, 0xbd, 0xe8, 0x93, 0xf0, 0x29, 0xe3, 0x00, 0x10, 0xbd, 0xe8, - 0x0c, 0xf0, 0x69, 0xe1, 0x00, 0x58, 0xbd, 0xe8, 0x0e, 0xf0, 0xb0, 0xe1, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x20, 0xa0, 0xe3, 0xb0, 0x01, 0x00, 0x00, + 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x40, 0xbd, 0xe8, + 0x93, 0xf0, 0x29, 0xe3, 0x00, 0x10, 0xbd, 0xe8, 0x0c, 0xf0, 0x69, 0xe1, + 0x10, 0x58, 0xbd, 0xe8, 0x0e, 0xf0, 0xb0, 0xe1, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x20, 0xa0, 0xe3, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xcc, 0x01, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xb0, 0x01, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, + 0xcc, 0x01, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, + 0x48, 0x03, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, + 0x48, 0x03, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, @@ -72,5 +72,6 @@ const uint8_t hleBios[SIZE_BIOS] = { 0x01, 0x70, 0xa0, 0xe1, 0x01, 0x80, 0xa0, 0xe1, 0x01, 0x90, 0xa0, 0xe1, 0x01, 0xa0, 0xa0, 0xe1, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0x00, 0x10, 0xa0, 0xe3, - 0xf0, 0x07, 0xbd, 0xe8, 0x1e, 0xff, 0x2f, 0xe1, 0xb0, 0x01, 0x00, 0x00 + 0xf0, 0x07, 0xbd, 0xe8, 0x1e, 0xff, 0x2f, 0xe1, 0xb0, 0x01, 0x00, 0x00, + 0x04, 0x40, 0x54, 0xe2, 0xfd, 0xff, 0xff, 0x8a, 0x1e, 0xff, 0x2f, 0xe1 }; diff --git a/src/gba/hle-bios.s b/src/gba/hle-bios.s index e14e2d4c4ba..c27e220b556 100644 --- a/src/gba/hle-bios.s +++ b/src/gba/hle-bios.s @@ -25,25 +25,26 @@ bx r0 .word 0 .word 0xE129F000 +.word 0 @ Padding for back-compat + swiBase: cmp sp, #0 moveq sp, #0x04000000 subeq sp, #0x20 -stmfd sp!, {r11-r12, lr} +stmfd sp!, {r4, r11-r12, lr} ldrb r11, [lr, #-2] mov r12, #swiTable ldr r11, [r12, r11, lsl #2] -cmp r11, #0 +mov r12, #StallCall +cmp r12, r11 +swieq 0xF00000 @ Special mGBA-internal call to load the stall count into r4 mrs r12, spsr stmfd sp!, {r12} and r12, #0x80 orr r12, #0x1F -msr cpsr, r12 +msr cpsr_c, r12 stmfd sp!, {lr} -nop -nop -nop -nop +cmp r11, #0 nop nop nop @@ -55,16 +56,17 @@ bxne r11 nop nop nop -nop ldmfd sp!, {lr} msr cpsr, #0x93 ldmfd sp!, {r12} msr spsr, r12 -ldmfd sp!, {r11-r12, lr} +ldmfd sp!, {r4, r11-r12, lr} movs pc, lr .word 0 .word 0xE3A02004 +.word 0 @ Padding for back-compat + swiTable: .word SoftReset @ 0x00 .word RegisterRamReset @ 0x01 @@ -131,11 +133,6 @@ subs pc, lr, #4 SoftReset: RegisterRamReset: Stop: -Div: -DivArm: -Sqrt: -ArcTan: -ArcTan2: GetBiosChecksum: BgAffineSet: ObjAffineSet: @@ -305,3 +302,14 @@ ldmfd sp!, {r4-r10} bx lr .ltorg + +Div: +DivArm: +Sqrt: +ArcTan: +ArcTan2: + +StallCall: +subs r4, #4 +bhi StallCall +bx lr diff --git a/src/gba/serialize.c b/src/gba/serialize.c index e8f1b215a30..97345ad0571 100644 --- a/src/gba/serialize.c +++ b/src/gba/serialize.c @@ -69,6 +69,7 @@ void GBASerialize(struct GBA* gba, struct GBASerializedState* state) { } miscFlags = GBASerializedMiscFlagsSetBlocked(miscFlags, gba->cpuBlocked); STORE_32(miscFlags, 0, &state->miscFlags); + STORE_32(gba->biosStall, 0, &state->nextIrq); GBAMemorySerialize(&gba->memory, state); GBAIOSerialize(gba, state); @@ -187,6 +188,7 @@ bool GBADeserialize(struct GBA* gba, const struct GBASerializedState* state) { mTimingSchedule(&gba->timing, &gba->irqEvent, when); } gba->cpuBlocked = GBASerializedMiscFlagsGetBlocked(miscFlags); + LOAD_32(gba->biosStall, 0, &state->nextIrq); GBAVideoDeserialize(&gba->video, state); GBAMemoryDeserialize(&gba->memory, state);