From 192d97f7201a7508d78027da36052df604eeb08e Mon Sep 17 00:00:00 2001 From: unknown <71151164+ZERICO2005@users.noreply.github.com> Date: Tue, 8 Jul 2025 15:44:23 -0600 Subject: [PATCH 1/2] implemented lltod/ulltod in assembly and optimized ltod/ultod --- src/crt/lltod.src | 14 --- src/crt/ltod.src | 267 ++++++++++++++++++++++++++++++++------------- src/crt/ulltod.src | 14 --- 3 files changed, 194 insertions(+), 101 deletions(-) delete mode 100644 src/crt/lltod.src delete mode 100644 src/crt/ulltod.src diff --git a/src/crt/lltod.src b/src/crt/lltod.src deleted file mode 100644 index 1736c25d7..000000000 --- a/src/crt/lltod.src +++ /dev/null @@ -1,14 +0,0 @@ - assume adl=1 - - section .text - - public __lltod - -__lltod: - ; i64_ret_f64 - push af, iy, bc, de, hl - call ___i64_to_f64 - pop af, af, af, iy, af - ret - - extern ___i64_to_f64 diff --git a/src/crt/ltod.src b/src/crt/ltod.src index 0cebc535e..911ce83e4 100644 --- a/src/crt/ltod.src +++ b/src/crt/ltod.src @@ -1,5 +1,123 @@ assume adl=1 +;------------------------------------------------------------------------------- + + section .text + + public __ulltod +; (long double)unsigned long long +__ulltod: + cp a, a ; set Z flag + push af + jq __lltod_common + +;------------------------------------------------------------------------------- + + section .text + + public __lltod +; (long double)long long +__lltod: + bit 7, b + push af + call nz, __llneg ; abs(BC:UDE:UHL) + + require __lltod_common + +;------------------------------------------------------------------------------- + + section .text + + private __lltod_common +__lltod_common: + call __llctlz + sub a, 63 ; normalize clz_result + ; filter out exponent of $000 (zero) and $3FF (one) + jr nc, __int_to_f64_zero_or_one + ; A is [-63, -1] + add a, 52 + ; A is [-11, 51] + jr c, __int_to_f64_shl +; __int_to_f64_shr: + ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm + ; Minimum exponent: $434 (2^53) + ; Maximum exponent: $43E (2^63) + ; It is assumed that A is [-11, -1] here, or [-63, -53] before adding 52 + cpl + inc a + ; A is [1, 11] + push hl + push bc + ld b, a + ld c, 1 +.shift_loop: + jr nc, .no_carry + inc c +.no_carry: + srl h + rr l + djnz .shift_loop + ; test round bit + jr nc, .no_round + ; test sticky bits + dec c + jr nz, .round_up + ; test guard bit + bit 0, l + jr nc, .no_round +.round_up: + inc b ; round up after shifting +.no_round: + ld h, b + pop bc + + ld l, a + ex (sp), hl ; (SP) = shift + call __llshru + ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift + add a, 51 + + dec h + push af + ; exponent = ($400 + (base2_logarithm - 1)) << 4 + ; BC = $4EEM + ld l, a + ld h, $04 + ; clear the implicit mantissa bit + res 4, c ; 52 % 8 == 4 + add hl, hl + add hl, hl + add hl, hl + add hl, hl + ld a, l + or a, c + ld c, a + ld b, h + pop af + pop hl ; restore shifted HL + call z, __lladd_1 ; round up to even + jr __int_to_f64_shl.finish + +;------------------------------------------------------------------------------- + + section .text + + private __int_to_f64_zero_or_one +__int_to_f64_zero_or_one: + ; carry is cleared here + ; UHL is either one or zero + ld b, h + ld c, h + jr nz, .ret_zero + ld bc, $3FF0 + dec hl ; ld hl, 0 +.ret_zero: + ex de, hl + sbc hl, hl + jr __int_to_f64_shl.finish + +;------------------------------------------------------------------------------- + section .text public __itod @@ -7,11 +125,12 @@ __itod: push hl add hl, hl ; extract signbit + sbc hl, hl ; set Z flag + ld e, l ; sign extend UHL to E:UHL pop hl - push af - ld e, 0 - call c, __ineg ; abs(UHL) - jr __ltod.hijack + jq __ltod + +;------------------------------------------------------------------------------- section .text @@ -22,102 +141,104 @@ __utod: require __ultod +;------------------------------------------------------------------------------- + section .text public __ultod ; (long double)unsigned long __ultod: - or a, a - push af - jr __ltod.hijack + cp a, a ; set Z flag + push af + jq __ltod_common + +;------------------------------------------------------------------------------- section .text public __ltod ; (long double)long __ltod: - rlc e + bit 7, e + + require __ltod.hijack_itod + +;------------------------------------------------------------------------------- + + section .text + + private __ltod.hijack_itod +__ltod.hijack_itod: + push af - rrc e - call c, __lneg ; abs(E:UHL) + call nz, __lneg ; abs(E:UHL) - require __ltod.hijack + require __ltod_common + +;------------------------------------------------------------------------------- section .text - private __ltod.hijack -__ltod.hijack: + private __ltod_common +__ltod_common: call __lctlz - inc.s bc ; clear UBC - ld b, a ; <<= 8 - xor a, $20 ; turns 32 into zero and clears carry flag - jr z, .zero - ; clears the MSB since the float will be normalized - ; x <<= clz_result + 1; /* shift by 32 is UB */ -if 0 - ; calculate the exponent - push hl - ; 1023 + 31 = 1054 = 0x41E - ld hl, $041E00 - ld c, l ; ld c, 0 - sbc hl, bc - ld l, e ; (expon16 << (16 + 24)) | (mant48) - ex de, hl - pop hl + sub a, 31 ; normalize clz_result -; ld b, a - inc b - ld a, e -.loop32: ; shift by 32 is not UB here! - add hl, hl - rla - djnz .loop32 - ld e, a -else - ; calculate the exponent - push hl - ; 1023 + 31 = 1054 = 0x41E - ld hl, $041E00 - ld c, l ; ld c, 0 - sbc hl, bc - ld l, e ; (expon16 << (16 + 24)) | (mant48) - ex de, hl + ; filter out exponent of $000 (zero) and $3FF (one) + jr nc, __int_to_f64_zero_or_one + ; A is [-31, -1] + add a, 52 + ; A is [21, 51] - ld l, b - pop bc - ld a, e - call __lshl - push bc - pop hl - ; shift by 32 is UB - add hl, hl - rla - ld e, a -end if + require __int_to_f64_shl - ; UDE:D has expon, E:UHL has mant - ; Float64_mant_bits - uint48_bits = 4 - ld c, 16 + 4 - push bc +;------------------------------------------------------------------------------- + + section .text + + private __int_to_f64_shl +__int_to_f64_shl: + ; exponent = (1023 or $3FF or f64_bias) + base2_logarithm + ; Minimum exponent: $400 (2^1) + ; Maximum exponent: $434 (2^52) + ; It is assumed that A is [0, 51] here, or [-52, -1] before adding 52 + push hl + ld l, a + ex (sp), hl ; (SP) = shift call __llshl - pop af ; reset SP + ex (sp), hl ; (SP) = shifted HL, L = shift + + ld a, 51 + sub a, l + + ; exponent = ($400 + (base2_logarithm - 1)) << 4 + ; BC = $4EEM + ld l, a + ld h, $04 + ; clear the implicit mantissa bit + res 4, c ; 52 % 8 == 4 + add hl, hl + add hl, hl + add hl, hl + add hl, hl + ld a, l + or a, c + ld c, a + ld b, h + pop hl ; restore shifted HL .finish: pop af - ret nc ; positive + ret z set 7, b - ret ; negative - -.zero: - ; E:UHL and A are zero - ex de, hl - sbc hl, hl - ld b, e - ld c, e - pop af ret +;------------------------------------------------------------------------------- + extern __ineg extern __lneg extern __lctlz - extern __lshl + extern __llctlz extern __llshl + extern __llshru + extern __llneg + extern __lladd_1 diff --git a/src/crt/ulltod.src b/src/crt/ulltod.src deleted file mode 100644 index 93abda157..000000000 --- a/src/crt/ulltod.src +++ /dev/null @@ -1,14 +0,0 @@ - assume adl=1 - - section .text - - public __ulltod - -__ulltod: - ; u64_ret_f64 - push af, iy, bc, de, hl - call ___ui64_to_f64 - pop af, af, af, iy, af - ret - - extern ___ui64_to_f64 From 7249418cc0dea78d899a5d7e9587c84aab8a8829 Mon Sep 17 00:00:00 2001 From: ZERICO2005 <71151164+ZERICO2005@users.noreply.github.com> Date: Sat, 19 Jul 2025 16:20:02 -0600 Subject: [PATCH 2/2] (u)lltod can now raise FE_INEXACT (disabled by default). Removed (u)itod for now --- src/crt/ltod.src | 116 +++++++-------- .../float64_from_integer/src/crt_wrap.asm | 32 ++++- .../src/f64_from_integer_LUT.h | 22 +-- .../float64_from_integer/src/main.c | 134 +++++++++++++----- 4 files changed, 194 insertions(+), 110 deletions(-) diff --git a/src/crt/ltod.src b/src/crt/ltod.src index 911ce83e4..bf3796996 100644 --- a/src/crt/ltod.src +++ b/src/crt/ltod.src @@ -1,5 +1,7 @@ assume adl=1 + __lltod_signal_FE_INEXACT := 0 + ;------------------------------------------------------------------------------- section .text @@ -49,54 +51,74 @@ __lltod_common: push hl push bc ld b, a - ld c, 1 + ld c, a + xor a, a .shift_loop: - jr nc, .no_carry - inc c -.no_carry: + adc a, 0 srl h rr l djnz .shift_loop - ; test round bit + ; round upwards to even if (round && (guard || sticky)) jr nc, .no_round - ; test sticky bits - dec c + ; we must ensure that FE_INEXACT is raised since rounding has occured + or a, a ; test sticky bits jr nz, .round_up - ; test guard bit - bit 0, l - jr nc, .no_round + inc a ; ld a, 1 + and a, l ; test guard bit + jr z, .no_round_inexact .round_up: inc b ; round up after shifting .no_round: +if __lltod_signal_FE_INEXACT + adc a, a ; test sticky and round bits + jr z, .result_is_exact +.no_round_inexact: + ld hl, ___fe_cur_env + set 5, (hl) ; FE_INEXACT +.result_is_exact: +else +.no_round_inexact: +end if ld h, b + ld a, c + ld l, c pop bc - ld l, a ex (sp), hl ; (SP) = shift call __llshru ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift add a, 51 dec h - push af - ; exponent = ($400 + (base2_logarithm - 1)) << 4 - ; BC = $4EEM - ld l, a - ld h, $04 - ; clear the implicit mantissa bit - res 4, c ; 52 % 8 == 4 - add hl, hl - add hl, hl - add hl, hl - add hl, hl - ld a, l - or a, c + jr nz, __int_to_f64_shl.no_rounding + + dec a ; compensate for the implicit mantissa bit + ; BC/exponent = [$434*, $43E*] + add a, a + add a, a + add a, a + add a, a + add a, c ld c, a - ld b, h - pop af pop hl ; restore shifted HL - call z, __lladd_1 ; round up to even + ld b, $43 +if 0 + ; inlined __lladd_1 + inc hl + add hl, de + or a, a + sbc hl, de + jr nz, __int_to_f64_shl.finish + inc de + sbc hl, de + add hl, de + jr nz, __int_to_f64_shl.finish + inc bc jr __int_to_f64_shl.finish +else + call __lladd_1 ; round up to even + jr __int_to_f64_shl.finish +end if ;------------------------------------------------------------------------------- @@ -116,31 +138,6 @@ __int_to_f64_zero_or_one: sbc hl, hl jr __int_to_f64_shl.finish -;------------------------------------------------------------------------------- - - section .text - - public __itod -; (long double)int -__itod: - push hl - add hl, hl ; extract signbit - sbc hl, hl ; set Z flag - ld e, l ; sign extend UHL to E:UHL - pop hl - jq __ltod - -;------------------------------------------------------------------------------- - - section .text - - public __utod -; (long double)unsigned int -__utod: - ld e, 0 - - require __ultod - ;------------------------------------------------------------------------------- section .text @@ -160,16 +157,6 @@ __ultod: ; (long double)long __ltod: bit 7, e - - require __ltod.hijack_itod - -;------------------------------------------------------------------------------- - - section .text - - private __ltod.hijack_itod -__ltod.hijack_itod: - push af call nz, __lneg ; abs(E:UHL) @@ -211,17 +198,18 @@ __int_to_f64_shl: ld a, 51 sub a, l +.no_rounding: ; exponent = ($400 + (base2_logarithm - 1)) << 4 ; BC = $4EEM ld l, a ld h, $04 ; clear the implicit mantissa bit - res 4, c ; 52 % 8 == 4 add hl, hl add hl, hl add hl, hl add hl, hl ld a, l + res 4, c ; 52 % 8 == 4 or a, c ld c, a ld b, h @@ -234,7 +222,6 @@ __int_to_f64_shl: ;------------------------------------------------------------------------------- - extern __ineg extern __lneg extern __lctlz extern __llctlz @@ -242,3 +229,4 @@ __int_to_f64_shl: extern __llshru extern __llneg extern __lladd_1 + extern ___fe_cur_env diff --git a/test/floating_point/float64_from_integer/src/crt_wrap.asm b/test/floating_point/float64_from_integer/src/crt_wrap.asm index 9093d1d3b..e7eedda0c 100644 --- a/test/floating_point/float64_from_integer/src/crt_wrap.asm +++ b/test/floating_point/float64_from_integer/src/crt_wrap.asm @@ -1,14 +1,32 @@ assume adl=1 +;------------------------------------------------------------------------------- + + section .text + + public _clear_fe_cur_env +_clear_fe_cur_env: + ld a, (___fe_cur_env) + and a, -125 ; feclearexcept(FE_ALL_EXCEPT) + ld (___fe_cur_env), a + ret + + public _get_fe_cur_env +_get_fe_cur_env: + ld a, (___fe_cur_env) + ret + +;------------------------------------------------------------------------------- + section .text - public _CRT_utod, _CRT_itod + public _CRT_uitod, _CRT_itod -_CRT_utod: +_CRT_uitod: ld hl, 3 add hl, sp ld hl, (hl) - jp __utod + jp __uitod _CRT_itod: ld hl, 3 @@ -16,5 +34,11 @@ _CRT_itod: ld hl, (hl) jp __itod - extern __utod +;------------------------------------------------------------------------------- + + extern __ultod + extern __ltod + extern ___fe_cur_env + + extern __uitod extern __itod diff --git a/test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h b/test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h index c0dca86ca..daa4ccdb0 100644 --- a/test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h +++ b/test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h @@ -9,15 +9,15 @@ typedef struct { uint32_t u32; uint64_t u64; } input_type; typedef struct { uint64_t fu32; uint64_t fi32; uint64_t fu64; uint64_t fi64; } output_type; -static const input_type f64_from_integer_LUT_input[256] = { +static const input_type f64_from_integer_LUT_input[259] = { /* 0 */ {UINT32_C(0x00000000), UINT64_C(0x0000000000000000)}, /* 1 */ {UINT32_C(0x00000001), UINT64_C(0x0000000000000001)}, /* 2 */ {UINT32_C(0xFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)}, /* 3 */ {UINT32_C(0x7FFFFFFF), UINT64_C(0x7FFFFFFFFFFFFFFF)}, /* 4 */ {UINT32_C(0x80000000), UINT64_C(0x8000000000000000)}, -/* 5 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)}, -/* 6 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)}, -/* 7 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)}, +/* 5 */ {UINT32_C(0x80000001), UINT64_C(0x8000000000000001)}, +/* 6 */ {UINT32_C(0x00000002), UINT64_C(0x0000000000000002)}, +/* 7 */ {UINT32_C(0xFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFE)}, /* 8 */ {UINT32_C(0xC66AAAFC), UINT64_C(0x8B8B8D6D3691C649)}, /* 9 */ {UINT32_C(0xB3FE2104), UINT64_C(0xA32AC22CB1C97A60)}, /* 10 */ {UINT32_C(0xE02F635F), UINT64_C(0xB36FE887C58B1EC0)}, @@ -266,17 +266,20 @@ static const input_type f64_from_integer_LUT_input[256] = { /* 253 */ {UINT32_C(0x89FE6A31), UINT64_C(0x0B23A5C0041A0FEA)}, /* 254 */ {UINT32_C(0x1469770E), UINT64_C(0xCDB4EDD42210BA66)}, /* 255 */ {UINT32_C(0xD8B6EA42), UINT64_C(0x34931BF01A51A099)}, +/* 256 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)}, +/* 257 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)}, +/* 258 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)}, }; -const output_type f64_from_integer_LUT_output[256] = { +const output_type f64_from_integer_LUT_output[259] = { /* 0 */ {UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000)}, /* 1 */ {UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000)}, /* 2 */ {UINT64_C(0x41EFFFFFFFE00000), UINT64_C(0xBFF0000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xBFF0000000000000)}, /* 3 */ {UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0x43E0000000000000)}, /* 4 */ {UINT64_C(0x41E0000000000000), UINT64_C(0xC1E0000000000000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)}, -/* 5 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)}, -/* 6 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)}, -/* 7 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)}, +/* 5 */ {UINT64_C(0x41E0000000200000), UINT64_C(0xC1DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)}, +/* 6 */ {UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000)}, +/* 7 */ {UINT64_C(0x41EFFFFFFFC00000), UINT64_C(0xC000000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xC000000000000000)}, /* 8 */ {UINT64_C(0x41E8CD555F800000), UINT64_C(0xC1CCCAAA82000000), UINT64_C(0x43E17171ADA6D239), UINT64_C(0xC3DD1D1CA4B25B8E)}, /* 9 */ {UINT64_C(0x41E67FC420800000), UINT64_C(0xC1D30077BF000000), UINT64_C(0x43E465584596392F), UINT64_C(0xC3D7354F74D38DA1)}, /* 10 */ {UINT64_C(0x41EC05EC6BE00000), UINT64_C(0xC1BFD09CA1000000), UINT64_C(0x43E66DFD10F8B164), UINT64_C(0xC3D32405DE0E9D38)}, @@ -525,6 +528,9 @@ const output_type f64_from_integer_LUT_output[256] = { /* 253 */ {UINT64_C(0x41E13FCD46200000), UINT64_C(0xC1DD806573C00000), UINT64_C(0x43A6474B80083420), UINT64_C(0x43A6474B80083420)}, /* 254 */ {UINT64_C(0x41B469770E000000), UINT64_C(0x41B469770E000000), UINT64_C(0x43E9B69DBA844217), UINT64_C(0xC3C9258915EEF7A3)}, /* 255 */ {UINT64_C(0x41EB16DD48400000), UINT64_C(0xC1C3A48ADF000000), UINT64_C(0x43CA498DF80D28D0), UINT64_C(0x43CA498DF80D28D0)}, +/* 256 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)}, +/* 257 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)}, +/* 258 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)}, }; #endif /* F64_FROM_INTEGER_LUT_H */ diff --git a/test/floating_point/float64_from_integer/src/main.c b/test/floating_point/float64_from_integer/src/main.c index 017229f2d..141514523 100644 --- a/test/floating_point/float64_from_integer/src/main.c +++ b/test/floating_point/float64_from_integer/src/main.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,19 +13,41 @@ #include "f64_from_integer_LUT.h" -#define ARRAY_LENGTH(x) (sizeof(x) / sizeof(x[0])) - -typedef union F64_pun { - long double flt; - uint64_t bin; -} F64_pun; +//------------------------------------------------------------------------------ +// Config +//------------------------------------------------------------------------------ #define AUTOTEST_DEBUG 0 +/* (u)int24_t variants of (u)ltod */ +#define TEST_UITOD 0 + +/* tests FE_INEXACT in (u)lltod */ +#define TEST_FENV 0 + +//------------------------------------------------------------------------------ +// Tests +//------------------------------------------------------------------------------ + #ifndef AUTOTEST_DEBUG -#define AUTOTEST_DEBUG 0 +#error "AUTOTEST_DEBUG must be defined to 0 or 1" +#endif + +#ifndef TEST_UITOD +#error "TEST_UITOD must be defined to 0 or 1" +#endif + +#ifndef TEST_FENV +#error "TEST_FENV must be defined to 0 or 1" #endif +#define ARRAY_LENGTH(x) (sizeof(x) / sizeof((x)[0])) + +typedef union F64_pun { + long double flt; + uint64_t bin; +} F64_pun; + #if AUTOTEST_DEBUG void print_failed(uint64_t input, uint64_t guess, uint64_t truth) { printf( @@ -32,13 +55,18 @@ void print_failed(uint64_t input, uint64_t guess, uint64_t truth) { input, guess, truth ); } +#define test_printf printf #else #define print_failed(...) +#define test_printf(...) #endif -long double CRT_utod(unsigned int); +long double CRT_uitod(unsigned int); long double CRT_itod(signed int); +void clear_fe_cur_env(void); +unsigned char get_fe_cur_env(void); + size_t run_test(const char** failed_func) { typedef struct { uint32_t u32; uint64_t u64; } input_t; typedef struct { F64_pun fu32; F64_pun fi32; F64_pun fu64; F64_pun fi64; } output_t; @@ -49,7 +77,7 @@ size_t run_test(const char** failed_func) { for (size_t i = 0; i < length; i++) { F64_pun result; - + result.flt = (long double)((uint32_t)input[i].u32); if (result.bin != output[i].fu32.bin) { print_failed((uint64_t)input[i].u32, result.bin, output[i].fu32.bin); @@ -64,36 +92,74 @@ size_t run_test(const char** failed_func) { return i; } - if ((uint32_t)input[i].u32 <= UINT24_MAX) { - result.flt = CRT_utod((uint24_t)input[i].u32); - if (result.bin != output[i].fu32.bin) { - print_failed((uint64_t)input[i].u32, result.bin, output[i].fu32.bin); - *failed_func = "utod"; - return i; + #if TEST_UITOD + if ((uint32_t)input[i].u32 <= UINT24_MAX) { + result.flt = CRT_uitod((uint24_t)input[i].u32); + if (result.bin != output[i].fu32.bin) { + print_failed((uint64_t)input[i].u32, result.bin, output[i].fu32.bin); + *failed_func = "uitod"; + return i; + } } - } - - if ((int32_t)input[i].u32 >= INT24_MIN && (int32_t)input[i].u32 <= INT24_MAX) { - result.flt = CRT_itod((int24_t)input[i].u32); - if (result.bin != output[i].fi32.bin) { - print_failed((uint64_t)input[i].u32, result.bin, output[i].fi32.bin); - *failed_func = "itod"; + + if ((int32_t)input[i].u32 >= INT24_MIN && (int32_t)input[i].u32 <= INT24_MAX) { + result.flt = CRT_itod((int24_t)input[i].u32); + if (result.bin != output[i].fi32.bin) { + print_failed((uint64_t)input[i].u32, result.bin, output[i].fi32.bin); + *failed_func = "itod"; + return i; + } + } + #endif /* TEST_UITOD */ + + { + clear_fe_cur_env(); + result.flt = (long double)((uint64_t)input[i].u64); + if (result.bin != output[i].fu64.bin) { + print_failed((uint64_t)input[i].u64, result.bin, output[i].fu64.bin); + *failed_func = "ulltod"; return i; } + #if TEST_FENV + unsigned char fe_env = get_fe_cur_env(); + bool rounding_occured = ((uint64_t)output[i].fu64.flt != (uint64_t)input[i].u64); + bool inexact_raised = (fe_env & FE_INEXACT); + if (rounding_occured != inexact_raised) { + test_printf( + "%zu: FE: %02X\nI: %016llX\nO: %016llX\n", + i, fe_env, + input[i].u64, output[i].fu64.bin + ); + *failed_func = "ulltod"; + fputs("fenv\n", stdout); + return i; + } + #endif /* TEST_FENV */ } - result.flt = (long double)((uint64_t)input[i].u64); - if (result.bin != output[i].fu64.bin) { - print_failed((uint64_t)input[i].u64, result.bin, output[i].fu64.bin); - *failed_func = "ulltod"; - return i; - } - - result.flt = (long double)((int64_t)input[i].u64); - if (result.bin != output[i].fi64.bin) { - print_failed((uint64_t)input[i].u64, result.bin, output[i].fi64.bin); - *failed_func = "lltod"; - return i; + { + clear_fe_cur_env(); + result.flt = (long double)((int64_t)input[i].u64); + if (result.bin != output[i].fi64.bin) { + print_failed((uint64_t)input[i].u64, result.bin, output[i].fi64.bin); + *failed_func = "lltod"; + return i; + } + #if TEST_FENV + unsigned char fe_env = get_fe_cur_env(); + bool rounding_occured = ((int64_t)output[i].fi64.flt != (int64_t)input[i].u64); + bool inexact_raised = (fe_env & FE_INEXACT); + if (rounding_occured != inexact_raised) { + test_printf( + "%zu: FE: %02X\nI: %016llX\nO: %016llX\n", + i, fe_env, + input[i].u64, output[i].fu64.bin + ); + *failed_func = "lltod"; + fputs("fenv\n", stdout); + return i; + } + #endif /* TEST_FENV */ } }