CE-Programming · mateoconlechuga · May 8, 2025 · May 7, 2025 · May 8, 2025
diff --git a/src/crt/dtol.src b/src/crt/dtol.src
@@ -3,24 +3,19 @@
 	section	.text
 
 	public	__dtol
+	public	__dtoul
 
+; correctly handles all non-UB cases for both
+; (long)long double and (unsigned long)long double
 __dtol:
-	; f64_ret_i32
-	push	af, iy
-	ld	a, b
-	push	bc, de, hl
-	ld	hl, 7
-	add	hl, sp
-	res	7, (hl)	; fabsl(x)
-	inc	hl
-	rlca
-	ld	(hl), a	; store the sign of x in the padding byte
-	call	__dtol_c
-	pop	af
-	ld	a, e
+__dtoul:
+	push	bc
+	push	de
+	call	__dtoll	; same as __dtoull
+	ld	c, e
 	pop	de
-	ld	e, a
-	pop	bc, iy, af
+	ld	e, c
+	pop	bc
 	ret
 
-	extern	__dtol_c
+	extern	__dtoll
diff --git a/src/crt/dtoll.src b/src/crt/dtoll.src
@@ -3,19 +3,79 @@
 	section	.text
 
 	public	__dtoll
+	public	__dtoull
 
+; correctly handles all non-UB cases for both
+; (long long)long double and (unsigned long long)long double
 __dtoll:
-	; f64_ret_i64
-	push	af, iy
-	ld	a, b
-	res	7, b	; fabsl(x)
-	push	bc, de, hl
-	ld	hl, 8
-	add	hl, sp
-	rlca
-	ld	(hl), a	; store the sign of x in the padding byte
-	call	__dtoll_c
-	pop	af, af, af, iy, af
+__dtoull:
+	bit	6, b	; set if |x| >= 2.0L
+	jr	z, .zero_or_one
+	bit	7, b
+	push	af
+	res	7, b
+	push	hl
+	; -((Float64_mant_bits + Float64_bias) << 4)
+	ld	hl, $FFC010 ; -16368 ; -$3FF0
+
+	; clears the exponent field without touching the mantissa
+	; sets the LSB of the exponent since x is normalized
+	ld	a, c
+	or	a, l	; or a, $10
+	and	a, $1F
+
+	add	hl, bc
+	; HL <<= 4
+	add	hl, hl
+	add	hl, hl
+	add	hl, hl
+	add	hl, hl
+
+	ld	c, a
+	ld	b, 0
+	ld	a, h
+	sub	a, 52 + 1	; float64 mantissa bits
+	jr	c, .shift_right
+	; shift_left
+	; expon >= 52 or [52, 63]
+	; A is one less than it should be here to allow for the CPL trick in shift_right
+	; A is [-1, 10]
+	cp	a, 11	; only call __llshl if the shift amount is [0, 63]
+	inc	a	; positioning inc a after cp a allows __llshl to be skipped when the shift amount is zero
+	ld	l, a
+	ex	(sp), hl
+	call	c, __llshl
+	jr	.finish
+.shift_right:
+	; expon is [0, 51]
+	cpl
+	ld	l, a
+	ex	(sp), hl
+	call	__llshru
+.finish:
+	pop	af	; reset SP
+	pop	af
+.finish_zero_or_one:
+	jp	nz, __llneg
 	ret
 
-	extern	__dtoll_c
+.zero_or_one:
+	ld	hl, 16
+	ld	d, h
+	ld	e, h
+	add	hl, bc	; adds one to the exponent
+	bit	6, h	; if |x| was [1, 2)
+	jr	z, .zero
+	inc	de
+	bit	7, b	; sets NZ if the result should be -1
+.zero:
+	ld	c, d
+	ld	b, d
+	ld	h, d
+	ld	l, d
+	ex.s	de, hl
+	jr	.finish_zero_or_one
+
+	extern	__llneg
+	extern	__llshl
+	extern	__llshru
diff --git a/src/crt/dtoul.src b/src/crt/dtoul.src
diff --git a/src/crt/dtoull.src b/src/crt/dtoull.src
diff --git a/src/crt/float64_to_int.c b/src/crt/float64_to_int.c
@@ -64,16 +64,6 @@ uint64_t _dtoull_c(long double x) {
 }
 #endif
 
-/**
- * @brief set to 0 or 1
- * If set to 1, values that truncate to `INT32_MIN`/`INT64_MIN` will be
- * handled correctly.
- * If set to 0, it can save a little bit of space by removing a comparison from
- * `_dtol_c` and `_dtoll_c`. However this will cause values that would truncate
- * to `INT32_MIN`/`INT64_MIN` to have an undefined result.
- */
-#define HANDLE_INT_MIN 1
-
 typedef struct f64_sign {
     long double flt;
     bool sign;
@@ -106,41 +96,20 @@ static uint64_t f64_to_unsigned(F64_pun val) {
     return val.bin;
 }
 
-uint64_t _dtoull_c(long double x) {
-    F64_pun val;
-    val.flt = x;
-    /* overflow || signbit(x) || isinf(x) || isnan(x) */
-    if (val.reg.BC >= ((Float64_bias + Float64_u64_max_exp) << Float64_exp_BC_shift)) {
-        /* undefined return value for negative/overflow/inf/NaN of x */
-        return 0;
-    }
-    return f64_to_unsigned(val);
-}
 
-uint32_t _dtoul_c(long double x) {
-    F64_pun val;
-    val.flt = x;
-    /* overflow || signbit(x) || isinf(x) || isnan(x) */
-    if (val.reg.BC >= ((Float64_bias + Float64_u32_max_exp) << Float64_exp_BC_shift)) {
-        /* undefined return value for negative/overflow/inf/NaN values of x */
-        return 0;
-    }
-    return (uint32_t)f64_to_unsigned(val);
-}
 
+/**
+ * @brief the exact same routine is used for (long long)long double and
+ * (unsigned long long)long double. If the input long double is out of range,
+ * then the conversion is UB anyways.
+ */
 int64_t _dtoll_c(f64_sign arg) {
     F64_pun val;
     bool x_sign = arg.sign;
     val.flt = arg.flt;
 
     /* overflow || isinf(x) || isnan(x) */
-    if (val.reg.BC >= ((Float64_bias + Float64_i64_max_exp) << Float64_exp_BC_shift)) {
-        #if HANDLE_INT_MIN != 0
-            /* if the value truncates to INT64_MIN */
-            if (x_sign && val.bin == UINT64_C(0x43E0000000000000)) {
-                return INT64_MIN;
-            }
-        #endif
+    if (val.reg.BC >= ((Float64_bias + Float64_u64_max_exp) << Float64_exp_BC_shift)) {
         /* undefined return value for underflow/overflow/inf/NaN values of x */
         return 0;
     }
@@ -150,19 +119,18 @@ int64_t _dtoll_c(f64_sign arg) {
     return ret;
 }
 
+/**
+ * @brief the exact same routine is used for (long)long double and
+ * (unsigned long)long double. If the input long double is out of range,
+ * then the conversion is UB anyways.
+ */
 int32_t _dtol_c(f64_sign arg) {
     F64_pun val;
     bool x_sign = arg.sign;
     val.flt = arg.flt;
 
     /* overflow || isinf(x) || isnan(x) */
-    if (val.reg.BC >= ((Float64_bias + Float64_i32_max_exp) << Float64_exp_BC_shift)) {
-        #if HANDLE_INT_MIN != 0
-            /* if the value truncates to INT32_MIN */
-            if (x_sign && val.bin <= UINT64_C(0x41E00000001FFFFF)) {
-                return INT32_MIN;
-            }
-        #endif
+    if (val.reg.BC >= ((Float64_bias + Float64_u32_max_exp) << Float64_exp_BC_shift)) {
         /* undefined return value for underflow/overflow/inf/NaN values of x */
         return 0;
     }

diff --git a/src/crt/ftoll.c b/src/crt/ftoll.c
@@ -2,6 +2,11 @@
 #include <math.h>
 #include <stdint.h>
 
+/**
+ * @brief the exact same routine is used for (long long)float and
+ * (unsigned long long)float. If the input float is out of range,
+ * then the conversion is UB anyways.
+ */
 long long _ftoll_c(float x)
 {
     const union { float f; uint32_t u; struct { uint32_t mantissa: FLT_MANT_DIG - 1, exponent: 8, sign: 1; }; } parts = { .f = x };

diff --git a/src/crt/ftoll.src b/src/crt/ftoll.src
@@ -3,7 +3,12 @@
 	section	.text
 
 	public	__ftoll
+	public	__ftoull
+
+; __ftoll_c correctly handles all non-UB cases for both
+; (long long)float and (unsigned long long)float
 __ftoll:
+__ftoull:
 	ld	d, a
 	push	iy, de, hl
 	call	__ftoll_c

diff --git a/src/crt/ftoull.c b/src/crt/ftoull.c
diff --git a/src/crt/ftoull.src b/src/crt/ftoull.src