Merge pull request #216 from PJK/half_id

Half-float encoding improvements; float encoding test improvements
PJK · Mar 19, 2022 · a1d3a2e · a1d3a2e
2 parents 90a9367 + 36e0eb6
commit a1d3a2e
Show file tree

Hide file tree

Showing 3 changed files with 103 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,9 @@
 Next
 ---------------------
-- Make the buffer_size optional in `cbor_serialize_alloc` [[#205]](https://github.com/PJK/libcbor/pull/205) (by [hughsie@](https://github.com/hughsie))
+- Make the buffer_size optional in `cbor_serialize_alloc` [[#205]](https://github.com/PJK/libcbor/pull/205) (by [hughsie](https://github.com/hughsie))
+- BREAKING: Improved half-float encoding for denormalized numbers. [[#208]](https://github.com/PJK/libcbor/pull/208) (by [ranvis](https://github.com/ranvis))
+  - Denormalized half-floats will now preserve data in the mantissa
+  - Note: Half-float NaNs still lose data (https://github.com/PJK/libcbor/issues/215)
 
 0.9.0 (2021-11-14)
 ---------------------

diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c
@@ -135,8 +135,11 @@ size_t cbor_encode_half(float value, unsigned char *buffer,
       val & 0x7FFFFFu; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */
   if (exp == 0xFF) {   /* Infinity or NaNs */
     if (value != value) {
-      res = (uint16_t)0x007e00; /* Not IEEE semantics - required by CBOR
-                                   [s. 3.9] */
+      // TODO: We currently discard information bits in half-float NaNs. This is
+      // not required for the core CBOR protocol (it is only a suggestion in
+      // Section 3.9).
+      // See https://github.com/PJK/libcbor/issues/215
+      res = (uint16_t)0x007e00;
     } else {
       res = (uint16_t)((val & 0x80000000u) >> 16u | 0x7C00u |
                        (mant ? 1u : 0u) << 15u);
@@ -158,7 +161,9 @@ size_t cbor_encode_half(float value, unsigned char *buffer,
          value is lost. This is an implementation decision that works around the
          absence of standard half-float in the language. */
       res = (uint16_t)((val & 0x80000000u) >> 16u) |  // Extract sign bit
-            (uint16_t)(1u << (24u + logical_exp));
+            (uint16_t)(1u << (24u + logical_exp)) +
+                (uint16_t)(((mant >> (-logical_exp - 2)) + 1) >>
+                           1);  // Round half away from zero for simplicity
     } else {
       res = (uint16_t)((val & 0x80000000u) >> 16u |
                        ((((uint8_t)logical_exp) + 15u) << 10u) |

diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c
@@ -38,57 +38,124 @@ static void test_break(void **_CBOR_UNUSED(_state)) {
   assert_memory_equal(buffer, ((unsigned char[]){0xFF}), 1);
 }
 
+/* Check that encode(decode(buffer)) = buffer for a valid half-float in the
+ * buffer.*/
+static void assert_half_float_codec_identity() {
+  unsigned char secondary_buffer[3];
+  struct cbor_load_result res;
+  // Load and check data in buffer
+  cbor_item_t *half_float = cbor_load(buffer, 3, &res);
+  assert_int_equal(res.error.code, CBOR_ERR_NONE);
+  assert_true(cbor_isa_float_ctrl(half_float));
+  assert_true(cbor_is_float(half_float));
+  assert_int_equal(cbor_float_get_width(half_float), CBOR_FLOAT_16);
+  // Encode again and check equality
+  assert_int_equal(3, cbor_encode_half(cbor_float_get_float2(half_float),
+                                       secondary_buffer, 3));
+  assert_memory_equal(buffer, secondary_buffer, 3);
+  cbor_decref(&half_float);
+}
+
 static void test_half(void **_CBOR_UNUSED(_state)) {
   assert_int_equal(3, cbor_encode_half(1.5f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x3E, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(-0.0f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(0.0f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(65504.0f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7B, 0xFF}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(0.00006103515625f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x04, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(-4.0f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xC4, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   /* Smallest representable value */
   assert_int_equal(3, cbor_encode_half(5.960464477539063e-8f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x01}), 3);
+  assert_half_float_codec_identity();
 
   /* Smaller than the smallest, approximate magnitude representation */
   assert_int_equal(3, cbor_encode_half(5.960464477539062e-8f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x01}), 3);
+  assert_half_float_codec_identity();
+
+  assert_int_equal(3, cbor_encode_half(4.172325134277344e-7f, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x07}), 3);
+  assert_half_float_codec_identity();
+
+  assert_int_equal(3, cbor_encode_half(6.097555160522461e-5f, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x03, 0xff}), 3);
+  assert_half_float_codec_identity();
+
+  assert_int_equal(3, cbor_encode_half(6.100535392761231e-5f, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x04, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   /* Smaller than the smallest and even the magnitude cannot be represented,
      round off to zero */
   assert_int_equal(3, cbor_encode_half(1e-25f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x02}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(-1.1920928955078124e-7, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x02}), 3);
+  assert_half_float_codec_identity();
 
   assert_int_equal(3, cbor_encode_half(INFINITY, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7C, 0x00}), 3);
+  assert_half_float_codec_identity();
 }
 
 static void test_half_special(void **_CBOR_UNUSED(_state)) {
   assert_int_equal(3, cbor_encode_half(NAN, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
+  assert_half_float_codec_identity();
+
+  // TODO: We currently discard all information bits in half-float NaNs. This is
+  // not required for the core CBOR protocol (it is only a suggestion in
+  // Section 3.9).
+  // See https://github.com/PJK/libcbor/issues/215
+  assert_int_equal(3, cbor_encode_half(nanf("2"), buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
+  assert_half_float_codec_identity();
 }
 
 static void test_float(void **_CBOR_UNUSED(_state)) {
   assert_int_equal(5, cbor_encode_single(3.4028234663852886e+38, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xFA, 0x7F, 0x7F, 0xFF, 0xFF}),
                       5);
+
+  assert_int_equal(5, cbor_encode_single(NAN, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xFA, 0x7F, 0xC0, 0x00, 0x00}),
+                      5);
+
+  assert_int_equal(5, cbor_encode_single(nanf("3"), buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xFA, 0x7F, 0xC0, 0x00, 0x03}),
+                      5);
+
+  assert_int_equal(5, cbor_encode_single(strtof("Inf", NULL), buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xFA, 0x7F, 0x80, 0x00, 0x00}),
+                      5);
+
+  assert_int_equal(5, cbor_encode_single(strtof("-Inf", NULL), buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xFA, 0xFF, 0x80, 0x00, 0x00}),
+                      5);
 }
 
 static void test_double(void **_CBOR_UNUSED(_state)) {
@@ -97,6 +164,30 @@ static void test_double(void **_CBOR_UNUSED(_state)) {
       buffer,
       ((unsigned char[]){0xFB, 0x7E, 0x37, 0xE4, 0x3C, 0x88, 0x00, 0x75, 0x9C}),
       9);
+
+  assert_int_equal(9, cbor_encode_double(nan(""), buffer, 512));
+  assert_memory_equal(
+      buffer,
+      ((unsigned char[]){0xFB, 0x7F, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
+      9);
+
+  assert_int_equal(9, cbor_encode_double(nan("3"), buffer, 512));
+  assert_memory_equal(
+      buffer,
+      ((unsigned char[]){0xFB, 0x7F, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}),
+      9);
+
+  assert_int_equal(9, cbor_encode_double(strtod("Inf", NULL), buffer, 512));
+  assert_memory_equal(
+      buffer,
+      ((unsigned char[]){0xFB, 0x7F, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
+      9);
+
+  assert_int_equal(9, cbor_encode_double(strtod("-Inf", NULL), buffer, 512));
+  assert_memory_equal(
+      buffer,
+      ((unsigned char[]){0xFB, 0xFF, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
+      9);
 }
 
 int main(void) {