From 7d7f40cec01927eb6b226e562af1ab79aeb87206 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 23 Dec 2019 16:01:35 -0500
Subject: [PATCH 1/9] Add cl_half.h header

---
 CL/cl_half.h | 454 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 454 insertions(+)
 create mode 100644 CL/cl_half.h

diff --git a/CL/cl_half.h b/CL/cl_half.h
new file mode 100644
index 00000000..0004b501
--- /dev/null
+++ b/CL/cl_half.h
@@ -0,0 +1,454 @@
+/*******************************************************************************
+ * Copyright (c) 2019 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/**
+ * This is a header-only utility library that provides OpenCL host code with
+ * routines for converting to/from cl_half values.
+ *
+ * Example usage:
+ *
+ *    #include <CL/cl_half.h>
+ *    ...
+ *    cl_half h = cl_float_to_half(0.5f, CL_HALF_RTE);
+ *    cl_float f = cl_half_to_float(h);
+ */
+
+#ifndef __CL_HALF_H
+#define __CL_HALF_H
+
+#include <CL/cl_platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Rounding mode used when converting to cl_half.
+ */
+typedef enum
+{
+  CL_HALF_RTE, // round to nearest even
+  CL_HALF_RTZ, // round towards zero
+  CL_HALF_RTP, // round towards positive infinity
+  CL_HALF_RTN, // round towards negative infinity
+} cl_half_rounding_mode;
+
+/**
+ * Convert a cl_float to a cl_half.
+ */
+static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode rounding_mode)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_float f;
+    uint32_t i;
+  } f32;
+  f32.f = f;
+
+  // Extract sign bit
+  uint16_t sign = f32.i >> 31;
+
+  // Extract FP32 exponent and mantissa
+  uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
+  uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
+
+  // Remove FP32 exponent bias
+  int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
+
+  // Add FP16 exponent bias
+  uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
+
+  // Check for NaN / infinity
+  if (f_exp == 0xFF)
+  {
+    if (f_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
+      h_mant |= 0x200;
+      return (sign << 15) | 0x7C00 | h_mant;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      return (sign << 15) | 0x7C00;
+    }
+  }
+
+  // Check for overflow
+  if (exp >= CL_HALF_MAX_EXP)
+  {
+    if (rounding_mode == CL_HALF_RTZ)
+    {
+      // Round overflow towards zero -> largest positive value
+      return (sign << 15) | 0x7BFF;
+    }
+    else if (rounding_mode == CL_HALF_RTP && sign)
+    {
+      // Round overflow towards positive infinity -> largest positive value
+      return (sign << 15) | 0x7BFF;
+    }
+    else if (rounding_mode == CL_HALF_RTN && !sign)
+    {
+      // Round overflow towards negative infinity -> smallest negative value
+      return (sign << 15) | 0x7BFF;
+    }
+    else
+    {
+      // Overflow to infinity
+      return (sign << 15) | 0x7C00;
+    }
+  }
+
+  // Check for underflow
+  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
+  {
+    if (rounding_mode == CL_HALF_RTP && !sign && (f_exp || f_mant))
+    {
+      // Round underflow towards positive infinity -> smallest positive value
+      return (sign << 15) | 1;
+    }
+    else if (rounding_mode == CL_HALF_RTN && sign && (f_exp || f_mant))
+    {
+      // Round underflow towards negative infinity -> largest negative value
+      return (sign << 15) | 1;
+    }
+    else
+    {
+      // Flush to zero
+      return (sign << 15);
+    }
+  }
+
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos;
+
+  if (exp < -14)
+  {
+    // Denormal -> include the implicit 1 from the FP32 mantissa
+    h_exp = 0;
+    f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
+
+    // Mantissa shift amount depends on exponent
+    lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
+  }
+  else
+  {
+    // Normal -> just truncate mantissa
+    lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
+  }
+
+  // Generate FP16 mantissa by shifting FP32 mantissa
+  uint16_t h_mant = f_mant >> lsb_pos;
+
+  // Check whether we need to round
+  uint32_t halfway = 1 << (lsb_pos - 1);
+  uint32_t mask = (halfway << 1) - 1;
+  switch (rounding_mode)
+  {
+    case CL_HALF_RTE:
+      if ((f_mant & mask) > halfway)
+      {
+        // More than halfway -> round up
+        h_mant += 1;
+      }
+      else if ((f_mant & mask) == halfway)
+      {
+        // Exactly halfway -> round to nearest even
+        if (h_mant & 0x1)
+          h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTZ:
+      // Mantissa has already been truncated -> do nothing
+      break;
+    case CL_HALF_RTP:
+      if ((f_mant & mask) && !sign)
+      {
+        // Round positive numbers up
+        h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTN:
+      if ((f_mant & mask) && sign)
+      {
+        // Round negative numbers down
+        h_mant += 1;
+      }
+      break;
+  }
+
+  // Check for mantissa overflow
+  if (h_mant & 0x400)
+  {
+    h_exp += 1;
+    h_mant = 0;
+  }
+
+  return (sign << 15) | (h_exp << 10) | h_mant;
+}
+
+
+/**
+ * Convert a cl_double to a cl_half.
+ */
+static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode rounding_mode)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_double d;
+    uint64_t i;
+  } f64;
+  f64.d = d;
+
+  // Extract sign bit
+  uint16_t sign = f64.i >> 63;
+
+  // Extract FP64 exponent and mantissa
+  uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
+  uint64_t d_mant = f64.i & ((1ul << (CL_DBL_MANT_DIG - 1)) - 1);
+
+  // Remove FP64 exponent bias
+  int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
+
+  // Add FP16 exponent bias
+  uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
+
+  // Check for NaN / infinity
+  if (d_exp == 0x7FF)
+  {
+    if (d_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG);
+      h_mant |= 0x200;
+      return (sign << 15) | 0x7C00 | h_mant;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      return (sign << 15) | 0x7C00;
+    }
+  }
+
+  // Check for overflow
+  if (exp >= CL_HALF_MAX_EXP)
+  {
+    if (rounding_mode == CL_HALF_RTZ)
+    {
+      // Round overflow towards zero -> largest positive value
+      return (sign << 15) | 0x7BFF;
+    }
+    else if (rounding_mode == CL_HALF_RTP && sign)
+    {
+      // Round overflow towards positive infinity -> largest positive value
+      return (sign << 15) | 0x7BFF;
+    }
+    else if (rounding_mode == CL_HALF_RTN && !sign)
+    {
+      // Round overflow towards negative infinity -> smallest negative value
+      return (sign << 15) | 0x7BFF;
+    }
+    else
+    {
+      // Overflow to infinity
+      return (sign << 15) | 0x7C00;
+    }
+  }
+
+  // Check for underflow
+  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
+  {
+    if (rounding_mode == CL_HALF_RTP && !sign && (d_exp || d_mant))
+    {
+      // Round underflow towards positive infinity -> smallest positive value
+      return (sign << 15) | 1;
+    }
+    else if (rounding_mode == CL_HALF_RTN && sign && (d_exp || d_mant))
+    {
+      // Round underflow towards negative infinity -> largest negative value
+      return (sign << 15) | 1;
+    }
+    else
+    {
+      // Flush to zero
+      return (sign << 15);
+    }
+  }
+
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos;
+
+  if (exp < -14)
+  {
+    // Denormal -> include the implicit 1 from the FP64 mantissa
+    h_exp = 0;
+    d_mant |= 1ul << (CL_DBL_MANT_DIG - 1);
+
+    // Mantissa shift amount depends on exponent
+    lsb_pos = -exp + (CL_DBL_MANT_DIG - 25);
+  }
+  else
+  {
+    // Normal -> just truncate mantissa
+    lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
+  }
+
+  // Generate FP16 mantissa by shifting FP64 mantissa
+  uint16_t h_mant = d_mant >> lsb_pos;
+
+  // Check whether we need to round
+  uint64_t halfway = 1ul << (lsb_pos - 1);
+  uint64_t mask = (halfway << 1) - 1;
+  switch (rounding_mode)
+  {
+    case CL_HALF_RTE:
+      if ((d_mant & mask) > halfway)
+      {
+        // More than halfway -> round up
+        h_mant += 1;
+      }
+      else if ((d_mant & mask) == halfway)
+      {
+        // Exactly halfway -> round to nearest even
+        if (h_mant & 0x1)
+          h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTZ:
+      // Mantissa has already been truncated -> do nothing
+      break;
+    case CL_HALF_RTP:
+      if ((d_mant & mask) && !sign)
+      {
+        // Round positive numbers up
+        h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTN:
+      if ((d_mant & mask) && sign)
+      {
+        // Round negative numbers down
+        h_mant += 1;
+      }
+      break;
+  }
+
+  // Check for mantissa overflow
+  if (h_mant & 0x400)
+  {
+    h_exp += 1;
+    h_mant = 0;
+  }
+
+  return (sign << 15) | (h_exp << 10) | h_mant;
+}
+
+
+/**
+ * Convert a cl_half to a cl_float.
+ */
+static inline cl_float cl_half_to_float(cl_half h)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_float f;
+    uint32_t i;
+  } f32;
+
+  // Extract sign bit
+  uint16_t sign = h >> 15;
+
+  // Extract FP16 exponent and mantissa
+  uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+  uint16_t h_mant = h & 0x3FF;
+
+  // Remove FP16 exponent bias
+  int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
+
+  // Add FP32 exponent bias
+  uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
+
+  // Check for NaN / infinity
+  if (h_exp == 0x1F)
+  {
+    if (h_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
+      f_mant |= 0x400000;
+      f32.i = (sign << 31) | 0x7F800000 | f_mant;
+      return f32.f;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      f32.i = (sign << 31) | 0x7F800000;
+      return f32.f;
+    }
+  }
+
+  // Check for zero / denormal
+  if (h_exp == 0)
+  {
+    if (h_mant == 0)
+    {
+      // Zero -> zero exponent
+      f_exp = 0;
+    }
+    else
+    {
+      // Denormal -> normalize it
+      // - Shift mantissa to make most-significant 1 implicit
+      // - Adjust exponent accordingly
+      uint32_t shift = 0;
+      while ((h_mant & 0x400) == 0)
+      {
+        h_mant <<= 1;
+        shift++;
+      }
+      h_mant &= 0x3FF;
+      f_exp -= shift - 1;
+    }
+  }
+
+  f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
+  return f32.f;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  /* __CL_HALF_H */

From e9cb80421a5fe3f5f23a7af370b6594c1040d415 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Thu, 9 Jan 2020 12:31:16 -0500
Subject: [PATCH 2/9] Make shared utility for handling {over,under}flow

---
 CL/cl_half.h | 152 ++++++++++++++++++++++++++-------------------------
 1 file changed, 78 insertions(+), 74 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index 0004b501..bddc20fd 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -59,6 +59,60 @@ typedef enum
   CL_HALF_RTN, // round towards negative infinity
 } cl_half_rounding_mode;
 
+
+/* Private utility macros. */
+#define CL_HALF_EXP_MASK 0x7C00
+#define CL_HALF_MAX_FINITE_MAG 0x7BFF
+
+
+/*
+ * Utility to deal with values that overflow when converting to half precision.
+ */
+static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
+                                              uint16_t sign)
+{
+  if (rounding_mode == CL_HALF_RTZ)
+  {
+    // Round overflow towards zero -> largest finite number (preserving sign)
+    return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
+  }
+  else if (rounding_mode == CL_HALF_RTP && sign)
+  {
+    // Round negative overflow towards positive infinity -> most negative finite number
+    return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
+  }
+  else if (rounding_mode == CL_HALF_RTN && !sign)
+  {
+    // Round positive overflow towards negative infinity -> largest finite number
+    return CL_HALF_MAX_FINITE_MAG;
+  }
+
+  // Overflow to infinity
+  return (sign << 15) | CL_HALF_EXP_MASK;
+}
+
+/*
+ * Utility to deal with values that underflow when converting to half precision.
+ */
+static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
+                                               uint16_t sign)
+{
+  if (rounding_mode == CL_HALF_RTP && !sign)
+  {
+    // Round underflow towards positive infinity -> smallest positive value
+    return (sign << 15) | 1;
+  }
+  else if (rounding_mode == CL_HALF_RTN && sign)
+  {
+    // Round underflow towards negative infinity -> largest negative value
+    return (sign << 15) | 1;
+  }
+
+  // Flush to zero
+  return (sign << 15);
+}
+
+
 /**
  * Convert a cl_float to a cl_half.
  */
@@ -93,58 +147,31 @@ static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode roundin
       // NaN -> propagate mantissa and silence it
       uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
       h_mant |= 0x200;
-      return (sign << 15) | 0x7C00 | h_mant;
+      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
     }
     else
     {
       // Infinity -> zero mantissa
-      return (sign << 15) | 0x7C00;
+      return (sign << 15) | CL_HALF_EXP_MASK;
     }
   }
 
+  // Check for zero
+  if (!f_exp && !f_mant)
+  {
+    return (sign << 15);
+  }
+
   // Check for overflow
   if (exp >= CL_HALF_MAX_EXP)
   {
-    if (rounding_mode == CL_HALF_RTZ)
-    {
-      // Round overflow towards zero -> largest positive value
-      return (sign << 15) | 0x7BFF;
-    }
-    else if (rounding_mode == CL_HALF_RTP && sign)
-    {
-      // Round overflow towards positive infinity -> largest positive value
-      return (sign << 15) | 0x7BFF;
-    }
-    else if (rounding_mode == CL_HALF_RTN && !sign)
-    {
-      // Round overflow towards negative infinity -> smallest negative value
-      return (sign << 15) | 0x7BFF;
-    }
-    else
-    {
-      // Overflow to infinity
-      return (sign << 15) | 0x7C00;
-    }
+    return cl_half_handle_overflow(rounding_mode, sign);
   }
 
   // Check for underflow
   if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
   {
-    if (rounding_mode == CL_HALF_RTP && !sign && (f_exp || f_mant))
-    {
-      // Round underflow towards positive infinity -> smallest positive value
-      return (sign << 15) | 1;
-    }
-    else if (rounding_mode == CL_HALF_RTN && sign && (f_exp || f_mant))
-    {
-      // Round underflow towards negative infinity -> largest negative value
-      return (sign << 15) | 1;
-    }
-    else
-    {
-      // Flush to zero
-      return (sign << 15);
-    }
+    return cl_half_handle_underflow(rounding_mode, sign);
   }
 
   // Position of the bit that will become the FP16 mantissa LSB
@@ -250,58 +277,31 @@ static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode round
       // NaN -> propagate mantissa and silence it
       uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG);
       h_mant |= 0x200;
-      return (sign << 15) | 0x7C00 | h_mant;
+      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
     }
     else
     {
       // Infinity -> zero mantissa
-      return (sign << 15) | 0x7C00;
+      return (sign << 15) | CL_HALF_EXP_MASK;
     }
   }
 
+  // Check for zero
+  if (!d_exp && !d_mant)
+  {
+    return (sign << 15);
+  }
+
   // Check for overflow
   if (exp >= CL_HALF_MAX_EXP)
   {
-    if (rounding_mode == CL_HALF_RTZ)
-    {
-      // Round overflow towards zero -> largest positive value
-      return (sign << 15) | 0x7BFF;
-    }
-    else if (rounding_mode == CL_HALF_RTP && sign)
-    {
-      // Round overflow towards positive infinity -> largest positive value
-      return (sign << 15) | 0x7BFF;
-    }
-    else if (rounding_mode == CL_HALF_RTN && !sign)
-    {
-      // Round overflow towards negative infinity -> smallest negative value
-      return (sign << 15) | 0x7BFF;
-    }
-    else
-    {
-      // Overflow to infinity
-      return (sign << 15) | 0x7C00;
-    }
+    return cl_half_handle_overflow(rounding_mode, sign);
   }
 
   // Check for underflow
   if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
   {
-    if (rounding_mode == CL_HALF_RTP && !sign && (d_exp || d_mant))
-    {
-      // Round underflow towards positive infinity -> smallest positive value
-      return (sign << 15) | 1;
-    }
-    else if (rounding_mode == CL_HALF_RTN && sign && (d_exp || d_mant))
-    {
-      // Round underflow towards negative infinity -> largest negative value
-      return (sign << 15) | 1;
-    }
-    else
-    {
-      // Flush to zero
-      return (sign << 15);
-    }
+    return cl_half_handle_underflow(rounding_mode, sign);
   }
 
   // Position of the bit that will become the FP16 mantissa LSB
@@ -446,6 +446,10 @@ static inline cl_float cl_half_to_float(cl_half h)
 }
 
 
+#undef CL_HALF_EXP_MASK
+#undef CL_HALF_MAX_FINITE_MAG
+
+
 #ifdef __cplusplus
 }
 #endif

From 5bc63a4601ae81a185a179c36660781129222928 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Thu, 9 Jan 2020 17:35:12 -0500
Subject: [PATCH 3/9] Include stdint.h

---
 CL/cl_half.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index bddc20fd..3ced295d 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -43,6 +43,8 @@
 
 #include <CL/cl_platform.h>
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif

From 943493d6feddea0674b6d5641fb1c7cf2b2741b8 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 13 Jan 2020 16:25:36 -0500
Subject: [PATCH 4/9] Prefix all new functions with cl_half_

---
 CL/cl_half.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index 3ced295d..5bb57742 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -34,7 +34,7 @@
  *
  *    #include <CL/cl_half.h>
  *    ...
- *    cl_half h = cl_float_to_half(0.5f, CL_HALF_RTE);
+ *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
  *    cl_float f = cl_half_to_float(h);
  */
 
@@ -118,7 +118,7 @@ static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mo
 /**
  * Convert a cl_float to a cl_half.
  */
-static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode rounding_mode)
+static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
 {
   // Type-punning to get direct access to underlying bits
   union
@@ -248,7 +248,7 @@ static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode roundin
 /**
  * Convert a cl_double to a cl_half.
  */
-static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode rounding_mode)
+static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
 {
   // Type-punning to get direct access to underlying bits
   union

From 4dff5990264d138fed3554a674b75caba95777de Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 13 Jan 2020 16:39:08 -0500
Subject: [PATCH 5/9] Address Windows warnings

---
 CL/cl_half.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index 5bb57742..fa418146 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -263,13 +263,13 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
 
   // Extract FP64 exponent and mantissa
   uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
-  uint64_t d_mant = f64.i & ((1ul << (CL_DBL_MANT_DIG - 1)) - 1);
+  uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
 
   // Remove FP64 exponent bias
   int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
 
   // Add FP16 exponent bias
-  uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
+  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
 
   // Check for NaN / infinity
   if (d_exp == 0x7FF)
@@ -277,7 +277,7 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
     if (d_mant)
     {
       // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG);
+      uint16_t h_mant = (uint16_t)(d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG));
       h_mant |= 0x200;
       return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
     }
@@ -313,10 +313,10 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
   {
     // Denormal -> include the implicit 1 from the FP64 mantissa
     h_exp = 0;
-    d_mant |= 1ul << (CL_DBL_MANT_DIG - 1);
+    d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
 
     // Mantissa shift amount depends on exponent
-    lsb_pos = -exp + (CL_DBL_MANT_DIG - 25);
+    lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
   }
   else
   {
@@ -325,10 +325,10 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
   }
 
   // Generate FP16 mantissa by shifting FP64 mantissa
-  uint16_t h_mant = d_mant >> lsb_pos;
+  uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
 
   // Check whether we need to round
-  uint64_t halfway = 1ul << (lsb_pos - 1);
+  uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
   uint64_t mask = (halfway << 1) - 1;
   switch (rounding_mode)
   {

From f901f830bb57810917aaf8214d40510df5ba7037 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 13 Jan 2020 16:44:07 -0500
Subject: [PATCH 6/9] Refactor lsb_pos calculation

---
 CL/cl_half.h | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index fa418146..c6d4f919 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -141,13 +141,16 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round
   // Add FP16 exponent bias
   uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
 
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
+
   // Check for NaN / infinity
   if (f_exp == 0xFF)
   {
     if (f_mant)
     {
       // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
+      uint16_t h_mant = f_mant >> lsb_pos;
       h_mant |= 0x200;
       return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
     }
@@ -176,9 +179,7 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round
     return cl_half_handle_underflow(rounding_mode, sign);
   }
 
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos;
-
+  // Check for value that will become denormal
   if (exp < -14)
   {
     // Denormal -> include the implicit 1 from the FP32 mantissa
@@ -188,11 +189,6 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round
     // Mantissa shift amount depends on exponent
     lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
   }
-  else
-  {
-    // Normal -> just truncate mantissa
-    lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
-  }
 
   // Generate FP16 mantissa by shifting FP32 mantissa
   uint16_t h_mant = f_mant >> lsb_pos;
@@ -271,13 +267,16 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
   // Add FP16 exponent bias
   uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
 
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
+
   // Check for NaN / infinity
   if (d_exp == 0x7FF)
   {
     if (d_mant)
     {
       // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = (uint16_t)(d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG));
+      uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
       h_mant |= 0x200;
       return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
     }
@@ -306,23 +305,16 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou
     return cl_half_handle_underflow(rounding_mode, sign);
   }
 
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos;
-
+  // Check for value that will become denormal
   if (exp < -14)
   {
-    // Denormal -> include the implicit 1 from the FP64 mantissa
+    // Include the implicit 1 from the FP64 mantissa
     h_exp = 0;
     d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
 
     // Mantissa shift amount depends on exponent
     lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
   }
-  else
-  {
-    // Normal -> just truncate mantissa
-    lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
-  }
 
   // Generate FP16 mantissa by shifting FP64 mantissa
   uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);

From fcf33631bc6c5365ff72233ce17ea78260526da4 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 13 Jan 2020 16:58:20 -0500
Subject: [PATCH 7/9] Avoid leading double-underscore in header guard

---
 CL/cl_half.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index c6d4f919..419d5c50 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -38,8 +38,8 @@
  *    cl_float f = cl_half_to_float(h);
  */
 
-#ifndef __CL_HALF_H
-#define __CL_HALF_H
+#ifndef OPENCL_CL_HALF_H
+#define OPENCL_CL_HALF_H
 
 #include <CL/cl_platform.h>
 
@@ -449,4 +449,4 @@ static inline cl_float cl_half_to_float(cl_half h)
 #endif
 
 
-#endif  /* __CL_HALF_H */
+#endif  /* OPENCL_CL_HALF_H */

From 461e4e76fcac201642441b2f9972a33a744f3a27 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Wed, 29 Apr 2020 17:41:33 -0400
Subject: [PATCH 8/9] Update license

---
 CL/cl_half.h | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/CL/cl_half.h b/CL/cl_half.h
index 419d5c50..f748d9ed 100644
--- a/CL/cl_half.h
+++ b/CL/cl_half.h
@@ -1,29 +1,17 @@
 /*******************************************************************************
- * Copyright (c) 2019 The Khronos Group Inc.
+ * Copyright (c) 2019-2020 The Khronos Group Inc.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and/or associated documentation files (the
- * "Materials"), to deal in the Materials without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Materials, and to
- * permit persons to whom the Materials are furnished to do so, subject to
- * the following conditions:
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Materials.
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
- * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
- * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
- * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
- *    https://www.khronos.org/registry/
- *
- * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  ******************************************************************************/
 
 /**

From e6c705dbde7f39a3684fca444c3d8fe5c85a8e3f Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Wed, 29 Apr 2020 17:41:46 -0400
Subject: [PATCH 9/9] Add tests for cl_half.h

---
 tests/CMakeLists.txt   |   1 +
 tests/test_cl_half.h.c | 114 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 tests/test_cl_half.h.c

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9e5b0bfa..e1c306d3 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -28,6 +28,7 @@ add_header_test(cl_ext_h test_cl_ext.h.c)
 add_header_test(cl_ext_intel_h test_cl_ext_intel.h.c)
 add_header_test(cl_gl_h test_cl_gl.h.c)
 add_header_test(cl_gl_ext_h test_cl_gl_ext.h.c)
+add_header_test(cl_half_h test_cl_half.h.c)
 add_header_test(cl_icd_h test_cl_icd.h.c)
 add_header_test(cl_platform_h test_cl_platform.h.c)
 add_header_test(cl_opencl_h test_opencl.h.c)
diff --git a/tests/test_cl_half.h.c b/tests/test_cl_half.h.c
new file mode 100644
index 00000000..cd82d571
--- /dev/null
+++ b/tests/test_cl_half.h.c
@@ -0,0 +1,114 @@
+//
+// Copyright (c) 2020 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <math.h>
+#include <stdio.h>
+
+#include "CL/cl_half.h"
+
+union FI {
+  float f;
+  uint32_t i;
+};
+
+int test_half_to_float(cl_half h, cl_float ref)
+{
+  cl_float f = cl_half_to_float(h);
+  if (f != ref) {
+    union FI f_i, ref_i;
+    f_i.f = f;
+    ref_i.f = ref;
+    printf("\nERROR: converting 0x%04x to float: expected 0x%08x, got 0x%08x\n",
+           h, ref_i.i, f_i.i);
+    return 0;
+  }
+  return 1;
+}
+
+int test_half_from_float(cl_float f, cl_half ref,
+                         cl_half_rounding_mode mode, const char *mode_str)
+{
+  cl_half h = cl_half_from_float(f, mode);
+  if (h != ref) {
+    union FI f_i;
+    f_i.f = f;
+    printf(
+      "\nERROR: converting 0x%08x to half (%s): expected 0x%04x, got 0x%04x\n",
+      f_i.i, mode_str, ref, h);
+    return 0;
+  }
+  return 1;
+}
+
+int main(void)
+{
+  printf("\nChecking conversion routines in cl_half.h\n");
+
+#define CHECK_TO_FLOAT(h, ref)                     \
+  if (!test_half_to_float(h, ref)) {               \
+    printf("Test failed on line %d.\n", __LINE__); \
+    return 1;                                      \
+  }
+
+  // Check a handful of values
+  CHECK_TO_FLOAT(0x0000, 0.f);
+  CHECK_TO_FLOAT(0x3c00, 1.f);
+  CHECK_TO_FLOAT(0xbc00, -1.f);
+  CHECK_TO_FLOAT(0x7c00, INFINITY);
+  CHECK_TO_FLOAT(0xfc00, -INFINITY);
+
+
+#define CHECK_FROM_FLOAT(f, ref, mode)                         \
+  if (!test_half_from_float(f, ref, CL_HALF_##mode, #mode)) {  \
+    printf("Test failed on line %d.\n", __LINE__);             \
+    return 1;                                                  \
+  }
+
+  // Check a handful of normal values
+  CHECK_FROM_FLOAT(0.f, 0x0000, RTE);
+  CHECK_FROM_FLOAT(1.f, 0x3c00, RTE);
+  CHECK_FROM_FLOAT(-1.f, 0xbc00, RTE);
+  CHECK_FROM_FLOAT(CL_HALF_MAX, 0x7bff, RTE);
+  CHECK_FROM_FLOAT(CL_HALF_MIN, 0x0400, RTE);
+
+  // Check huge positive (non-inf) values round properly
+  CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7c00, RTE);
+  CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7c00, RTP);
+  CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7bff, RTN);
+  CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7bff, RTZ);
+
+  // Check huge negative (non-inf) values round properly
+  CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfc00, RTE);
+  CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfbff, RTP);
+  CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfc00, RTN);
+  CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfbff, RTZ);
+
+  // Check tiny positive values round properly
+  CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTE);
+  CHECK_FROM_FLOAT(0x1.000000p-25, 0x0001, RTP);
+  CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTN);
+  CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTZ);
+
+  // Check tiny negative values round properly
+  CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTE);
+  CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTP);
+  CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8001, RTN);
+  CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTZ);
+
+  printf("\nAll tests passed!\n");
+
+  return 0;
+}