From 7d7f40cec01927eb6b226e562af1ab79aeb87206 Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 23 Dec 2019 16:01:35 -0500 Subject: [PATCH 1/9] Add cl_half.h header --- CL/cl_half.h | 454 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 454 insertions(+) create mode 100644 CL/cl_half.h diff --git a/CL/cl_half.h b/CL/cl_half.h new file mode 100644 index 00000000..0004b501 --- /dev/null +++ b/CL/cl_half.h @@ -0,0 +1,454 @@ +/******************************************************************************* + * Copyright (c) 2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/** + * This is a header-only utility library that provides OpenCL host code with + * routines for converting to/from cl_half values. + * + * Example usage: + * + * #include + * ... + * cl_half h = cl_float_to_half(0.5f, CL_HALF_RTE); + * cl_float f = cl_half_to_float(h); + */ + +#ifndef __CL_HALF_H +#define __CL_HALF_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Rounding mode used when converting to cl_half. + */ +typedef enum +{ + CL_HALF_RTE, // round to nearest even + CL_HALF_RTZ, // round towards zero + CL_HALF_RTP, // round towards positive infinity + CL_HALF_RTN, // round towards negative infinity +} cl_half_rounding_mode; + +/** + * Convert a cl_float to a cl_half. + */ +static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + f32.f = f; + + // Extract sign bit + uint16_t sign = f32.i >> 31; + + // Extract FP32 exponent and mantissa + uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; + uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); + + // Remove FP32 exponent bias + int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; + + // Check for NaN / infinity + if (f_exp == 0xFF) + { + if (f_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); + h_mant |= 0x200; + return (sign << 15) | 0x7C00 | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | 0x7C00; + } + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + if (rounding_mode == CL_HALF_RTZ) + { + // Round overflow towards zero -> largest positive value + return (sign << 15) | 0x7BFF; + } + else if (rounding_mode == CL_HALF_RTP && sign) + { + // Round overflow towards positive infinity -> largest positive value + return (sign << 15) | 0x7BFF; + } + else if (rounding_mode == CL_HALF_RTN && !sign) + { + // Round overflow towards negative infinity -> smallest negative value + return (sign << 15) | 0x7BFF; + } + else + { + // Overflow to infinity + return (sign << 15) | 0x7C00; + } + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + if (rounding_mode == CL_HALF_RTP && !sign && (f_exp || f_mant)) + { + // Round underflow towards positive infinity -> smallest positive value + return (sign << 15) | 1; + } + else if (rounding_mode == CL_HALF_RTN && sign && (f_exp || f_mant)) + { + // Round underflow towards negative infinity -> largest negative value + return (sign << 15) | 1; + } + else + { + // Flush to zero + return (sign << 15); + } + } + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos; + + if (exp < -14) + { + // Denormal -> include the implicit 1 from the FP32 mantissa + h_exp = 0; + f_mant |= 1 << (CL_FLT_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); + } + else + { + // Normal -> just truncate mantissa + lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; + } + + // Generate FP16 mantissa by shifting FP32 mantissa + uint16_t h_mant = f_mant >> lsb_pos; + + // Check whether we need to round + uint32_t halfway = 1 << (lsb_pos - 1); + uint32_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((f_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((f_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((f_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((f_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_double to a cl_half. + */ +static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_double d; + uint64_t i; + } f64; + f64.d = d; + + // Extract sign bit + uint16_t sign = f64.i >> 63; + + // Extract FP64 exponent and mantissa + uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; + uint64_t d_mant = f64.i & ((1ul << (CL_DBL_MANT_DIG - 1)) - 1); + + // Remove FP64 exponent bias + int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; + + // Check for NaN / infinity + if (d_exp == 0x7FF) + { + if (d_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG); + h_mant |= 0x200; + return (sign << 15) | 0x7C00 | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | 0x7C00; + } + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + if (rounding_mode == CL_HALF_RTZ) + { + // Round overflow towards zero -> largest positive value + return (sign << 15) | 0x7BFF; + } + else if (rounding_mode == CL_HALF_RTP && sign) + { + // Round overflow towards positive infinity -> largest positive value + return (sign << 15) | 0x7BFF; + } + else if (rounding_mode == CL_HALF_RTN && !sign) + { + // Round overflow towards negative infinity -> smallest negative value + return (sign << 15) | 0x7BFF; + } + else + { + // Overflow to infinity + return (sign << 15) | 0x7C00; + } + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + if (rounding_mode == CL_HALF_RTP && !sign && (d_exp || d_mant)) + { + // Round underflow towards positive infinity -> smallest positive value + return (sign << 15) | 1; + } + else if (rounding_mode == CL_HALF_RTN && sign && (d_exp || d_mant)) + { + // Round underflow towards negative infinity -> largest negative value + return (sign << 15) | 1; + } + else + { + // Flush to zero + return (sign << 15); + } + } + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos; + + if (exp < -14) + { + // Denormal -> include the implicit 1 from the FP64 mantissa + h_exp = 0; + d_mant |= 1ul << (CL_DBL_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = -exp + (CL_DBL_MANT_DIG - 25); + } + else + { + // Normal -> just truncate mantissa + lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; + } + + // Generate FP16 mantissa by shifting FP64 mantissa + uint16_t h_mant = d_mant >> lsb_pos; + + // Check whether we need to round + uint64_t halfway = 1ul << (lsb_pos - 1); + uint64_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((d_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((d_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((d_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((d_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_half to a cl_float. + */ +static inline cl_float cl_half_to_float(cl_half h) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + + // Extract sign bit + uint16_t sign = h >> 15; + + // Extract FP16 exponent and mantissa + uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = h & 0x3FF; + + // Remove FP16 exponent bias + int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; + + // Add FP32 exponent bias + uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; + + // Check for NaN / infinity + if (h_exp == 0x1F) + { + if (h_mant) + { + // NaN -> propagate mantissa and silence it + uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); + f_mant |= 0x400000; + f32.i = (sign << 31) | 0x7F800000 | f_mant; + return f32.f; + } + else + { + // Infinity -> zero mantissa + f32.i = (sign << 31) | 0x7F800000; + return f32.f; + } + } + + // Check for zero / denormal + if (h_exp == 0) + { + if (h_mant == 0) + { + // Zero -> zero exponent + f_exp = 0; + } + else + { + // Denormal -> normalize it + // - Shift mantissa to make most-significant 1 implicit + // - Adjust exponent accordingly + uint32_t shift = 0; + while ((h_mant & 0x400) == 0) + { + h_mant <<= 1; + shift++; + } + h_mant &= 0x3FF; + f_exp -= shift - 1; + } + } + + f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); + return f32.f; +} + + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_HALF_H */ From e9cb80421a5fe3f5f23a7af370b6594c1040d415 Mon Sep 17 00:00:00 2001 From: James Price Date: Thu, 9 Jan 2020 12:31:16 -0500 Subject: [PATCH 2/9] Make shared utility for handling {over,under}flow --- CL/cl_half.h | 152 ++++++++++++++++++++++++++------------------------- 1 file changed, 78 insertions(+), 74 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index 0004b501..bddc20fd 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -59,6 +59,60 @@ typedef enum CL_HALF_RTN, // round towards negative infinity } cl_half_rounding_mode; + +/* Private utility macros. */ +#define CL_HALF_EXP_MASK 0x7C00 +#define CL_HALF_MAX_FINITE_MAG 0x7BFF + + +/* + * Utility to deal with values that overflow when converting to half precision. + */ +static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTZ) + { + // Round overflow towards zero -> largest finite number (preserving sign) + return (sign << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTP && sign) + { + // Round negative overflow towards positive infinity -> most negative finite number + return (1 << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTN && !sign) + { + // Round positive overflow towards negative infinity -> largest finite number + return CL_HALF_MAX_FINITE_MAG; + } + + // Overflow to infinity + return (sign << 15) | CL_HALF_EXP_MASK; +} + +/* + * Utility to deal with values that underflow when converting to half precision. + */ +static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTP && !sign) + { + // Round underflow towards positive infinity -> smallest positive value + return (sign << 15) | 1; + } + else if (rounding_mode == CL_HALF_RTN && sign) + { + // Round underflow towards negative infinity -> largest negative value + return (sign << 15) | 1; + } + + // Flush to zero + return (sign << 15); +} + + /** * Convert a cl_float to a cl_half. */ @@ -93,58 +147,31 @@ static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode roundin // NaN -> propagate mantissa and silence it uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); h_mant |= 0x200; - return (sign << 15) | 0x7C00 | h_mant; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } else { // Infinity -> zero mantissa - return (sign << 15) | 0x7C00; + return (sign << 15) | CL_HALF_EXP_MASK; } } + // Check for zero + if (!f_exp && !f_mant) + { + return (sign << 15); + } + // Check for overflow if (exp >= CL_HALF_MAX_EXP) { - if (rounding_mode == CL_HALF_RTZ) - { - // Round overflow towards zero -> largest positive value - return (sign << 15) | 0x7BFF; - } - else if (rounding_mode == CL_HALF_RTP && sign) - { - // Round overflow towards positive infinity -> largest positive value - return (sign << 15) | 0x7BFF; - } - else if (rounding_mode == CL_HALF_RTN && !sign) - { - // Round overflow towards negative infinity -> smallest negative value - return (sign << 15) | 0x7BFF; - } - else - { - // Overflow to infinity - return (sign << 15) | 0x7C00; - } + return cl_half_handle_overflow(rounding_mode, sign); } // Check for underflow if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) { - if (rounding_mode == CL_HALF_RTP && !sign && (f_exp || f_mant)) - { - // Round underflow towards positive infinity -> smallest positive value - return (sign << 15) | 1; - } - else if (rounding_mode == CL_HALF_RTN && sign && (f_exp || f_mant)) - { - // Round underflow towards negative infinity -> largest negative value - return (sign << 15) | 1; - } - else - { - // Flush to zero - return (sign << 15); - } + return cl_half_handle_underflow(rounding_mode, sign); } // Position of the bit that will become the FP16 mantissa LSB @@ -250,58 +277,31 @@ static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode round // NaN -> propagate mantissa and silence it uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG); h_mant |= 0x200; - return (sign << 15) | 0x7C00 | h_mant; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } else { // Infinity -> zero mantissa - return (sign << 15) | 0x7C00; + return (sign << 15) | CL_HALF_EXP_MASK; } } + // Check for zero + if (!d_exp && !d_mant) + { + return (sign << 15); + } + // Check for overflow if (exp >= CL_HALF_MAX_EXP) { - if (rounding_mode == CL_HALF_RTZ) - { - // Round overflow towards zero -> largest positive value - return (sign << 15) | 0x7BFF; - } - else if (rounding_mode == CL_HALF_RTP && sign) - { - // Round overflow towards positive infinity -> largest positive value - return (sign << 15) | 0x7BFF; - } - else if (rounding_mode == CL_HALF_RTN && !sign) - { - // Round overflow towards negative infinity -> smallest negative value - return (sign << 15) | 0x7BFF; - } - else - { - // Overflow to infinity - return (sign << 15) | 0x7C00; - } + return cl_half_handle_overflow(rounding_mode, sign); } // Check for underflow if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) { - if (rounding_mode == CL_HALF_RTP && !sign && (d_exp || d_mant)) - { - // Round underflow towards positive infinity -> smallest positive value - return (sign << 15) | 1; - } - else if (rounding_mode == CL_HALF_RTN && sign && (d_exp || d_mant)) - { - // Round underflow towards negative infinity -> largest negative value - return (sign << 15) | 1; - } - else - { - // Flush to zero - return (sign << 15); - } + return cl_half_handle_underflow(rounding_mode, sign); } // Position of the bit that will become the FP16 mantissa LSB @@ -446,6 +446,10 @@ static inline cl_float cl_half_to_float(cl_half h) } +#undef CL_HALF_EXP_MASK +#undef CL_HALF_MAX_FINITE_MAG + + #ifdef __cplusplus } #endif From 5bc63a4601ae81a185a179c36660781129222928 Mon Sep 17 00:00:00 2001 From: James Price Date: Thu, 9 Jan 2020 17:35:12 -0500 Subject: [PATCH 3/9] Include stdint.h --- CL/cl_half.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CL/cl_half.h b/CL/cl_half.h index bddc20fd..3ced295d 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -43,6 +43,8 @@ #include +#include + #ifdef __cplusplus extern "C" { #endif From 943493d6feddea0674b6d5641fb1c7cf2b2741b8 Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 13 Jan 2020 16:25:36 -0500 Subject: [PATCH 4/9] Prefix all new functions with cl_half_ --- CL/cl_half.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index 3ced295d..5bb57742 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -34,7 +34,7 @@ * * #include * ... - * cl_half h = cl_float_to_half(0.5f, CL_HALF_RTE); + * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); * cl_float f = cl_half_to_float(h); */ @@ -118,7 +118,7 @@ static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mo /** * Convert a cl_float to a cl_half. */ -static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode rounding_mode) +static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) { // Type-punning to get direct access to underlying bits union @@ -248,7 +248,7 @@ static inline cl_half cl_float_to_half(cl_float f, cl_half_rounding_mode roundin /** * Convert a cl_double to a cl_half. */ -static inline cl_half cl_double_to_half(cl_double d, cl_half_rounding_mode rounding_mode) +static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) { // Type-punning to get direct access to underlying bits union From 4dff5990264d138fed3554a674b75caba95777de Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 13 Jan 2020 16:39:08 -0500 Subject: [PATCH 5/9] Address Windows warnings --- CL/cl_half.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index 5bb57742..fa418146 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -263,13 +263,13 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou // Extract FP64 exponent and mantissa uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; - uint64_t d_mant = f64.i & ((1ul << (CL_DBL_MANT_DIG - 1)) - 1); + uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); // Remove FP64 exponent bias int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; // Add FP16 exponent bias - uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; + uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); // Check for NaN / infinity if (d_exp == 0x7FF) @@ -277,7 +277,7 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou if (d_mant) { // NaN -> propagate mantissa and silence it - uint16_t h_mant = d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG); + uint16_t h_mant = (uint16_t)(d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG)); h_mant |= 0x200; return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } @@ -313,10 +313,10 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou { // Denormal -> include the implicit 1 from the FP64 mantissa h_exp = 0; - d_mant |= 1ul << (CL_DBL_MANT_DIG - 1); + d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); // Mantissa shift amount depends on exponent - lsb_pos = -exp + (CL_DBL_MANT_DIG - 25); + lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); } else { @@ -325,10 +325,10 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou } // Generate FP16 mantissa by shifting FP64 mantissa - uint16_t h_mant = d_mant >> lsb_pos; + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); // Check whether we need to round - uint64_t halfway = 1ul << (lsb_pos - 1); + uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); uint64_t mask = (halfway << 1) - 1; switch (rounding_mode) { From f901f830bb57810917aaf8214d40510df5ba7037 Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 13 Jan 2020 16:44:07 -0500 Subject: [PATCH 6/9] Refactor lsb_pos calculation --- CL/cl_half.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index fa418146..c6d4f919 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -141,13 +141,16 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round // Add FP16 exponent bias uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; + // Check for NaN / infinity if (f_exp == 0xFF) { if (f_mant) { // NaN -> propagate mantissa and silence it - uint16_t h_mant = f_mant >> (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); + uint16_t h_mant = f_mant >> lsb_pos; h_mant |= 0x200; return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } @@ -176,9 +179,7 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round return cl_half_handle_underflow(rounding_mode, sign); } - // Position of the bit that will become the FP16 mantissa LSB - uint32_t lsb_pos; - + // Check for value that will become denormal if (exp < -14) { // Denormal -> include the implicit 1 from the FP32 mantissa @@ -188,11 +189,6 @@ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode round // Mantissa shift amount depends on exponent lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); } - else - { - // Normal -> just truncate mantissa - lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; - } // Generate FP16 mantissa by shifting FP32 mantissa uint16_t h_mant = f_mant >> lsb_pos; @@ -271,13 +267,16 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou // Add FP16 exponent bias uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; + // Check for NaN / infinity if (d_exp == 0x7FF) { if (d_mant) { // NaN -> propagate mantissa and silence it - uint16_t h_mant = (uint16_t)(d_mant >> (CL_DBL_MANT_DIG - CL_HALF_MANT_DIG)); + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); h_mant |= 0x200; return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } @@ -306,23 +305,16 @@ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rou return cl_half_handle_underflow(rounding_mode, sign); } - // Position of the bit that will become the FP16 mantissa LSB - uint32_t lsb_pos; - + // Check for value that will become denormal if (exp < -14) { - // Denormal -> include the implicit 1 from the FP64 mantissa + // Include the implicit 1 from the FP64 mantissa h_exp = 0; d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); // Mantissa shift amount depends on exponent lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); } - else - { - // Normal -> just truncate mantissa - lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; - } // Generate FP16 mantissa by shifting FP64 mantissa uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); From fcf33631bc6c5365ff72233ce17ea78260526da4 Mon Sep 17 00:00:00 2001 From: James Price Date: Mon, 13 Jan 2020 16:58:20 -0500 Subject: [PATCH 7/9] Avoid leading double-underscore in header guard --- CL/cl_half.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index c6d4f919..419d5c50 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -38,8 +38,8 @@ * cl_float f = cl_half_to_float(h); */ -#ifndef __CL_HALF_H -#define __CL_HALF_H +#ifndef OPENCL_CL_HALF_H +#define OPENCL_CL_HALF_H #include @@ -449,4 +449,4 @@ static inline cl_float cl_half_to_float(cl_half h) #endif -#endif /* __CL_HALF_H */ +#endif /* OPENCL_CL_HALF_H */ From 461e4e76fcac201642441b2f9972a33a744f3a27 Mon Sep 17 00:00:00 2001 From: James Price Date: Wed, 29 Apr 2020 17:41:33 -0400 Subject: [PATCH 8/9] Update license --- CL/cl_half.h | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/CL/cl_half.h b/CL/cl_half.h index 419d5c50..f748d9ed 100644 --- a/CL/cl_half.h +++ b/CL/cl_half.h @@ -1,29 +1,17 @@ /******************************************************************************* - * Copyright (c) 2019 The Khronos Group Inc. + * Copyright (c) 2019-2020 The Khronos Group Inc. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. + * http://www.apache.org/licenses/LICENSE-2.0 * - * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS - * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS - * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT - * https://www.khronos.org/registry/ - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. ******************************************************************************/ /** From e6c705dbde7f39a3684fca444c3d8fe5c85a8e3f Mon Sep 17 00:00:00 2001 From: James Price Date: Wed, 29 Apr 2020 17:41:46 -0400 Subject: [PATCH 9/9] Add tests for cl_half.h --- tests/CMakeLists.txt | 1 + tests/test_cl_half.h.c | 114 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 tests/test_cl_half.h.c diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9e5b0bfa..e1c306d3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,6 +28,7 @@ add_header_test(cl_ext_h test_cl_ext.h.c) add_header_test(cl_ext_intel_h test_cl_ext_intel.h.c) add_header_test(cl_gl_h test_cl_gl.h.c) add_header_test(cl_gl_ext_h test_cl_gl_ext.h.c) +add_header_test(cl_half_h test_cl_half.h.c) add_header_test(cl_icd_h test_cl_icd.h.c) add_header_test(cl_platform_h test_cl_platform.h.c) add_header_test(cl_opencl_h test_opencl.h.c) diff --git a/tests/test_cl_half.h.c b/tests/test_cl_half.h.c new file mode 100644 index 00000000..cd82d571 --- /dev/null +++ b/tests/test_cl_half.h.c @@ -0,0 +1,114 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include + +#include "CL/cl_half.h" + +union FI { + float f; + uint32_t i; +}; + +int test_half_to_float(cl_half h, cl_float ref) +{ + cl_float f = cl_half_to_float(h); + if (f != ref) { + union FI f_i, ref_i; + f_i.f = f; + ref_i.f = ref; + printf("\nERROR: converting 0x%04x to float: expected 0x%08x, got 0x%08x\n", + h, ref_i.i, f_i.i); + return 0; + } + return 1; +} + +int test_half_from_float(cl_float f, cl_half ref, + cl_half_rounding_mode mode, const char *mode_str) +{ + cl_half h = cl_half_from_float(f, mode); + if (h != ref) { + union FI f_i; + f_i.f = f; + printf( + "\nERROR: converting 0x%08x to half (%s): expected 0x%04x, got 0x%04x\n", + f_i.i, mode_str, ref, h); + return 0; + } + return 1; +} + +int main(void) +{ + printf("\nChecking conversion routines in cl_half.h\n"); + +#define CHECK_TO_FLOAT(h, ref) \ + if (!test_half_to_float(h, ref)) { \ + printf("Test failed on line %d.\n", __LINE__); \ + return 1; \ + } + + // Check a handful of values + CHECK_TO_FLOAT(0x0000, 0.f); + CHECK_TO_FLOAT(0x3c00, 1.f); + CHECK_TO_FLOAT(0xbc00, -1.f); + CHECK_TO_FLOAT(0x7c00, INFINITY); + CHECK_TO_FLOAT(0xfc00, -INFINITY); + + +#define CHECK_FROM_FLOAT(f, ref, mode) \ + if (!test_half_from_float(f, ref, CL_HALF_##mode, #mode)) { \ + printf("Test failed on line %d.\n", __LINE__); \ + return 1; \ + } + + // Check a handful of normal values + CHECK_FROM_FLOAT(0.f, 0x0000, RTE); + CHECK_FROM_FLOAT(1.f, 0x3c00, RTE); + CHECK_FROM_FLOAT(-1.f, 0xbc00, RTE); + CHECK_FROM_FLOAT(CL_HALF_MAX, 0x7bff, RTE); + CHECK_FROM_FLOAT(CL_HALF_MIN, 0x0400, RTE); + + // Check huge positive (non-inf) values round properly + CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7c00, RTE); + CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7c00, RTP); + CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7bff, RTN); + CHECK_FROM_FLOAT(CL_HALF_MAX + 1000.f, 0x7bff, RTZ); + + // Check huge negative (non-inf) values round properly + CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfc00, RTE); + CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfbff, RTP); + CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfc00, RTN); + CHECK_FROM_FLOAT(-(CL_HALF_MAX + 1000.f), 0xfbff, RTZ); + + // Check tiny positive values round properly + CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTE); + CHECK_FROM_FLOAT(0x1.000000p-25, 0x0001, RTP); + CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTN); + CHECK_FROM_FLOAT(0x1.000000p-25, 0x0000, RTZ); + + // Check tiny negative values round properly + CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTE); + CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTP); + CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8001, RTN); + CHECK_FROM_FLOAT(-0x1.000000p-25, 0x8000, RTZ); + + printf("\nAll tests passed!\n"); + + return 0; +}