From 6c743e675f3374ca421d86dbc37d50531a4d1147 Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Fri, 3 May 2019 14:08:19 -0700 Subject: [PATCH 01/12] Replacement for hypot(a,b) Provides a fast and accurate implementation of hypot() that leverages the fused multiply add where available. The approach is explained and tested in detail in the paper: An Improved Algorithm for hypot(a,b) by Carlos F. Borges The article is available online at ArXiv at the link https://arxiv.org/abs/1904.09481 --- base/math.jl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/base/math.jl b/base/math.jl index 0a9fac443c602..f84c59ea77df2 100644 --- a/base/math.jl +++ b/base/math.jl @@ -523,6 +523,12 @@ sqrt(x::Real) = sqrt(float(x)) Compute the hypotenuse ``\\sqrt{x^2+y^2}`` avoiding overflow and underflow. +This code is an implementation of the algorithm described in: +An Improved Algorithm for `hypot(a,b)` +by Carlos F. Borges +The article is available online at ArXiv at the link + https://arxiv.org/abs/1904.09481 + # Examples ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*" julia> a = 10^10; @@ -538,6 +544,37 @@ Stacktrace: ``` """ hypot(x::Number, y::Number) = hypot(promote(x, y)...) +hypot(x::Integer, y::Integer) = hypot(promote(float(x), float(y))...) +function hypot(x::T,y::T) where T<:AbstractFloat + #Return Inf if either or both imputs is Inf (Compliance with IEEE754) + if isinf(x) || isinf(y) + return convert(T,Inf) + end + + ax,ay = abs(x), abs(y) + if ay > ax + ax,ay = ay,ax + end + + if ay <= ax*sqrt(eps(T)/2) #Note: This also gets ay == 0 + return ax + end + + if (ax > sqrt(floatmax(T)/2)) || (ay < sqrt(floatmin(T))) + rescale = eps(sqrt(floatmin(T))) + if ax > sqrt(floatmax(T)/2) + ax = ax*rescale + ay = ay*rescale + return sqrt(muladd(ax,ax,ay*ay))/rescale + else + ax = ax/rescale + ay = ay/rescale + return sqrt(muladd(ax,ax,ay*ay))*rescale + end + else + return sqrt(muladd(ax,ax,ay*ay)) + end +end function hypot(x::T, y::T) where T<:Number ax = abs(x) ay = abs(y) From a59490b93027a2560c96d18a7406a63d3f61a61a Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Tue, 7 May 2019 09:04:37 -0700 Subject: [PATCH 02/12] Adds functionality for complex inputs. --- base/math.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/math.jl b/base/math.jl index f84c59ea77df2..e6a4983dbd014 100644 --- a/base/math.jl +++ b/base/math.jl @@ -544,6 +544,7 @@ Stacktrace: ``` """ hypot(x::Number, y::Number) = hypot(promote(x, y)...) +hypot(x::Complex, y::Complex) = hypot(promote(abs(x),abs(y))...) hypot(x::Integer, y::Integer) = hypot(promote(float(x), float(y))...) function hypot(x::T,y::T) where T<:AbstractFloat #Return Inf if either or both imputs is Inf (Compliance with IEEE754) From d71afbf18b725dd88af822352bb054180f1bcca9 Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Tue, 21 May 2019 13:08:18 -0700 Subject: [PATCH 03/12] Adds some testing. --- test/math.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/math.jl b/test/math.jl index 9887f7c7a1289..6c3fdee528f91 100644 --- a/test/math.jl +++ b/test/math.jl @@ -191,6 +191,8 @@ end @test isequal(expm1(T(0)), T(0)) @test expm1(T(1)) ≈ T(ℯ)-1 atol=10*eps(T) @test isequal(hypot(T(3),T(4)), T(5)) + @test isequal(hypot(floatmax(T),T(1)),floatmax(T)) + @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T))) @test isequal(log(T(1)), T(0)) @test isequal(log(ℯ,T(1)), T(0)) @test log(T(ℯ)) ≈ T(1) atol=eps(T) From aef05c8c2119bfd3453627d549686293c38c493c Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Tue, 21 May 2019 14:46:21 -0700 Subject: [PATCH 04/12] Cleaned up the logic a bit by eliminating a nested if structure. And cleaned up the comments. --- base/math.jl | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/base/math.jl b/base/math.jl index e6a4983dbd014..56b6820e895f0 100644 --- a/base/math.jl +++ b/base/math.jl @@ -552,29 +552,33 @@ function hypot(x::T,y::T) where T<:AbstractFloat return convert(T,Inf) end + # Order the operands ax,ay = abs(x), abs(y) if ay > ax ax,ay = ay,ax end - if ay <= ax*sqrt(eps(T)/2) #Note: This also gets ay == 0 + # Widely varying operands + if ay <= ax*sqrt(eps(T)/2.0) #Note: This also gets ay == 0 return ax end - if (ax > sqrt(floatmax(T)/2)) || (ay < sqrt(floatmin(T))) - rescale = eps(sqrt(floatmin(T))) - if ax > sqrt(floatmax(T)/2) - ax = ax*rescale - ay = ay*rescale - return sqrt(muladd(ax,ax,ay*ay))/rescale - else - ax = ax/rescale - ay = ay/rescale - return sqrt(muladd(ax,ax,ay*ay))*rescale - end - else - return sqrt(muladd(ax,ax,ay*ay)) + # Operands do not vary widely + rescale = eps(sqrt(floatmin(T))) #Rescaling constant + + if ax > sqrt(floatmax(T)/2.0) + ax = ax*rescale + ay = ay*rescale + return sqrt(muladd(ax,ax,ay*ay))/rescale end + + if ay < sqrt(floatmin(T)) + ax = ax/rescale + ay = ay/rescale + return sqrt(muladd(ax,ax,ay*ay))*rescale + end + + sqrt(muladd(ax,ax,ay*ay)) end function hypot(x::T, y::T) where T<:Number ax = abs(x) From 59d9156884b786a649233f9aa8e4f6d6ff23a757 Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Thu, 23 May 2019 11:01:05 -0700 Subject: [PATCH 05/12] Restructured to avoid a compiler issue where muladds are not consistently promoted to fused multiply-adds. --- base/math.jl | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/base/math.jl b/base/math.jl index 56b6820e895f0..e9ab82ba7453b 100644 --- a/base/math.jl +++ b/base/math.jl @@ -564,21 +564,18 @@ function hypot(x::T,y::T) where T<:AbstractFloat end # Operands do not vary widely - rescale = eps(sqrt(floatmin(T))) #Rescaling constant - + scale = eps(sqrt(floatmin(T))) #Rescaling constant if ax > sqrt(floatmax(T)/2.0) - ax = ax*rescale - ay = ay*rescale - return sqrt(muladd(ax,ax,ay*ay))/rescale - end - - if ay < sqrt(floatmin(T)) - ax = ax/rescale - ay = ay/rescale - return sqrt(muladd(ax,ax,ay*ay))*rescale + ax = ax*scale + ay = ay*scale + scale = 1.0/scale + elseif ay < sqrt(floatmin(T)) + ax = ax/scale + ay = ay/scale + else + scale = 1.0 end - - sqrt(muladd(ax,ax,ay*ay)) + sqrt(muladd(ax,ax,ay*ay))*scale end function hypot(x::T, y::T) where T<:Number ax = abs(x) From 8556142fdae494709e645ffbab3abda50970afb0 Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Thu, 23 May 2019 13:49:47 -0700 Subject: [PATCH 06/12] This is a test of the rescaling in the hypot code. It will fail if the rescaled version doesn't give the same answer as the one that requires no rescaling. The earlier version of my hypot code fails this test because of the issue with not all muladd calls being promoted to fma calls. However, the updated version passes. It ain't easy to construct arguments that test this. --- test/math.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/math.jl b/test/math.jl index 6c3fdee528f91..cc5ed20412984 100644 --- a/test/math.jl +++ b/test/math.jl @@ -193,6 +193,7 @@ end @test isequal(hypot(T(3),T(4)), T(5)) @test isequal(hypot(floatmax(T),T(1)),floatmax(T)) @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T))) + @test isequal(floatmin(T)*hypot(1.368423059742933,1.3510496552495361),hypot(floatmin(T)*1.368423059742933,floatmin(T)*1.3510496552495361)) @test isequal(log(T(1)), T(0)) @test isequal(log(ℯ,T(1)), T(0)) @test log(T(ℯ)) ≈ T(1) atol=eps(T) From 10d12e263565e252d6860436af86f9efe78d7353 Mon Sep 17 00:00:00 2001 From: cfborges <50250495+cfborges@users.noreply.github.com> Date: Wed, 29 May 2019 16:09:40 -0700 Subject: [PATCH 07/12] Update base/math.jl Co-Authored-By: Simon Byrne --- base/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/math.jl b/base/math.jl index e9ab82ba7453b..f259716969e5f 100644 --- a/base/math.jl +++ b/base/math.jl @@ -565,7 +565,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat # Operands do not vary widely scale = eps(sqrt(floatmin(T))) #Rescaling constant - if ax > sqrt(floatmax(T)/2.0) + if ax > sqrt(floatmax(T)/2) ax = ax*scale ay = ay*scale scale = 1.0/scale From 15d6f3c00032f10dc89dd06b71373ad3db048eaa Mon Sep 17 00:00:00 2001 From: cfborges <50250495+cfborges@users.noreply.github.com> Date: Wed, 29 May 2019 16:10:02 -0700 Subject: [PATCH 08/12] Update base/math.jl Co-Authored-By: Simon Byrne --- base/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/math.jl b/base/math.jl index f259716969e5f..3a26c0e338e57 100644 --- a/base/math.jl +++ b/base/math.jl @@ -559,7 +559,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat end # Widely varying operands - if ay <= ax*sqrt(eps(T)/2.0) #Note: This also gets ay == 0 + if ay <= ax*sqrt(eps(T)/2) #Note: This also gets ay == 0 return ax end From 5a69e85f19f5ff0107cd3f1417497f8baa17616c Mon Sep 17 00:00:00 2001 From: cfborges <50250495+cfborges@users.noreply.github.com> Date: Wed, 29 May 2019 16:11:40 -0700 Subject: [PATCH 09/12] Update base/math.jl Co-Authored-By: Simon Byrne --- base/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/math.jl b/base/math.jl index 3a26c0e338e57..300a2a540d083 100644 --- a/base/math.jl +++ b/base/math.jl @@ -568,7 +568,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat if ax > sqrt(floatmax(T)/2) ax = ax*scale ay = ay*scale - scale = 1.0/scale + scale = inv(scale) elseif ay < sqrt(floatmin(T)) ax = ax/scale ay = ay/scale From 4ded5fbbd6f902b419d18bf6da24c6b14dc958b0 Mon Sep 17 00:00:00 2001 From: cfborges <50250495+cfborges@users.noreply.github.com> Date: Wed, 29 May 2019 16:12:01 -0700 Subject: [PATCH 10/12] Update base/math.jl Co-Authored-By: Simon Byrne --- base/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/math.jl b/base/math.jl index 300a2a540d083..844c5706e45c5 100644 --- a/base/math.jl +++ b/base/math.jl @@ -573,7 +573,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat ax = ax/scale ay = ay/scale else - scale = 1.0 + scale = one(scale) end sqrt(muladd(ax,ax,ay*ay))*scale end From 74315233cfad2889c07aa1f5cf6e68eae28b8d5d Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Wed, 5 Jun 2019 09:10:25 -0700 Subject: [PATCH 11/12] I added a differential correction to the code that MASSIVELY improves the accuracy. It is now far better than the clib hypot code even without a fused multiply add. If you look at the ArXiv paper there is a plot that shows the stunning performance difference. This comes at the cost of a two more multiplies and one more divide (and some adds). Still way cheaper than the clib code --- base/math.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/math.jl b/base/math.jl index 844c5706e45c5..c9930154eed39 100644 --- a/base/math.jl +++ b/base/math.jl @@ -575,7 +575,9 @@ function hypot(x::T,y::T) where T<:AbstractFloat else scale = one(scale) end - sqrt(muladd(ax,ax,ay*ay))*scale + h = sqrt(muladd(ax,ax,ay*ay)) + delta = h-ax + (h - muladd(ax,delta,(delta+ay)*(delta-ay)/2)/h)*scale end function hypot(x::T, y::T) where T<:Number ax = abs(x) From 34a18b1762b4d5e15d3aa526e33809dd99015950 Mon Sep 17 00:00:00 2001 From: "Carlos F. Borges" Date: Wed, 12 Jun 2019 10:53:12 -0700 Subject: [PATCH 12/12] This version gives correctly rounded results more than 99% of the time. It is 10 times better than the C math library hypot function in this respect. --- base/math.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/base/math.jl b/base/math.jl index c9930154eed39..064c3e6058f37 100644 --- a/base/math.jl +++ b/base/math.jl @@ -576,8 +576,14 @@ function hypot(x::T,y::T) where T<:AbstractFloat scale = one(scale) end h = sqrt(muladd(ax,ax,ay*ay)) - delta = h-ax - (h - muladd(ax,delta,(delta+ay)*(delta-ay)/2)/h)*scale + if h <= 2*ay + delta = h-ay + h -= muladd(delta,delta-2*(ax-ay),ax*(2*delta - ax))/(2*h) + else + delta = h-ax + h -= muladd(delta,delta,muladd(ay,(4*delta-ay),2*delta*(ax-2*ay)))/(2*h) + end + h*scale end function hypot(x::T, y::T) where T<:Number ax = abs(x)