From 6c743e675f3374ca421d86dbc37d50531a4d1147 Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Fri, 3 May 2019 14:08:19 -0700
Subject: [PATCH 01/12] Replacement for hypot(a,b)

Provides a fast and accurate implementation of hypot() that leverages
the fused multiply add where available. The approach is explained
and tested in detail in the paper:
 An Improved Algorithm for hypot(a,b) by Carlos F. Borges
The article is available online at ArXiv at the link
https://arxiv.org/abs/1904.09481
---
 base/math.jl | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/base/math.jl b/base/math.jl
index 0a9fac443c602..f84c59ea77df2 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -523,6 +523,12 @@ sqrt(x::Real) = sqrt(float(x))
 
 Compute the hypotenuse ``\\sqrt{x^2+y^2}`` avoiding overflow and underflow.
 
+This code is an implementation of the algorithm described in:
+An Improved Algorithm for `hypot(a,b)`
+by Carlos F. Borges
+The article is available online at ArXiv at the link
+  https://arxiv.org/abs/1904.09481
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> a = 10^10;
@@ -538,6 +544,37 @@ Stacktrace:
 ```
 """
 hypot(x::Number, y::Number) = hypot(promote(x, y)...)
+hypot(x::Integer, y::Integer) = hypot(promote(float(x), float(y))...)
+function hypot(x::T,y::T) where T<:AbstractFloat
+    #Return Inf if either or both imputs is Inf (Compliance with IEEE754)
+    if isinf(x) || isinf(y)
+        return convert(T,Inf)
+    end
+
+    ax,ay = abs(x), abs(y)
+    if ay > ax
+        ax,ay = ay,ax
+    end
+
+    if ay <= ax*sqrt(eps(T)/2)  #Note: This also gets ay == 0
+        return ax
+    end
+
+    if (ax > sqrt(floatmax(T)/2)) || (ay < sqrt(floatmin(T)))
+        rescale = eps(sqrt(floatmin(T)))
+        if ax > sqrt(floatmax(T)/2)
+            ax = ax*rescale
+            ay = ay*rescale
+            return sqrt(muladd(ax,ax,ay*ay))/rescale
+        else
+            ax = ax/rescale
+            ay = ay/rescale
+            return sqrt(muladd(ax,ax,ay*ay))*rescale
+        end
+    else
+        return sqrt(muladd(ax,ax,ay*ay))
+    end
+end
 function hypot(x::T, y::T) where T<:Number
     ax = abs(x)
     ay = abs(y)

From a59490b93027a2560c96d18a7406a63d3f61a61a Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Tue, 7 May 2019 09:04:37 -0700
Subject: [PATCH 02/12] Adds functionality for complex inputs.

---
 base/math.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/base/math.jl b/base/math.jl
index f84c59ea77df2..e6a4983dbd014 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -544,6 +544,7 @@ Stacktrace:
 ```
 """
 hypot(x::Number, y::Number) = hypot(promote(x, y)...)
+hypot(x::Complex, y::Complex) = hypot(promote(abs(x),abs(y))...)
 hypot(x::Integer, y::Integer) = hypot(promote(float(x), float(y))...)
 function hypot(x::T,y::T) where T<:AbstractFloat
     #Return Inf if either or both imputs is Inf (Compliance with IEEE754)

From d71afbf18b725dd88af822352bb054180f1bcca9 Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Tue, 21 May 2019 13:08:18 -0700
Subject: [PATCH 03/12] Adds some testing.

---
 test/math.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/math.jl b/test/math.jl
index 9887f7c7a1289..6c3fdee528f91 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -191,6 +191,8 @@ end
             @test isequal(expm1(T(0)), T(0))
             @test expm1(T(1)) ≈ T(ℯ)-1 atol=10*eps(T)
             @test isequal(hypot(T(3),T(4)), T(5))
+            @test isequal(hypot(floatmax(T),T(1)),floatmax(T))
+            @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T)))
             @test isequal(log(T(1)), T(0))
             @test isequal(log(ℯ,T(1)), T(0))
             @test log(T(ℯ)) ≈ T(1) atol=eps(T)

From aef05c8c2119bfd3453627d549686293c38c493c Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Tue, 21 May 2019 14:46:21 -0700
Subject: [PATCH 04/12] Cleaned up the logic a bit by eliminating a nested if
 structure. And cleaned up the comments.

---
 base/math.jl | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/base/math.jl b/base/math.jl
index e6a4983dbd014..56b6820e895f0 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -552,29 +552,33 @@ function hypot(x::T,y::T) where T<:AbstractFloat
         return convert(T,Inf)
     end
 
+    # Order the operands
     ax,ay = abs(x), abs(y)
     if ay > ax
         ax,ay = ay,ax
     end
 
-    if ay <= ax*sqrt(eps(T)/2)  #Note: This also gets ay == 0
+    # Widely varying operands
+    if ay <= ax*sqrt(eps(T)/2.0)  #Note: This also gets ay == 0
         return ax
     end
 
-    if (ax > sqrt(floatmax(T)/2)) || (ay < sqrt(floatmin(T)))
-        rescale = eps(sqrt(floatmin(T)))
-        if ax > sqrt(floatmax(T)/2)
-            ax = ax*rescale
-            ay = ay*rescale
-            return sqrt(muladd(ax,ax,ay*ay))/rescale
-        else
-            ax = ax/rescale
-            ay = ay/rescale
-            return sqrt(muladd(ax,ax,ay*ay))*rescale
-        end
-    else
-        return sqrt(muladd(ax,ax,ay*ay))
+    # Operands do not vary widely
+    rescale = eps(sqrt(floatmin(T)))  #Rescaling constant
+
+    if ax > sqrt(floatmax(T)/2.0)
+        ax = ax*rescale
+        ay = ay*rescale
+        return sqrt(muladd(ax,ax,ay*ay))/rescale
     end
+
+    if ay < sqrt(floatmin(T))
+        ax = ax/rescale
+        ay = ay/rescale
+        return sqrt(muladd(ax,ax,ay*ay))*rescale
+    end
+
+    sqrt(muladd(ax,ax,ay*ay))
 end
 function hypot(x::T, y::T) where T<:Number
     ax = abs(x)

From 59d9156884b786a649233f9aa8e4f6d6ff23a757 Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Thu, 23 May 2019 11:01:05 -0700
Subject: [PATCH 05/12] Restructured to avoid a compiler issue where muladds
 are not consistently promoted to fused multiply-adds.

---
 base/math.jl | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/base/math.jl b/base/math.jl
index 56b6820e895f0..e9ab82ba7453b 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -564,21 +564,18 @@ function hypot(x::T,y::T) where T<:AbstractFloat
     end
 
     # Operands do not vary widely
-    rescale = eps(sqrt(floatmin(T)))  #Rescaling constant
-
+    scale = eps(sqrt(floatmin(T)))  #Rescaling constant
     if ax > sqrt(floatmax(T)/2.0)
-        ax = ax*rescale
-        ay = ay*rescale
-        return sqrt(muladd(ax,ax,ay*ay))/rescale
-    end
-
-    if ay < sqrt(floatmin(T))
-        ax = ax/rescale
-        ay = ay/rescale
-        return sqrt(muladd(ax,ax,ay*ay))*rescale
+        ax = ax*scale
+        ay = ay*scale
+        scale = 1.0/scale
+    elseif ay < sqrt(floatmin(T))
+        ax = ax/scale
+        ay = ay/scale
+    else
+        scale = 1.0
     end
-
-    sqrt(muladd(ax,ax,ay*ay))
+    sqrt(muladd(ax,ax,ay*ay))*scale
 end
 function hypot(x::T, y::T) where T<:Number
     ax = abs(x)

From 8556142fdae494709e645ffbab3abda50970afb0 Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Thu, 23 May 2019 13:49:47 -0700
Subject: [PATCH 06/12] This is a test of the rescaling in the hypot code. It
 will fail if the rescaled version doesn't give the same answer as the one
 that requires no rescaling. The earlier version of my hypot code fails this
 test because of the issue with not all muladd calls being promoted to fma
 calls. However, the updated version passes. It ain't easy to construct
 arguments that test this.

---
 test/math.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/math.jl b/test/math.jl
index 6c3fdee528f91..cc5ed20412984 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -193,6 +193,7 @@ end
             @test isequal(hypot(T(3),T(4)), T(5))
             @test isequal(hypot(floatmax(T),T(1)),floatmax(T))
             @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T)))
+            @test isequal(floatmin(T)*hypot(1.368423059742933,1.3510496552495361),hypot(floatmin(T)*1.368423059742933,floatmin(T)*1.3510496552495361))
             @test isequal(log(T(1)), T(0))
             @test isequal(log(ℯ,T(1)), T(0))
             @test log(T(ℯ)) ≈ T(1) atol=eps(T)

From 10d12e263565e252d6860436af86f9efe78d7353 Mon Sep 17 00:00:00 2001
From: cfborges <50250495+cfborges@users.noreply.github.com>
Date: Wed, 29 May 2019 16:09:40 -0700
Subject: [PATCH 07/12] Update base/math.jl

Co-Authored-By: Simon Byrne <simonbyrne@gmail.com>
---
 base/math.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/math.jl b/base/math.jl
index e9ab82ba7453b..f259716969e5f 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -565,7 +565,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat
 
     # Operands do not vary widely
     scale = eps(sqrt(floatmin(T)))  #Rescaling constant
-    if ax > sqrt(floatmax(T)/2.0)
+    if ax > sqrt(floatmax(T)/2)
         ax = ax*scale
         ay = ay*scale
         scale = 1.0/scale

From 15d6f3c00032f10dc89dd06b71373ad3db048eaa Mon Sep 17 00:00:00 2001
From: cfborges <50250495+cfborges@users.noreply.github.com>
Date: Wed, 29 May 2019 16:10:02 -0700
Subject: [PATCH 08/12] Update base/math.jl

Co-Authored-By: Simon Byrne <simonbyrne@gmail.com>
---
 base/math.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/math.jl b/base/math.jl
index f259716969e5f..3a26c0e338e57 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -559,7 +559,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat
     end
 
     # Widely varying operands
-    if ay <= ax*sqrt(eps(T)/2.0)  #Note: This also gets ay == 0
+    if ay <= ax*sqrt(eps(T)/2)  #Note: This also gets ay == 0
         return ax
     end
 

From 5a69e85f19f5ff0107cd3f1417497f8baa17616c Mon Sep 17 00:00:00 2001
From: cfborges <50250495+cfborges@users.noreply.github.com>
Date: Wed, 29 May 2019 16:11:40 -0700
Subject: [PATCH 09/12] Update base/math.jl

Co-Authored-By: Simon Byrne <simonbyrne@gmail.com>
---
 base/math.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/math.jl b/base/math.jl
index 3a26c0e338e57..300a2a540d083 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -568,7 +568,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat
     if ax > sqrt(floatmax(T)/2)
         ax = ax*scale
         ay = ay*scale
-        scale = 1.0/scale
+        scale = inv(scale)
     elseif ay < sqrt(floatmin(T))
         ax = ax/scale
         ay = ay/scale

From 4ded5fbbd6f902b419d18bf6da24c6b14dc958b0 Mon Sep 17 00:00:00 2001
From: cfborges <50250495+cfborges@users.noreply.github.com>
Date: Wed, 29 May 2019 16:12:01 -0700
Subject: [PATCH 10/12] Update base/math.jl

Co-Authored-By: Simon Byrne <simonbyrne@gmail.com>
---
 base/math.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/math.jl b/base/math.jl
index 300a2a540d083..844c5706e45c5 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -573,7 +573,7 @@ function hypot(x::T,y::T) where T<:AbstractFloat
         ax = ax/scale
         ay = ay/scale
     else
-        scale = 1.0
+        scale = one(scale)
     end
     sqrt(muladd(ax,ax,ay*ay))*scale
 end

From 74315233cfad2889c07aa1f5cf6e68eae28b8d5d Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Wed, 5 Jun 2019 09:10:25 -0700
Subject: [PATCH 11/12] I added a differential correction to the code that
 MASSIVELY improves the accuracy. It is now far better than the clib hypot
 code even without a fused multiply add. If you look at the ArXiv paper there
 is a plot that shows the stunning performance difference. This comes at the
 cost of a two more multiplies and one more divide (and some adds). Still way
 cheaper than the clib code

---
 base/math.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/base/math.jl b/base/math.jl
index 844c5706e45c5..c9930154eed39 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -575,7 +575,9 @@ function hypot(x::T,y::T) where T<:AbstractFloat
     else
         scale = one(scale)
     end
-    sqrt(muladd(ax,ax,ay*ay))*scale
+    h = sqrt(muladd(ax,ax,ay*ay))
+    delta = h-ax
+    (h - muladd(ax,delta,(delta+ay)*(delta-ay)/2)/h)*scale
 end
 function hypot(x::T, y::T) where T<:Number
     ax = abs(x)

From 34a18b1762b4d5e15d3aa526e33809dd99015950 Mon Sep 17 00:00:00 2001
From: "Carlos F. Borges" <borges@nps.edu>
Date: Wed, 12 Jun 2019 10:53:12 -0700
Subject: [PATCH 12/12] This version gives correctly rounded results more than
 99% of the time. It is 10 times better than the C math library hypot function
 in this respect.

---
 base/math.jl | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/base/math.jl b/base/math.jl
index c9930154eed39..064c3e6058f37 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -576,8 +576,14 @@ function hypot(x::T,y::T) where T<:AbstractFloat
         scale = one(scale)
     end
     h = sqrt(muladd(ax,ax,ay*ay))
-    delta = h-ax
-    (h - muladd(ax,delta,(delta+ay)*(delta-ay)/2)/h)*scale
+    if h <= 2*ay
+        delta = h-ay
+        h -= muladd(delta,delta-2*(ax-ay),ax*(2*delta - ax))/(2*h)
+    else
+        delta = h-ax
+        h -= muladd(delta,delta,muladd(ay,(4*delta-ay),2*delta*(ax-2*ay)))/(2*h)
+    end
+    h*scale
 end
 function hypot(x::T, y::T) where T<:Number
     ax = abs(x)