From 8a2b7389f50a50a4e26ec98101d47fb1fc1c1bcd Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Sat, 5 Sep 2020 17:10:36 -0700
Subject: [PATCH] Changes libstdc++ hashable function to use double instead of
 long double

Oracle quite a few years ago was changing the bucket calculation from
using float to calculate the primes.
I presume they were generating excessively large maps and needed larger
primes.
Originally they intended to just move up to double with the
implementation, but then a question was raised on the mailing list about
just using long double instead.

They decided at that point just to move to using long double as an
overkill solution solution.

The problem with this solution is that for anyone not running x86, this
can fall down a software floating point implementation path.
AArch64 is a good example where it will fall down a 128bit soft float
implementation for calculating this.

Instead of accepting terrible performance for our bucket calculations;
move the implementation down to double.
If some HPC class application ends up getting inefficient bucket
calculation at this point, maybe they should instead invest in a hashmap
implementation specifically optimized for huge data sets.
---
 libstdc++-v3/include/bits/hashtable_policy.h | 12 ++++++------
 libstdc++-v3/src/c++11/hashtable_c++0x.cc    | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index ef120134914df..fe53a2100ec3c 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -460,7 +460,7 @@ namespace __detail
     // Return a bucket count appropriate for n elements
     std::size_t
     _M_bkt_for_elements(std::size_t __n) const
-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
+    { return __builtin_ceill(__n / (double)_M_max_load_factor); }
 
     // __n_bkt is current bucket count, __n_elt is current element count,
     // and __n_ins is number of elements to be inserted.  Do we need to
@@ -560,7 +560,7 @@ namespace __detail
 	_M_next_resize = numeric_limits<size_t>::max();
       else
 	_M_next_resize
-	  = __builtin_floorl(__res * (long double)_M_max_load_factor);
+	  = __builtin_floorl(__res * (double)_M_max_load_factor);
 
       return __res;
     }
@@ -568,7 +568,7 @@ namespace __detail
     // Return a bucket count appropriate for n elements
     std::size_t
     _M_bkt_for_elements(std::size_t __n) const noexcept
-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
+    { return __builtin_ceill(__n / (double)_M_max_load_factor); }
 
     // __n_bkt is current bucket count, __n_elt is current element count,
     // and __n_ins is number of elements to be inserted.  Do we need to
@@ -583,16 +583,16 @@ namespace __detail
 	  // If _M_next_resize is 0 it means that we have nothing allocated so
 	  // far and that we start inserting elements. In this case we start
 	  // with an initial bucket size of 11.
-	  long double __min_bkts
+	  double __min_bkts
 	    = std::max<std::size_t>(__n_elt + __n_ins, _M_next_resize ? 0 : 11)
-	      / (long double)_M_max_load_factor;
+	      / (double)_M_max_load_factor;
 	  if (__min_bkts >= __n_bkt)
 	    return { true,
 	      _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
 						__n_bkt * _S_growth_factor)) };
 
 	  _M_next_resize
-	    = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
+	    = __builtin_floorl(__n_bkt * (double)_M_max_load_factor);
 	  return { false, 0 };
 	}
       else
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index de8e2c7cb915b..6d6a5c01187e1 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -58,7 +58,7 @@ namespace __detail
 	  return 1;
 
 	_M_next_resize =
-	  __builtin_floorl(__fast_bkt[__n] * (long double)_M_max_load_factor);
+	  __builtin_floorl(__fast_bkt[__n] * (double)_M_max_load_factor);
 	return __fast_bkt[__n];
       }
 
@@ -81,7 +81,7 @@ namespace __detail
       _M_next_resize = numeric_limits<size_t>::max();
     else
       _M_next_resize =
-	__builtin_floorl(*__next_bkt * (long double)_M_max_load_factor);
+	__builtin_floorl(*__next_bkt * (double)_M_max_load_factor);
 
     return *__next_bkt;
   }
@@ -105,16 +105,16 @@ namespace __detail
 	// If _M_next_resize is 0 it means that we have nothing allocated so
 	// far and that we start inserting elements. In this case we start
 	// with an initial bucket size of 11.
-	long double __min_bkts
+	double __min_bkts
 	  = std::max<std::size_t>(__n_elt + __n_ins, _M_next_resize ? 0 : 11)
-	  / (long double)_M_max_load_factor;
+	  / (double)_M_max_load_factor;
 	if (__min_bkts >= __n_bkt)
 	  return { true,
 	    _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
 					      __n_bkt * _S_growth_factor)) };
 
 	_M_next_resize
-	  = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
+	  = __builtin_floorl(__n_bkt * (double)_M_max_load_factor);
 	return { false, 0 };
       }
     else