Make use of double-conversion private.

Bit-Quill · Nov 14, 2019 · 3b89b19 · 3b89b19
1 parent 9e1da51
commit 3b89b19
Show file tree

Hide file tree

Showing 25 changed files with 203 additions and 124 deletions.
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
@@ -136,6 +136,7 @@ set(ARROW_SRCS
     util/logging.cc
     util/key_value_metadata.cc
     util/memory.cc
+    util/parsing.cc
     util/string.cc
     util/string_builder.cc
     util/task_group.cc

diff --git a/cpp/src/arrow/util/double_conversion.h b/cpp/src/arrow/util/double_conversion.h
@@ -23,9 +23,9 @@ namespace arrow {
 namespace util {
 namespace double_conversion {
 
-using arrow_vendored_double_conversion::DoubleToStringConverter;
-using arrow_vendored_double_conversion::StringBuilder;
-using arrow_vendored_double_conversion::StringToDoubleConverter;
+using ::double_conversion::DoubleToStringConverter;
+using ::double_conversion::StringBuilder;
+using ::double_conversion::StringToDoubleConverter;
 
 }  // namespace double_conversion
 }  // namespace util

diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc
@@ -16,8 +16,16 @@
 // under the License.
 
 #include "arrow/util/formatting.h"
+#include "arrow/util/config.h"
+#include "arrow/util/double_conversion.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
+
+using util::double_conversion::DoubleToStringConverter;
+
+static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1;
+
 namespace internal {
 namespace detail {
 
@@ -28,6 +36,38 @@ const char digit_pairs[] =
     "6061626364656667686970717273747576777879"
     "8081828384858687888990919293949596979899";
 
+}  // namespace detail
+
+struct FloatToStringFormatter::Impl {
+  Impl()
+      : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
+                   'e', -6, 10, 6, 0) {}
+
+  DoubleToStringConverter converter_;
+};
+
+FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
+
+FloatToStringFormatter::~FloatToStringFormatter() {}
+
+int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
+  DCHECK_GE(out_size, kMinBufferSize);
+  // StringBuilder checks bounds in debug mode for us
+  util::double_conversion::StringBuilder builder(out_buffer, out_size);
+  bool result = impl_->converter_.ToShortestSingle(v, &builder);
+  DCHECK(result);
+  ARROW_UNUSED(result);
+  return builder.position();
 }
+
+int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size) {
+  DCHECK_GE(out_size, kMinBufferSize);
+  util::double_conversion::StringBuilder builder(out_buffer, out_size);
+  bool result = impl_->converter_.ToShortest(v, &builder);
+  DCHECK(result);
+  ARROW_UNUSED(result);
+  return builder.position();
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
@@ -27,10 +27,8 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/config.h"
-#include "arrow/util/double_conversion.h"
 #include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace internal {
@@ -182,55 +180,47 @@ class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type>
 /////////////////////////////////////////////////////////////////////////
 // Floating-point formatting
 
-template <typename ARROW_TYPE, typename Derived>
-class FloatToStringFormatterMixin {
+class ARROW_EXPORT FloatToStringFormatter {
+ public:
+  FloatToStringFormatter();
+  ~FloatToStringFormatter();
+
+  // Returns the number of characters written
+  int FormatFloat(float v, char* out_buffer, int out_size);
+  int FormatFloat(double v, char* out_buffer, int out_size);
+
+ protected:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+template <typename ARROW_TYPE>
+class FloatToStringFormatterMixin : public FloatToStringFormatter {
  public:
-  using DoubleToStringConverter = util::double_conversion::DoubleToStringConverter;
   using value_type = typename ARROW_TYPE::c_type;
 
-  static const int buffer_size = DoubleToStringConverter::kBase10MaximalLength + 1;
+  static constexpr int buffer_size = 50;
 
-  explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR)
-      : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
-                   'e', -6, 10, 6, 0) {}
+  explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
 
   template <typename Appender>
   Status operator()(value_type value, Appender&& append) {
     char buffer[buffer_size];
-    // StringBuilder checks bounds in debug mode for us
-    util::double_conversion::StringBuilder builder(buffer, buffer_size);
-    static_cast<Derived*>(this)->Format(value, &builder);
-    return append(util::string_view(buffer, builder.position()));
+    int size = FormatFloat(value, buffer, buffer_size);
+    return append(util::string_view(buffer, size));
   }
-
- protected:
-  DoubleToStringConverter converter_;
 };
 
 template <>
-class StringFormatter<FloatType>
-    : public FloatToStringFormatterMixin<FloatType, StringFormatter<FloatType>> {
+class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
  public:
   using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
-
-  void Format(value_type value, util::double_conversion::StringBuilder* builder) {
-    bool result = converter_.ToShortestSingle(value, builder);
-    assert(result);
-    ARROW_UNUSED(result);
-  }
 };
 
 template <>
-class StringFormatter<DoubleType>
-    : public FloatToStringFormatterMixin<DoubleType, StringFormatter<DoubleType>> {
+class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
  public:
   using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
-
-  void Format(value_type value, util::double_conversion::StringBuilder* builder) {
-    bool result = converter_.ToShortest(value, builder);
-    assert(result);
-    ARROW_UNUSED(result);
-  }
 };
 
 }  // namespace internal

diff --git a/cpp/src/arrow/util/parsing.cc b/cpp/src/arrow/util/parsing.cc
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/parsing.h"
+#include "arrow/util/double_conversion.h"
+
+namespace arrow {
+namespace internal {
+
+struct StringToFloatConverter::Impl {
+  Impl()
+      : main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
+        fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
+                            "nan") {}
+
+  // This is only supported in double-conversion 3.1+
+#ifdef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY
+  static constexpr int flags_ =
+      util::double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
+#else
+  static constexpr int flags_ =
+      util::double_conversion::StringToDoubleConverter::NO_FLAGS;
+#endif
+
+  // Two unlikely values to signal a parsing error
+  static constexpr double main_junk_value_ = 0.7066424364107089;
+  static constexpr double fallback_junk_value_ = 0.40088499148279166;
+
+  util::double_conversion::StringToDoubleConverter main_converter_;
+  util::double_conversion::StringToDoubleConverter fallback_converter_;
+};
+
+StringToFloatConverter::StringToFloatConverter() : impl_(new Impl()) {}
+
+StringToFloatConverter::~StringToFloatConverter() {}
+
+bool StringToFloatConverter::StringToFloat(const char* s, size_t length, float* out) {
+  int processed_length;
+  float v;
+  v = impl_->main_converter_.StringToFloat(s, static_cast<int>(length),
+                                           &processed_length);
+  if (ARROW_PREDICT_FALSE(v == static_cast<float>(impl_->main_junk_value_))) {
+    v = impl_->fallback_converter_.StringToFloat(s, static_cast<int>(length),
+                                                 &processed_length);
+    if (ARROW_PREDICT_FALSE(v == static_cast<float>(impl_->fallback_junk_value_))) {
+      return false;
+    }
+  }
+  *out = v;
+  return true;
+}
+
+bool StringToFloatConverter::StringToFloat(const char* s, size_t length, double* out) {
+  int processed_length;
+  double v;
+  v = impl_->main_converter_.StringToDouble(s, static_cast<int>(length),
+                                            &processed_length);
+  if (ARROW_PREDICT_FALSE(v == impl_->main_junk_value_)) {
+    v = impl_->fallback_converter_.StringToDouble(s, static_cast<int>(length),
+                                                  &processed_length);
+    if (ARROW_PREDICT_FALSE(v == impl_->fallback_junk_value_)) {
+      return false;
+    }
+  }
+  *out = v;
+  return true;
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h
@@ -31,7 +31,6 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/config.h"
-#include "arrow/util/double_conversion.h"
 #include "arrow/vendored/datetime.h"
 
 namespace arrow {
@@ -91,58 +90,28 @@ class StringConverter<BooleanType> {
 // - https://github.com/google/double-conversion [used here]
 // - https://github.com/achan001/dtoa-fast
 
+class ARROW_EXPORT StringToFloatConverter {
+ public:
+  StringToFloatConverter();
+  ~StringToFloatConverter();
+
+  bool StringToFloat(const char* s, size_t length, float* out);
+  bool StringToFloat(const char* s, size_t length, double* out);
+
+ protected:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
 template <class ARROW_TYPE>
-class StringToFloatConverterMixin {
+class StringToFloatConverterMixin : public StringToFloatConverter {
  public:
   using value_type = typename ARROW_TYPE::c_type;
 
-  explicit StringToFloatConverterMixin(const std::shared_ptr<DataType>& = NULLPTR)
-      : main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
-        fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
-                            "nan") {}
+  explicit StringToFloatConverterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
 
   bool operator()(const char* s, size_t length, value_type* out) {
-    value_type v;
-    // double-conversion doesn't give us an error flag but signals parse
-    // errors with sentinel values.  Since a sentinel value can appear as
-    // legitimate input, we fallback on a second converter with a different
-    // sentinel to eliminate false errors.
-    TryConvert(main_converter_, s, length, &v);
-    if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(main_junk_value_))) {
-      TryConvert(fallback_converter_, s, length, &v);
-      if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(fallback_junk_value_))) {
-        return false;
-      }
-    }
-    *out = v;
-    return true;
-  }
-
- protected:
-// This is only support in double-conversion 3.1+
-#ifdef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY
-  static const int flags_ =
-      util::double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
-#else
-  static const int flags_ = util::double_conversion::StringToDoubleConverter::NO_FLAGS;
-#endif
-  // Two unlikely values to signal a parsing error
-  static constexpr double main_junk_value_ = 0.7066424364107089;
-  static constexpr double fallback_junk_value_ = 0.40088499148279166;
-
-  util::double_conversion::StringToDoubleConverter main_converter_;
-  util::double_conversion::StringToDoubleConverter fallback_converter_;
-
-  inline void TryConvert(util::double_conversion::StringToDoubleConverter& converter,
-                         const char* s, size_t length, float* out) {
-    int processed_length;
-    *out = converter.StringToFloat(s, static_cast<int>(length), &processed_length);
-  }
-
-  inline void TryConvert(util::double_conversion::StringToDoubleConverter& converter,
-                         const char* s, size_t length, double* out) {
-    int processed_length;
-    *out = converter.StringToDouble(s, static_cast<int>(length), &processed_length);
+    return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
   }
 };
 

diff --git a/cpp/src/arrow/vendored/double-conversion/README.md b/cpp/src/arrow/vendored/double-conversion/README.md
@@ -18,8 +18,3 @@
 -->
 
 The files in this directory are vendored from double-conversion git tag v3.1.5.
-
-Namespace fixup (from "double_conversion" to "arrow_vendored_double_conversion") using:
-```
-sed -i "s/\bnamespace double_conversion\b/namespace arrow_vendored_double_conversion/g" `find src/arrow/vendored/double-conversion -type f`
-```
diff --git a/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc b/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc
@@ -32,7 +32,7 @@
 #include "bignum.h"
 #include "ieee.h"
 
-namespace arrow_vendored_double_conversion {
+namespace double_conversion {
 
 static int NormalizedExponent(uint64_t significand, int exponent) {
   ASSERT(significand != 0);
@@ -638,4 +638,4 @@ static void FixupMultiply10(int estimated_power, bool is_even,
   }
 }
 
-}  // namespace arrow_vendored_double_conversion
+}  // namespace double_conversion
diff --git a/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.h b/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.h
@@ -30,7 +30,7 @@
 
 #include "utils.h"
 
-namespace arrow_vendored_double_conversion {
+namespace double_conversion {
 
 enum BignumDtoaMode {
   // Return the shortest correct representation.
@@ -79,6 +79,6 @@ enum BignumDtoaMode {
 void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
                 Vector<char> buffer, int* length, int* point);
 
-}  // namespace arrow_vendored_double_conversion
+}  // namespace double_conversion
 
 #endif  // DOUBLE_CONVERSION_BIGNUM_DTOA_H_
diff --git a/cpp/src/arrow/vendored/double-conversion/bignum.cc b/cpp/src/arrow/vendored/double-conversion/bignum.cc
@@ -28,7 +28,7 @@
 #include "bignum.h"
 #include "utils.h"
 
-namespace arrow_vendored_double_conversion {
+namespace double_conversion {
 
 Bignum::Bignum()
     : bigits_buffer_(), bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) {
@@ -764,4 +764,4 @@ void Bignum::SubtractTimes(const Bignum& other, int factor) {
 }
 
 
-}  // namespace arrow_vendored_double_conversion
+}  // namespace double_conversion