Skip to content

Commit

Permalink
Make use of double-conversion private.
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Nov 14, 2019
1 parent 9e1da51 commit 3b89b19
Show file tree
Hide file tree
Showing 25 changed files with 203 additions and 124 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ set(ARROW_SRCS
util/logging.cc
util/key_value_metadata.cc
util/memory.cc
util/parsing.cc
util/string.cc
util/string_builder.cc
util/task_group.cc
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/util/double_conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ namespace arrow {
namespace util {
namespace double_conversion {

using arrow_vendored_double_conversion::DoubleToStringConverter;
using arrow_vendored_double_conversion::StringBuilder;
using arrow_vendored_double_conversion::StringToDoubleConverter;
using ::double_conversion::DoubleToStringConverter;
using ::double_conversion::StringBuilder;
using ::double_conversion::StringToDoubleConverter;

} // namespace double_conversion
} // namespace util
Expand Down
40 changes: 40 additions & 0 deletions cpp/src/arrow/util/formatting.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,16 @@
// under the License.

#include "arrow/util/formatting.h"
#include "arrow/util/config.h"
#include "arrow/util/double_conversion.h"
#include "arrow/util/logging.h"

namespace arrow {

using util::double_conversion::DoubleToStringConverter;

static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1;

namespace internal {
namespace detail {

Expand All @@ -28,6 +36,38 @@ const char digit_pairs[] =
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";

} // namespace detail

struct FloatToStringFormatter::Impl {
Impl()
: converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
'e', -6, 10, 6, 0) {}

DoubleToStringConverter converter_;
};

FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}

FloatToStringFormatter::~FloatToStringFormatter() {}

int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
DCHECK_GE(out_size, kMinBufferSize);
// StringBuilder checks bounds in debug mode for us
util::double_conversion::StringBuilder builder(out_buffer, out_size);
bool result = impl_->converter_.ToShortestSingle(v, &builder);
DCHECK(result);
ARROW_UNUSED(result);
return builder.position();
}

int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size) {
DCHECK_GE(out_size, kMinBufferSize);
util::double_conversion::StringBuilder builder(out_buffer, out_size);
bool result = impl_->converter_.ToShortest(v, &builder);
DCHECK(result);
ARROW_UNUSED(result);
return builder.position();
}

} // namespace internal
} // namespace arrow
56 changes: 23 additions & 33 deletions cpp/src/arrow/util/formatting.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/config.h"
#include "arrow/util/double_conversion.h"
#include "arrow/util/string_view.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace internal {
Expand Down Expand Up @@ -182,55 +180,47 @@ class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type>
/////////////////////////////////////////////////////////////////////////
// Floating-point formatting

template <typename ARROW_TYPE, typename Derived>
class FloatToStringFormatterMixin {
class ARROW_EXPORT FloatToStringFormatter {
public:
FloatToStringFormatter();
~FloatToStringFormatter();

// Returns the number of characters written
int FormatFloat(float v, char* out_buffer, int out_size);
int FormatFloat(double v, char* out_buffer, int out_size);

protected:
struct Impl;
std::unique_ptr<Impl> impl_;
};

template <typename ARROW_TYPE>
class FloatToStringFormatterMixin : public FloatToStringFormatter {
public:
using DoubleToStringConverter = util::double_conversion::DoubleToStringConverter;
using value_type = typename ARROW_TYPE::c_type;

static const int buffer_size = DoubleToStringConverter::kBase10MaximalLength + 1;
static constexpr int buffer_size = 50;

explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR)
: converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
'e', -6, 10, 6, 0) {}
explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}

template <typename Appender>
Status operator()(value_type value, Appender&& append) {
char buffer[buffer_size];
// StringBuilder checks bounds in debug mode for us
util::double_conversion::StringBuilder builder(buffer, buffer_size);
static_cast<Derived*>(this)->Format(value, &builder);
return append(util::string_view(buffer, builder.position()));
int size = FormatFloat(value, buffer, buffer_size);
return append(util::string_view(buffer, size));
}

protected:
DoubleToStringConverter converter_;
};

template <>
class StringFormatter<FloatType>
: public FloatToStringFormatterMixin<FloatType, StringFormatter<FloatType>> {
class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
public:
using FloatToStringFormatterMixin::FloatToStringFormatterMixin;

void Format(value_type value, util::double_conversion::StringBuilder* builder) {
bool result = converter_.ToShortestSingle(value, builder);
assert(result);
ARROW_UNUSED(result);
}
};

template <>
class StringFormatter<DoubleType>
: public FloatToStringFormatterMixin<DoubleType, StringFormatter<DoubleType>> {
class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
public:
using FloatToStringFormatterMixin::FloatToStringFormatterMixin;

void Format(value_type value, util::double_conversion::StringBuilder* builder) {
bool result = converter_.ToShortest(value, builder);
assert(result);
ARROW_UNUSED(result);
}
};

} // namespace internal
Expand Down
84 changes: 84 additions & 0 deletions cpp/src/arrow/util/parsing.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/util/parsing.h"
#include "arrow/util/double_conversion.h"

namespace arrow {
namespace internal {

struct StringToFloatConverter::Impl {
Impl()
: main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
"nan") {}

// This is only supported in double-conversion 3.1+
#ifdef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY
static constexpr int flags_ =
util::double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
#else
static constexpr int flags_ =
util::double_conversion::StringToDoubleConverter::NO_FLAGS;
#endif

// Two unlikely values to signal a parsing error
static constexpr double main_junk_value_ = 0.7066424364107089;
static constexpr double fallback_junk_value_ = 0.40088499148279166;

util::double_conversion::StringToDoubleConverter main_converter_;
util::double_conversion::StringToDoubleConverter fallback_converter_;
};

StringToFloatConverter::StringToFloatConverter() : impl_(new Impl()) {}

StringToFloatConverter::~StringToFloatConverter() {}

bool StringToFloatConverter::StringToFloat(const char* s, size_t length, float* out) {
int processed_length;
float v;
v = impl_->main_converter_.StringToFloat(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == static_cast<float>(impl_->main_junk_value_))) {
v = impl_->fallback_converter_.StringToFloat(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == static_cast<float>(impl_->fallback_junk_value_))) {
return false;
}
}
*out = v;
return true;
}

bool StringToFloatConverter::StringToFloat(const char* s, size_t length, double* out) {
int processed_length;
double v;
v = impl_->main_converter_.StringToDouble(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == impl_->main_junk_value_)) {
v = impl_->fallback_converter_.StringToDouble(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == impl_->fallback_junk_value_)) {
return false;
}
}
*out = v;
return true;
}

} // namespace internal
} // namespace arrow
63 changes: 16 additions & 47 deletions cpp/src/arrow/util/parsing.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/config.h"
#include "arrow/util/double_conversion.h"
#include "arrow/vendored/datetime.h"

namespace arrow {
Expand Down Expand Up @@ -91,58 +90,28 @@ class StringConverter<BooleanType> {
// - https://github.com/google/double-conversion [used here]
// - https://github.com/achan001/dtoa-fast

class ARROW_EXPORT StringToFloatConverter {
public:
StringToFloatConverter();
~StringToFloatConverter();

bool StringToFloat(const char* s, size_t length, float* out);
bool StringToFloat(const char* s, size_t length, double* out);

protected:
struct Impl;
std::unique_ptr<Impl> impl_;
};

template <class ARROW_TYPE>
class StringToFloatConverterMixin {
class StringToFloatConverterMixin : public StringToFloatConverter {
public:
using value_type = typename ARROW_TYPE::c_type;

explicit StringToFloatConverterMixin(const std::shared_ptr<DataType>& = NULLPTR)
: main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
"nan") {}
explicit StringToFloatConverterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}

bool operator()(const char* s, size_t length, value_type* out) {
value_type v;
// double-conversion doesn't give us an error flag but signals parse
// errors with sentinel values. Since a sentinel value can appear as
// legitimate input, we fallback on a second converter with a different
// sentinel to eliminate false errors.
TryConvert(main_converter_, s, length, &v);
if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(main_junk_value_))) {
TryConvert(fallback_converter_, s, length, &v);
if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(fallback_junk_value_))) {
return false;
}
}
*out = v;
return true;
}

protected:
// This is only support in double-conversion 3.1+
#ifdef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY
static const int flags_ =
util::double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
#else
static const int flags_ = util::double_conversion::StringToDoubleConverter::NO_FLAGS;
#endif
// Two unlikely values to signal a parsing error
static constexpr double main_junk_value_ = 0.7066424364107089;
static constexpr double fallback_junk_value_ = 0.40088499148279166;

util::double_conversion::StringToDoubleConverter main_converter_;
util::double_conversion::StringToDoubleConverter fallback_converter_;

inline void TryConvert(util::double_conversion::StringToDoubleConverter& converter,
const char* s, size_t length, float* out) {
int processed_length;
*out = converter.StringToFloat(s, static_cast<int>(length), &processed_length);
}

inline void TryConvert(util::double_conversion::StringToDoubleConverter& converter,
const char* s, size_t length, double* out) {
int processed_length;
*out = converter.StringToDouble(s, static_cast<int>(length), &processed_length);
return ARROW_PREDICT_TRUE(StringToFloat(s, length, out));
}
};

Expand Down
5 changes: 0 additions & 5 deletions cpp/src/arrow/vendored/double-conversion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,3 @@
-->

The files in this directory are vendored from double-conversion git tag v3.1.5.

Namespace fixup (from "double_conversion" to "arrow_vendored_double_conversion") using:
```
sed -i "s/\bnamespace double_conversion\b/namespace arrow_vendored_double_conversion/g" `find src/arrow/vendored/double-conversion -type f`
```
4 changes: 2 additions & 2 deletions cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "bignum.h"
#include "ieee.h"

namespace arrow_vendored_double_conversion {
namespace double_conversion {

static int NormalizedExponent(uint64_t significand, int exponent) {
ASSERT(significand != 0);
Expand Down Expand Up @@ -638,4 +638,4 @@ static void FixupMultiply10(int estimated_power, bool is_even,
}
}

} // namespace arrow_vendored_double_conversion
} // namespace double_conversion
4 changes: 2 additions & 2 deletions cpp/src/arrow/vendored/double-conversion/bignum-dtoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

#include "utils.h"

namespace arrow_vendored_double_conversion {
namespace double_conversion {

enum BignumDtoaMode {
// Return the shortest correct representation.
Expand Down Expand Up @@ -79,6 +79,6 @@ enum BignumDtoaMode {
void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
Vector<char> buffer, int* length, int* point);

} // namespace arrow_vendored_double_conversion
} // namespace double_conversion

#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_
4 changes: 2 additions & 2 deletions cpp/src/arrow/vendored/double-conversion/bignum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "bignum.h"
#include "utils.h"

namespace arrow_vendored_double_conversion {
namespace double_conversion {

Bignum::Bignum()
: bigits_buffer_(), bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) {
Expand Down Expand Up @@ -764,4 +764,4 @@ void Bignum::SubtractTimes(const Bignum& other, int factor) {
}


} // namespace arrow_vendored_double_conversion
} // namespace double_conversion

0 comments on commit 3b89b19

Please sign in to comment.