Skip to content

Commit

Permalink
Include explict example inputs for denoise
Browse files Browse the repository at this point in the history
Some branches of the code were not being hit - this should ensure good
coverage. In the process, some errors were found and corrected related
to parsing numbers with Python types.
  • Loading branch information
SethMMorton committed Nov 29, 2023
1 parent c6db7ff commit fabe113
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 44 deletions.
18 changes: 18 additions & 0 deletions include/fastnumbers/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,24 @@ class Buffer {
}
}

/// Mark the first '.', 'e', or 'E' as '\0'.
void mark_integer_end() noexcept
{
// Technically, we set the exponent character to '\0'
// and then adjust the length. We are not actually removing data.
// Look for '.' or 'e'/'E'. Instead of searching simulataneously
// use memchr because it is so darn fast.
char* exp_loc = nullptr;
for (const char c : { '.', 'e', 'E' }) {
exp_loc = static_cast<char*>(std::memchr(m_buffer, c, m_len));
if (exp_loc != nullptr) {
*exp_loc = '\0';
m_len = m_size = exp_loc - m_buffer;
return;
}
}
}

/// The largest amount of data the buffer can contain
std::size_t max_size() const noexcept
{
Expand Down
38 changes: 28 additions & 10 deletions include/fastnumbers/c_str_parsing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ class StringChecker {
const char* integer_end() const { return m_decimal_start; }

/// The length of the integer component of the contained number.
uint32_t integer_length() const { return integer_end() - integer_start(); }
uint32_t integer_length() const
{
return std::max(static_cast<uint32_t>(integer_end() - integer_start()), 0U);
}

/// The number of zeros that trail the integer part of the contained number.
uint32_t integer_trailing_zeros() const { return m_int_trailing_zeros; }
Expand All @@ -125,7 +128,10 @@ class StringChecker {
const char* decimal_end() const { return m_decimal_end; }

/// The length of the decimal component of the contained number.
uint32_t decimal_length() const { return decimal_end() - decimal_start(); }
uint32_t decimal_length() const
{
return std::max(static_cast<uint32_t>(decimal_end() - decimal_start()), 0U);
}

/// The number of zeros that trail the decimal part of the contained number.
uint32_t decimal_trailing_zeros() const { return m_dec_trailing_zeros; }
Expand All @@ -140,28 +146,34 @@ class StringChecker {
bool is_exponent_negative() const { return m_exp_negative; }

/// The total length of the integer plus decimal components.
uint32_t total_length() const { return integer_length() + decimal_length(); }
uint32_t digit_length() const { return integer_length() + decimal_length(); }

/// The decimal length after removing trailing zeros.
uint32_t truncated_decimal_length() const
{
return decimal_length() - decimal_trailing_zeros();
return std::max(decimal_length() - decimal_trailing_zeros(), 0U);
}

/// The exponent after taking into account the decimal digits.
uint32_t adjusted_exponent_value() const
{
if (is_adjusted_exponent_negative()) {
return truncated_decimal_length() - exponent_value();
if (is_exponent_negative()) {
return exponent_value();
} else {
return exponent_value() - truncated_decimal_length();
return std::max(exponent_value() - truncated_decimal_length(), 0U);
}
}

/// Is the adjusted exponent negative?
bool is_adjusted_exponent_negative() const
/// The total length of the entire number.
uint32_t total_length() const
{
return truncated_decimal_length() > exponent_value();
return std::max(static_cast<uint32_t>(m_total_end - integer_start()), 0U);
}

/// The length of the start of the decimal component to the end of the number.
uint32_t decimal_and_exponent_length() const
{
return std::max(static_cast<uint32_t>(m_total_end - decimal_start()), 0U);
}

private:
Expand All @@ -177,6 +189,9 @@ class StringChecker {
/// Set the decimal end.
void set_decimal_end(const char* val) { m_decimal_end = val; }

/// Set the total end.
void set_total_end(const char* val) { m_total_end = val; }

/// Set the exponent value.
void set_exponent(const uint32_t val) { m_expon = val; }

Expand All @@ -198,6 +213,9 @@ class StringChecker {
/// The end of the decimal component of the contained number.
const char* m_decimal_end;

/// The end of the contained number.
const char* m_total_end;

/// The value of the exponent of the nubmer.
uint32_t m_expon;

Expand Down
4 changes: 4 additions & 0 deletions src/cpp/c_str_parsing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ StringChecker::StringChecker(const char* str, const char* end, int base) noexcep
: m_integer_start(nullptr)
, m_decimal_start(nullptr)
, m_decimal_end(nullptr)
, m_total_end(nullptr)
, m_expon(0U)
, m_exp_negative(false)
, m_int_trailing_zeros(0U)
Expand All @@ -38,6 +39,7 @@ StringChecker::StringChecker(const char* str, const char* end, int base) noexcep
set_integer_start(str);
set_decimal_start(str);
set_decimal_end(str);
set_total_end(str);
return;
}

Expand All @@ -55,6 +57,7 @@ StringChecker::StringChecker(const char* str, const char* end, int base) noexcep
}
set_decimal_start(str);
set_decimal_end(str);
set_total_end(str);
set_type(
(str == end && str != integer_start()) ? StringType::INTEGER
: StringType::INVALID
Expand Down Expand Up @@ -119,6 +122,7 @@ StringChecker::StringChecker(const char* str, const char* end, int base) noexcep
set_type(StringType::FLOAT);
}
}
set_total_end(str);

// If the parsing was not valid or we are not at the end of the string
// then the string is invalid.
Expand Down
97 changes: 63 additions & 34 deletions src/cpp/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@
// C++ version of https://docs.python.org/3/library/math.html#math.ulp
static double ulp(const double x)
{
if (x > 0)
return std::nexttoward(x, std::numeric_limits<double>::infinity()) - x;
else
return x - std::nexttoward(x, -std::numeric_limits<double>::infinity());
// Assumes positive input
return std::nexttoward(x, std::numeric_limits<double>::infinity()) - x;
}

// Parse a sequence of characters as a Python long
static PyObject*
parse_long_helper(const char* start, const char* end, const std::size_t length)
static PyObject* parse_long_helper(
const char* start,
const char* end,
const std::size_t length,
const std::size_t length_to_end
)
{
// We know by construction that this correctly stores a number, so we skip
// the error checking.
Expand All @@ -38,11 +40,41 @@ parse_long_helper(const char* start, const char* end, const std::size_t length)
length ? parse_int<uint64_t>(start, end, 10, error, overflow) : 0ULL
);
} else {
char* this_end = const_cast<char*>(end);
return length ? PyLong_FromString(start, &this_end, 10) : pyobject_from_int(0);
// This Python parse won't let us specify the end of the string, and it errors
// out on non-numeric characters, so we have to copy our data to a buffer
// and set the exponent character to '\0' so that Python can parse it.
Buffer buffer(start, length_to_end);
buffer.mark_integer_end();
return PyLong_FromString(buffer.start(), nullptr, 10);
}
}

// Convert a value into a power of ten Python long in the most efficient method
// depending on the value of the exponent.
static PyObject* exponent_creation_helper(const uint32_t exp_val)
{
if (exp_val < overflow_cutoff<uint64_t>()) {
return pyobject_from_int(ipow::ipow(10ULL, exp_val));
} else {
PyObject* py_expon = pyobject_from_int(10);
PyObject* py_exp_component = pyobject_from_int(exp_val);
in_place_pow(py_expon, py_exp_component);
Py_DECREF(py_exp_component);
return py_expon;
}
}

// Convert a PyObject to a negative number if needed.
static PyObject* do_negative(PyObject* obj, const bool negative)
{
if (negative) {
PyObject* temp = obj;
obj = PyNumber_Negative(temp);
Py_DECREF(temp);
}
return obj;
}

PyObject* Parser::float_as_int_without_noise(PyObject* obj) noexcept
{
const double val = PyFloat_AsDouble(obj);
Expand Down Expand Up @@ -82,6 +114,8 @@ PyObject* Parser::float_as_int_without_noise(PyObject* obj) noexcept

// If the number of digits is less than 1 (e.g. the float is already noiselessly
// converted to an integer) just return the integer directly.
// Because of the floor check above, it is unlikely this will ever be true, but
// include it for completeness' sake.
if (digits < 1) {
return val_int;
}
Expand All @@ -103,8 +137,8 @@ PyObject* Parser::float_as_int_without_noise(

// As a special case, if the number of digits in the integer and decimal
// parts of the float are small enough that we know it could fit into a unsigned
// 65-bit integer, parse using C++ methods in the name of efficiency.
if (checker.total_length() < overflow_cutoff<uint64_t>()) {
// 64-bit integer, parse using C++ methods in the name of efficiency.
if (checker.digit_length() < overflow_cutoff<uint64_t>()) {
bool error = false;
bool overflow = false;
uint64_t integer = 0;
Expand Down Expand Up @@ -144,7 +178,10 @@ PyObject* Parser::float_as_int_without_noise(
else {
// First parse the integer components of the floating point number.
py_integer = parse_long_helper(
checker.integer_start(), checker.integer_end(), checker.integer_length()
checker.integer_start(),
checker.integer_end(),
checker.integer_length(),
checker.total_length()
);
if (py_integer == nullptr) {
return py_integer;
Expand All @@ -156,17 +193,19 @@ PyObject* Parser::float_as_int_without_noise(
if (checker.truncated_decimal_length()) {
// Parse the decimal component...
PyObject* py_decimal = parse_long_helper(
checker.decimal_start(), checker.decimal_end(), checker.decimal_length()
checker.decimal_start(),
checker.decimal_end(),
checker.decimal_length(),
checker.decimal_and_exponent_length()
);
if (py_decimal == nullptr) {
return py_decimal;

Check warning on line 202 in src/cpp/parser.cpp

View check run for this annotation

Codecov / codecov/patch

src/cpp/parser.cpp#L202

Added line #L202 was not covered by tests
}

// ... and then "remove" trailing zeros if they exist...
if (checker.decimal_trailing_zeros()) {
PyObject* divisor = pyobject_from_int(
ipow::ipow(10ULL, checker.decimal_trailing_zeros())
);
PyObject* divisor
= exponent_creation_helper(checker.decimal_trailing_zeros());
if (divisor == nullptr) {
return divisor;
}
Expand All @@ -181,9 +220,8 @@ PyObject* Parser::float_as_int_without_noise(
// ... and then "shift" the integer py powers of ten so we can
// add in the decimal component...
{
PyObject* offset = pyobject_from_int(
ipow::ipow(10ULL, checker.truncated_decimal_length())
);
PyObject* offset
= exponent_creation_helper(checker.truncated_decimal_length());
if (offset == nullptr) {
Py_DECREF(py_integer);
return offset;

Check warning on line 227 in src/cpp/parser.cpp

View check run for this annotation

Codecov / codecov/patch

src/cpp/parser.cpp#L226-L227

Added lines #L226 - L227 were not covered by tests
Expand Down Expand Up @@ -212,19 +250,15 @@ PyObject* Parser::float_as_int_without_noise(
// of ten. Because this value could be *huge*, we cannot do power nor the
// mutliplication in the C++ space - it must be all done with Python's types.
if (checker.adjusted_exponent_value() > 0) {
PyObject* py_expon = pyobject_from_int(10);
PyObject* py_exp_component
= pyobject_from_int(checker.adjusted_exponent_value());
in_place_pow(py_expon, py_exp_component);
Py_DECREF(py_exp_component);
PyObject* py_expon = exponent_creation_helper(checker.adjusted_exponent_value());
if (py_expon == nullptr) {
Py_DECREF(py_integer);
return py_expon;

Check warning on line 256 in src/cpp/parser.cpp

View check run for this annotation

Codecov / codecov/patch

src/cpp/parser.cpp#L255-L256

Added lines #L255 - L256 were not covered by tests
}

// We are carrying around the exponent as an unsigned integer,
// so we handle negatives by dividing, and positives by multiplying.
if (checker.is_adjusted_exponent_negative()) {
if (checker.is_exponent_negative()) {
in_place_divide(py_integer, py_expon);
} else {
in_place_multiply(py_integer, py_expon);
Expand All @@ -233,12 +267,7 @@ PyObject* Parser::float_as_int_without_noise(
}

// Finally, we negate the result if it is supposed to be negative.
if (is_negative) {
PyObject* temp = py_integer;
py_integer = PyNumber_Negative(temp);
Py_DECREF(temp);
}
return py_integer;
return do_negative(py_integer, is_negative);
}

/**
Expand Down Expand Up @@ -336,10 +365,10 @@ RawPayload<PyObject*> CharacterParser::as_pyint() const noexcept(false)
return pyobject_from_int(result);
}

// Parse and record the location where parsing ended (including trailing whitespace)
// No need to do input validation with the second argument because we already know
// the input is valid from above.
// Return the value without checking python's error state.
// Parse and record the location where parsing ended (including trailing
// whitespace) No need to do input validation with the second argument because we
// already know the input is valid from above. Return the value without checking
// python's error state.
return PyLong_FromString(m_start_orig, nullptr, options().get_base());
}

Expand Down
16 changes: 16 additions & 0 deletions tests/test_fastnumbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,14 @@ def test_given_float_returns_int_matching_decimal_object_with_denoise(
assert_integers_close(result, expected)

@given(floats(allow_nan=False).filter(an_integer).map(repr))
@example("1234.56E56")
@example("12345.60000E56")
@example("1234560000E-3")
@example("1234.56789012345678901234567890123456789e56")
@example("1234.5678901234567890123456789012345678900000e56")
@example("1234567890123456789012345678901234567890000000000000.")
@example("123456789012345678901234567890123456789.0000000000000000000000000000")
@example("1234567890123456789012345678901234567890000000000000e-5")
def test_given_float_str_returns_int_matching_decimal_object_with_denoise(
self, x: str
) -> None:
Expand Down Expand Up @@ -1268,6 +1276,14 @@ def test_given_float_returns_int_matching_decimal_object_with_denoise(
assert_integers_close(result, expected)

@given(floats(allow_nan=False, allow_infinity=False).map(repr))
@example("1234.56E56")
@example("12345.60000E56")
@example("1234560000E-3")
@example("1234.56789012345678901234567890123456789e56")
@example("1234.5678901234567890123456789012345678900000e56")
@example("1234567890123456789012345678901234567890000000000000.")
@example("123456789012345678901234567890123456789.0000000000000000000000000000")
@example("1234567890123456789012345678901234567890000000000000e-5")
def test_given_float_str_returns_int_matching_decimal_object_with_denoise(
self, x: str
) -> None:
Expand Down

0 comments on commit fabe113

Please sign in to comment.