Skip to content

Commit

Permalink
Merge 02230fe into d87b698
Browse files Browse the repository at this point in the history
  • Loading branch information
sylveon committed Jul 25, 2019
2 parents d87b698 + 02230fe commit 679c2f2
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 42 deletions.
72 changes: 72 additions & 0 deletions include/rapidjson/internal/clzll.h
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,72 @@
// Tencent is pleased to support the open source community by making RapidJSON available.
//
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
//
// Licensed under the MIT License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://opensource.org/licenses/MIT
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef RAPIDJSON_CLZLL_H_
#define RAPIDJSON_CLZLL_H_

#include "../rapidjson.h"

#if defined(_MSC_VER)
#include <intrin.h>
#if defined(_WIN64)
#pragma intrinsic(_BitScanReverse64)
#else
#pragma intrinsic(_BitScanReverse)
#endif
#endif

RAPIDJSON_NAMESPACE_BEGIN
namespace internal {

#if (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)
#define RAPIDJSON_CLZLL __builtin_clzll
#else

inline uint32_t clzll(uint64_t x) {
// Passing 0 to __builtin_clzll is UB in GCC and results in an
// infinite loop in the software implementation.
RAPIDJSON_ASSERT(x != 0);

#if defined(_MSC_VER)
unsigned long r = 0;
#if defined(_WIN64)
_BitScanReverse64(&r, x);
#else
// Scan the high 32 bits.
if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
return 63 - (r + 32);

// Scan the low 32 bits.
_BitScanReverse(&r, static_cast<uint32_t>(x & 0xFFFFFFFF));
#endif // _WIN64

return 63 - r;
#else
uint32_t r;
while (!(x & (static_cast<uint64_t>(1) << 63))) {
x <<= 1;
++r;
}

return r;
#endif // _MSC_VER
}

#define RAPIDJSON_CLZLL RAPIDJSON_NAMESPACE::internal::clzll
#endif // (defined(__GNUC__) && __GNUC__ >= 4) || RAPIDJSON_HAS_BUILTIN(__builtin_clzll)

} // namespace internal
RAPIDJSON_NAMESPACE_END

#endif // RAPIDJSON_CLZLL_H_
18 changes: 2 additions & 16 deletions include/rapidjson/internal/diyfp.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
#define RAPIDJSON_DIYFP_H_ #define RAPIDJSON_DIYFP_H_


#include "../rapidjson.h" #include "../rapidjson.h"
#include "clzll.h"
#include <limits> #include <limits>


#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
#include <intrin.h> #include <intrin.h>
#pragma intrinsic(_BitScanReverse64)
#pragma intrinsic(_umul128) #pragma intrinsic(_umul128)
#endif #endif


Expand Down Expand Up @@ -100,22 +100,8 @@ struct DiyFp {
} }


DiyFp Normalize() const { DiyFp Normalize() const {
RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737 int s = static_cast<int>(RAPIDJSON_CLZLL(f));
#if defined(_MSC_VER) && defined(_M_AMD64)
unsigned long index;
_BitScanReverse64(&index, f);
return DiyFp(f << (63 - index), e - (63 - index));
#elif defined(__GNUC__) && __GNUC__ >= 4
int s = __builtin_clzll(f);
return DiyFp(f << s, e - s); return DiyFp(f << s, e - s);
#else
DiyFp res = *this;
while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
res.f <<= 1;
res.e--;
}
return res;
#endif
} }


DiyFp NormalizeBoundary() const { DiyFp NormalizeBoundary() const {
Expand Down
6 changes: 6 additions & 0 deletions include/rapidjson/rapidjson.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -490,6 +490,12 @@ RAPIDJSON_NAMESPACE_END
#define RAPIDJSON_VERSION_CODE(x,y,z) \ #define RAPIDJSON_VERSION_CODE(x,y,z) \
(((x)*100000) + ((y)*100) + (z)) (((x)*100000) + ((y)*100) + (z))


#if defined(__has_builtin)
#define RAPIDJSON_HAS_BUILTIN(x) __has_builtin(x)
#else
#define RAPIDJSON_HAS_BUILTIN(x) 0
#endif

/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF // RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF


Expand Down
43 changes: 22 additions & 21 deletions include/rapidjson/reader.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "allocators.h" #include "allocators.h"
#include "stream.h" #include "stream.h"
#include "encodedstream.h" #include "encodedstream.h"
#include "internal/clzll.h"
#include "internal/meta.h" #include "internal/meta.h"
#include "internal/stack.h" #include "internal/stack.h"
#include "internal/strtod.h" #include "internal/strtod.h"
Expand Down Expand Up @@ -443,16 +444,16 @@ inline const char *SkipWhitespace_SIMD(const char* p) {


x = vmvnq_u8(x); // Negate x = vmvnq_u8(x); // Negate
x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
int lz =__builtin_clzll(high);; uint32_t lz = RAPIDJSON_CLZLL(high);
return p + 8 + (lz >> 3); return p + 8 + (lz >> 3);
} }
} else { } else {
int lz = __builtin_clzll(low);; uint32_t lz = RAPIDJSON_CLZLL(low);
return p + (lz >> 3); return p + (lz >> 3);
} }
} }
Expand All @@ -479,16 +480,16 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {


x = vmvnq_u8(x); // Negate x = vmvnq_u8(x); // Negate
x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
int lz = __builtin_clzll(high); uint32_t lz = RAPIDJSON_CLZLL(high);
return p + 8 + (lz >> 3); return p + 8 + (lz >> 3);
} }
} else { } else {
int lz = __builtin_clzll(low); uint32_t lz = RAPIDJSON_CLZLL(low);
return p + (lz >> 3); return p + (lz >> 3);
} }
} }
Expand Down Expand Up @@ -1244,19 +1245,19 @@ class GenericReader {
x = vorrq_u8(x, vcltq_u8(s, s3)); x = vorrq_u8(x, vcltq_u8(s, s3));


x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


SizeType length = 0; SizeType length = 0;
bool escaped = false; bool escaped = false;
if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
unsigned lz = (unsigned)__builtin_clzll(high);; uint32_t lz = RAPIDJSON_CLZLL(high);
length = 8 + (lz >> 3); length = 8 + (lz >> 3);
escaped = true; escaped = true;
} }
} else { } else {
unsigned lz = (unsigned)__builtin_clzll(low);; uint32_t lz = RAPIDJSON_CLZLL(low);
length = lz >> 3; length = lz >> 3;
escaped = true; escaped = true;
} }
Expand Down Expand Up @@ -1314,19 +1315,19 @@ class GenericReader {
x = vorrq_u8(x, vcltq_u8(s, s3)); x = vorrq_u8(x, vcltq_u8(s, s3));


x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


SizeType length = 0; SizeType length = 0;
bool escaped = false; bool escaped = false;
if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
unsigned lz = (unsigned)__builtin_clzll(high); uint32_t lz = RAPIDJSON_CLZLL(high);
length = 8 + (lz >> 3); length = 8 + (lz >> 3);
escaped = true; escaped = true;
} }
} else { } else {
unsigned lz = (unsigned)__builtin_clzll(low); uint32_t lz = RAPIDJSON_CLZLL(low);
length = lz >> 3; length = lz >> 3;
escaped = true; escaped = true;
} }
Expand Down Expand Up @@ -1370,17 +1371,17 @@ class GenericReader {
x = vorrq_u8(x, vcltq_u8(s, s3)); x = vorrq_u8(x, vcltq_u8(s, s3));


x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
int lz = __builtin_clzll(high); uint32_t lz = RAPIDJSON_CLZLL(high);
p += 8 + (lz >> 3); p += 8 + (lz >> 3);
break; break;
} }
} else { } else {
int lz = __builtin_clzll(low); uint32_t lz = RAPIDJSON_CLZLL(low);
p += lz >> 3; p += lz >> 3;
break; break;
} }
Expand All @@ -1403,7 +1404,7 @@ class GenericReader {
RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
RAPIDJSON_FORCEINLINE void Push(char) {} RAPIDJSON_FORCEINLINE void Push(char) {}


size_t Tell() { return is.Tell(); } size_t Tell() { return is.Tell(); }
size_t Length() { return 0; } size_t Length() { return 0; }
Expand Down
11 changes: 6 additions & 5 deletions include/rapidjson/writer.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define RAPIDJSON_WRITER_H_ #define RAPIDJSON_WRITER_H_


#include "stream.h" #include "stream.h"
#include "internal/clzll.h"
#include "internal/meta.h" #include "internal/meta.h"
#include "internal/stack.h" #include "internal/stack.h"
#include "internal/strfunc.h" #include "internal/strfunc.h"
Expand Down Expand Up @@ -226,7 +227,7 @@ class Writer {
return Key(str.data(), SizeType(str.size())); return Key(str.data(), SizeType(str.size()));
} }
#endif #endif

bool EndObject(SizeType memberCount = 0) { bool EndObject(SizeType memberCount = 0) {
(void)memberCount; (void)memberCount;
RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
Expand Down Expand Up @@ -668,19 +669,19 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
x = vorrq_u8(x, vcltq_u8(s, s3)); x = vorrq_u8(x, vcltq_u8(s, s3));


x = vrev64q_u8(x); // Rev in 64 x = vrev64q_u8(x); // Rev in 64
uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract


SizeType len = 0; SizeType len = 0;
bool escaped = false; bool escaped = false;
if (low == 0) { if (low == 0) {
if (high != 0) { if (high != 0) {
unsigned lz = (unsigned)__builtin_clzll(high); uint32_t lz = RAPIDJSON_CLZLL(high);
len = 8 + (lz >> 3); len = 8 + (lz >> 3);
escaped = true; escaped = true;
} }
} else { } else {
unsigned lz = (unsigned)__builtin_clzll(low); uint32_t lz = RAPIDJSON_CLZLL(low);
len = lz >> 3; len = lz >> 3;
escaped = true; escaped = true;
} }
Expand Down

0 comments on commit 679c2f2

Please sign in to comment.