Skip to content
This repository was archived by the owner on Apr 22, 2023. It is now read-only.

Commit 642ea2b

Browse files
committed
string_bytes: use external for large strings
When large strings are used they cause v8's GC to spend a lot more time cleaning up. In these cases it's much faster to use external string resources. UTF8 strings do not use external string resources because only one and two byte external strings are supported. EXTERN_APEX is the value at which v8's GC overtakes performance. The following table has the type and buffer size that use to encode the strings as rough estimates of the percentage of performance gain from this patch (UTF8 is missing because they cannot be externalized). encoding 128KB 1MB 5MB ----------------------------- ASCII 58% 208% 250% HEX 15% 74% 86% BASE64 11% 74% 71% UCS2 2% 225% 398% BINARY 2234% 1728% 2305% BINARY is so much faster across the board because of using the new v8 WriteOneByte API.
1 parent 87624ab commit 642ea2b

File tree

1 file changed

+113
-29
lines changed

1 file changed

+113
-29
lines changed

src/string_bytes.cc

Lines changed: 113 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
#include "node_buffer.h"
3030
#include "v8.h"
3131

32+
// When creating strings >= this length v8's gc spins up and consumes
33+
// most of the execution time. For these cases it's more performant to
34+
// use external string resources.
35+
#define EXTERN_APEX 0xFBEE9
36+
3237
namespace node {
3338

3439
using v8::Local;
@@ -39,6 +44,64 @@ using v8::String;
3944
using v8::Value;
4045

4146

47+
template <typename ResourceType, typename TypeName>
48+
class ExternString: public ResourceType {
49+
public:
50+
~ExternString() {
51+
delete[] data_;
52+
node_isolate->AdjustAmountOfExternalAllocatedMemory(-length_);
53+
}
54+
55+
const TypeName* data() const {
56+
return data_;
57+
}
58+
59+
size_t length() const {
60+
return length_;
61+
}
62+
63+
static Local<String> NewFromCopy(const TypeName* data, size_t length) {
64+
HandleScope scope(node_isolate);
65+
66+
if (length == 0)
67+
return scope.Close(String::Empty(node_isolate));
68+
69+
TypeName* new_data = new TypeName[length];
70+
memcpy(new_data, data, length * sizeof(*new_data));
71+
72+
return scope.Close(ExternString<ResourceType, TypeName>::New(new_data,
73+
length));
74+
}
75+
76+
// uses "data" for external resource, and will be free'd on gc
77+
static Local<String> New(const TypeName* data, size_t length) {
78+
HandleScope scope(node_isolate);
79+
80+
if (length == 0)
81+
return scope.Close(String::Empty(node_isolate));
82+
83+
ExternString* h_str = new ExternString<ResourceType, TypeName>(data,
84+
length);
85+
Local<String> str = String::NewExternal(h_str);
86+
node_isolate->AdjustAmountOfExternalAllocatedMemory(length);
87+
88+
return scope.Close(str);
89+
}
90+
91+
private:
92+
ExternString(const TypeName* data, size_t length)
93+
: data_(data), length_(length) { }
94+
const TypeName* data_;
95+
size_t length_;
96+
};
97+
98+
99+
typedef ExternString<String::ExternalAsciiStringResource,
100+
char> ExternOneByteString;
101+
typedef ExternString<String::ExternalStringResource,
102+
uint16_t> ExternTwoByteString;
103+
104+
42105
//// Base 64 ////
43106

44107
#define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
@@ -556,16 +619,23 @@ Local<Value> StringBytes::Encode(const char* buf,
556619
if (contains_non_ascii(buf, buflen)) {
557620
char* out = new char[buflen];
558621
force_ascii(buf, out, buflen);
559-
val = String::NewFromOneByte(node_isolate,
560-
reinterpret_cast<const uint8_t*>(out),
561-
String::kNormalString,
562-
buflen);
563-
delete[] out;
622+
if (buflen < EXTERN_APEX) {
623+
val = String::NewFromOneByte(node_isolate,
624+
reinterpret_cast<const uint8_t*>(out),
625+
String::kNormalString,
626+
buflen);
627+
delete[] out;
628+
} else {
629+
val = ExternOneByteString::New(out, buflen);
630+
}
564631
} else {
565-
val = String::NewFromOneByte(node_isolate,
566-
reinterpret_cast<const uint8_t*>(buf),
567-
String::kNormalString,
568-
buflen);
632+
if (buflen < EXTERN_APEX)
633+
val = String::NewFromOneByte(node_isolate,
634+
reinterpret_cast<const uint8_t*>(buf),
635+
String::kNormalString,
636+
buflen);
637+
else
638+
val = ExternOneByteString::NewFromCopy(buf, buflen);
569639
}
570640
break;
571641

@@ -576,13 +646,15 @@ Local<Value> StringBytes::Encode(const char* buf,
576646
buflen);
577647
break;
578648

579-
case BINARY: {
580-
val = String::NewFromOneByte(node_isolate,
581-
reinterpret_cast<const uint8_t*>(buf),
582-
String::kNormalString,
583-
buflen);
649+
case BINARY:
650+
if (buflen < EXTERN_APEX)
651+
val = String::NewFromOneByte(node_isolate,
652+
reinterpret_cast<const uint8_t*>(buf),
653+
String::kNormalString,
654+
buflen);
655+
else
656+
val = ExternOneByteString::NewFromCopy(buf, buflen);
584657
break;
585-
}
586658

587659
case BASE64: {
588660
size_t dlen = base64_encoded_size(buflen);
@@ -591,19 +663,27 @@ Local<Value> StringBytes::Encode(const char* buf,
591663
size_t written = base64_encode(buf, buflen, dst, dlen);
592664
assert(written == dlen);
593665

594-
val = String::NewFromOneByte(node_isolate,
595-
reinterpret_cast<const uint8_t*>(dst),
596-
String::kNormalString,
597-
dlen);
598-
delete[] dst;
666+
if (dlen < EXTERN_APEX) {
667+
val = String::NewFromOneByte(node_isolate,
668+
reinterpret_cast<const uint8_t*>(dst),
669+
String::kNormalString,
670+
dlen);
671+
delete[] dst;
672+
} else {
673+
val = ExternOneByteString::New(dst, dlen);
674+
}
599675
break;
600676
}
601677

602678
case UCS2: {
603-
val = String::NewFromTwoByte(node_isolate,
604-
reinterpret_cast<const uint16_t*>(buf),
605-
String::kNormalString,
606-
buflen / 2);
679+
const uint16_t* out = reinterpret_cast<const uint16_t*>(buf);
680+
if (buflen < EXTERN_APEX)
681+
val = String::NewFromTwoByte(node_isolate,
682+
out,
683+
String::kNormalString,
684+
buflen / 2);
685+
else
686+
val = ExternTwoByteString::NewFromCopy(out, buflen / 2);
607687
break;
608688
}
609689

@@ -613,11 +693,15 @@ Local<Value> StringBytes::Encode(const char* buf,
613693
size_t written = hex_encode(buf, buflen, dst, dlen);
614694
assert(written == dlen);
615695

616-
val = String::NewFromOneByte(node_isolate,
617-
reinterpret_cast<uint8_t*>(dst),
618-
String::kNormalString,
619-
dlen);
620-
delete[] dst;
696+
if (dlen < EXTERN_APEX) {
697+
val = String::NewFromOneByte(node_isolate,
698+
reinterpret_cast<const uint8_t*>(dst),
699+
String::kNormalString,
700+
dlen);
701+
delete[] dst;
702+
} else {
703+
val = ExternOneByteString::New(dst, dlen);
704+
}
621705
break;
622706
}
623707

0 commit comments

Comments
 (0)