-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: Do not allocate when computing metadata size
Intern code from Guava to compute the encoded length of an utf8 string and use it to compute the size of the metadata for the multi valued message wihtout allocating.
- Loading branch information
Showing
2 changed files
with
96 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
* Copyright (C) 2013 The Guava Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
* in compliance with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License | ||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
* or implied. See the License for the specific language governing permissions and limitations under | ||
* the License. | ||
*/ | ||
|
||
package com.timgroup.statsd; | ||
|
||
import static java.lang.Character.MAX_SURROGATE; | ||
import static java.lang.Character.MIN_SURROGATE; | ||
|
||
/** | ||
* This class is a partial copy of the {@code com.google.common.base.Utf8} | ||
* <a href="https://github.com/google/guava/blob/v33.0.0/guava/src/com/google/common/base/Utf8.java">class</a> | ||
* from the Guava library. | ||
* It is copied here to avoid a dependency on Guava. | ||
*/ | ||
final class Utf8 { | ||
|
||
private Utf8() { | ||
} | ||
|
||
/** | ||
* Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, this | ||
* method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in both | ||
* time and space. | ||
* | ||
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired | ||
* surrogates) | ||
*/ | ||
public static int encodedLength(CharSequence sequence) { | ||
// Warning to maintainers: this implementation is highly optimized. | ||
int utf16Length = sequence.length(); | ||
int utf8Length = utf16Length; | ||
int index = 0; | ||
|
||
// This loop optimizes for pure ASCII. | ||
while (index < utf16Length && sequence.charAt(index) < 0x80) { | ||
index++; | ||
} | ||
|
||
// This loop optimizes for chars less than 0x800. | ||
for (; index < utf16Length; index++) { | ||
char character = sequence.charAt(index); | ||
if (character < 0x800) { | ||
utf8Length += ((0x7f - character) >>> 31); // branch free! | ||
} else { | ||
utf8Length += encodedLengthGeneral(sequence, index); | ||
break; | ||
} | ||
} | ||
|
||
if (utf8Length < utf16Length) { | ||
// Necessary and sufficient condition for overflow because of maximum 3x expansion | ||
throw new IllegalArgumentException( | ||
"UTF-8 length does not fit in int: " + (utf8Length + (1L << 32))); | ||
} | ||
return utf8Length; | ||
} | ||
|
||
private static int encodedLengthGeneral(CharSequence sequence, int start) { | ||
int utf16Length = sequence.length(); | ||
int utf8Length = 0; | ||
for (int index = start; index < utf16Length; index++) { | ||
char character = sequence.charAt(index); | ||
if (character < 0x800) { | ||
utf8Length += (0x7f - character) >>> 31; // branch free! | ||
} else { | ||
utf8Length += 2; | ||
// jdk7+: if (Character.isSurrogate(character)) { | ||
if (MIN_SURROGATE <= character && character <= MAX_SURROGATE) { | ||
// Check that we have a well-formed surrogate pair. | ||
if (Character.codePointAt(sequence, index) == character) { | ||
throw new IllegalArgumentException(unpairedSurrogateMsg(index)); | ||
} | ||
index++; | ||
} | ||
} | ||
} | ||
return utf8Length; | ||
} | ||
|
||
private static String unpairedSurrogateMsg(int index) { | ||
return "Unpaired surrogate at index " + index; | ||
} | ||
} |