Skip to content

Commit

Permalink
Extract UTF-8 decoding logic
Browse files Browse the repository at this point in the history
  • Loading branch information
slandelle committed Aug 26, 2016
1 parent 9566dfb commit 6f125c7
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 65 deletions.
Expand Up @@ -18,83 +18,20 @@

import java.nio.charset.CharacterCodingException;

public class Utf8ByteBufDecoder {
public class Utf8ByteBufDecoder extends Utf8Decoder {

private static final FastThreadLocal<Utf8ByteBufDecoder> DECODERS = new FastThreadLocal<Utf8ByteBufDecoder>() {
protected Utf8ByteBufDecoder initialValue() {
return new Utf8ByteBufDecoder();
};
};

public static Utf8ByteBufDecoder getCachedDecoder() {
Utf8ByteBufDecoder cached = DECODERS.get();
cached.reset();
return cached;
}

private static final byte[] TYPES = new byte[] {//
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,/**/
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,/**/
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,/**/
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 /**/
};

private static final byte[] STATES = new byte[] {//
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,/**/
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,/**/
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,/**/
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,/**/
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 //
};

private static final int UTF8_ACCEPT = 0;
private static final int UTF8_REJECT = 12;

private StringBuilder sb = new StringBuilder();
private int state = UTF8_ACCEPT;
private int codePoint = 0;

private void write(byte b) throws CharacterCodingException {
int t = TYPES[b & 0xFF];

codePoint = state != UTF8_ACCEPT ? (b & 0x3f) | (codePoint << 6) : (0xff >> t) & b;
state = STATES[state + t];

if (state == UTF8_ACCEPT) {
if (codePoint < Character.MIN_HIGH_SURROGATE) {
sb.append((char) codePoint);
} else {
appendCodePointChars();
}
} else if (state == UTF8_REJECT) {
throw new CharacterCodingException();
}
}

private void appendCodePointChars() {
if (Character.isBmpCodePoint(codePoint)) {
sb.append((char) codePoint);

} else if (Character.isValidCodePoint(codePoint)) {
char charIndexPlus1 = Character.lowSurrogate(codePoint);
char charIndex = Character.highSurrogate(codePoint);
sb.append(charIndex).append(charIndexPlus1);

} else {
throw new IllegalArgumentException();
}
}

public void reset() {
sb.setLength(0);
state = UTF8_ACCEPT;
codePoint = 0;
}

public String decode(Iterable<ByteBuf> bufs) throws CharacterCodingException {

for (ByteBuf buf : bufs) {
Expand Down
82 changes: 82 additions & 0 deletions client/src/main/java/org/asynchttpclient/util/Utf8Decoder.java
@@ -0,0 +1,82 @@
/*
* Copyright (c) 2016 AsyncHttpClient Project. All rights reserved.
*
* This program is licensed to you under the Apache License Version 2.0,
* and you may not use this file except in compliance with the Apache License Version 2.0.
* You may obtain a copy of the Apache License Version 2.0 at
* http://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the Apache License Version 2.0 is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
*/
package org.asynchttpclient.util;

import java.nio.charset.CharacterCodingException;

public abstract class Utf8Decoder {

private static final byte[] TYPES = new byte[] {//
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,/**/
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,/**/
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,/**/
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 /**/
};

private static final byte[] STATES = new byte[] {//
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,/**/
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,/**/
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,/**/
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,/**/
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 //
};

protected static final int UTF8_ACCEPT = 0;
protected static final int UTF8_REJECT = 12;

protected StringBuilder sb = new StringBuilder();
protected int state = UTF8_ACCEPT;
private int codePoint = 0;

protected void write(byte b) throws CharacterCodingException {
int t = TYPES[b & 0xFF];

codePoint = state != UTF8_ACCEPT ? (b & 0x3f) | (codePoint << 6) : (0xff >> t) & b;
state = STATES[state + t];

if (state == UTF8_ACCEPT) {
if (codePoint < Character.MIN_HIGH_SURROGATE) {
sb.append((char) codePoint);
} else {
appendCodePointChars();
}
} else if (state == UTF8_REJECT) {
throw new CharacterCodingException();
}
}

private void appendCodePointChars() {
if (Character.isBmpCodePoint(codePoint)) {
sb.append((char) codePoint);

} else if (Character.isValidCodePoint(codePoint)) {
char charIndexPlus1 = Character.lowSurrogate(codePoint);
char charIndex = Character.highSurrogate(codePoint);
sb.append(charIndex).append(charIndexPlus1);

} else {
throw new IllegalArgumentException();
}
}

public void reset() {
sb.setLength(0);
state = UTF8_ACCEPT;
codePoint = 0;
}
}

0 comments on commit 6f125c7

Please sign in to comment.