Skip to content

Commit

Permalink
Count JSON tokens (#1296)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjfanning committed Jun 18, 2024
1 parent c65b70b commit 8bc5dba
Show file tree
Hide file tree
Showing 7 changed files with 332 additions and 9 deletions.
3 changes: 3 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ a pure JSON library.
#1277: Add back Java 22 optimisation in FastDoubleParser
#1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
(contributed by @zhangOranges)
#1310: Add new `StreamReadConstraints` (`maxTokenCount`) to limit maximum number
of Tokens allowed per document
(implemented by @pjfanning)

2.17.2 (not yet released)

Expand Down
12 changes: 12 additions & 0 deletions src/main/java/com/fasterxml/jackson/core/JsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,18 @@ public JsonLocation currentTokenLocation() {
return getTokenLocation();
}

/**
* Get an approximate count of the number of tokens that have been read.
* This count is likely to be only updated if {@link StreamReadConstraints.Builder#maxTokenCount(long)}
* has been used to set a limit on the number of tokens that can be read.
*
* @return the number of tokens that have been read (-1 if the count is not available)
* @since 2.18
*/
public long currentTokenCount() {
return -1L;
}

/**
* Deprecated alias for {@link #currentLocation()} (removed from Jackson 3.0).
*
Expand Down
112 changes: 107 additions & 5 deletions src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ public class StreamReadConstraints
*/
public static final long DEFAULT_MAX_DOC_LEN = -1L;

/**
* Default setting for maximum token count:
* see {@link Builder#maxTokenCount} for details.
*/
public static final long DEFAULT_MAX_TOKEN_COUNT = -1L;

/**
* @since 2.16
*/
Expand Down Expand Up @@ -74,6 +80,7 @@ public class StreamReadConstraints

protected final int _maxNestingDepth;
protected final long _maxDocLen;
protected final long _maxTokenCount;

protected final int _maxNumLen;
protected final int _maxStringLen;
Expand Down Expand Up @@ -112,6 +119,7 @@ public static void overrideDefaultStreamReadConstraints(final StreamReadConstrai

public static final class Builder {
private long maxDocLen;
private long maxTokenCount;
private int maxNestingDepth;
private int maxNumLen;
private int maxStringLen;
Expand Down Expand Up @@ -156,6 +164,31 @@ public Builder maxDocumentLength(long maxDocLen) {
return this;
}

/**
* Sets the maximum allowed token count (for positive values over 0) or
* indicate that any count is acceptable ({@code 0} or negative number).
*
* <p>
* A token is a single unit of input, such as a number, a string, an object
* start or end, or an array start or end.
* </p>
*
* @param maxTokenCount the maximum allowed token count if positive number above 0; otherwise
* ({@code 0} or negative number) means "unlimited".
*
* @return this builder
*
* @since 2.18
*/
public Builder maxTokenCount(long maxTokenCount) {
// Negative values and 0 mean "unlimited", mark with -1L
if (maxTokenCount <= 0L) {
maxTokenCount = -1L;
}
this.maxTokenCount = maxTokenCount;
return this;
}

/**
* Sets the maximum number length (in chars or bytes, depending on input context).
* The default is 1000.
Expand Down Expand Up @@ -220,14 +253,15 @@ public Builder maxNameLength(final int maxNameLen) {
}

Builder() {
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN,
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, DEFAULT_MAX_TOKEN_COUNT,
DEFAULT_MAX_NUM_LEN, DEFAULT_MAX_STRING_LEN, DEFAULT_MAX_NAME_LEN);
}

Builder(final int maxNestingDepth, final long maxDocLen,
Builder(final int maxNestingDepth, final long maxDocLen, final long maxTokenCount,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
this.maxNestingDepth = maxNestingDepth;
this.maxDocLen = maxDocLen;
this.maxTokenCount = maxTokenCount;
this.maxNumLen = maxNumLen;
this.maxStringLen = maxStringLen;
this.maxNameLen = maxNameLen;
Expand All @@ -236,14 +270,15 @@ public Builder maxNameLength(final int maxNameLen) {
Builder(StreamReadConstraints src) {
maxNestingDepth = src._maxNestingDepth;
maxDocLen = src._maxDocLen;
maxTokenCount = src._maxTokenCount;
maxNumLen = src._maxNumLen;
maxStringLen = src._maxStringLen;
maxNameLen = src._maxNameLen;
}

public StreamReadConstraints build() {
return new StreamReadConstraints(maxNestingDepth, maxDocLen,
maxNumLen, maxStringLen, maxNameLen);
maxNumLen, maxStringLen, maxNameLen, maxTokenCount);
}
}

Expand All @@ -257,7 +292,7 @@ public StreamReadConstraints build() {
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen) {
this(maxNestingDepth, maxDocLen,
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN);
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN, DEFAULT_MAX_TOKEN_COUNT);
}

/**
Expand All @@ -269,13 +304,30 @@ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
*
* @since 2.16
*/
@Deprecated // since 2.18
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
this(maxNestingDepth, maxDocLen, maxNumLen, maxStringLen, maxNameLen, DEFAULT_MAX_TOKEN_COUNT);
}

/**
* @param maxNestingDepth Maximum input document nesting to allow
* @param maxDocLen Maximum input document length to allow
* @param maxNumLen Maximum number representation length to allow
* @param maxStringLen Maximum String value length to allow
* @param maxNameLen Maximum Object property name length to allow
* @param maxTokenCount Maximum number of tokens to allow
*
* @since 2.18
*/
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
final int maxNumLen, final int maxStringLen, final int maxNameLen, final long maxTokenCount) {
_maxNestingDepth = maxNestingDepth;
_maxDocLen = maxDocLen;
_maxNumLen = maxNumLen;
_maxStringLen = maxStringLen;
_maxNameLen = maxNameLen;
_maxTokenCount = maxTokenCount;
}

public static Builder builder() {
Expand Down Expand Up @@ -337,6 +389,31 @@ public boolean hasMaxDocumentLength() {
return _maxDocLen > 0L;
}

/**
* Accessor for maximum token count.
* see {@link Builder#maxTokenCount(long)} for details.
*
* @return Maximum allowed token count
* @since 2.18
*/
public long getMaxTokenCount() {
return _maxTokenCount;
}

/**
* Convenience method, basically same as:
*<pre>
* getMaxTokenCount() &gt; 0L
*</pre>
*
* @return {@code True} if this constraints instance has a limit for maximum
* token count to enforce; {@code false} otherwise.
* @since 2.18
*/
public boolean hasMaxTokenCount() {
return _maxTokenCount > 0L;
}

/**
* Accessor for maximum length of numbers to decode.
* see {@link Builder#maxNumberLength(int)} for details.
Expand Down Expand Up @@ -419,6 +496,31 @@ public void validateDocumentLength(long len) throws StreamConstraintsException
}
}

/**
* Convenience method that can be used to verify that the
* token count does not exceed the maximum specified by this
* constraints object (if any): if it does, a
* {@link StreamConstraintsException}
* is thrown.
*
* @param count Current token count for processed document content
*
* @throws StreamConstraintsException If length exceeds maximum
*
* @since 2.18
*/
public void validateTokenCount(long count) throws StreamConstraintsException
{
// for performance reasons, it is assumed that users check hasMaxTokenCount()
// before calling this method - this method will not work properly if hasMaxTokenCount() is false
if (count > _maxTokenCount) {
throw _constructException(
"Token count (%d) exceeds the maximum allowed (%d, from %s)",
count, _maxTokenCount,
_constrainRef("getMaxTokenCount"));
}
}

/*
/**********************************************************************
/* Convenience methods for validation, token lengths
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,20 @@ public abstract class ParserMinimalBase extends JsonParser
*/
protected JsonToken _currToken;

/**
* Current count of tokens, if tracked (see {@link #_trackMaxTokenCount})
*
* @since 2.18
*/
protected long _tokenCount;

/**
* Whether or not to track the token count due a {@link StreamReadConstraints} maxTokenCount > 0.
*
* @since 2.18
*/
protected final boolean _trackMaxTokenCount;

/**
* Last cleared token, if any: that is, value that was in
* effect when {@link #clearCurrentToken} was called.
Expand All @@ -175,6 +189,7 @@ public abstract class ParserMinimalBase extends JsonParser
protected ParserMinimalBase() {
super();
_streamReadConstraints = StreamReadConstraints.defaults();
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

@Deprecated // since 2.18
Expand All @@ -186,12 +201,14 @@ protected ParserMinimalBase(int features) {
protected ParserMinimalBase(StreamReadConstraints src) {
super();
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

// @since 2.18
protected ParserMinimalBase(int features, StreamReadConstraints src) {
super(features);
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

// NOTE: had base impl in 2.3 and before; but shouldn't
Expand Down Expand Up @@ -311,9 +328,6 @@ public JsonParser skipChildren() throws IOException
*/
protected abstract void _handleEOF() throws JsonParseException;

//public JsonToken getCurrentToken()
//public boolean hasCurrentToken()

@Deprecated // since 2.17 -- still need to implement
@Override
public abstract String getCurrentName() throws IOException;
Expand All @@ -327,6 +341,11 @@ public JsonParser skipChildren() throws IOException

// public abstract JsonLocation getCurrentLocation();

@Override // since 2.18
public long currentTokenCount() {
return _tokenCount;
}

/*
/**********************************************************
/* Public API, token state overrides
Expand Down Expand Up @@ -827,9 +846,11 @@ protected final void _wrapError(String msg, Throwable t) throws JsonParseExcepti

protected final JsonToken _updateToken(final JsonToken token) throws StreamConstraintsException {
_currToken = token;
if (_trackMaxTokenCount) {
_streamReadConstraints.validateTokenCount(++_tokenCount);
}
return token;
}

protected final JsonToken _updateTokenToNull() {
return (_currToken = null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ public boolean requiresCustomCodec() {
@Override public JsonLocation currentLocation() { return delegate.currentLocation(); }
@Override public JsonLocation currentTokenLocation() { return delegate.currentTokenLocation(); }

@Override // since 2.18
public long currentTokenCount() { return delegate.currentTokenCount(); }

@Override
@Deprecated
public JsonToken getCurrentToken() { return delegate.getCurrentToken(); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
import com.fasterxml.jackson.core.testsupport.AsyncReaderWrapper;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

// [core#1047]: Add max-name-length constraints
Expand All @@ -20,6 +21,10 @@ class LargeDocReadTest extends AsyncTestBase
.streamReadConstraints(StreamReadConstraints.builder().maxDocumentLength(10_000L).build())
.build();

private final JsonFactory JSON_F_MAX_TOKENS_1K = JsonFactory.builder()
.streamReadConstraints(StreamReadConstraints.builder().maxTokenCount(1_000L).build())
.build();

// Test name that is below default max name
@Test
void largeNameBytes() throws Exception {
Expand Down Expand Up @@ -83,6 +88,18 @@ void largeNameWithSmallLimitAsync() throws Exception
}
}

@Test
void tokenLimitBytes() throws Exception {
final String doc = generateJSON(StreamReadConstraints.defaults().getMaxNameLength() - 100);
try (JsonParser p = createParserUsingStream(JSON_F_MAX_TOKENS_1K, doc, "UTF-8")) {
consumeTokens(p);
fail("expected StreamConstraintsException");
} catch (StreamConstraintsException e) {
assertEquals("Token count (1001) exceeds the maximum allowed (1000, from `StreamReadConstraints.getMaxTokenCount()`)",
e.getMessage());
}
}

private void consumeTokens(JsonParser p) throws IOException {
while (p.nextToken() != null) {
;
Expand Down
Loading

0 comments on commit 8bc5dba

Please sign in to comment.