Skip to content

Commit

Permalink
IO Streams prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
shanshin committed Apr 22, 2022
1 parent 216ea9c commit 0c7f518
Show file tree
Hide file tree
Showing 28 changed files with 849 additions and 244 deletions.
1 change: 1 addition & 0 deletions docs/formats.md
Expand Up @@ -17,6 +17,7 @@ stable, these are currently experimental features of Kotlin Serialization.
* [Field numbers](#field-numbers)
* [Integer types](#integer-types)
* [Lists as repeated fields](#lists-as-repeated-fields)
* [Packed fields](#packed-fields)
* [Properties (experimental)](#properties-experimental)
* [Custom formats (experimental)](#custom-formats-experimental)
* [Basic encoder](#basic-encoder)
Expand Down
1 change: 1 addition & 0 deletions docs/serialization-guide.md
Expand Up @@ -140,6 +140,7 @@ Once the project is set up, we can start serializing some classes.
* <a name='field-numbers'></a>[Field numbers](formats.md#field-numbers)
* <a name='integer-types'></a>[Integer types](formats.md#integer-types)
* <a name='lists-as-repeated-fields'></a>[Lists as repeated fields](formats.md#lists-as-repeated-fields)
* <a name='packed-fields'></a>[Packed fields](formats.md#packed-fields)
* <a name='properties-experimental'></a>[Properties (experimental)](formats.md#properties-experimental)
* <a name='custom-formats-experimental'></a>[Custom formats (experimental)](formats.md#custom-formats-experimental)
* <a name='basic-encoder'></a>[Basic encoder](formats.md#basic-encoder)
Expand Down
7 changes: 7 additions & 0 deletions formats/json-okio/api/kotlinx-serialization-json-okio.api
@@ -0,0 +1,7 @@
public final class kotlinx/serialization/json/OkioStreamsKt {
public static final fun decodeFromOkio (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/DeserializationStrategy;Lokio/BufferedSource;)Ljava/lang/Object;
public static final fun decodeOkioToSequence (Lkotlinx/serialization/json/Json;Lokio/BufferedSource;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;)Lkotlin/sequences/Sequence;
public static synthetic fun decodeOkioToSequence$default (Lkotlinx/serialization/json/Json;Lokio/BufferedSource;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;ILjava/lang/Object;)Lkotlin/sequences/Sequence;
public static final fun encodeToOkio (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;Lokio/BufferedSink;)V
}

31 changes: 31 additions & 0 deletions formats/json-okio/build.gradle.kts
@@ -0,0 +1,31 @@
/*
* Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/
import Java9Modularity.configureJava9ModuleInfo

plugins {
kotlin("multiplatform")
kotlin("plugin.serialization")
}

apply(from = rootProject.file("gradle/native-targets.gradle"))
apply(from = rootProject.file("gradle/configure-source-sets.gradle"))

kotlin {
sourceSets {
val commonMain by getting {
dependencies {
api(project(":kotlinx-serialization-core"))
api(project(":kotlinx-serialization-json"))
compileOnly("com.squareup.okio:okio-multiplatform:3.0.0-alpha.9")
}
}
val commonTest by getting {
dependencies {
implementation("com.squareup.okio:okio-multiplatform:3.0.0-alpha.9")
}
}
}
}

project.configureJava9ModuleInfo()
@@ -0,0 +1,120 @@
package kotlinx.serialization.json

import kotlinx.serialization.*
import kotlinx.serialization.json.internal.*
import kotlinx.serialization.json.internal.JsonToOkioStreamWriter
import kotlinx.serialization.json.internal.decodeToSequence
import okio.BufferedSink
import okio.BufferedSource

/**
* Serializes the [value] with [serializer] into a [stream] using JSON format and UTF-8 encoding.
*
* @throws [SerializationException] if the given value cannot be serialized to JSON.
* @throws [IOException] If an I/O error occurs and stream can't be written to.
*/
@ExperimentalSerializationApi
public fun <T> Json.encodeToOkio(
serializer: SerializationStrategy<T>,
value: T,
target: BufferedSink
) {
val writer = JsonToOkioStreamWriter(target)
try {
encodeByWriter(writer, serializer, value)
} finally {
writer.release()
}
}

/**
* Serializes given [value] to [stream] using UTF-8 encoding and serializer retrieved from the reified type parameter.
*
* @throws [SerializationException] if the given value cannot be serialized to JSON.
* @throws [IOException] If an I/O error occurs and stream can't be written to.
*/
@ExperimentalSerializationApi
public inline fun <reified T> Json.encodeToOkio(
value: T,
target: BufferedSink
): Unit =
encodeToOkio(serializersModule.serializer(), value, target)



/**
* Deserializes JSON from [source] using UTF-8 encoding to a value of type [T] using [deserializer].
*
* Note that this functions expects that exactly one object would be present in the stream
* and throws an exception if there are any dangling bytes after an object.
*
* @throws [SerializationException] if the given JSON input cannot be deserialized to the value of type [T].
* @throws [IOException] If an I/O error occurs and stream can't be read from.
*/
@ExperimentalSerializationApi
public fun <T> Json.decodeFromOkio(
deserializer: DeserializationStrategy<T>,
source: BufferedSource
): T {
return decodeByReader(deserializer, OkioSerialReader(source))
}

/**
* Deserializes the contents of given [source] to the value of type [T] using UTF-8 encoding and
* deserializer retrieved from the reified type parameter.
*
* Note that this functions expects that exactly one object would be present in the stream
* and throws an exception if there are any dangling bytes after an object.
*
* @throws [SerializationException] if the given JSON input cannot be deserialized to the value of type [T].
* @throws [IOException] If an I/O error occurs and stream can't be read from.
*/
@ExperimentalSerializationApi
public inline fun <reified T> Json.decodeFromOkio(source: BufferedSource): T =
decodeFromOkio(serializersModule.serializer(), source)


/**
* Transforms the given [source] into lazily deserialized sequence of elements of type [T] using UTF-8 encoding and [deserializer].
* Unlike [decodeFromStream], [source] is allowed to have more than one element, separated as [format] declares.
*
* Elements must all be of type [T].
* Elements are parsed lazily when resulting [Sequence] is evaluated.
* Resulting sequence is tied to the stream and can be evaluated only once.
*
* **Resource caution:** this method neither closes the [source] when the parsing is finished nor provides a method to close it manually.
* It is a caller responsibility to hold a reference to a stream and close it. Moreover, because stream is parsed lazily,
* closing it before returned sequence is evaluated completely will result in [IOException] from decoder.
*
* @throws [SerializationException] if the given JSON input cannot be deserialized to the value of type [T].
* @throws [IOException] If an I/O error occurs and stream can't be read from.
*/
@ExperimentalSerializationApi
public fun <T> Json.decodeOkioToSequence(
source: BufferedSource,
deserializer: DeserializationStrategy<T>,
format: DecodeSequenceMode = DecodeSequenceMode.AUTO_DETECT
): Sequence<T> {
return decodeToSequence(OkioSerialReader(source), deserializer, format)
}

/**
* Transforms the given [source] into lazily deserialized sequence of elements of type [T] using UTF-8 encoding and deserializer retrieved from the reified type parameter.
* Unlike [decodeFromStream], [source] is allowed to have more than one element, separated as [format] declares.
*
* Elements must all be of type [T].
* Elements are parsed lazily when resulting [Sequence] is evaluated.
* Resulting sequence is tied to the stream and constrained to be evaluated only once.
*
* **Resource caution:** this method does not close [source] when the parsing is finished neither provides method to close it manually.
* It is a caller responsibility to hold a reference to a stream and close it. Moreover, because stream is parsed lazily,
* closing it before returned sequence is evaluated fully would result in [IOException] from decoder.
*
* @throws [SerializationException] if the given JSON input cannot be deserialized to the value of type [T].
* @throws [IOException] If an I/O error occurs and stream can't be read from.
*/
@ExperimentalSerializationApi
public inline fun <reified T> Json.decodeOkioToSequence(
source: BufferedSource,
format: DecodeSequenceMode = DecodeSequenceMode.AUTO_DETECT
): Sequence<T> = decodeOkioToSequence(source, serializersModule.serializer(), format)
@@ -0,0 +1,50 @@
package kotlinx.serialization.json.internal

import okio.*

internal class JsonToOkioStreamWriter(private val target: BufferedSink) : JsonWriter {
override fun writeLong(value: Long) {
write(value.toString())
}

override fun writeChar(char: Char) {
target.writeUtf8CodePoint(char.code)
}

override fun write(text: String) {
target.writeUtf8(text)
}

override fun writeQuoted(text: String) {
target.writeUtf8CodePoint('"'.code)
var lastPos = 0
for (i in text.indices) {
val c = text[i].code
if (c < ESCAPE_STRINGS.size && ESCAPE_STRINGS[c] != null) {
target.writeUtf8(text, lastPos, i) // flush prev
target.writeUtf8(ESCAPE_STRINGS[c]!!)
lastPos = i + 1
}
}

if (lastPos != 0) target.writeUtf8(text, lastPos, text.length)
else target.writeUtf8(text)
target.writeUtf8CodePoint('"'.code)
}

override fun release() {
target.flush()
}
}

internal class OkioSerialReader(private val source: BufferedSource): SerialReader {
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
var i = 0
while (i < count && !source.exhausted()) {
buffer[i] = source.readUtf8CodePoint().toChar()
i++
}
return if (i > 0) i else -1
}
}

@@ -0,0 +1,54 @@
package kotlinx.serialization.json

import kotlinx.serialization.KSerializer
import kotlinx.serialization.Serializable
import okio.*
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertTrue


class TestTest {
private val strLen = 1024 * 2 + 42

@Serializable
data class StringHolder(val data: String)

@Test
fun testParsesStringsLongerThanBuffer() {
val str = "a".repeat(strLen)
val input = """{"data":"$str"}"""
assertEquals(input, Json.encodeViaOkio(StringHolder.serializer(), StringHolder(str)))
}
@Test
fun test2() {
val str = "a".repeat(strLen)

val buffer = Buffer()

Json.encodeToOkio(StringHolder.serializer(), StringHolder(str), buffer)

val result = Json.decodeFromOkio(StringHolder.serializer(), buffer)

assertEquals(StringHolder(str), result)
}
@Test
fun test4() {
val str = "a".repeat(strLen)

val buffer = Buffer()

Json.encodeToOkio(StringHolder(str), buffer)
Json.decodeOkioToSequence(buffer, StringHolder.serializer())
}
}

fun <T> Json.encodeViaOkio(serializer: KSerializer<T>, value: T): String {
val limited = Buffer()

encodeToOkio(serializer, value, limited)

return limited.readUtf8()

}
24 changes: 24 additions & 0 deletions formats/json/api/kotlinx-serialization-json.api
Expand Up @@ -355,3 +355,27 @@ public final class kotlinx/serialization/json/JvmStreamsKt {
public static final fun encodeToStream (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;Ljava/io/OutputStream;)V
}

public final class kotlinx/serialization/json/internal/JsonStreamsKt {
public static final fun decodeByReader (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/internal/SerialReader;)Ljava/lang/Object;
public static final fun decodeToSequence (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/json/internal/SerialReader;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;)Lkotlin/sequences/Sequence;
public static synthetic fun decodeToSequence$default (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/json/internal/SerialReader;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;ILjava/lang/Object;)Lkotlin/sequences/Sequence;
public static final fun encodeByWriter (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/json/internal/JsonWriter;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;)V
}

public abstract interface class kotlinx/serialization/json/internal/JsonWriter {
public abstract fun release ()V
public abstract fun write (Ljava/lang/String;)V
public abstract fun writeChar (C)V
public abstract fun writeLong (J)V
public abstract fun writeQuoted (Ljava/lang/String;)V
}

public abstract interface class kotlinx/serialization/json/internal/SerialReader {
public abstract fun read ([CII)I
}

public final class kotlinx/serialization/json/internal/StringOpsKt {
public static final fun getESCAPE_MARKERS ()[B
public static final fun getESCAPE_STRINGS ()[Ljava/lang/String;
}

63 changes: 56 additions & 7 deletions formats/json/commonMain/src/kotlinx/serialization/json/Json.kt
Expand Up @@ -75,14 +75,9 @@ public sealed class Json(
* @throws [SerializationException] if the given value cannot be serialized to JSON.
*/
public final override fun <T> encodeToString(serializer: SerializationStrategy<T>, value: T): String {
val result = JsonStringBuilder()
val result = JsonToStringWriter()
try {
val encoder = StreamingJsonEncoder(
result, this,
WriteMode.OBJ,
arrayOfNulls(WriteMode.values().size)
)
encoder.encodeSerializableValue(serializer, value)
encodeByWriter(result, serializer, value)
return result.toString()
} finally {
result.release()
Expand Down Expand Up @@ -129,6 +124,60 @@ public sealed class Json(
}
}

/**
* Description of [decodeToSequence]'s JSON input shape.
*
* The sequence represents a stream of objects parsed one by one;
* [DecodeSequenceMode] defines a separator between these objects.
* Typically, these objects are not separated by meaningful characters ([WHITESPACE_SEPARATED]),
* or the whole stream is a large array of objects separated with commas ([ARRAY_WRAPPED]).
*/
@ExperimentalSerializationApi
public enum class DecodeSequenceMode {
/**
* Declares that objects in the input stream are separated by whitespace characters.
*
* The stream is read as multiple JSON objects separated by any number of whitespace characters between objects. Starting and trailing whitespace characters are also permitted.
* Each individual object is parsed lazily, when it is requested from the resulting sequence.
*
* Whitespace character is either ' ', '\n', '\r' or '\t'.
*
* Example of `WHITESPACE_SEPARATED` stream content:
* ```
* """{"key": "value"}{"key": "value2"} {"key2": "value2"}"""
* ```
*/
WHITESPACE_SEPARATED,

/**
* Declares that objects in the input stream are wrapped in the JSON array.
* Each individual object in the array is parsed lazily when it is requested from the resulting sequence.
*
* The stream is read as multiple JSON objects wrapped into a JSON array.
* The stream must start with an array start character `[` and end with an array end character `]`,
* otherwise, [JsonDecodingException] is thrown.
*
* Example of `ARRAY_WRAPPED` stream content:
* ```
* """[{"key": "value"}, {"key": "value2"},{"key2": "value2"}]"""
* ```
*/
ARRAY_WRAPPED,

/**
* Declares that parser itself should select between [WHITESPACE_SEPARATED] and [ARRAY_WRAPPED] modes.
* The selection is performed by looking on the first meaningful character of the stream.
*
* In most cases, auto-detection is sufficient to correctly parse an input.
* If the input is _whitespace-separated stream of the arrays_, parser could select an incorrect mode,
* for that [DecodeSequenceMode] must be specified explicitly.
*
* Example of an exceptional case:
* `[1, 2, 3] [4, 5, 6]\n[7, 8, 9]`
*/
AUTO_DETECT;
}

/**
* Creates an instance of [Json] configured from the optionally given [Json instance][from] and adjusted with [builderAction].
*/
Expand Down

0 comments on commit 0c7f518

Please sign in to comment.