Skip to content

Commit

Permalink
Merge pull request #25 from SunDoge/dom-api
Browse files Browse the repository at this point in the history
Dom api
  • Loading branch information
SunDoge committed Aug 26, 2023
2 parents 746ec45 + 61c4c6c commit 31517f2
Show file tree
Hide file tree
Showing 20 changed files with 1,003 additions and 24 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ resolver = "2"
members = ["simdjson-sys"]

[workspace.package]
version = "0.3.0-alpha.1"
version = "0.3.0-alpha.2"

[workspace.dependencies]
simdjson-sys = { path = "simdjson-sys", version = "0.1.0-alpha.1" }
simdjson-sys = { path = "simdjson-sys", version = "0.1.0-alpha.2" }


[dependencies]
Expand Down
62 changes: 55 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,70 @@ Add this to your `Cargo.toml`

```toml
# In the `[dependencies]` section
simdjson-rust = {git = "https://github.com/SunDoge/simdjson-rust"}
simdjson-rust = "0.3.0"
```

Then, get started.

```rust
use simdjson_rust::{ondemand::Parser, prelude::*};
use simdjson_rust::prelude::*;
use simdjson_rust::{dom, ondemand};

fn main() -> simdjson_rust::Result<()> {
let mut parser = Parser::default();
let ps = make_padded_string("[0,1,2,3]");
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);

// ondemand api.
{
let mut parser = ondemand::Parser::default();
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);
}
}

// dom api.
{
let mut parser = dom::Parser::default();
let elem = parser.parse(&ps)?;
let arr = elem.get_array()?;
for (index, value) in arr.iter().enumerate() {
assert_eq!(index as u64, value.get_uint64()?);
}
}

Ok(())
}
```

### `dom` and `ondemand`

`simdjson` now offer two kinds of API, `dom` and `ondemand`.
`dom` will parsed the whole string while `ondemand` only parse what you request.
Due to `ffi`, the overhead of `ondemand` API is relatively high. I have tested `lto` but it only improves a little :(

Thus it is suggestted that

- use `ondemand` if you only want to access a specific part of a large json,
- use `dom` if you want to parse the whole json.


### `padded_string`

`simdjson` requires the input string to be padded. We must provide a string with `capacity = len + SIMDJSON_PADDING`.
We provide utils to do so.

```rust
use simdjson_rust::prelude::*;

fn main() -> simdjson_rust::Result<()> {
let ps = make_padded_string("[0,1,2,3]");
let ps = "[0,1,2,3]".to_padded_string();
// or reuse a buffer.
let unpadded = String::from("[1,2,3,4]");
let ps = unpadded.into_padded_string();
// or load from file.
let ps = load_padded_string("test.json")?;
Ok(())
}
```
27 changes: 21 additions & 6 deletions examples/simple.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
use simdjson_rust::prelude::*;
use simdjson_rust::{dom, ondemand, prelude::*};

fn main() -> simdjson_rust::Result<()> {
let mut parser = ondemand::Parser::default();
let ps = make_padded_string("[0,1,2,3]");
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);

// ondemand api.
{
let mut parser = ondemand::Parser::default();
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);
}
}

// dom api.
{
let mut parser = dom::Parser::default();
let elem = parser.parse(&ps)?;
let arr = elem.get_array()?;
for (index, value) in arr.iter().enumerate() {
assert_eq!(index as u64, value.get_uint64()?);
}
}

Ok(())
}
2 changes: 1 addition & 1 deletion simdjson-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "simdjson-sys"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2"
edition = "2021"
authors = ["SunDoge <384813529@qq.com>"]
license = "Apache-2.0"
Expand Down
3 changes: 2 additions & 1 deletion simdjson-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ fn main() {
cc::Build::new()
.cpp(true)
.flag_if_supported("-std=c++17")
.flag_if_supported("/std:c++17")
.flag_if_supported("/std:c++20") // error C7555: use of designated initializers requires at least '/std:c++20'
.flag_if_supported("-pthread")
.flag_if_supported("-O3")
.flag("-DNDEBUG")
.include("simdjson/singleheader")
.file("src/simdjson_c_api.cpp")
.file("simdjson/singleheader/simdjson.cpp")
Expand Down
1 change: 1 addition & 0 deletions simdjson-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

pub const SIMDJSON_PADDING: usize = 64;
pub const SIMDJSON_MAXSIZE_BYTES: usize = 0xFFFFFFFF;
pub const DEFAULT_BATCH_SIZE: usize = 1000000;
226 changes: 225 additions & 1 deletion simdjson-sys/src/simdjson_c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ template <typename U, typename T> inline U object_to_pointer(T &&t) {
return reinterpret_cast<U>(new T(std::move(t)));
}

// template <typename U, typename T>
// auto simdjson_result_to_struct(simdjson_result<T> &&sr) {
// T value;
// const error_code error = std::move(sr).get(value);
// return {static_cast<int>(error),
// reinterpret_cast<U>(new T(std::move(value)))};
// }

// template <typename T>
// inline int enum_result_to_number_result(simdjson_result<T>&& enum_result) {
// T inner;
Expand Down Expand Up @@ -287,4 +295,220 @@ IMPL_GET_PRIMITIVE(SJ_OD_number, ondemand::number, double, get_double)
int SJ_OD_number_get_number_type(SJ_OD_number *self) {
return static_cast<int>(
reinterpret_cast<ondemand::number *>(self)->get_number_type());
}
}

// New macros for dom
#define IMPL_HANDLE(name, type) \
void name##_free(name *r) { delete reinterpret_cast<type *>(r); } \
inline type *cast_to_type(name *r) { return reinterpret_cast<type *>(r); } \
inline name *move_to_handle(type &&r) { \
return object_to_pointer<name *>(std::move(r)); \
}

IMPL_HANDLE(SJ_DOM_parser, dom::parser)
IMPL_HANDLE(SJ_DOM_array, dom::array)
IMPL_HANDLE(SJ_DOM_element, dom::element)
IMPL_HANDLE(SJ_DOM_object, dom::object)
IMPL_HANDLE(SJ_DOM_array_iterator, dom::array::iterator)
IMPL_HANDLE(SJ_DOM_object_iterator, dom::object::iterator)
IMPL_HANDLE(SJ_DOM_document, dom::document)
IMPL_HANDLE(SJ_DOM_document_stream, dom::document_stream)
IMPL_HANDLE(SJ_DOM_document_stream_iterator, dom::document_stream::iterator)

// dom::parser
SJ_DOM_parser *SJ_DOM_parser_new(size_t max_capacity) {
return object_to_pointer<SJ_DOM_parser *>(dom::parser(max_capacity));
}

SJ_DOM_element_result SJ_DOM_parser_parse(SJ_DOM_parser *parser,
const char *json, size_t len) {
dom::element value;
const auto error = reinterpret_cast<dom::parser *>(parser)
->parse(json, len, false)
.get(value); // The string is padded, so false.
return {static_cast<int>(error), move_to_handle(std::move(value))};
}
SJ_DOM_element_result SJ_DOM_parser_parse_into_document(SJ_DOM_parser *parser,
SJ_DOM_document *doc,
const char *json,
size_t len) {
dom::element value;
const auto error = cast_to_type(parser)
->parse_into_document(
*reinterpret_cast<dom::document *>(doc), json, len)
.get(value);
return {static_cast<int>(error), move_to_handle(std::move(value))};
}
SJ_DOM_document_stream_result SJ_DOM_parser_parse_many(SJ_DOM_parser *parser,
const char *json,
size_t len,
size_t batch_size) {
dom::document_stream value;
const auto error =
cast_to_type(parser)->parse_many(json, len, batch_size).get(value);
return {static_cast<int>(error), move_to_handle(std::move(value))};
}

// dom::element
int SJ_DOM_element_type(SJ_DOM_element *self) {
return static_cast<int>(reinterpret_cast<dom::element *>(self)->type());
}

SJ_DOM_array_result SJ_DOM_element_get_array(SJ_DOM_element *self) {
dom::array res;
const error_code error = cast_to_type(self)->get_array().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_object_result SJ_DOM_element_get_object(SJ_DOM_element *self) {
dom::object res;
const error_code error = cast_to_type(self)->get_object().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

SJ_string_view_result SJ_DOM_element_get_string(SJ_DOM_element *self) {
std::string_view res;
const error_code error = cast_to_type(self)->get_string().get(res);
return {static_cast<int>(error), {.data = res.data(), .len = res.size()}};
}

SJ_uint64_t_result SJ_DOM_element_get_uint64(SJ_DOM_element *self) {
uint64_t res = 0;
const error_code error = cast_to_type(self)->get_uint64().get(res);
return {static_cast<int>(error), res};
}
SJ_int64_t_result SJ_DOM_element_get_int64(SJ_DOM_element *self) {
int64_t res = 0;
const error_code error = cast_to_type(self)->get_int64().get(res);
return {static_cast<int>(error), res};
}
SJ_double_result SJ_DOM_element_get_double(SJ_DOM_element *self) {
double res = 0.0;
const error_code error = cast_to_type(self)->get_double().get(res);
return {static_cast<int>(error), res};
}
SJ_DOM_element_result SJ_DOM_element_at_pointer(SJ_DOM_element *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::array
SJ_DOM_array_iterator *SJ_DOM_array_begin(SJ_DOM_array *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_array_iterator *SJ_DOM_array_end(SJ_DOM_array *self) {
return move_to_handle(cast_to_type(self)->end());
}
size_t SJ_DOM_array_size(SJ_DOM_array *self) {
return cast_to_type(self)->size();
}
size_t SJ_DOM_array_number_of_slots(SJ_DOM_array *self) {
return cast_to_type(self)->number_of_slots();
}
SJ_DOM_element_result SJ_DOM_array_at(SJ_DOM_array *self, size_t index) {
dom::element res;
const error_code error = cast_to_type(self)->at(index).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_array_at_pointer(SJ_DOM_array *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::array::iterator
SJ_DOM_element *SJ_DOM_array_iterator_get(SJ_DOM_array_iterator *self) {
return move_to_handle(**cast_to_type(self));
}
bool SJ_DOM_array_iterator_not_equal(SJ_DOM_array_iterator *lhs,
SJ_DOM_array_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}
void SJ_DOM_array_iterator_step(SJ_DOM_array_iterator *self) {
++(*cast_to_type(self));
}

// dom::object
SJ_DOM_object_iterator *SJ_DOM_object_begin(SJ_DOM_object *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_object_iterator *SJ_DOM_object_end(SJ_DOM_object *self) {
return move_to_handle(cast_to_type(self)->end());
}
size_t SJ_DOM_object_size(SJ_DOM_object *self) {
return cast_to_type(self)->size();
}
SJ_DOM_element_result SJ_DOM_object_at_pointer(SJ_DOM_object *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_object_at_key(SJ_DOM_object *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_key(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_object_at_key_case_insensitive(SJ_DOM_object *self,
const char *json,
size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)
->at_key_case_insensitive(std::string_view(json, len))
.get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::object::iterator
SJ_DOM_key_value_pair SJ_DOM_object_iterator_get(SJ_DOM_object_iterator *self) {
dom::key_value_pair pair = **cast_to_type(self);
return {.key = {.data = pair.key.data(), .len = pair.key.size()},
.value = move_to_handle(std::move(pair.value))};
}
bool SJ_DOM_object_iterator_not_equal(SJ_DOM_object_iterator *lhs,
SJ_DOM_object_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}
void SJ_DOM_object_iterator_step(SJ_DOM_object_iterator *self) {
++(*cast_to_type(self));
}

// dom::document
SJ_DOM_document *SJ_DOM_document_new() {
return object_to_pointer<SJ_DOM_document *>(dom::document());
}

SJ_DOM_element *SJ_DOM_document_root(SJ_DOM_document *self) {
return move_to_handle(cast_to_type(self)->root());
}
SJ_DOM_document_stream_iterator *
SJ_DOM_document_stream_begin(SJ_DOM_document_stream *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_document_stream_iterator *
SJ_DOM_document_stream_end(SJ_DOM_document_stream *self) {
return move_to_handle(cast_to_type(self)->end());
}
SJ_DOM_element_result
SJ_DOM_document_stream_iterator_get(SJ_DOM_document_stream_iterator *self) {
dom::element res;
const error_code error = cast_to_type(self)->operator*().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
void SJ_DOM_document_stream_iterator_step(
SJ_DOM_document_stream_iterator *self) {
++(*cast_to_type(self));
}
bool SJ_DOM_document_stream_iterator_not_equal(
SJ_DOM_document_stream_iterator *lhs,
SJ_DOM_document_stream_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}

0 comments on commit 31517f2

Please sign in to comment.