Skip to content

Commit

Permalink
Merge pull request #23 from Syndelis/master
Browse files Browse the repository at this point in the history
Add regex matching feature for Json overrides
  • Loading branch information
AlecTroemel committed Feb 19, 2024
2 parents 46d5044 + a9d05bb commit 77613f5
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 39 deletions.
22 changes: 0 additions & 22 deletions .github/workflows/rust.yml

This file was deleted.

46 changes: 46 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Test

on:
push:
branches: [ master ]

pull_request:
branches: [ master ]

env:
CARGO_TERM_COLOR: always

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3

- name: Generate Dependencies Hash
id: cargo_toml_hash
uses: KEINOS/gh-action-hash-for-cache@e0515fd0280f1ef616e13cef3b2b9566938da2c4
with:
path: |
./Cargo.toml
- name: Retrieve Cargo's Index - Try Cache
id: cargo_index_cache
uses: actions/cache/restore@v3
with:
path: ~/.cargo
key: ${{ runner.os }}-cargo-index-${{ steps.cargo_toml_hash.outputs.hash }}

- name: Build
run: cargo build --verbose

- name: Retrieve Cargo's Index - Save to Cache
if: steps.cargo_index_cache.outputs.cache-hit != 'true'
uses: actions/cache/save@v3
with:
path: ~/.cargo
key: ${{ runner.os }}-cargo-index-${{ steps.cargo_toml_hash.outputs.hash }}


- name: Run tests
run: cargo test --verbose
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ serde = "1.0"
serde_json = "1.0"
serde_derive = "1.0"
minidom = "0.12"
regex = "1.8.3"

[features]
json_types = [] # Enable to enforce fixed JSON data types for certain XML nodes
regex_path = ["json_types"] # Enable Regex matching for JSON types
126 changes: 109 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#![allow(clippy::items_after_test_module)]
#![allow(clippy::single_match)]
#![allow(clippy::single_char_pattern)]
#![allow(clippy::needless_borrow)]
#![allow(clippy::ptr_arg)]
//! # quickxml_to_serde
//! Fast and flexible conversion from XML to JSON using [quick-xml](https://github.com/tafia/quick-xml)
//! and [serde](https://github.com/serde-rs/json). Inspired by [node2object](https://github.com/vorot93/node2object).
Expand Down Expand Up @@ -53,12 +58,18 @@
extern crate minidom;
extern crate serde_json;

#[cfg(feature = "regex_path")]
extern crate regex;

use minidom::{Element, Error};
use serde_json::{Map, Number, Value};
#[cfg(feature = "json_types")]
use std::collections::HashMap;
use std::str::FromStr;

#[cfg(feature = "regex_path")]
use regex::Regex;

#[cfg(test)]
mod tests;

Expand Down Expand Up @@ -89,6 +100,41 @@ pub enum JsonArray {
Infer(JsonType),
}

/// Used as a parameter for `Config.add_json_type_override`. Defines how the XML path should be matched
/// in order to apply the JSON type overriding rules. This enumerator exists to allow the same function
/// to be used for multiple different types of path matching rules.
#[derive(Debug)]
pub enum PathMatcher {
/// An absolute path starting with a leading slash (`/`). E.g. `/a/b/c/@d`.
/// It's implicitly converted from `&str` and automatically includes the leading slash.
Absolute(String),
/// A regex that will be checked against the XML path. E.g. `(\w/)*c$`.
/// It's implicitly converted from `regex::Regex`.
#[cfg(feature = "regex_path")]
Regex(Regex),
}

// For retro-compatibility and for syntax's sake, a string may be coerced into an absolute path.
impl From<&str> for PathMatcher {
fn from(value: &str) -> Self {
let path_with_leading_slash = if value.starts_with("/") {
value.into()
} else {
["/", value].concat()
};

PathMatcher::Absolute(path_with_leading_slash)
}
}

// ... While a Regex may be coerced into a regex path.
#[cfg(feature = "regex_path")]
impl From<Regex> for PathMatcher {
fn from(value: Regex) -> Self {
PathMatcher::Regex(value)
}
}

/// Defines which data type to apply in JSON format for consistency of output.
/// E.g., the range of XML values for the same node type may be `1234`, `001234`, `AB1234`.
/// It is impossible to guess with 100% consistency which data type to apply without seeing
Expand Down Expand Up @@ -130,7 +176,7 @@ pub struct Config {
pub xml_text_node_prop_name: String,
/// Defines how empty elements like `<x />` should be handled.
pub empty_element_handling: NullValue,
/// A list of XML paths with their JsonType overrides. They take precedence over the document-wide `json_type`
/// A map of XML paths with their JsonArray overrides. They take precedence over the document-wide `json_type`
/// property. The path syntax is based on xPath: literal element names and attribute names prefixed with `@`.
/// The path must start with a leading `/`. It is a bit of an inconvenience to remember about it, but it saves
/// an extra `if`-check in the code to improve the performance.
Expand All @@ -140,6 +186,10 @@ pub struct Config {
/// - path for `b` text node (007): `/a/b`
#[cfg(feature = "json_types")]
pub json_type_overrides: HashMap<String, JsonArray>,
/// A list of pairs of regex and JsonArray overrides. They take precedence over both the document-wide `json_type`
/// property and the `json_type_overrides` property. The path syntax is based on xPath just like `json_type_overrides`.
#[cfg(feature = "regex_path")]
pub json_regex_type_overrides: Vec<(Regex, JsonArray)>,
}

impl Config {
Expand All @@ -154,6 +204,8 @@ impl Config {
empty_element_handling: NullValue::EmptyObject,
#[cfg(feature = "json_types")]
json_type_overrides: HashMap::new(),
#[cfg(feature = "regex_path")]
json_regex_type_overrides: Vec::new(),
}
}

Expand All @@ -171,6 +223,8 @@ impl Config {
empty_element_handling,
#[cfg(feature = "json_types")]
json_type_overrides: HashMap::new(),
#[cfg(feature = "regex_path")]
json_regex_type_overrides: Vec::new(),
}
}

Expand All @@ -179,16 +233,27 @@ impl Config {
/// - **XML**: `<a><b c="123">007</b></a>`
/// - path for `c`: `/a/b/@c`
/// - path for `b` text node (007): `/a/b`
/// This function will add the leading `/` if it's missing.
/// - regex path for any `element` node: `(\w/)*element$` [requires `regex_path` feature]
#[cfg(feature = "json_types")]
pub fn add_json_type_override(self, path: &str, json_type: JsonArray) -> Self {
pub fn add_json_type_override<P>(self, path: P, json_type: JsonArray) -> Self
where
P: Into<PathMatcher>
{
let mut conf = self;
let path = if path.starts_with("/") {
path.to_owned()
} else {
["/", path].concat()
};
conf.json_type_overrides.insert(path, json_type);

match path.into() {
PathMatcher::Absolute(path) => {
conf.json_type_overrides.insert(path, json_type);
}
#[cfg(feature = "regex_path")]
PathMatcher::Regex(regex) => {
conf.json_regex_type_overrides.push((
regex,
json_type
));
}
}

conf
}
}
Expand Down Expand Up @@ -386,20 +451,47 @@ pub fn xml_string_to_json(xml: String, config: &Config) -> Result<Value, Error>
/// in the list of paths with custom config.
#[cfg(feature = "json_types")]
#[inline]
fn get_json_type(config: &Config, path: &String) -> (bool, JsonType) {
fn get_json_type_with_absolute_path<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
match config
.json_type_overrides
.get(path)
.unwrap_or(&JsonArray::Infer(JsonType::Infer))
.json_type_overrides
.get(path)
.unwrap_or(&JsonArray::Infer(JsonType::Infer))
{
JsonArray::Infer(v) => (false, v.clone()),
JsonArray::Always(v) => (true, v.clone()),
JsonArray::Infer(v) => (false, v),
JsonArray::Always(v) => (true, v),
}
}

/// Simply returns `get_json_type_with_absolute_path` if `regex_path` feature is disabled.
#[cfg(feature = "json_types")]
#[cfg(not(feature = "regex_path"))]
#[inline]
fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
get_json_type_with_absolute_path(config, path)
}

/// Returns a tuple for Array and Value enforcements for the current node. Searches both absolute paths
/// and regex paths, giving precedence to regex paths. Returns `(false, JsonArray::Infer(JsonType::Infer)`
/// if the current path is not found in the list of paths with custom config.
#[cfg(feature = "json_types")]
#[cfg(feature = "regex_path")]
#[inline]
fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
for (regex, json_array) in &config.json_regex_type_overrides {
if regex.is_match(path) {
return match json_array {
JsonArray::Infer(v) => (false, v),
JsonArray::Always(v) => (true, v),
};
}
}

get_json_type_with_absolute_path(config, path)
}

/// Always returns `(false, JsonArray::Infer(JsonType::Infer)` if `json_types` feature is not enabled.
#[cfg(not(feature = "json_types"))]
#[inline]
fn get_json_type(_config: &Config, _path: &String) -> (bool, JsonType) {
(false, JsonType::Infer)
fn get_json_type<'conf>(_config: &'conf Config, _path: &String) -> (bool, &'conf JsonType) {
(false, &JsonType::Infer)
}
71 changes: 71 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,74 @@ fn test_xml_str_to_json() {

assert_eq!(expected, result.unwrap());
}

#[cfg(feature = "regex_path")]
#[test]
fn test_regex_json_type_overrides() {
use regex::Regex;

// test a non-array with array enforcement (as object).
let xml = r#"<a attr1="att1"><b c="att">1</b></a>"#;
let expected = json!({
"a": {
"@attr1":"att1",
"b": [{ "@c":"att", "#text":1 }]
}
});

let config = Config::new_with_defaults()
.add_json_type_override(
Regex::new(r"\w/b").unwrap(),
JsonArray::Always(JsonType::Infer
)
);

let result = xml_string_to_json(String::from(xml), &config);
assert_eq!(expected, result.unwrap());

// test a multiple elements of the same tag nested in different elements
let xml = r#"
<a attr1="att1">
<element name="el1" />
<element name="el2" />
<b attr2="att2">
<element name="el3" />
<c attr3="att3">
<element name="el4" />
</c>
</b>
</a>
"#;

let expected = json!({
"a": {
"@attr1": "att1",
"element": [
{ "@name": "el1" },
{ "@name": "el2" }
],
"b": {
"@attr2": "att2",
"element": [
{ "@name": "el3" }
],
"c": {
"@attr3": "att3",
"element": [
{ "@name": "el4" }
]
}
},
}
});

let config = Config::new_with_defaults()
.add_json_type_override(
Regex::new(r"element").unwrap(),
JsonArray::Always(JsonType::Infer)
);

let result = xml_string_to_json(String::from(xml), &config);
assert_eq!(expected, result.unwrap());

}

0 comments on commit 77613f5

Please sign in to comment.