Skip to content

Commit

Permalink
feat(python): Custom format checkers
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
  • Loading branch information
Stranger6667 committed May 1, 2024
1 parent 0a00839 commit e564888
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 52 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

### Added

- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/429)
- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/379)
- Expose `JsonPointerNode` that can be converted into `JSONPointer`.
This is needed for the upcoming custom validators support.

Expand All @@ -24,6 +24,8 @@
- Bump `regex` to `1.10`.
- Bump `url` to `2.5`.
- Build CLI only if the `cli` feature is enabled.
- **BREAKING**: Extend `CompilationOptions` to support more ways to define custom format checkers (for example in Python bindings).
In turn it changes `ValidationErrorKind::Format` to contain a `String` instead of a `&'static str`.

## [0.17.1] - 2023-07-05

Expand Down
4 changes: 4 additions & 0 deletions bindings/python/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- Defining custom format checkers. [#245](https://github.com/Stranger6667/jsonschema-rs/issues/245)

### Changed

- Update `pyo3` to `0.21`.
Expand Down
18 changes: 18 additions & 0 deletions bindings/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@ validator = jsonschema_rs.JSONSchema.from_str('{"minimum": 42}')
...
```

You can define custom format checkers:

```python
import jsonschema_rs

def is_currency(value):
# The input value is always a string
return len(value) == 3 and value.isascii()


validator = jsonschema_rs.JSONSchema(
{"type": "string", "format": "currency"},
formats={"currency": is_currency}
)
validator.is_valid("USD") # True
validator.is_valid("invalid") # False
```

## Performance

According to our benchmarks, `jsonschema-rs` is usually faster than
Expand Down
111 changes: 88 additions & 23 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@
)]
#![allow(clippy::upper_case_acronyms)]

use std::{
any::Any,
cell::RefCell,
panic::{self, AssertUnwindSafe},
};

use jsonschema::{paths::JSONPointer, Draft};
use pyo3::{
exceptions::{self, PyValueError},
ffi::PyUnicode_AsUTF8AndSize,
prelude::*,
types::{PyAny, PyList, PyType},
types::{PyAny, PyDict, PyList, PyString, PyType},
wrap_pyfunction,
};
#[macro_use]
Expand Down Expand Up @@ -128,9 +134,14 @@ fn get_draft(draft: u8) -> PyResult<Draft> {
}
}

thread_local! {
static LAST_FORMAT_ERROR: RefCell<Option<PyErr>> = const { RefCell::new(None) };
}

fn make_options(
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<jsonschema::CompilationOptions> {
let mut options = jsonschema::JSONSchema::options();
if let Some(raw_draft_version) = draft {
Expand All @@ -139,6 +150,37 @@ fn make_options(
if with_meta_schemas == Some(true) {
options.with_meta_schemas();
}
if let Some(formats) = formats {
for (name, callback) in formats.iter() {
if !callback.is_callable() {
return Err(exceptions::PyValueError::new_err(format!(
"Format checker for '{}' must be a callable",
name
)));
}
let callback: Py<PyAny> = callback.clone().unbind();
let call_py_callback = move |value: &str| {
Python::with_gil(|py| {
let value = PyString::new_bound(py, value);
callback.call_bound(py, (value,), None)?.is_truthy(py)
})
};
options.with_format(
name.to_string(),
move |value: &str| match call_py_callback(value) {
Ok(r) => r,
Err(e) => {
LAST_FORMAT_ERROR.with(|last| {
*last.borrow_mut() = Some(e);
});
std::panic::set_hook(Box::new(|_| {}));
// Should be caught
panic!("Format checker failed")
}
},
);
}
}
Ok(options)
}

Expand All @@ -150,11 +192,15 @@ fn iter_on_error(
let instance = ser::to_value(instance)?;
let mut pyerrors = vec![];

if let Err(errors) = compiled.validate(&instance) {
for error in errors {
pyerrors.push(into_py_err(py, error)?);
}
};
panic::catch_unwind(AssertUnwindSafe(|| {
if let Err(errors) = compiled.validate(&instance) {
for error in errors {
pyerrors.push(into_py_err(py, error)?);
}
};
PyResult::Ok(())
}))
.map_err(handle_format_checked_panic)??;
Ok(ValidationErrorIter {
iter: pyerrors.into_iter(),
})
Expand All @@ -166,7 +212,8 @@ fn raise_on_error(
instance: &Bound<'_, PyAny>,
) -> PyResult<()> {
let instance = ser::to_value(instance)?;
let result = compiled.validate(&instance);
let result = panic::catch_unwind(AssertUnwindSafe(|| compiled.validate(&instance)))
.map_err(handle_format_checked_panic)?;
let error = result
.err()
.map(|mut errors| errors.next().expect("Iterator should not be empty"));
Expand Down Expand Up @@ -227,7 +274,7 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String {
message
}

/// is_valid(schema, instance, draft=None, with_meta_schemas=False)
/// is_valid(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// A shortcut for validating the input instance against the schema.
///
Expand All @@ -237,26 +284,28 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String {
/// If your workflow implies validating against the same schema, consider using `JSONSchema.is_valid`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn is_valid(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<bool> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => {
let instance = ser::to_value(instance)?;
Ok(compiled.is_valid(&instance))
panic::catch_unwind(AssertUnwindSafe(|| Ok(compiled.is_valid(&instance))))
.map_err(handle_format_checked_panic)?
}
Err(error) => Err(into_py_err(py, error)?),
}
}

/// validate(schema, instance, draft=None, with_meta_schemas=False)
/// validate(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// Validate the input instance and raise `ValidationError` in the error case
///
Expand All @@ -268,23 +317,24 @@ fn is_valid(
/// If your workflow implies validating against the same schema, consider using `JSONSchema.validate`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn validate(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<()> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => raise_on_error(py, &compiled, instance),
Err(error) => Err(into_py_err(py, error)?),
}
}

/// iter_errors(schema, instance, draft=None, with_meta_schemas=False)
/// iter_errors(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// Iterate the validation errors of the input instance
///
Expand All @@ -295,15 +345,16 @@ fn validate(
/// If your workflow implies validating against the same schema, consider using `JSONSchema.iter_errors`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn iter_errors(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<ValidationErrorIter> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => iter_on_error(py, &compiled, instance),
Expand Down Expand Up @@ -338,17 +389,29 @@ fn get_schema_repr(schema: &serde_json::Value) -> String {
repr
}

fn handle_format_checked_panic(err: Box<dyn Any + Send>) -> PyErr {
LAST_FORMAT_ERROR.with(|last| {
if let Some(err) = last.borrow_mut().take() {
let _ = panic::take_hook();
err
} else {
exceptions::PyRuntimeError::new_err(format!("Validation panicked: {:?}", err))
}
})
}

#[pymethods]
impl JSONSchema {
#[new]
#[pyo3(text_signature = "(schema, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, draft=None, with_meta_schemas=False, formats=None)")]
fn new(
py: Python<'_>,
pyschema: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<Self> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let raw_schema = ser::to_value(pyschema)?;
match options.compile(&raw_schema) {
Ok(schema) => Ok(JSONSchema {
Expand All @@ -358,21 +421,22 @@ impl JSONSchema {
Err(error) => Err(into_py_err(py, error)?),
}
}
/// from_str(string, draft=None, with_meta_schemas=False)
/// from_str(string, draft=None, with_meta_schemas=False, formats=None)
///
/// Create `JSONSchema` from a serialized JSON string.
///
/// >>> compiled = JSONSchema.from_str('{"minimum": 5}')
///
/// Use it if you have your schema as a string and want to utilize Rust JSON parsing.
#[classmethod]
#[pyo3(text_signature = "(string, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(string, draft=None, with_meta_schemas=False, formats=None)")]
fn from_str(
_: &Bound<'_, PyType>,
py: Python<'_>,
pyschema: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<Self> {
let obj_ptr = pyschema.as_ptr();
let object_type = unsafe { pyo3::ffi::Py_TYPE(obj_ptr) };
Expand All @@ -389,7 +453,7 @@ impl JSONSchema {
let slice = unsafe { std::slice::from_raw_parts(ptr.cast::<u8>(), str_size as usize) };
let raw_schema = serde_json::from_slice(slice)
.map_err(|error| PyValueError::new_err(format!("Invalid string: {}", error)))?;
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
match options.compile(&raw_schema) {
Ok(schema) => Ok(JSONSchema {
schema,
Expand All @@ -412,7 +476,8 @@ impl JSONSchema {
#[pyo3(text_signature = "(instance)")]
fn is_valid(&self, instance: &Bound<'_, PyAny>) -> PyResult<bool> {
let instance = ser::to_value(instance)?;
Ok(self.schema.is_valid(&instance))
panic::catch_unwind(AssertUnwindSafe(|| Ok(self.schema.is_valid(&instance))))
.map_err(handle_format_checked_panic)?
}

/// validate(instance)
Expand Down
38 changes: 38 additions & 0 deletions bindings/python/tests-py/test_jsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,41 @@ def test_dict_subclasses(type_, value, expected):
schema = {"type": "object", "properties": {"foo": {"type": "integer"}}}
document = type_({"foo": value})
assert is_valid(schema, document) is expected


def test_custom_format():
def is_currency(value):
return len(value) == 3 and value.isascii()

validator = JSONSchema({"type": "string", "format": "currency"}, formats={"currency": is_currency})
assert validator.is_valid("USD")
assert not validator.is_valid(42)
assert not validator.is_valid("invalid")


def test_custom_format_invalid_callback():
with pytest.raises(ValueError, match="Format checker for 'currency' must be a callable"):
JSONSchema({"type": "string", "format": "currency"}, formats={"currency": 42})


def test_custom_format_with_exception():
def is_currency(_):
raise ValueError("Invalid currency")

schema = {"type": "string", "format": "currency"}
formats = {"currency": is_currency}
validator = JSONSchema(schema, formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
validator.is_valid("USD")
with pytest.raises(ValueError, match="Invalid currency"):
validator.validate("USD")
with pytest.raises(ValueError, match="Invalid currency"):
for _ in validator.iter_errors("USD"):
pass
with pytest.raises(ValueError, match="Invalid currency"):
is_valid(schema, "USD", formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
validate(schema, "USD", formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
for _ in iter_errors(schema, "USD", formats=formats):
pass

0 comments on commit e564888

Please sign in to comment.