diff --git a/guide/src/migration.md b/guide/src/migration.md index e2f730e2b77..9bb01b1be44 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -3,6 +3,39 @@ This guide can help you upgrade code through breaking changes from one PyO3 version to the next. For a detailed list of all changes, see the [CHANGELOG](changelog.md). +## from 0.28.* to 0.29 + +### Removed implementations of `From`, `From`, and `From` for `PyErr` + +Previously the implementations of `From`, `From`, `From`, `From`, and `From` failed to construct the correct Python exception class, as reported in . +The implementations for `string::FromUtf8Error` and `ffi::IntoStringError` were fixed in this release. + +For `str::Utf8Error`, the Rust error does not contain the source bytes required to construct the Python exception. +Instead, `PyUnicodeDecodeError::new_err_from_utf8` can be used to convert the error to a `PyErr`. + +Before: + +```rust,ignore +fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> { + Ok(std::str::from_utf8(bytes)?) +} +``` + +After: + +```rust +# use pyo3::prelude::*; +use pyo3::exceptions::PyUnicodeDecodeError; + +# #[expect(dead_code)] +fn bytes_to_str<'a>(py: Python<'_>, bytes: &'a [u8]) -> PyResult<&'a str> { + std::str::from_utf8(bytes).map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(py, bytes, e)) +} +``` + +For `string::FromUtf16Error` and `char::DecodeUtf16Error` the Rust error types do not contain any of the information required to construct a `UnicodeDecodeError`. +To raise a Python `UnicodeDecodeError` a new error should be manually constructed by calling `PyUnicodeDecodeError::new_err(...)`. + ## from 0.27.* to 0.28 ### Default to supporting free-threaded Python diff --git a/newsfragments/5668.added.md b/newsfragments/5668.added.md new file mode 100644 index 00000000000..e25dcab2314 --- /dev/null +++ b/newsfragments/5668.added.md @@ -0,0 +1 @@ +Added new `PyUnicodeDecodeError::new_err_from_utf8` API to create a `PyErr` from a `str::Utf8Error` diff --git a/newsfragments/5668.fixed.md b/newsfragments/5668.fixed.md new file mode 100644 index 00000000000..1b89b4fb10b --- /dev/null +++ b/newsfragments/5668.fixed.md @@ -0,0 +1 @@ +Fixed the implementations of `From` and `From` for `PyErr`. diff --git a/newsfragments/5668.removed.md b/newsfragments/5668.removed.md new file mode 100644 index 00000000000..990aa7baf4c --- /dev/null +++ b/newsfragments/5668.removed.md @@ -0,0 +1 @@ +Removed the failing implementations of `From`, `From`, and `From` for `PyErr`. diff --git a/src/conversions/std/cstring.rs b/src/conversions/std/cstring.rs index 2dd05265639..43f5aaefcc1 100644 --- a/src/conversions/std/cstring.rs +++ b/src/conversions/std/cstring.rs @@ -1,3 +1,4 @@ +use crate::exceptions::PyUnicodeDecodeError; #[cfg(feature = "experimental-inspect")] use crate::inspect::PyStaticExpr; #[cfg(feature = "experimental-inspect")] @@ -6,7 +7,6 @@ use crate::types::PyString; use crate::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, Python}; use std::borrow::Cow; use std::ffi::{CStr, CString}; -use std::str::Utf8Error; #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] use { crate::{exceptions::PyValueError, ffi}, @@ -16,21 +16,24 @@ use { impl<'py> IntoPyObject<'py> for &CStr { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: PyStaticExpr = <&str>::OUTPUT_TYPE; #[inline] fn into_pyobject(self, py: Python<'py>) -> Result { - self.to_str()?.into_pyobject(py).map_err(|err| match err {}) + self.to_str() + .map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(py, self.to_bytes(), e))? + .into_pyobject(py) + .map_err(|err| match err {}) } } impl<'py> IntoPyObject<'py> for CString { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE; @@ -44,7 +47,7 @@ impl<'py> IntoPyObject<'py> for CString { impl<'py> IntoPyObject<'py> for &CString { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE; @@ -58,7 +61,7 @@ impl<'py> IntoPyObject<'py> for &CString { impl<'py> IntoPyObject<'py> for Cow<'_, CStr> { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE; @@ -71,7 +74,7 @@ impl<'py> IntoPyObject<'py> for Cow<'_, CStr> { impl<'py> IntoPyObject<'py> for &Cow<'_, CStr> { type Target = PyString; type Output = Bound<'py, Self::Target>; - type Error = Utf8Error; + type Error = PyErr; #[cfg(feature = "experimental-inspect")] const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE; diff --git a/src/err/impls.rs b/src/err/impls.rs index c49c7a56f82..86e36a4c136 100644 --- a/src/err/impls.rs +++ b/src/err/impls.rs @@ -1,4 +1,4 @@ -use crate::{err::PyErrArguments, exceptions, PyErr, Python}; +use crate::{err::PyErrArguments, exceptions, types, PyErr, Python}; use crate::{IntoPyObject, Py, PyAny}; use std::io; @@ -118,28 +118,78 @@ macro_rules! impl_to_pyerr { }; } +struct Utf8ErrorWithBytes { + err: std::str::Utf8Error, + bytes: Vec, +} + +impl PyErrArguments for Utf8ErrorWithBytes { + fn arguments(self, py: Python<'_>) -> Py { + let Self { err, bytes } = self; + let start = err.valid_up_to(); + let end = err.error_len().map_or(bytes.len(), |l| start + l); + + let encoding = types::PyString::new(py, "utf-8").into_any(); + let bytes = types::PyBytes::new(py, &bytes).into_any(); + let start = types::PyInt::new(py, start).into_any(); + let end = types::PyInt::new(py, end).into_any(); + let reason = types::PyString::new(py, "invalid utf-8").into_any(); + + // FIXME(icxolu) remove unwrap + types::PyTuple::new(py, &[encoding, bytes, start, end, reason]) + .unwrap() + .into_any() + .unbind() + } +} + +impl PyErrArguments for std::string::FromUtf8Error { + fn arguments(self, py: Python<'_>) -> Py { + Utf8ErrorWithBytes { + err: self.utf8_error(), + bytes: self.into_bytes(), + } + .arguments(py) + } +} + +impl std::convert::From for PyErr { + fn from(err: std::string::FromUtf8Error) -> PyErr { + exceptions::PyUnicodeDecodeError::new_err(err) + } +} + +impl PyErrArguments for std::ffi::IntoStringError { + fn arguments(self, py: Python<'_>) -> Py { + Utf8ErrorWithBytes { + err: self.utf8_error(), + bytes: self.into_cstring().into_bytes(), + } + .arguments(py) + } +} + +impl std::convert::From for PyErr { + fn from(err: std::ffi::IntoStringError) -> PyErr { + exceptions::PyUnicodeDecodeError::new_err(err) + } +} + impl_to_pyerr!(std::array::TryFromSliceError, exceptions::PyValueError); impl_to_pyerr!(std::num::ParseIntError, exceptions::PyValueError); impl_to_pyerr!(std::num::ParseFloatError, exceptions::PyValueError); impl_to_pyerr!(std::num::TryFromIntError, exceptions::PyValueError); impl_to_pyerr!(std::str::ParseBoolError, exceptions::PyValueError); -impl_to_pyerr!(std::ffi::IntoStringError, exceptions::PyUnicodeDecodeError); impl_to_pyerr!(std::ffi::NulError, exceptions::PyValueError); -impl_to_pyerr!(std::str::Utf8Error, exceptions::PyUnicodeDecodeError); -impl_to_pyerr!(std::string::FromUtf8Error, exceptions::PyUnicodeDecodeError); -impl_to_pyerr!( - std::string::FromUtf16Error, - exceptions::PyUnicodeDecodeError -); -impl_to_pyerr!( - std::char::DecodeUtf16Error, - exceptions::PyUnicodeDecodeError -); impl_to_pyerr!(std::net::AddrParseError, exceptions::PyValueError); #[cfg(test)] mod tests { - use crate::{PyErr, Python}; + use super::*; + + use crate::exceptions::PyUnicodeDecodeError; + use crate::types::PyAnyMethods; + use crate::{IntoPyObjectExt as _, PyErr, Python}; use std::io; #[test] @@ -179,4 +229,68 @@ mod tests { check_err(io::ErrorKind::IsADirectory, "IsADirectoryError"); check_err(io::ErrorKind::NotADirectory, "NotADirectoryError"); } + + #[test] + #[allow(invalid_from_utf8)] + fn utf8_errors() { + let bytes = b"abc\xffdef".to_vec(); + + let check_err = |py_err: PyErr| { + Python::attach(|py| { + let py_err = py_err.into_bound_py_any(py).unwrap(); + + assert!(py_err.is_instance_of::()); + assert_eq!( + py_err + .getattr("encoding") + .unwrap() + .extract::() + .unwrap(), + "utf-8" + ); + assert_eq!( + py_err + .getattr("object") + .unwrap() + .extract::>() + .unwrap(), + &*bytes + ); + assert_eq!( + py_err.getattr("start").unwrap().extract::().unwrap(), + 3 + ); + assert_eq!( + py_err.getattr("end").unwrap().extract::().unwrap(), + 4 + ); + assert_eq!( + py_err + .getattr("reason") + .unwrap() + .extract::() + .unwrap(), + "invalid utf-8" + ); + }); + }; + + let utf8_err_with_bytes = PyUnicodeDecodeError::new_err(Utf8ErrorWithBytes { + err: std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"), + bytes: bytes.clone(), + }); + check_err(utf8_err_with_bytes); + + let from_utf8_err = String::from_utf8(bytes.clone()) + .expect_err("\\xff is invalid utf-8") + .into(); + check_err(from_utf8_err); + + let from_utf8_err = std::ffi::CString::new(bytes.clone()) + .unwrap() + .into_string() + .expect_err("\\xff is invalid utf-8") + .into(); + check_err(from_utf8_err); + } } diff --git a/src/exceptions.rs b/src/exceptions.rs index 5ad34227a94..f47d6f1ab50 100644 --- a/src/exceptions.rs +++ b/src/exceptions.rs @@ -776,8 +776,38 @@ impl PyUnicodeDecodeError { input: &[u8], err: std::str::Utf8Error, ) -> PyResult> { - let pos = err.valid_up_to(); - PyUnicodeDecodeError::new(py, c"utf-8", input, pos..(pos + 1), c"invalid utf-8") + let start = err.valid_up_to(); + let end = err.error_len().map_or(input.len(), |l| start + l); + PyUnicodeDecodeError::new(py, c"utf-8", input, start..end, c"invalid utf-8") + } + + /// Create a new [`PyErr`](crate::PyErr) of this type from a Rust UTF-8 decoding error. + /// + /// This is equivalent to [`PyUnicodeDecodeError::new_utf8`], but returning a + /// [`PyErr`](crate::PyErr) instead of an exception object. + /// + /// # Example + /// + /// ``` + /// use pyo3::prelude::*; + /// use pyo3::exceptions::PyUnicodeDecodeError; + /// + /// Python::attach(|py| { + /// let invalid_utf8 = b"fo\xd8o"; + /// # #[expect(invalid_from_utf8)] + /// let err = std::str::from_utf8(invalid_utf8).expect_err("should be invalid utf8"); + /// let py_err = PyUnicodeDecodeError::new_err_from_utf8(py, invalid_utf8, err); + /// }) + /// ``` + pub fn new_err_from_utf8( + py: Python<'_>, + bytes: &[u8], + err: std::str::Utf8Error, + ) -> crate::PyErr { + match Self::new_utf8(py, bytes, err) { + Ok(e) => crate::PyErr::from_value(e.into_any()), + Err(e) => e, + } } } @@ -925,7 +955,7 @@ mod tests { use super::*; use crate::types::any::PyAnyMethods; use crate::types::{IntoPyDict, PyDict}; - use crate::PyErr; + use crate::{IntoPyObjectExt as _, PyErr}; import_exception!(socket, gaierror); import_exception!(email.errors, MessageError); @@ -1222,4 +1252,57 @@ mod tests { test_exception!(PyBytesWarning); #[cfg(Py_3_10)] test_exception!(PyEncodingWarning); + + #[test] + #[allow(invalid_from_utf8)] + fn unicode_decode_error_from_utf8() { + Python::attach(|py| { + let bytes = b"abc\xffdef".to_vec(); + + let check_err = |py_err: PyErr| { + let py_err = py_err.into_bound_py_any(py).unwrap(); + + assert!(py_err.is_instance_of::()); + assert_eq!( + py_err + .getattr("encoding") + .unwrap() + .extract::() + .unwrap(), + "utf-8" + ); + assert_eq!( + py_err + .getattr("object") + .unwrap() + .extract::>() + .unwrap(), + &*bytes + ); + assert_eq!( + py_err.getattr("start").unwrap().extract::().unwrap(), + 3 + ); + assert_eq!( + py_err.getattr("end").unwrap().extract::().unwrap(), + 4 + ); + assert_eq!( + py_err + .getattr("reason") + .unwrap() + .extract::() + .unwrap(), + "invalid utf-8" + ); + }; + + let utf8_err_with_bytes = PyUnicodeDecodeError::new_err_from_utf8( + py, + &bytes, + std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"), + ); + check_err(utf8_err_with_bytes); + }) + } } diff --git a/src/impl_/pymodule.rs b/src/impl_/pymodule.rs index 1b2201b852e..0bdf0e350e2 100644 --- a/src/impl_/pymodule.rs +++ b/src/impl_/pymodule.rs @@ -170,7 +170,17 @@ impl ModuleDef { { let ffi_def = self.ffi_def.get(); - let name = unsafe { CStr::from_ptr((*ffi_def).m_name).to_str()? }.to_string(); + let m_name = unsafe { CStr::from_ptr((*ffi_def).m_name) }; + let name = m_name + .to_str() + .map_err(|e| { + crate::exceptions::PyUnicodeDecodeError::new_err_from_utf8( + py, + m_name.to_bytes(), + e, + ) + })? + .to_string(); let kwargs = PyDict::new(py); kwargs.set_item("name", name)?; let spec = simple_ns.call((), Some(&kwargs))?; diff --git a/tests/ui/invalid_result_conversion.stderr b/tests/ui/invalid_result_conversion.stderr index ef31f073689..4adc47dee38 100644 --- a/tests/ui/invalid_result_conversion.stderr +++ b/tests/ui/invalid_result_conversion.stderr @@ -8,10 +8,10 @@ error[E0277]: the trait bound `PyErr: From` is not satisfied `PyErr` implements `From` `PyErr` implements `From>` `PyErr` implements `From>` - `PyErr` implements `From` `PyErr` implements `From>` `PyErr` implements `From>` - `PyErr` implements `From` `PyErr` implements `From` + `PyErr` implements `From` + `PyErr` implements `From>` and $N others = note: required for `MyError` to implement `Into`