Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions guide/src/migration.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,39 @@
This guide can help you upgrade code through breaking changes from one PyO3 version to the next.
For a detailed list of all changes, see the [CHANGELOG](changelog.md).

## from 0.28.* to 0.29

### Removed implementations of `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` for `PyErr`

Previously the implementations of `From<string::FromUtf8Error>`, `From<ffi::IntoStringError>`, `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` failed to construct the correct Python exception class, as reported in <https://github.com/PyO3/pyo3/issues/5651>.
The implementations for `string::FromUtf8Error` and `ffi::IntoStringError` were fixed in this release.

For `str::Utf8Error`, the Rust error does not contain the source bytes required to construct the Python exception.
Instead, `PyUnicodeDecodeError::new_err_from_utf8` can be used to convert the error to a `PyErr`.

Before:

```rust,ignore
fn bytes_to_str(bytes: &[u8]) -> PyResult<&str> {
Ok(std::str::from_utf8(bytes)?)
}
```

After:

```rust
# use pyo3::prelude::*;
use pyo3::exceptions::PyUnicodeDecodeError;

# #[expect(dead_code)]
fn bytes_to_str<'a>(py: Python<'_>, bytes: &'a [u8]) -> PyResult<&'a str> {
std::str::from_utf8(bytes).map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(py, bytes, e))
}
```

For `string::FromUtf16Error` and `char::DecodeUtf16Error` the Rust error types do not contain any of the information required to construct a `UnicodeDecodeError`.
To raise a Python `UnicodeDecodeError` a new error should be manually constructed by calling `PyUnicodeDecodeError::new_err(...)`.

## from 0.27.* to 0.28

### Default to supporting free-threaded Python
Expand Down
1 change: 1 addition & 0 deletions newsfragments/5668.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added new `PyUnicodeDecodeError::new_err_from_utf8` API to create a `PyErr` from a `str::Utf8Error`
1 change: 1 addition & 0 deletions newsfragments/5668.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed the implementations of `From<string::FromUtf8Error>` and `From<ffi::IntoStringError>` for `PyErr`.
1 change: 1 addition & 0 deletions newsfragments/5668.removed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Removed the failing implementations of `From<str::Utf8Error>`, `From<string::FromUtf16Error>`, and `From<char::DecodeUtf16Error>` for `PyErr`.
17 changes: 10 additions & 7 deletions src/conversions/std/cstring.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::exceptions::PyUnicodeDecodeError;
#[cfg(feature = "experimental-inspect")]
use crate::inspect::PyStaticExpr;
#[cfg(feature = "experimental-inspect")]
Expand All @@ -6,7 +7,6 @@ use crate::types::PyString;
use crate::{Borrowed, Bound, FromPyObject, IntoPyObject, PyAny, PyErr, Python};
use std::borrow::Cow;
use std::ffi::{CStr, CString};
use std::str::Utf8Error;
#[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
use {
crate::{exceptions::PyValueError, ffi},
Expand All @@ -16,21 +16,24 @@ use {
impl<'py> IntoPyObject<'py> for &CStr {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Utf8Error;
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: PyStaticExpr = <&str>::OUTPUT_TYPE;

#[inline]
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
self.to_str()?.into_pyobject(py).map_err(|err| match err {})
self.to_str()
.map_err(|e| PyUnicodeDecodeError::new_err_from_utf8(py, self.to_bytes(), e))?
.into_pyobject(py)
.map_err(|err| match err {})
}
}

impl<'py> IntoPyObject<'py> for CString {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Utf8Error;
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE;
Expand All @@ -44,7 +47,7 @@ impl<'py> IntoPyObject<'py> for CString {
impl<'py> IntoPyObject<'py> for &CString {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Utf8Error;
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE;
Expand All @@ -58,7 +61,7 @@ impl<'py> IntoPyObject<'py> for &CString {
impl<'py> IntoPyObject<'py> for Cow<'_, CStr> {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Utf8Error;
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE;
Expand All @@ -71,7 +74,7 @@ impl<'py> IntoPyObject<'py> for Cow<'_, CStr> {
impl<'py> IntoPyObject<'py> for &Cow<'_, CStr> {
type Target = PyString;
type Output = Bound<'py, Self::Target>;
type Error = Utf8Error;
type Error = PyErr;

#[cfg(feature = "experimental-inspect")]
const OUTPUT_TYPE: PyStaticExpr = <&CStr>::OUTPUT_TYPE;
Expand Down
140 changes: 127 additions & 13 deletions src/err/impls.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{err::PyErrArguments, exceptions, PyErr, Python};
use crate::{err::PyErrArguments, exceptions, types, PyErr, Python};
use crate::{IntoPyObject, Py, PyAny};
use std::io;

Expand Down Expand Up @@ -118,28 +118,78 @@ macro_rules! impl_to_pyerr {
};
}

struct Utf8ErrorWithBytes {
err: std::str::Utf8Error,
bytes: Vec<u8>,
}

impl PyErrArguments for Utf8ErrorWithBytes {
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
let Self { err, bytes } = self;
let start = err.valid_up_to();
let end = err.error_len().map_or(bytes.len(), |l| start + l);

let encoding = types::PyString::new(py, "utf-8").into_any();
let bytes = types::PyBytes::new(py, &bytes).into_any();
let start = types::PyInt::new(py, start).into_any();
let end = types::PyInt::new(py, end).into_any();
let reason = types::PyString::new(py, "invalid utf-8").into_any();
Comment thread
davidhewitt marked this conversation as resolved.

// FIXME(icxolu) remove unwrap
types::PyTuple::new(py, &[encoding, bytes, start, end, reason])
.unwrap()
.into_any()
.unbind()
}
}

impl PyErrArguments for std::string::FromUtf8Error {
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
Utf8ErrorWithBytes {
err: self.utf8_error(),
bytes: self.into_bytes(),
}
.arguments(py)
}
}

impl std::convert::From<std::string::FromUtf8Error> for PyErr {
fn from(err: std::string::FromUtf8Error) -> PyErr {
exceptions::PyUnicodeDecodeError::new_err(err)
}
}

impl PyErrArguments for std::ffi::IntoStringError {
fn arguments(self, py: Python<'_>) -> Py<PyAny> {
Utf8ErrorWithBytes {
err: self.utf8_error(),
bytes: self.into_cstring().into_bytes(),
}
.arguments(py)
}
}

impl std::convert::From<std::ffi::IntoStringError> for PyErr {
fn from(err: std::ffi::IntoStringError) -> PyErr {
exceptions::PyUnicodeDecodeError::new_err(err)
}
}

impl_to_pyerr!(std::array::TryFromSliceError, exceptions::PyValueError);
impl_to_pyerr!(std::num::ParseIntError, exceptions::PyValueError);
impl_to_pyerr!(std::num::ParseFloatError, exceptions::PyValueError);
impl_to_pyerr!(std::num::TryFromIntError, exceptions::PyValueError);
impl_to_pyerr!(std::str::ParseBoolError, exceptions::PyValueError);
impl_to_pyerr!(std::ffi::IntoStringError, exceptions::PyUnicodeDecodeError);
impl_to_pyerr!(std::ffi::NulError, exceptions::PyValueError);
impl_to_pyerr!(std::str::Utf8Error, exceptions::PyUnicodeDecodeError);
impl_to_pyerr!(std::string::FromUtf8Error, exceptions::PyUnicodeDecodeError);
impl_to_pyerr!(
std::string::FromUtf16Error,
exceptions::PyUnicodeDecodeError
);
impl_to_pyerr!(
std::char::DecodeUtf16Error,
exceptions::PyUnicodeDecodeError
);
impl_to_pyerr!(std::net::AddrParseError, exceptions::PyValueError);

#[cfg(test)]
mod tests {
use crate::{PyErr, Python};
use super::*;

use crate::exceptions::PyUnicodeDecodeError;
use crate::types::PyAnyMethods;
use crate::{IntoPyObjectExt as _, PyErr, Python};
use std::io;

#[test]
Expand Down Expand Up @@ -179,4 +229,68 @@ mod tests {
check_err(io::ErrorKind::IsADirectory, "IsADirectoryError");
check_err(io::ErrorKind::NotADirectory, "NotADirectoryError");
}

#[test]
#[allow(invalid_from_utf8)]
fn utf8_errors() {
let bytes = b"abc\xffdef".to_vec();

let check_err = |py_err: PyErr| {
Python::attach(|py| {
let py_err = py_err.into_bound_py_any(py).unwrap();

assert!(py_err.is_instance_of::<exceptions::PyUnicodeDecodeError>());
assert_eq!(
py_err
.getattr("encoding")
.unwrap()
.extract::<String>()
.unwrap(),
"utf-8"
);
assert_eq!(
py_err
.getattr("object")
.unwrap()
.extract::<Vec<u8>>()
.unwrap(),
&*bytes
);
assert_eq!(
py_err.getattr("start").unwrap().extract::<usize>().unwrap(),
3
);
assert_eq!(
py_err.getattr("end").unwrap().extract::<usize>().unwrap(),
4
);
assert_eq!(
py_err
.getattr("reason")
.unwrap()
.extract::<String>()
.unwrap(),
"invalid utf-8"
);
});
};

let utf8_err_with_bytes = PyUnicodeDecodeError::new_err(Utf8ErrorWithBytes {
err: std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"),
bytes: bytes.clone(),
});
check_err(utf8_err_with_bytes);

let from_utf8_err = String::from_utf8(bytes.clone())
.expect_err("\\xff is invalid utf-8")
.into();
check_err(from_utf8_err);

let from_utf8_err = std::ffi::CString::new(bytes.clone())
.unwrap()
.into_string()
.expect_err("\\xff is invalid utf-8")
.into();
check_err(from_utf8_err);
}
}
89 changes: 86 additions & 3 deletions src/exceptions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,38 @@ impl PyUnicodeDecodeError {
input: &[u8],
err: std::str::Utf8Error,
) -> PyResult<Bound<'py, PyUnicodeDecodeError>> {
let pos = err.valid_up_to();
PyUnicodeDecodeError::new(py, c"utf-8", input, pos..(pos + 1), c"invalid utf-8")
let start = err.valid_up_to();
let end = err.error_len().map_or(input.len(), |l| start + l);
PyUnicodeDecodeError::new(py, c"utf-8", input, start..end, c"invalid utf-8")
}

/// Create a new [`PyErr`](crate::PyErr) of this type from a Rust UTF-8 decoding error.
///
/// This is equivalent to [`PyUnicodeDecodeError::new_utf8`], but returning a
/// [`PyErr`](crate::PyErr) instead of an exception object.
///
/// # Example
///
/// ```
/// use pyo3::prelude::*;
/// use pyo3::exceptions::PyUnicodeDecodeError;
///
/// Python::attach(|py| {
/// let invalid_utf8 = b"fo\xd8o";
/// # #[expect(invalid_from_utf8)]
/// let err = std::str::from_utf8(invalid_utf8).expect_err("should be invalid utf8");
/// let py_err = PyUnicodeDecodeError::new_err_from_utf8(py, invalid_utf8, err);
/// })
/// ```
pub fn new_err_from_utf8(
py: Python<'_>,
bytes: &[u8],
err: std::str::Utf8Error,
) -> crate::PyErr {
match Self::new_utf8(py, bytes, err) {
Ok(e) => crate::PyErr::from_value(e.into_any()),
Err(e) => e,
}
}
}

Expand Down Expand Up @@ -925,7 +955,7 @@ mod tests {
use super::*;
use crate::types::any::PyAnyMethods;
use crate::types::{IntoPyDict, PyDict};
use crate::PyErr;
use crate::{IntoPyObjectExt as _, PyErr};

import_exception!(socket, gaierror);
import_exception!(email.errors, MessageError);
Expand Down Expand Up @@ -1222,4 +1252,57 @@ mod tests {
test_exception!(PyBytesWarning);
#[cfg(Py_3_10)]
test_exception!(PyEncodingWarning);

#[test]
#[allow(invalid_from_utf8)]
fn unicode_decode_error_from_utf8() {
Python::attach(|py| {
let bytes = b"abc\xffdef".to_vec();

let check_err = |py_err: PyErr| {
let py_err = py_err.into_bound_py_any(py).unwrap();

assert!(py_err.is_instance_of::<PyUnicodeDecodeError>());
assert_eq!(
py_err
.getattr("encoding")
.unwrap()
.extract::<String>()
.unwrap(),
"utf-8"
);
assert_eq!(
py_err
.getattr("object")
.unwrap()
.extract::<Vec<u8>>()
.unwrap(),
&*bytes
);
assert_eq!(
py_err.getattr("start").unwrap().extract::<usize>().unwrap(),
3
);
assert_eq!(
py_err.getattr("end").unwrap().extract::<usize>().unwrap(),
4
);
assert_eq!(
py_err
.getattr("reason")
.unwrap()
.extract::<String>()
.unwrap(),
"invalid utf-8"
);
};

let utf8_err_with_bytes = PyUnicodeDecodeError::new_err_from_utf8(
py,
&bytes,
std::str::from_utf8(&bytes).expect_err("\\xff is invalid utf-8"),
);
check_err(utf8_err_with_bytes);
})
}
}
Loading
Loading