From 59a8a569dd52f93907048df26fa7227d117605f2 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Wed, 30 Jul 2025 01:03:12 +0900 Subject: [PATCH 1/2] Wtf8-compatible fixes (#5985) * deprecate more payload_* functions * loose trait bount for PyInterned * Fix levenstein * Fix genericalias * Fix PyBoundMethod::repr * fix repr * Fix fromhex --- common/src/str.rs | 4 ++-- vm/src/builtins/bytearray.rs | 2 +- vm/src/builtins/bytes.rs | 2 +- vm/src/builtins/function.rs | 9 ++++++--- vm/src/builtins/genericalias.rs | 8 ++++---- vm/src/builtins/union.rs | 8 ++++---- vm/src/bytes_inner.rs | 8 ++++---- vm/src/intern.rs | 11 ++++------- vm/src/object/core.rs | 10 +++++++--- vm/src/suggestion.rs | 4 ++-- 10 files changed, 35 insertions(+), 31 deletions(-) diff --git a/common/src/str.rs b/common/src/str.rs index af30ed6dec..10d0296619 100644 --- a/common/src/str.rs +++ b/common/src/str.rs @@ -487,7 +487,7 @@ pub mod levenshtein { if a == b { CASE_COST } else { MOVE_COST } } - pub fn levenshtein_distance(a: &str, b: &str, max_cost: usize) -> usize { + pub fn levenshtein_distance(a: &[u8], b: &[u8], max_cost: usize) -> usize { thread_local! { #[allow(clippy::declare_interior_mutable_const)] static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = const { @@ -499,7 +499,7 @@ pub mod levenshtein { return 0; } - let (mut a_bytes, mut b_bytes) = (a.as_bytes(), b.as_bytes()); + let (mut a_bytes, mut b_bytes) = (a, b); let (mut a_begin, mut a_end) = (0usize, a.len()); let (mut b_begin, mut b_end) = (0usize, b.len()); diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index ce48b2bd7c..58406e3a20 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -328,7 +328,7 @@ impl PyByteArray { #[pyclassmethod] fn fromhex(cls: PyTypeRef, string: PyStrRef, vm: &VirtualMachine) -> PyResult { - let bytes = PyBytesInner::fromhex(string.as_str(), vm)?; + let bytes = PyBytesInner::fromhex(string.as_bytes(), vm)?; let bytes = vm.ctx.new_bytes(bytes); let args = vec![bytes.into()].into(); PyType::call(&cls, args, vm) diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 22c93ee929..038592e543 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -264,7 +264,7 @@ impl PyBytes { #[pyclassmethod] fn fromhex(cls: PyTypeRef, string: PyStrRef, vm: &VirtualMachine) -> PyResult { - let bytes = PyBytesInner::fromhex(string.as_str(), vm)?; + let bytes = PyBytesInner::fromhex(string.as_bytes(), vm)?; let bytes = vm.ctx.new_bytes(bytes).into(); PyType::call(&cls, vec![bytes].into(), vm) } diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index 06a91ff36c..c93b50010e 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -859,10 +859,13 @@ impl Representable for PyBoundMethod { vm.get_attribute_opt(zelf.function.clone(), "__name__")? }; let func_name: Option = func_name.and_then(|o| o.downcast().ok()); + let formatted_func_name = match func_name { + Some(name) => name.to_string(), + None => "?".to_string(), + }; + let object_repr = zelf.object.repr(vm)?; Ok(format!( - "", - func_name.as_ref().map_or("?", |s| s.as_str()), - &zelf.object.repr(vm)?.as_str(), + "", )) } } diff --git a/vm/src/builtins/genericalias.rs b/vm/src/builtins/genericalias.rs index 00bd65583d..667a3c1d39 100644 --- a/vm/src/builtins/genericalias.rs +++ b/vm/src/builtins/genericalias.rs @@ -122,16 +122,16 @@ impl PyGenericAlias { .get_attribute_opt(obj.clone(), identifier!(vm, __args__))? .is_some() { - return Ok(obj.repr(vm)?.as_str().to_string()); + return Ok(obj.repr(vm)?.to_string()); } match ( vm.get_attribute_opt(obj.clone(), identifier!(vm, __qualname__))? - .and_then(|o| o.downcast_ref::().map(|n| n.as_str().to_string())), + .and_then(|o| o.downcast_ref::().map(|n| n.to_string())), vm.get_attribute_opt(obj.clone(), identifier!(vm, __module__))? - .and_then(|o| o.downcast_ref::().map(|m| m.as_str().to_string())), + .and_then(|o| o.downcast_ref::().map(|m| m.to_string())), ) { - (None, _) | (_, None) => Ok(obj.repr(vm)?.as_str().to_string()), + (None, _) | (_, None) => Ok(obj.repr(vm)?.to_string()), (Some(qualname), Some(module)) => Ok(if module == "builtins" { qualname } else { diff --git a/vm/src/builtins/union.rs b/vm/src/builtins/union.rs index 962f3b5eb2..16d2b7831c 100644 --- a/vm/src/builtins/union.rs +++ b/vm/src/builtins/union.rs @@ -59,16 +59,16 @@ impl PyUnion { .get_attribute_opt(obj.clone(), identifier!(vm, __args__))? .is_some() { - return Ok(obj.repr(vm)?.as_str().to_string()); + return Ok(obj.repr(vm)?.to_string()); } match ( vm.get_attribute_opt(obj.clone(), identifier!(vm, __qualname__))? - .and_then(|o| o.downcast_ref::().map(|n| n.as_str().to_string())), + .and_then(|o| o.downcast_ref::().map(|n| n.to_string())), vm.get_attribute_opt(obj.clone(), identifier!(vm, __module__))? - .and_then(|o| o.downcast_ref::().map(|m| m.as_str().to_string())), + .and_then(|o| o.downcast_ref::().map(|m| m.to_string())), ) { - (None, _) | (_, None) => Ok(obj.repr(vm)?.as_str().to_string()), + (None, _) | (_, None) => Ok(obj.repr(vm)?.to_string()), (Some(qualname), Some(module)) => Ok(if module == "builtins" { qualname } else { diff --git a/vm/src/bytes_inner.rs b/vm/src/bytes_inner.rs index db1e843091..58d272e1cf 100644 --- a/vm/src/bytes_inner.rs +++ b/vm/src/bytes_inner.rs @@ -459,11 +459,11 @@ impl PyBytesInner { bytes_to_hex(self.elements.as_slice(), sep, bytes_per_sep, vm) } - pub fn fromhex(string: &str, vm: &VirtualMachine) -> PyResult> { - let mut iter = string.bytes().enumerate(); - let mut bytes: Vec = Vec::with_capacity(string.len() / 2); + pub fn fromhex(bytes: &[u8], vm: &VirtualMachine) -> PyResult> { + let mut iter = bytes.iter().enumerate(); + let mut bytes: Vec = Vec::with_capacity(bytes.len() / 2); let i = loop { - let (i, b) = match iter.next() { + let (i, &b) = match iter.next() { Some(val) => val, None => { return Ok(bytes); diff --git a/vm/src/intern.rs b/vm/src/intern.rs index 8463e3a1c1..a5b2a798d5 100644 --- a/vm/src/intern.rs +++ b/vm/src/intern.rs @@ -128,10 +128,7 @@ impl CachedPyStrRef { } } -pub struct PyInterned -where - T: PyPayload, -{ +pub struct PyInterned { inner: Py, } @@ -173,14 +170,14 @@ impl std::hash::Hash for PyInterned { } } -impl AsRef> for PyInterned { +impl AsRef> for PyInterned { #[inline(always)] fn as_ref(&self) -> &Py { &self.inner } } -impl Deref for PyInterned { +impl Deref for PyInterned { type Target = Py; #[inline(always)] fn deref(&self) -> &Self::Target { @@ -197,7 +194,7 @@ impl PartialEq for PyInterned { impl Eq for PyInterned {} -impl std::fmt::Debug for PyInterned { +impl std::fmt::Debug for PyInterned { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Debug::fmt(&**self, f)?; write!(f, "@{:p}", self.as_ptr()) diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index 1e95c69ea2..54a6657a9f 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -633,6 +633,7 @@ impl PyObject { self.weak_ref_list().map(|wrl| wrl.get_weak_references()) } + #[deprecated(note = "use downcastable instead")] #[inline(always)] pub fn payload_is(&self) -> bool { self.0.typeid == T::payload_type_id() @@ -642,6 +643,7 @@ impl PyObject { /// /// # Safety /// The actual payload type must be T. + #[deprecated(note = "use downcast_unchecked_ref instead")] #[inline(always)] pub const unsafe fn payload_unchecked(&self) -> &T { // we cast to a PyInner first because we don't know T's exact offset because of @@ -653,7 +655,9 @@ impl PyObject { #[deprecated(note = "use downcast_ref instead")] #[inline(always)] pub fn payload(&self) -> Option<&T> { + #[allow(deprecated)] if self.payload_is::() { + #[allow(deprecated)] Some(unsafe { self.payload_unchecked() }) } else { None @@ -719,7 +723,7 @@ impl PyObject { /// Check if this object can be downcast to T. #[inline(always)] pub fn downcastable(&self) -> bool { - self.payload_is::() + self.0.typeid == T::payload_type_id() } /// Attempt to downcast this reference to a subclass. @@ -899,9 +903,9 @@ impl Py { }) } + #[inline] pub fn payload(&self) -> &T { - // SAFETY: we know the payload is T because of the type parameter - unsafe { self.as_object().payload_unchecked() } + &self.0.payload } } diff --git a/vm/src/suggestion.rs b/vm/src/suggestion.rs index 32d4b623b4..2cc935873c 100644 --- a/vm/src/suggestion.rs +++ b/vm/src/suggestion.rs @@ -26,7 +26,7 @@ pub fn calculate_suggestions<'a>( for item in dir_iter { let item_name = item.downcast_ref::()?; - if name.as_str() == item_name.as_str() { + if name.as_bytes() == item_name.as_bytes() { continue; } // No more than 1/3 of the characters should need changed @@ -35,7 +35,7 @@ pub fn calculate_suggestions<'a>( suggestion_distance - 1, ); let current_distance = - levenshtein_distance(name.as_str(), item_name.as_str(), max_distance); + levenshtein_distance(name.as_bytes(), item_name.as_bytes(), max_distance); if current_distance > max_distance { continue; } From f402deef6d447d39f442443fa8a1968f972288d1 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Wed, 30 Jul 2025 01:09:21 +0900 Subject: [PATCH 2/2] Deprecate ::new_ref (#6046) --- vm/src/builtins/bytearray.rs | 3 ++- vm/src/builtins/bytes.rs | 3 ++- vm/src/builtins/classmethod.rs | 9 ++------ vm/src/builtins/complex.rs | 39 +++++++++++++++------------------ vm/src/builtins/dict.rs | 3 ++- vm/src/builtins/function.rs | 7 ++---- vm/src/builtins/list.rs | 3 ++- vm/src/builtins/namespace.rs | 10 --------- vm/src/builtins/set.rs | 5 ++--- vm/src/builtins/staticmethod.rs | 14 ++++++------ vm/src/builtins/str.rs | 3 ++- vm/src/builtins/tuple.rs | 1 + vm/src/frame.rs | 4 ++-- vm/src/macros.rs | 2 +- vm/src/stdlib/marshal.rs | 4 ++-- 15 files changed, 47 insertions(+), 63 deletions(-) diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 58406e3a20..2d3bd1ebfc 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -73,8 +73,9 @@ pub(crate) fn init(context: &Context) { } impl PyByteArray { + #[deprecated(note = "use PyByteArray::from(...).into_ref() instead")] pub fn new_ref(data: Vec, ctx: &Context) -> PyRef { - PyRef::new_ref(Self::from(data), ctx.types.bytearray_type.to_owned(), None) + Self::from(data).into_ref(ctx) } const fn from_inner(inner: PyBytesInner) -> Self { diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 038592e543..6d77a3734a 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -99,8 +99,9 @@ impl Constructor for PyBytes { } impl PyBytes { + #[deprecated(note = "use PyBytes::from(...).into_ref() instead")] pub fn new_ref(data: Vec, ctx: &Context) -> PyRef { - PyRef::new_ref(Self::from(data), ctx.types.bytes_type.to_owned(), None) + Self::from(data).into_ref(ctx) } fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult { diff --git a/vm/src/builtins/classmethod.rs b/vm/src/builtins/classmethod.rs index 03bdeb171d..f88f14819d 100644 --- a/vm/src/builtins/classmethod.rs +++ b/vm/src/builtins/classmethod.rs @@ -111,14 +111,9 @@ impl Initializer for PyClassMethod { } impl PyClassMethod { + #[deprecated(note = "use PyClassMethod::from(...).into_ref() instead")] pub fn new_ref(callable: PyObjectRef, ctx: &Context) -> PyRef { - PyRef::new_ref( - Self { - callable: PyMutex::new(callable), - }, - ctx.types.classmethod_type.to_owned(), - None, - ) + Self::from(callable).into_ref(ctx) } } diff --git a/vm/src/builtins/complex.rs b/vm/src/builtins/complex.rs index a7a4049de8..7cf0802a68 100644 --- a/vm/src/builtins/complex.rs +++ b/vm/src/builtins/complex.rs @@ -29,12 +29,6 @@ pub struct PyComplex { value: Complex64, } -impl PyComplex { - pub const fn to_complex64(self) -> Complex64 { - self.value - } -} - impl PyPayload for PyComplex { #[inline] fn class(ctx: &Context) -> &'static Py { @@ -234,13 +228,30 @@ impl Constructor for PyComplex { } impl PyComplex { + #[deprecated(note = "use PyComplex::from(...).into_ref() instead")] pub fn new_ref(value: Complex64, ctx: &Context) -> PyRef { - PyRef::new_ref(Self::from(value), ctx.types.complex_type.to_owned(), None) + Self::from(value).into_ref(ctx) + } + + pub const fn to_complex64(self) -> Complex64 { + self.value } pub const fn to_complex(&self) -> Complex64 { self.value } + + fn number_op(a: &PyObject, b: &PyObject, op: F, vm: &VirtualMachine) -> PyResult + where + F: FnOnce(Complex64, Complex64, &VirtualMachine) -> R, + R: ToPyResult, + { + if let (Some(a), Some(b)) = (to_op_complex(a, vm)?, to_op_complex(b, vm)?) { + op(a, b, vm).to_pyresult(vm) + } else { + Ok(vm.ctx.not_implemented()) + } + } } #[pyclass( @@ -503,20 +514,6 @@ impl Representable for PyComplex { } } -impl PyComplex { - fn number_op(a: &PyObject, b: &PyObject, op: F, vm: &VirtualMachine) -> PyResult - where - F: FnOnce(Complex64, Complex64, &VirtualMachine) -> R, - R: ToPyResult, - { - if let (Some(a), Some(b)) = (to_op_complex(a, vm)?, to_op_complex(b, vm)?) { - op(a, b, vm).to_pyresult(vm) - } else { - Ok(vm.ctx.not_implemented()) - } - } -} - #[derive(FromArgs)] pub struct ComplexArgs { #[pyarg(any, optional)] diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index e59aa5bcf7..ce3d37322e 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -51,8 +51,9 @@ impl PyPayload for PyDict { } impl PyDict { + #[deprecated(note = "use PyDict::default().into_ref() instead")] pub fn new_ref(ctx: &Context) -> PyRef { - PyRef::new_ref(Self::default(), ctx.types.dict_type.to_owned(), None) + Self::default().into_ref(ctx) } /// escape hatch to access the underlying data structure directly. prefer adding a method on diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index c93b50010e..c73db94edf 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -773,12 +773,9 @@ impl PyBoundMethod { Self { object, function } } + #[deprecated(note = "Use `Self::new(object, function).into_ref(ctx)` instead")] pub fn new_ref(object: PyObjectRef, function: PyObjectRef, ctx: &Context) -> PyRef { - PyRef::new_ref( - Self::new(object, function), - ctx.types.bound_method_type.to_owned(), - None, - ) + Self::new(object, function).into_ref(ctx) } } diff --git a/vm/src/builtins/list.rs b/vm/src/builtins/list.rs index e1faff465c..9a7b589418 100644 --- a/vm/src/builtins/list.rs +++ b/vm/src/builtins/list.rs @@ -63,8 +63,9 @@ impl ToPyObject for Vec { } impl PyList { + #[deprecated(note = "use PyList::from(...).into_ref() instead")] pub fn new_ref(elements: Vec, ctx: &Context) -> PyRef { - PyRef::new_ref(Self::from(elements), ctx.types.list_type.to_owned(), None) + Self::from(elements).into_ref(ctx) } pub fn borrow_vec(&self) -> PyMappedRwLockReadGuard<'_, [PyObjectRef]> { diff --git a/vm/src/builtins/namespace.rs b/vm/src/builtins/namespace.rs index 2c6b8e79d8..ea430225a8 100644 --- a/vm/src/builtins/namespace.rs +++ b/vm/src/builtins/namespace.rs @@ -26,16 +26,6 @@ impl PyPayload for PyNamespace { impl DefaultConstructor for PyNamespace {} -impl PyNamespace { - pub fn new_ref(ctx: &Context) -> PyRef { - PyRef::new_ref( - Self {}, - ctx.types.namespace_type.to_owned(), - Some(ctx.new_dict()), - ) - } -} - #[pyclass( flags(BASETYPE, HAS_DICT), with(Constructor, Initializer, Comparable, Representable) diff --git a/vm/src/builtins/set.rs b/vm/src/builtins/set.rs index 7cf20a17f7..b68a51cb33 100644 --- a/vm/src/builtins/set.rs +++ b/vm/src/builtins/set.rs @@ -39,10 +39,9 @@ pub struct PySet { } impl PySet { + #[deprecated(note = "Use `PySet::default().into_ref(ctx)` instead")] pub fn new_ref(ctx: &Context) -> PyRef { - // Initialized empty, as calling __hash__ is required for adding each object to the set - // which requires a VM context - this is done in the set code itself. - PyRef::new_ref(Self::default(), ctx.types.set_type.to_owned(), None) + Self::default().into_ref(ctx) } pub fn elements(&self) -> Vec { diff --git a/vm/src/builtins/staticmethod.rs b/vm/src/builtins/staticmethod.rs index c357516abb..36aef728a3 100644 --- a/vm/src/builtins/staticmethod.rs +++ b/vm/src/builtins/staticmethod.rs @@ -61,14 +61,14 @@ impl Constructor for PyStaticMethod { } impl PyStaticMethod { + pub fn new(callable: PyObjectRef) -> Self { + Self { + callable: PyMutex::new(callable), + } + } + #[deprecated(note = "use PyStaticMethod::new(...).into_ref() instead")] pub fn new_ref(callable: PyObjectRef, ctx: &Context) -> PyRef { - PyRef::new_ref( - Self { - callable: PyMutex::new(callable), - }, - ctx.types.staticmethod_type.to_owned(), - None, - ) + Self::new(callable).into_ref(ctx) } } diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 1c38314afe..2e4678d557 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -418,9 +418,10 @@ impl PyStr { unsafe { AsciiString::from_ascii_unchecked(bytes) }.into() } + #[deprecated(note = "use PyStr::from(...).into_ref() instead")] pub fn new_ref(zelf: impl Into, ctx: &Context) -> PyRef { let zelf = zelf.into(); - PyRef::new_ref(zelf, ctx.types.str_type.to_owned(), None) + zelf.into_ref(ctx) } fn new_substr(&self, s: Wtf8Buf) -> Self { diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 2c3255b249..a97e63c304 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -193,6 +193,7 @@ impl PyTuple { } impl PyTuple { + // Do not deprecate this. empty_tuple must be checked. pub fn new_ref(elements: Vec, ctx: &Context) -> PyRef { if elements.is_empty() { ctx.empty_tuple.clone() diff --git a/vm/src/frame.rs b/vm/src/frame.rs index b3954b6f44..3f70b695c4 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -756,7 +756,7 @@ impl ExecutingFrame<'_> { Ok(None) } bytecode::Instruction::BuildSet { size } => { - let set = PySet::new_ref(&vm.ctx); + let set = PySet::default().into_ref(&vm.ctx); for element in self.pop_multiple(size.get(arg) as usize) { set.add(element, vm)?; } @@ -764,7 +764,7 @@ impl ExecutingFrame<'_> { Ok(None) } bytecode::Instruction::BuildSetFromTuples { size } => { - let set = PySet::new_ref(&vm.ctx); + let set = PySet::default().into_ref(&vm.ctx); for element in self.pop_multiple(size.get(arg) as usize) { // SAFETY: trust compiler let tup = unsafe { element.downcast_unchecked::() }; diff --git a/vm/src/macros.rs b/vm/src/macros.rs index 171558b9a9..ff9b28cf88 100644 --- a/vm/src/macros.rs +++ b/vm/src/macros.rs @@ -46,7 +46,7 @@ macro_rules! extend_class { macro_rules! py_namespace { ( $vm:expr, { $($name:expr => $value:expr),* $(,)* }) => { { - let namespace = $crate::builtins::PyNamespace::new_ref(&$vm.ctx); + let namespace = $crate::object::PyPayload::into_ref($crate::builtins::PyNamespace {}, &$vm.ctx); let obj = $crate::object::AsObject::as_object(&namespace); $( obj.generic_setattr($vm.ctx.intern_str($name), $crate::function::PySetterValue::Assign($value.into()), $vm).unwrap(); diff --git a/vm/src/stdlib/marshal.rs b/vm/src/stdlib/marshal.rs index b99f4bc53e..aced9e4877 100644 --- a/vm/src/stdlib/marshal.rs +++ b/vm/src/stdlib/marshal.rs @@ -14,7 +14,7 @@ mod decl { common::wtf8::Wtf8, convert::ToPyObject, function::{ArgBytesLike, OptionalArg}, - object::AsObject, + object::{AsObject, PyPayload}, protocol::PyBuffer, }; use malachite_bigint::BigInt; @@ -186,7 +186,7 @@ mod decl { it: impl Iterator, ) -> Result { let vm = self.0; - let set = PySet::new_ref(&vm.ctx); + let set = PySet::default().into_ref(&vm.ctx); for elem in it { set.add(elem, vm).unwrap() }