diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 8c733e343d1..270bd8e2e45 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -12,6 +12,7 @@ cellvar cellvars cmpop denom +deopt dictoffset elts excepthandler diff --git a/.gitattributes b/.gitattributes index f54bcd3b725..326c99e5ec5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,6 +2,7 @@ Lib/** linguist-vendored Cargo.lock linguist-generated *.snap linguist-generated -merge vm/src/stdlib/ast/gen.rs linguist-generated -merge +compiler/core/src/opcodes.rs linguist-generated=true Lib/*.py text working-tree-encoding=UTF-8 eol=LF **/*.rs text working-tree-encoding=UTF-8 eol=LF *.pck binary diff --git a/Cargo.lock b/Cargo.lock index cef898addd7..73274ea0ca4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2306,6 +2306,8 @@ dependencies = [ "lz4_flex", "malachite-bigint", "num-complex", + "num-traits", + "num_enum", "ruff_source_file", "rustpython-wtf8", ] diff --git a/Lib/dis.py b/Lib/dis.py index 53c85555bc9..fc7ad7e875d 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1,5 +1,73 @@ +import types # XXX: From CPython 3.13.7 from _dis import * +# XXX: From CPython 3.13.7 +from opcode import * +# XXX: From CPython 3.13.7 +from opcode import ( + __all__ as _opcodes_all, + _cache_format, + _inline_cache_entries, + _nb_ops, + _intrinsic_1_descs, + _intrinsic_2_descs, + _specializations, + _specialized_opmap, +) + +# XXX: From CPython 3.13.7 +from _opcode import get_executor + +# XXX: From CPython 3.13.7 +__all__ = ["code_info", "dis", "disassemble", "distb", "disco", + "findlinestarts", "findlabels", "show_code", + "get_instructions", "Instruction", "Bytecode"] + _opcodes_all +del _opcodes_all + +# XXX: From CPython 3.13.7 +_have_code = (types.MethodType, types.FunctionType, types.CodeType, + classmethod, staticmethod, type) + +# XXX: From CPython 3.13.7 +CONVERT_VALUE = opmap['CONVERT_VALUE'] + +# XXX: From CPython 3.13.7 +SET_FUNCTION_ATTRIBUTE = opmap['SET_FUNCTION_ATTRIBUTE'] +FUNCTION_ATTR_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') + +# XXX: From CPython 3.13.7 +ENTER_EXECUTOR = opmap['ENTER_EXECUTOR'] +LOAD_CONST = opmap['LOAD_CONST'] +RETURN_CONST = opmap['RETURN_CONST'] +LOAD_GLOBAL = opmap['LOAD_GLOBAL'] +BINARY_OP = opmap['BINARY_OP'] +JUMP_BACKWARD = opmap['JUMP_BACKWARD'] +FOR_ITER = opmap['FOR_ITER'] +SEND = opmap['SEND'] +LOAD_ATTR = opmap['LOAD_ATTR'] +LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR'] +CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1'] +CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2'] +LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] +STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] +STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] + +# XXX: From CPython 3.13.7 +CACHE = opmap["CACHE"] + +# XXX: From CPython 3.13.7 +_all_opname = list(opname) +_all_opmap = dict(opmap) +for name, op in _specialized_opmap.items(): + # fill opname and opmap + assert op < len(_all_opname) + _all_opname[op] = name + _all_opmap[name] = op + +# XXX: From CPython 3.13.7 +deoptmap = { + specialized: base for base, family in _specializations.items() for specialized in family +} # Disassembling a file by following cpython Lib/dis.py def _test(): diff --git a/Lib/test/test__opcode.py b/Lib/test/test__opcode.py index b1e38b43dc8..10f04b64dda 100644 --- a/Lib/test/test__opcode.py +++ b/Lib/test/test__opcode.py @@ -27,7 +27,6 @@ def test_invalid_opcodes(self): self.check_bool_function_result(_opcode.has_local, invalid, False) self.check_bool_function_result(_opcode.has_exc, invalid, False) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'dis' has no attribute 'opmap' def test_is_valid(self): names = [ 'CACHE', @@ -39,7 +38,6 @@ def test_is_valid(self): opcodes = [dis.opmap[opname] for opname in names] self.check_bool_function_result(_opcode.is_valid, opcodes, True) - @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'dis' has no attribute 'hasarg' def test_oplists(self): def check_function(self, func, expected): for op in [-10, 520]: @@ -58,7 +56,6 @@ def check_function(self, func, expected): class StackEffectTests(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON def test_stack_effect(self): self.assertEqual(stack_effect(dis.opmap['POP_TOP']), -1) self.assertEqual(stack_effect(dis.opmap['BUILD_SLICE'], 0), -1) @@ -79,7 +76,6 @@ def test_stack_effect(self): self.assertRaises(ValueError, stack_effect, code) self.assertRaises(ValueError, stack_effect, code, 0) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_stack_effect_jump(self): FOR_ITER = dis.opmap['FOR_ITER'] self.assertEqual(stack_effect(FOR_ITER, 0), 1) diff --git a/compiler/core/Cargo.toml b/compiler/core/Cargo.toml index e49c73eb14a..3cb77958c97 100644 --- a/compiler/core/Cargo.toml +++ b/compiler/core/Cargo.toml @@ -17,6 +17,8 @@ bitflags = { workspace = true } itertools = { workspace = true } malachite-bigint = { workspace = true } num-complex = { workspace = true } +num_enum = { workspace = true } +num-traits = { workspace = true } lz4_flex = "0.11" diff --git a/compiler/core/src/lib.rs b/compiler/core/src/lib.rs index 0ce4a9defb1..d6d17ac826e 100644 --- a/compiler/core/src/lib.rs +++ b/compiler/core/src/lib.rs @@ -5,7 +5,10 @@ pub mod bytecode; pub mod frozen; pub mod marshal; mod mode; +pub mod opcode; +mod opcodes; pub use mode::Mode; +pub use opcode::{Opcode, PseudoOpcode, RealOpcode}; pub use ruff_source_file::{LineIndex, OneIndexed, SourceFile, SourceFileBuilder, SourceLocation}; diff --git a/compiler/core/src/opcode.rs b/compiler/core/src/opcode.rs new file mode 100644 index 00000000000..5b6e931b734 --- /dev/null +++ b/compiler/core/src/opcode.rs @@ -0,0 +1,48 @@ +use crate::marshal::MarshalError; +pub use crate::opcodes::{PseudoOpcode, RealOpcode}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Opcode { + Real(RealOpcode), + Pseudo(PseudoOpcode), +} + +impl TryFrom for Opcode { + type Error = MarshalError; + + fn try_from(raw: u16) -> Result { + // Try first pseudo opcode. If not, fallback to real opcode. + PseudoOpcode::try_from(raw) + .map(Opcode::Pseudo) + .or_else(|_| { + Self::try_from(u8::try_from(raw).map_err(|_| Self::Error::InvalidBytecode)?) + }) + } +} + +impl TryFrom for Opcode { + type Error = MarshalError; + + fn try_from(raw: u8) -> Result { + // u8 can never be a pseudo. + RealOpcode::try_from(raw).map(Opcode::Real) + } +} + +macro_rules! impl_try_from { + ($struct_name:ident, $($t:ty),+ $(,)?) => { + $( + impl TryFrom<$t> for $struct_name { + type Error = MarshalError; + + fn try_from(raw: $t) -> Result { + Self::try_from(u16::try_from(raw).map_err(|_| Self::Error::InvalidBytecode)?) + } + } + )+ + }; +} + +impl_try_from!( + Opcode, i8, i16, i32, i64, i128, isize, u32, u64, u128, usize +); diff --git a/compiler/core/src/opcodes.rs b/compiler/core/src/opcodes.rs new file mode 100644 index 00000000000..cddd5684e5c --- /dev/null +++ b/compiler/core/src/opcodes.rs @@ -0,0 +1,1075 @@ +//! Python opcode implementation. Currently aligned with cpython 3.13.7 + +// This file is generated by scripts/gen_opcodes.py +// Do not edit! + +use crate::marshal::MarshalError; +use num_enum::TryFromPrimitive; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, TryFromPrimitive)] +#[num_enum(error_type(name = MarshalError, constructor = new_invalid_bytecode))] +#[repr(u8)] +pub enum RealOpcode { + BeforeAsyncWith = 1, + BeforeWith = 2, + BinaryOp = 45, + BinaryOpAddFloat = 150, + BinaryOpAddInt = 151, + BinaryOpAddUnicode = 152, + BinaryOpInplaceAddUnicode = 3, + BinaryOpMultiplyFloat = 153, + BinaryOpMultiplyInt = 154, + BinaryOpSubtractFloat = 155, + BinaryOpSubtractInt = 156, + BinarySlice = 4, + BinarySubscr = 5, + BinarySubscrDict = 157, + BinarySubscrGetitem = 158, + BinarySubscrListInt = 159, + BinarySubscrStrInt = 160, + BinarySubscrTupleInt = 161, + BuildConstKeyMap = 46, + BuildList = 47, + BuildMap = 48, + BuildSet = 49, + BuildSlice = 50, + BuildString = 51, + BuildTuple = 52, + Cache = 0, + Call = 53, + CallAllocAndEnterInit = 162, + CallBoundMethodExactArgs = 163, + CallBoundMethodGeneral = 164, + CallBuiltinClass = 165, + CallBuiltinFast = 166, + CallBuiltinFastWithKeywords = 167, + CallBuiltinO = 168, + CallFunctionEx = 54, + CallIntrinsic1 = 55, + CallIntrinsic2 = 56, + CallIsinstance = 169, + CallKw = 57, + CallLen = 170, + CallListAppend = 171, + CallMethodDescriptorFast = 172, + CallMethodDescriptorFastWithKeywords = 173, + CallMethodDescriptorNoargs = 174, + CallMethodDescriptorO = 175, + CallNonPyGeneral = 176, + CallPyExactArgs = 177, + CallPyGeneral = 178, + CallStr1 = 179, + CallTuple1 = 180, + CallType1 = 181, + CheckEgMatch = 6, + CheckExcMatch = 7, + CleanupThrow = 8, + CompareOp = 58, + CompareOpFloat = 182, + CompareOpInt = 183, + CompareOpStr = 184, + ContainsOp = 59, + ContainsOpDict = 185, + ContainsOpSet = 186, + ConvertValue = 60, + Copy = 61, + CopyFreeVars = 62, + DeleteAttr = 63, + DeleteDeref = 64, + DeleteFast = 65, + DeleteGlobal = 66, + DeleteName = 67, + DeleteSubscr = 9, + DictMerge = 68, + DictUpdate = 69, + EndAsyncFor = 10, + EndFor = 11, + EndSend = 12, + EnterExecutor = 70, + ExitInitCheck = 13, + ExtendedArg = 71, + FormatSimple = 14, + FormatWithSpec = 15, + ForIter = 72, + ForIterGen = 187, + ForIterList = 188, + ForIterRange = 189, + ForIterTuple = 190, + GetAiter = 16, + GetAnext = 18, + GetAwaitable = 73, + GetIter = 19, + GetLen = 20, + GetYieldFromIter = 21, + ImportFrom = 74, + ImportName = 75, + InstrumentedCall = 244, + InstrumentedCallFunctionEx = 246, + InstrumentedCallKw = 245, + InstrumentedEndFor = 237, + InstrumentedEndSend = 238, + InstrumentedForIter = 243, + InstrumentedInstruction = 247, + InstrumentedJumpBackward = 249, + InstrumentedJumpForward = 248, + InstrumentedLine = 254, + InstrumentedLoadSuperAttr = 242, + InstrumentedPopJumpIfFalse = 251, + InstrumentedPopJumpIfNone = 252, + InstrumentedPopJumpIfNotNone = 253, + InstrumentedPopJumpIfTrue = 250, + InstrumentedResume = 236, + InstrumentedReturnConst = 240, + InstrumentedReturnValue = 239, + InstrumentedYieldValue = 241, + InterpreterExit = 22, + IsOp = 76, + JumpBackward = 77, + JumpBackwardNoInterrupt = 78, + JumpForward = 79, + ListAppend = 80, + ListExtend = 81, + LoadAssertionError = 23, + LoadAttr = 82, + LoadAttrClass = 191, + LoadAttrGetattributeOverridden = 192, + LoadAttrInstanceValue = 193, + LoadAttrMethodLazyDict = 194, + LoadAttrMethodNoDict = 195, + LoadAttrMethodWithValues = 196, + LoadAttrModule = 197, + LoadAttrNondescriptorNoDict = 198, + LoadAttrNondescriptorWithValues = 199, + LoadAttrProperty = 200, + LoadAttrSlot = 201, + LoadAttrWithHint = 202, + LoadBuildClass = 24, + LoadConst = 83, + LoadDeref = 84, + LoadFast = 85, + LoadFastAndClear = 86, + LoadFastCheck = 87, + LoadFastLoadFast = 88, + LoadFromDictOrDeref = 89, + LoadFromDictOrGlobals = 90, + LoadGlobal = 91, + LoadGlobalBuiltin = 203, + LoadGlobalModule = 204, + LoadLocals = 25, + LoadName = 92, + LoadSuperAttr = 93, + LoadSuperAttrAttr = 205, + LoadSuperAttrMethod = 206, + MakeCell = 94, + MakeFunction = 26, + MapAdd = 95, + MatchClass = 96, + MatchKeys = 27, + MatchMapping = 28, + MatchSequence = 29, + Nop = 30, + PopExcept = 31, + PopJumpIfFalse = 97, + PopJumpIfNone = 98, + PopJumpIfNotNone = 99, + PopJumpIfTrue = 100, + PopTop = 32, + PushExcInfo = 33, + PushNull = 34, + RaiseVarargs = 101, + Reraise = 102, + Reserved = 17, + Resume = 149, + ResumeCheck = 207, + ReturnConst = 103, + ReturnGenerator = 35, + ReturnValue = 36, + Send = 104, + SendGen = 208, + SetupAnnotations = 37, + SetAdd = 105, + SetFunctionAttribute = 106, + SetUpdate = 107, + StoreAttr = 108, + StoreAttrInstanceValue = 209, + StoreAttrSlot = 210, + StoreAttrWithHint = 211, + StoreDeref = 109, + StoreFast = 110, + StoreFastLoadFast = 111, + StoreFastStoreFast = 112, + StoreGlobal = 113, + StoreName = 114, + StoreSlice = 38, + StoreSubscr = 39, + StoreSubscrDict = 212, + StoreSubscrListInt = 213, + Swap = 115, + ToBool = 40, + ToBoolAlwaysTrue = 214, + ToBoolBool = 215, + ToBoolInt = 216, + ToBoolList = 217, + ToBoolNone = 218, + ToBoolStr = 219, + UnaryInvert = 41, + UnaryNegative = 42, + UnaryNot = 43, + UnpackEx = 116, + UnpackSequence = 117, + UnpackSequenceList = 220, + UnpackSequenceTuple = 221, + UnpackSequenceTwoTuple = 222, + WithExceptStart = 44, + YieldValue = 118, +} + +impl RealOpcode { + pub const fn deopt(&self) -> Option { + Some(match *self { + Self::BinaryOpAddFloat + | Self::BinaryOpAddInt + | Self::BinaryOpAddUnicode + | Self::BinaryOpInplaceAddUnicode + | Self::BinaryOpMultiplyFloat + | Self::BinaryOpMultiplyInt + | Self::BinaryOpSubtractFloat + | Self::BinaryOpSubtractInt => Self::BinaryOp, + Self::BinarySubscrDict + | Self::BinarySubscrGetitem + | Self::BinarySubscrListInt + | Self::BinarySubscrStrInt + | Self::BinarySubscrTupleInt => Self::BinarySubscr, + Self::CallAllocAndEnterInit + | Self::CallBoundMethodExactArgs + | Self::CallBoundMethodGeneral + | Self::CallBuiltinClass + | Self::CallBuiltinFast + | Self::CallBuiltinFastWithKeywords + | Self::CallBuiltinO + | Self::CallIsinstance + | Self::CallLen + | Self::CallListAppend + | Self::CallMethodDescriptorFast + | Self::CallMethodDescriptorFastWithKeywords + | Self::CallMethodDescriptorNoargs + | Self::CallMethodDescriptorO + | Self::CallNonPyGeneral + | Self::CallPyExactArgs + | Self::CallPyGeneral + | Self::CallStr1 + | Self::CallTuple1 + | Self::CallType1 => Self::Call, + Self::CompareOpFloat | Self::CompareOpInt | Self::CompareOpStr => Self::CompareOp, + Self::ContainsOpDict | Self::ContainsOpSet => Self::ContainsOp, + Self::ForIterGen | Self::ForIterList | Self::ForIterRange | Self::ForIterTuple => { + Self::ForIter + } + Self::LoadAttrClass + | Self::LoadAttrGetattributeOverridden + | Self::LoadAttrInstanceValue + | Self::LoadAttrMethodLazyDict + | Self::LoadAttrMethodNoDict + | Self::LoadAttrMethodWithValues + | Self::LoadAttrModule + | Self::LoadAttrNondescriptorNoDict + | Self::LoadAttrNondescriptorWithValues + | Self::LoadAttrProperty + | Self::LoadAttrSlot + | Self::LoadAttrWithHint => Self::LoadAttr, + Self::LoadGlobalBuiltin | Self::LoadGlobalModule => Self::LoadGlobal, + Self::LoadSuperAttrAttr | Self::LoadSuperAttrMethod => Self::LoadSuperAttr, + Self::ResumeCheck => Self::Resume, + Self::SendGen => Self::Send, + Self::StoreAttrInstanceValue | Self::StoreAttrSlot | Self::StoreAttrWithHint => { + Self::StoreAttr + } + Self::StoreSubscrDict | Self::StoreSubscrListInt => Self::StoreSubscr, + Self::ToBoolAlwaysTrue + | Self::ToBoolBool + | Self::ToBoolInt + | Self::ToBoolList + | Self::ToBoolNone + | Self::ToBoolStr => Self::ToBool, + Self::UnpackSequenceList | Self::UnpackSequenceTuple | Self::UnpackSequenceTwoTuple => { + Self::UnpackSequence + } + _ => return None, + }) + } + + /// Whether opcode ID have 'HAS_ARG_FLAG' set. + #[must_use] + pub const fn has_arg(&self) -> bool { + matches!( + *self, + Self::BinaryOp + | Self::BuildConstKeyMap + | Self::BuildList + | Self::BuildMap + | Self::BuildSet + | Self::BuildSlice + | Self::BuildString + | Self::BuildTuple + | Self::Call + | Self::CallAllocAndEnterInit + | Self::CallBoundMethodExactArgs + | Self::CallBoundMethodGeneral + | Self::CallBuiltinClass + | Self::CallBuiltinFast + | Self::CallBuiltinFastWithKeywords + | Self::CallBuiltinO + | Self::CallFunctionEx + | Self::CallIntrinsic1 + | Self::CallIntrinsic2 + | Self::CallIsinstance + | Self::CallKw + | Self::CallLen + | Self::CallListAppend + | Self::CallMethodDescriptorFast + | Self::CallMethodDescriptorFastWithKeywords + | Self::CallMethodDescriptorNoargs + | Self::CallMethodDescriptorO + | Self::CallNonPyGeneral + | Self::CallPyExactArgs + | Self::CallPyGeneral + | Self::CallStr1 + | Self::CallTuple1 + | Self::CallType1 + | Self::CompareOp + | Self::CompareOpFloat + | Self::CompareOpInt + | Self::CompareOpStr + | Self::ContainsOp + | Self::ContainsOpDict + | Self::ContainsOpSet + | Self::ConvertValue + | Self::Copy + | Self::CopyFreeVars + | Self::DeleteAttr + | Self::DeleteDeref + | Self::DeleteFast + | Self::DeleteGlobal + | Self::DeleteName + | Self::DictMerge + | Self::DictUpdate + | Self::EnterExecutor + | Self::ExtendedArg + | Self::ForIter + | Self::ForIterGen + | Self::ForIterList + | Self::ForIterRange + | Self::ForIterTuple + | Self::GetAwaitable + | Self::ImportFrom + | Self::ImportName + | Self::InstrumentedCall + | Self::InstrumentedCallKw + | Self::InstrumentedForIter + | Self::InstrumentedJumpBackward + | Self::InstrumentedJumpForward + | Self::InstrumentedLoadSuperAttr + | Self::InstrumentedPopJumpIfFalse + | Self::InstrumentedPopJumpIfNone + | Self::InstrumentedPopJumpIfNotNone + | Self::InstrumentedPopJumpIfTrue + | Self::InstrumentedResume + | Self::InstrumentedReturnConst + | Self::InstrumentedYieldValue + | Self::IsOp + | Self::JumpBackward + | Self::JumpBackwardNoInterrupt + | Self::JumpForward + | Self::ListAppend + | Self::ListExtend + | Self::LoadAttr + | Self::LoadAttrClass + | Self::LoadAttrGetattributeOverridden + | Self::LoadAttrInstanceValue + | Self::LoadAttrMethodLazyDict + | Self::LoadAttrMethodNoDict + | Self::LoadAttrMethodWithValues + | Self::LoadAttrModule + | Self::LoadAttrNondescriptorNoDict + | Self::LoadAttrNondescriptorWithValues + | Self::LoadAttrProperty + | Self::LoadAttrSlot + | Self::LoadAttrWithHint + | Self::LoadConst + | Self::LoadDeref + | Self::LoadFast + | Self::LoadFastAndClear + | Self::LoadFastCheck + | Self::LoadFastLoadFast + | Self::LoadFromDictOrDeref + | Self::LoadFromDictOrGlobals + | Self::LoadGlobal + | Self::LoadGlobalBuiltin + | Self::LoadGlobalModule + | Self::LoadName + | Self::LoadSuperAttr + | Self::LoadSuperAttrAttr + | Self::LoadSuperAttrMethod + | Self::MakeCell + | Self::MapAdd + | Self::MatchClass + | Self::PopJumpIfFalse + | Self::PopJumpIfNone + | Self::PopJumpIfNotNone + | Self::PopJumpIfTrue + | Self::RaiseVarargs + | Self::Reraise + | Self::Resume + | Self::ReturnConst + | Self::Send + | Self::SendGen + | Self::SetAdd + | Self::SetFunctionAttribute + | Self::SetUpdate + | Self::StoreAttr + | Self::StoreAttrWithHint + | Self::StoreDeref + | Self::StoreFast + | Self::StoreFastLoadFast + | Self::StoreFastStoreFast + | Self::StoreGlobal + | Self::StoreName + | Self::Swap + | Self::UnpackEx + | Self::UnpackSequence + | Self::UnpackSequenceList + | Self::UnpackSequenceTuple + | Self::UnpackSequenceTwoTuple + | Self::YieldValue + ) + } + + /// Whether opcode ID have 'HAS_CONST_FLAG' set. + #[must_use] + pub const fn has_const(&self) -> bool { + matches!( + *self, + Self::InstrumentedReturnConst | Self::LoadConst | Self::ReturnConst + ) + } + + /// Whether opcode ID have 'HAS_PURE_FLAG' set. + #[must_use] + pub const fn has_exc(&self) -> bool { + matches!( + *self, + Self::Copy + | Self::EndFor + | Self::EndSend + | Self::InstrumentedLine + | Self::LoadConst + | Self::LoadFast + | Self::Nop + | Self::PopTop + | Self::PushNull + | Self::Swap + | Self::UnaryNot + ) + } + + /// Whether opcode ID have 'HAS_FREE_FLAG' set. + #[must_use] + pub const fn has_free(&self) -> bool { + matches!( + *self, + Self::DeleteDeref + | Self::LoadDeref + | Self::LoadFromDictOrDeref + | Self::MakeCell + | Self::StoreDeref + ) + } + + /// Whether opcode ID have 'HAS_JUMP_FLAG' set. + #[must_use] + pub const fn has_jump(&self) -> bool { + matches!( + *self, + Self::ForIter + | Self::ForIterList + | Self::ForIterRange + | Self::ForIterTuple + | Self::JumpBackward + | Self::JumpBackwardNoInterrupt + | Self::JumpForward + | Self::PopJumpIfFalse + | Self::PopJumpIfNone + | Self::PopJumpIfNotNone + | Self::PopJumpIfTrue + | Self::Send + ) + } + + /// Whether opcode ID have 'HAS_LOCAL_FLAG' set. + #[must_use] + pub const fn has_local(&self) -> bool { + matches!( + *self, + Self::BinaryOpInplaceAddUnicode + | Self::DeleteFast + | Self::LoadFast + | Self::LoadFastAndClear + | Self::LoadFastCheck + | Self::LoadFastLoadFast + | Self::StoreFast + | Self::StoreFastLoadFast + | Self::StoreFastStoreFast + ) + } + + /// Whether opcode ID have 'HAS_NAME_FLAG' set. + #[must_use] + pub const fn has_name(&self) -> bool { + matches!( + *self, + Self::DeleteAttr + | Self::DeleteGlobal + | Self::DeleteName + | Self::ImportFrom + | Self::ImportName + | Self::LoadAttr + | Self::LoadAttrGetattributeOverridden + | Self::LoadAttrWithHint + | Self::LoadFromDictOrGlobals + | Self::LoadGlobal + | Self::LoadName + | Self::LoadSuperAttr + | Self::LoadSuperAttrAttr + | Self::LoadSuperAttrMethod + | Self::StoreAttr + | Self::StoreAttrWithHint + | Self::StoreGlobal + | Self::StoreName + ) + } + + /// How many items should be popped from the stack. + pub const fn num_popped(&self, oparg: i32) -> i32 { + match *self { + Self::BeforeAsyncWith => 1, + Self::BeforeWith => 1, + Self::BinaryOp => 2, + Self::BinaryOpAddFloat => 2, + Self::BinaryOpAddInt => 2, + Self::BinaryOpAddUnicode => 2, + Self::BinaryOpInplaceAddUnicode => 2, + Self::BinaryOpMultiplyFloat => 2, + Self::BinaryOpMultiplyInt => 2, + Self::BinaryOpSubtractFloat => 2, + Self::BinaryOpSubtractInt => 2, + Self::BinarySlice => 3, + Self::BinarySubscr => 2, + Self::BinarySubscrDict => 2, + Self::BinarySubscrGetitem => 2, + Self::BinarySubscrListInt => 2, + Self::BinarySubscrStrInt => 2, + Self::BinarySubscrTupleInt => 2, + Self::BuildConstKeyMap => 1 + oparg, + Self::BuildList => oparg, + Self::BuildMap => oparg * 2, + Self::BuildSet => oparg, + Self::BuildSlice => 2 + (if oparg == 3 { 1 } else { 0 }), + Self::BuildString => oparg, + Self::BuildTuple => oparg, + Self::Cache => 0, + Self::Call => 2 + oparg, + Self::CallAllocAndEnterInit => 2 + oparg, + Self::CallBoundMethodExactArgs => 2 + oparg, + Self::CallBoundMethodGeneral => 2 + oparg, + Self::CallBuiltinClass => 2 + oparg, + Self::CallBuiltinFast => 2 + oparg, + Self::CallBuiltinFastWithKeywords => 2 + oparg, + Self::CallBuiltinO => 2 + oparg, + Self::CallFunctionEx => 3 + (oparg & 1), + Self::CallIntrinsic1 => 1, + Self::CallIntrinsic2 => 2, + Self::CallIsinstance => 2 + oparg, + Self::CallKw => 3 + oparg, + Self::CallLen => 2 + oparg, + Self::CallListAppend => 3, + Self::CallMethodDescriptorFast => 2 + oparg, + Self::CallMethodDescriptorFastWithKeywords => 2 + oparg, + Self::CallMethodDescriptorNoargs => 2 + oparg, + Self::CallMethodDescriptorO => 2 + oparg, + Self::CallNonPyGeneral => 2 + oparg, + Self::CallPyExactArgs => 2 + oparg, + Self::CallPyGeneral => 2 + oparg, + Self::CallStr1 => 3, + Self::CallTuple1 => 3, + Self::CallType1 => 3, + Self::CheckEgMatch => 2, + Self::CheckExcMatch => 2, + Self::CleanupThrow => 3, + Self::CompareOp => 2, + Self::CompareOpFloat => 2, + Self::CompareOpInt => 2, + Self::CompareOpStr => 2, + Self::ContainsOp => 2, + Self::ContainsOpDict => 2, + Self::ContainsOpSet => 2, + Self::ConvertValue => 1, + Self::Copy => 1 + (oparg - 1), + Self::CopyFreeVars => 0, + Self::DeleteAttr => 1, + Self::DeleteDeref => 0, + Self::DeleteFast => 0, + Self::DeleteGlobal => 0, + Self::DeleteName => 0, + Self::DeleteSubscr => 2, + Self::DictMerge => 5 + (oparg - 1), + Self::DictUpdate => 2 + (oparg - 1), + Self::EndAsyncFor => 2, + Self::EndFor => 1, + Self::EndSend => 2, + Self::EnterExecutor => 0, + Self::ExitInitCheck => 1, + Self::ExtendedArg => 0, + Self::FormatSimple => 1, + Self::FormatWithSpec => 2, + Self::ForIter => 1, + Self::ForIterGen => 1, + Self::ForIterList => 1, + Self::ForIterRange => 1, + Self::ForIterTuple => 1, + Self::GetAiter => 1, + Self::GetAnext => 1, + Self::GetAwaitable => 1, + Self::GetIter => 1, + Self::GetLen => 1, + Self::GetYieldFromIter => 1, + Self::ImportFrom => 1, + Self::ImportName => 2, + Self::InstrumentedCall => 0, + Self::InstrumentedCallFunctionEx => 0, + Self::InstrumentedCallKw => 0, + Self::InstrumentedEndFor => 2, + Self::InstrumentedEndSend => 2, + Self::InstrumentedForIter => 0, + Self::InstrumentedInstruction => 0, + Self::InstrumentedJumpBackward => 0, + Self::InstrumentedJumpForward => 0, + Self::InstrumentedLine => 0, + Self::InstrumentedLoadSuperAttr => 3, + Self::InstrumentedPopJumpIfFalse => 0, + Self::InstrumentedPopJumpIfNone => 0, + Self::InstrumentedPopJumpIfNotNone => 0, + Self::InstrumentedPopJumpIfTrue => 0, + Self::InstrumentedResume => 0, + Self::InstrumentedReturnConst => 0, + Self::InstrumentedReturnValue => 1, + Self::InstrumentedYieldValue => 1, + Self::InterpreterExit => 1, + Self::IsOp => 2, + Self::JumpBackward => 0, + Self::JumpBackwardNoInterrupt => 0, + Self::JumpForward => 0, + Self::ListAppend => 2 + (oparg - 1), + Self::ListExtend => 2 + (oparg - 1), + Self::LoadAssertionError => 0, + Self::LoadAttr => 1, + Self::LoadAttrClass => 1, + Self::LoadAttrGetattributeOverridden => 1, + Self::LoadAttrInstanceValue => 1, + Self::LoadAttrMethodLazyDict => 1, + Self::LoadAttrMethodNoDict => 1, + Self::LoadAttrMethodWithValues => 1, + Self::LoadAttrModule => 1, + Self::LoadAttrNondescriptorNoDict => 1, + Self::LoadAttrNondescriptorWithValues => 1, + Self::LoadAttrProperty => 1, + Self::LoadAttrSlot => 1, + Self::LoadAttrWithHint => 1, + Self::LoadBuildClass => 0, + Self::LoadConst => 0, + Self::LoadDeref => 0, + Self::LoadFast => 0, + Self::LoadFastAndClear => 0, + Self::LoadFastCheck => 0, + Self::LoadFastLoadFast => 0, + Self::LoadFromDictOrDeref => 1, + Self::LoadFromDictOrGlobals => 1, + Self::LoadGlobal => 0, + Self::LoadGlobalBuiltin => 0, + Self::LoadGlobalModule => 0, + Self::LoadLocals => 0, + Self::LoadName => 0, + Self::LoadSuperAttr => 3, + Self::LoadSuperAttrAttr => 3, + Self::LoadSuperAttrMethod => 3, + Self::MakeCell => 0, + Self::MakeFunction => 1, + Self::MapAdd => 3 + (oparg - 1), + Self::MatchClass => 3, + Self::MatchKeys => 2, + Self::MatchMapping => 1, + Self::MatchSequence => 1, + Self::Nop => 0, + Self::PopExcept => 1, + Self::PopJumpIfFalse => 1, + Self::PopJumpIfNone => 1, + Self::PopJumpIfNotNone => 1, + Self::PopJumpIfTrue => 1, + Self::PopTop => 1, + Self::PushExcInfo => 1, + Self::PushNull => 0, + Self::RaiseVarargs => oparg, + Self::Reraise => 1 + oparg, + Self::Reserved => 0, + Self::Resume => 0, + Self::ResumeCheck => 0, + Self::ReturnConst => 0, + Self::ReturnGenerator => 0, + Self::ReturnValue => 1, + Self::Send => 2, + Self::SendGen => 2, + Self::SetupAnnotations => 0, + Self::SetAdd => 2 + (oparg - 1), + Self::SetFunctionAttribute => 2, + Self::SetUpdate => 2 + (oparg - 1), + Self::StoreAttr => 2, + Self::StoreAttrInstanceValue => 2, + Self::StoreAttrSlot => 2, + Self::StoreAttrWithHint => 2, + Self::StoreDeref => 1, + Self::StoreFast => 1, + Self::StoreFastLoadFast => 1, + Self::StoreFastStoreFast => 2, + Self::StoreGlobal => 1, + Self::StoreName => 1, + Self::StoreSlice => 4, + Self::StoreSubscr => 3, + Self::StoreSubscrDict => 3, + Self::StoreSubscrListInt => 3, + Self::Swap => 2 + (oparg - 2), + Self::ToBool => 1, + Self::ToBoolAlwaysTrue => 1, + Self::ToBoolBool => 1, + Self::ToBoolInt => 1, + Self::ToBoolList => 1, + Self::ToBoolNone => 1, + Self::ToBoolStr => 1, + Self::UnaryInvert => 1, + Self::UnaryNegative => 1, + Self::UnaryNot => 1, + Self::UnpackEx => 1, + Self::UnpackSequence => 1, + Self::UnpackSequenceList => 1, + Self::UnpackSequenceTuple => 1, + Self::UnpackSequenceTwoTuple => 1, + Self::WithExceptStart => 4, + Self::YieldValue => 1, + } + } + + /// How many items should be pushed on the stack. + pub const fn num_pushed(&self, oparg: i32) -> i32 { + match *self { + Self::BeforeAsyncWith => 2, + Self::BeforeWith => 2, + Self::BinaryOp => 1, + Self::BinaryOpAddFloat => 1, + Self::BinaryOpAddInt => 1, + Self::BinaryOpAddUnicode => 1, + Self::BinaryOpInplaceAddUnicode => 0, + Self::BinaryOpMultiplyFloat => 1, + Self::BinaryOpMultiplyInt => 1, + Self::BinaryOpSubtractFloat => 1, + Self::BinaryOpSubtractInt => 1, + Self::BinarySlice => 1, + Self::BinarySubscr => 1, + Self::BinarySubscrDict => 1, + Self::BinarySubscrGetitem => 1, + Self::BinarySubscrListInt => 1, + Self::BinarySubscrStrInt => 1, + Self::BinarySubscrTupleInt => 1, + Self::BuildConstKeyMap => 1, + Self::BuildList => 1, + Self::BuildMap => 1, + Self::BuildSet => 1, + Self::BuildSlice => 1, + Self::BuildString => 1, + Self::BuildTuple => 1, + Self::Cache => 0, + Self::Call => 1, + Self::CallAllocAndEnterInit => 1, + Self::CallBoundMethodExactArgs => 0, + Self::CallBoundMethodGeneral => 0, + Self::CallBuiltinClass => 1, + Self::CallBuiltinFast => 1, + Self::CallBuiltinFastWithKeywords => 1, + Self::CallBuiltinO => 1, + Self::CallFunctionEx => 1, + Self::CallIntrinsic1 => 1, + Self::CallIntrinsic2 => 1, + Self::CallIsinstance => 1, + Self::CallKw => 1, + Self::CallLen => 1, + Self::CallListAppend => 1, + Self::CallMethodDescriptorFast => 1, + Self::CallMethodDescriptorFastWithKeywords => 1, + Self::CallMethodDescriptorNoargs => 1, + Self::CallMethodDescriptorO => 1, + Self::CallNonPyGeneral => 1, + Self::CallPyExactArgs => 0, + Self::CallPyGeneral => 0, + Self::CallStr1 => 1, + Self::CallTuple1 => 1, + Self::CallType1 => 1, + Self::CheckEgMatch => 2, + Self::CheckExcMatch => 2, + Self::CleanupThrow => 2, + Self::CompareOp => 1, + Self::CompareOpFloat => 1, + Self::CompareOpInt => 1, + Self::CompareOpStr => 1, + Self::ContainsOp => 1, + Self::ContainsOpDict => 1, + Self::ContainsOpSet => 1, + Self::ConvertValue => 1, + Self::Copy => 2 + (oparg - 1), + Self::CopyFreeVars => 0, + Self::DeleteAttr => 0, + Self::DeleteDeref => 0, + Self::DeleteFast => 0, + Self::DeleteGlobal => 0, + Self::DeleteName => 0, + Self::DeleteSubscr => 0, + Self::DictMerge => 4 + (oparg - 1), + Self::DictUpdate => 1 + (oparg - 1), + Self::EndAsyncFor => 0, + Self::EndFor => 0, + Self::EndSend => 1, + Self::EnterExecutor => 0, + Self::ExitInitCheck => 0, + Self::ExtendedArg => 0, + Self::FormatSimple => 1, + Self::FormatWithSpec => 1, + Self::ForIter => 2, + Self::ForIterGen => 1, + Self::ForIterList => 2, + Self::ForIterRange => 2, + Self::ForIterTuple => 2, + Self::GetAiter => 1, + Self::GetAnext => 2, + Self::GetAwaitable => 1, + Self::GetIter => 1, + Self::GetLen => 2, + Self::GetYieldFromIter => 1, + Self::ImportFrom => 2, + Self::ImportName => 1, + Self::InstrumentedCall => 0, + Self::InstrumentedCallFunctionEx => 0, + Self::InstrumentedCallKw => 0, + Self::InstrumentedEndFor => 1, + Self::InstrumentedEndSend => 1, + Self::InstrumentedForIter => 0, + Self::InstrumentedInstruction => 0, + Self::InstrumentedJumpBackward => 0, + Self::InstrumentedJumpForward => 0, + Self::InstrumentedLine => 0, + Self::InstrumentedLoadSuperAttr => 1 + (oparg & 1), + Self::InstrumentedPopJumpIfFalse => 0, + Self::InstrumentedPopJumpIfNone => 0, + Self::InstrumentedPopJumpIfNotNone => 0, + Self::InstrumentedPopJumpIfTrue => 0, + Self::InstrumentedResume => 0, + Self::InstrumentedReturnConst => 0, + Self::InstrumentedReturnValue => 0, + Self::InstrumentedYieldValue => 1, + Self::InterpreterExit => 0, + Self::IsOp => 1, + Self::JumpBackward => 0, + Self::JumpBackwardNoInterrupt => 0, + Self::JumpForward => 0, + Self::ListAppend => 1 + (oparg - 1), + Self::ListExtend => 1 + (oparg - 1), + Self::LoadAssertionError => 1, + Self::LoadAttr => 1 + (oparg & 1), + Self::LoadAttrClass => 1 + (oparg & 1), + Self::LoadAttrGetattributeOverridden => 1, + Self::LoadAttrInstanceValue => 1 + (oparg & 1), + Self::LoadAttrMethodLazyDict => 2, + Self::LoadAttrMethodNoDict => 2, + Self::LoadAttrMethodWithValues => 2, + Self::LoadAttrModule => 1 + (oparg & 1), + Self::LoadAttrNondescriptorNoDict => 1, + Self::LoadAttrNondescriptorWithValues => 1, + Self::LoadAttrProperty => 1, + Self::LoadAttrSlot => 1 + (oparg & 1), + Self::LoadAttrWithHint => 1 + (oparg & 1), + Self::LoadBuildClass => 1, + Self::LoadConst => 1, + Self::LoadDeref => 1, + Self::LoadFast => 1, + Self::LoadFastAndClear => 1, + Self::LoadFastCheck => 1, + Self::LoadFastLoadFast => 2, + Self::LoadFromDictOrDeref => 1, + Self::LoadFromDictOrGlobals => 1, + Self::LoadGlobal => 1 + (oparg & 1), + Self::LoadGlobalBuiltin => 1 + (oparg & 1), + Self::LoadGlobalModule => 1 + (oparg & 1), + Self::LoadLocals => 1, + Self::LoadName => 1, + Self::LoadSuperAttr => 1 + (oparg & 1), + Self::LoadSuperAttrAttr => 1, + Self::LoadSuperAttrMethod => 2, + Self::MakeCell => 0, + Self::MakeFunction => 1, + Self::MapAdd => 1 + (oparg - 1), + Self::MatchClass => 1, + Self::MatchKeys => 3, + Self::MatchMapping => 2, + Self::MatchSequence => 2, + Self::Nop => 0, + Self::PopExcept => 0, + Self::PopJumpIfFalse => 0, + Self::PopJumpIfNone => 0, + Self::PopJumpIfNotNone => 0, + Self::PopJumpIfTrue => 0, + Self::PopTop => 0, + Self::PushExcInfo => 2, + Self::PushNull => 1, + Self::RaiseVarargs => 0, + Self::Reraise => oparg, + Self::Reserved => 0, + Self::Resume => 0, + Self::ResumeCheck => 0, + Self::ReturnConst => 0, + Self::ReturnGenerator => 1, + Self::ReturnValue => 0, + Self::Send => 2, + Self::SendGen => 2, + Self::SetupAnnotations => 0, + Self::SetAdd => 1 + (oparg - 1), + Self::SetFunctionAttribute => 1, + Self::SetUpdate => 1 + (oparg - 1), + Self::StoreAttr => 0, + Self::StoreAttrInstanceValue => 0, + Self::StoreAttrSlot => 0, + Self::StoreAttrWithHint => 0, + Self::StoreDeref => 0, + Self::StoreFast => 0, + Self::StoreFastLoadFast => 1, + Self::StoreFastStoreFast => 0, + Self::StoreGlobal => 0, + Self::StoreName => 0, + Self::StoreSlice => 0, + Self::StoreSubscr => 0, + Self::StoreSubscrDict => 0, + Self::StoreSubscrListInt => 0, + Self::Swap => 2 + (oparg - 2), + Self::ToBool => 1, + Self::ToBoolAlwaysTrue => 1, + Self::ToBoolBool => 1, + Self::ToBoolInt => 1, + Self::ToBoolList => 1, + Self::ToBoolNone => 1, + Self::ToBoolStr => 1, + Self::UnaryInvert => 1, + Self::UnaryNegative => 1, + Self::UnaryNot => 1, + Self::UnpackEx => 1 + (oparg >> 8) + (oparg & 0xFF), + Self::UnpackSequence => oparg, + Self::UnpackSequenceList => oparg, + Self::UnpackSequenceTuple => oparg, + Self::UnpackSequenceTwoTuple => 2, + Self::WithExceptStart => 5, + Self::YieldValue => 1, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, TryFromPrimitive)] +#[num_enum(error_type(name = MarshalError, constructor = new_invalid_bytecode))] +#[repr(u16)] +pub enum PseudoOpcode { + Jump = 256, + JumpNoInterrupt = 257, + LoadClosure = 258, + LoadMethod = 259, + LoadSuperMethod = 260, + LoadZeroSuperAttr = 261, + LoadZeroSuperMethod = 262, + PopBlock = 263, + SetupCleanup = 264, + SetupFinally = 265, + SetupWith = 266, + StoreFastMaybeNull = 267, +} + +impl PseudoOpcode { + /// Whether opcode ID have 'HAS_ARG_FLAG' set. + #[must_use] + pub const fn has_arg(&self) -> bool { + matches!( + *self, + Self::Jump + | Self::JumpNoInterrupt + | Self::LoadClosure + | Self::LoadMethod + | Self::LoadSuperMethod + | Self::LoadZeroSuperAttr + | Self::LoadZeroSuperMethod + | Self::StoreFastMaybeNull + ) + } + + /// Whether opcode ID have 'HAS_CONST_FLAG' set. + #[must_use] + pub const fn has_const(&self) -> bool { + false + } + + /// Whether opcode ID have 'HAS_PURE_FLAG' set. + #[must_use] + pub const fn has_exc(&self) -> bool { + matches!( + *self, + Self::LoadClosure + | Self::PopBlock + | Self::SetupCleanup + | Self::SetupFinally + | Self::SetupWith + ) + } + + /// Whether opcode ID have 'HAS_FREE_FLAG' set. + #[must_use] + pub const fn has_free(&self) -> bool { + false + } + + /// Whether opcode ID have 'HAS_JUMP_FLAG' set. + #[must_use] + pub const fn has_jump(&self) -> bool { + matches!(*self, Self::Jump | Self::JumpNoInterrupt) + } + + /// Whether opcode ID have 'HAS_LOCAL_FLAG' set. + #[must_use] + pub const fn has_local(&self) -> bool { + matches!(*self, Self::LoadClosure | Self::StoreFastMaybeNull) + } + + /// Whether opcode ID have 'HAS_NAME_FLAG' set. + #[must_use] + pub const fn has_name(&self) -> bool { + matches!( + *self, + Self::LoadMethod + | Self::LoadSuperMethod + | Self::LoadZeroSuperAttr + | Self::LoadZeroSuperMethod + ) + } +} + +const fn new_invalid_bytecode(_: T) -> MarshalError { + MarshalError::InvalidBytecode +} diff --git a/scripts/gen_opcodes.py b/scripts/gen_opcodes.py new file mode 100755 index 00000000000..fc77303181c --- /dev/null +++ b/scripts/gen_opcodes.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +import abc +import collections +import itertools +import pathlib +import re +import subprocess # for `cargo fmt` +import sys +import typing + +if typing.TYPE_CHECKING: + from collections.abc import Iterable, Iterator + +CPYTHON_PATH = ( + pathlib.Path(__file__).parents[2] / "cpython" # Local filesystem path of cpython +) + +_cases_generator_path = CPYTHON_PATH / "Tools" / "cases_generator" +sys.path.append(str(_cases_generator_path)) + + +import analyzer +from generators_common import DEFAULT_INPUT +from stack import StackOffset, get_stack_effect + +ROOT = pathlib.Path(__file__).parents[1] +OUT_PATH = ROOT / "compiler" / "core" / "src" / "opcodes.rs" + +DERIVE = "#[derive(Clone, Copy, Debug, Eq, PartialEq, TryFromPrimitive)]" + + +def _var_size(var): + """ + Adapted from https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Tools/cases_generator/stack.py#L24-L36 + """ + if var.condition: + if var.condition == "0": + return "0" + elif var.condition == "1": + return var.size + elif var.condition == "oparg & 1" and var.size == "1": + return f"({var.condition})" + else: + return f"(if {var.condition} {{ {var.size} }} else {{ 0 }})" + else: + return var.size + + +StackOffset.pop = lambda self, item: self.popped.append(_var_size(item)) +StackOffset.push = lambda self, item: self.pushed.append(_var_size(item)) + + +def enum_variant_name(name: str) -> str: + return name.title().replace("_", "") + + +class InstructionsMeta(metaclass=abc.ABCMeta): + def __init__(self, analysis: analyzer.Analysis) -> None: + self._analysis = analysis + + @abc.abstractmethod + def __iter__( + self, + ) -> "Iterator[analyzer.Instruction | analyzer.PseudoInstruction]": ... + + @property + @abc.abstractmethod + def typ(self) -> str: + """ + Opcode ID type (u8/u16/u32/etc) + """ + ... + + @property + @abc.abstractmethod + def enum_name(self) -> str: ... + + @property + def rust_code(self) -> str: + enum_variant_defs = ",\n".join( + f"{inst.name} = {self._analysis.opmap[inst.name]}" for inst in self + ) + funcs = "\n\n".join( + getattr(self, attr).strip() + for attr in sorted(dir(self)) + if attr.startswith("fn_") + ) + + return f""" +{DERIVE} +#[num_enum(error_type(name = MarshalError, constructor = new_invalid_bytecode))] +#[repr({self.typ})] +pub enum {self.enum_name} {{ +{enum_variant_defs} +}} + +impl {self.enum_name} {{ +{funcs} +}} + """.strip() + + def build_has_attr_fn(self, fn_attr: str, prop_attr: str, doc_flag: str): + matches = "|".join( + f"Self::{inst.name}" for inst in self if getattr(inst.properties, prop_attr) + ) + if matches: + inner = f"matches!(*self, {matches})" + else: + inner = "false" + + return f""" +/// Whether opcode ID have '{doc_flag}' set. +#[must_use] +pub const fn has_{fn_attr}(&self) -> bool {{ +{inner} +}} + """ + + fn_has_arg = property( + lambda self: self.build_has_attr_fn("arg", "oparg", "HAS_ARG_FLAG") + ) + fn_has_const = property( + lambda self: self.build_has_attr_fn("const", "uses_co_consts", "HAS_CONST_FLAG") + ) + fn_has_name = property( + lambda self: self.build_has_attr_fn("name", "uses_co_names", "HAS_NAME_FLAG") + ) + fn_has_jump = property( + lambda self: self.build_has_attr_fn("jump", "jumps", "HAS_JUMP_FLAG") + ) + fn_has_free = property( + lambda self: self.build_has_attr_fn("free", "has_free", "HAS_FREE_FLAG") + ) + fn_has_local = property( + lambda self: self.build_has_attr_fn("local", "uses_locals", "HAS_LOCAL_FLAG") + ) + fn_has_exc = property( + lambda self: self.build_has_attr_fn("exc", "pure", "HAS_PURE_FLAG") + ) + + +class RealInstructions(InstructionsMeta): + enum_name = "RealOpcode" + typ = "u8" + + def __iter__(self) -> "Iterator[analyzer.Instruction | analyzer.PseudoInstruction]": + yield from sorted( + itertools.chain( + self._analysis.instructions.values(), + [analyzer.Instruction("INSTRUMENTED_LINE", [], None)], + ), + key=lambda inst: inst.name, + ) + + def _generate_stack_effect(self, direction: str) -> str: + """ + Adapted from https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Tools/cases_generator/stack.py#L89-L111 + """ + + lines = [] + for inst in self: + stack = get_stack_effect(inst) + if direction == "popped": + val = -stack.base_offset + elif direction == "pushed": + val = stack.top_offset - stack.base_offset + + expr = val.to_c() + line = f"Self::{inst.name} => {expr}" + lines.append(line) + + branches = ",\n".join(lines) + doc = "from" if direction == "popped" else "on" + return f""" +/// How many items should be {direction} {doc} the stack. +pub const fn num_{direction}(&self, oparg: i32) -> i32 {{ + match *self {{ +{branches} + }} +}} +""" + + @property + def fn_num_popped(self) -> str: + return self._generate_stack_effect("popped") + + @property + def fn_num_pushed(self) -> str: + return self._generate_stack_effect("pushed") + + @property + def fn_deopt(self) -> str: + def format_deopt_variants(lst: list[str]) -> str: + return "|".join(f"Self::{v}" for v in lst) + + deopts = collections.defaultdict(list) + for inst in self: + deopt = inst.name + + if inst.family is not None: + deopt = inst.family.name + + if inst.name == deopt: + continue + deopts[deopt].append(inst.name) + + branches = ",\n".join( + f"{format_deopt_variants(deopt)} => Self::{name}" + for name, deopt in sorted(deopts.items()) + ) + return f""" +pub const fn deopt(&self) -> Option {{ + Some(match *self {{ +{branches}, +_ => return None, + }}) +}} +""".strip() + + +class PseudoInstructions(InstructionsMeta): + enum_name = "PseudoOpcode" + typ = "u16" + + def __iter__(self) -> "Iterator[analyzer.PseudoInstruction]": + yield from sorted(self._analysis.pseudos.values(), key=lambda inst: inst.name) + + +def main(): + analysis = analyzer.analyze_files([DEFAULT_INPUT]) + real_instructions = RealInstructions(analysis) + pseudo_instructions = PseudoInstructions(analysis) + + script_path = pathlib.Path(__file__).absolute().relative_to(ROOT).as_posix() + out = f""" +//! Python opcode implementation. Currently aligned with cpython 3.13.7 + +// This file is generated by {script_path} +// Do not edit! + +use crate::marshal::MarshalError; +use num_enum::TryFromPrimitive; + +{real_instructions.rust_code} + +{pseudo_instructions.rust_code} + +const fn new_invalid_bytecode(_: T) -> MarshalError {{ + MarshalError::InvalidBytecode +}} + """.strip() + + replacements = {name: enum_variant_name(name) for name in analysis.opmap} + inner_pattern = "|".join(replacements) + pattern = re.compile(rf"\b({inner_pattern})\b") + out = pattern.sub(lambda m: replacements[m.group(0)], out) + OUT_PATH.write_text(out) + print("Running `cargo fmt`") + subprocess.run(["cargo", "fmt"], cwd=ROOT) + + +if __name__ == "__main__": + main() diff --git a/stdlib/src/opcode.rs b/stdlib/src/opcode.rs index c355b59df91..c07040be9c7 100644 --- a/stdlib/src/opcode.rs +++ b/stdlib/src/opcode.rs @@ -4,110 +4,11 @@ pub(crate) use opcode::make_module; mod opcode { use crate::vm::{ AsObject, PyObjectRef, PyResult, VirtualMachine, - builtins::{PyBool, PyInt, PyIntRef, PyNone}, - bytecode::Instruction, - match_class, + builtins::{PyInt, PyIntRef}, + opcode::{Opcode, PseudoOpcode, RealOpcode}, }; - use std::ops::Deref; - - struct Opcode(Instruction); - - impl Deref for Opcode { - type Target = Instruction; - - fn deref(&self) -> &Self::Target { - &self.0 - } - } - - impl Opcode { - // https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Include/opcode_ids.h#L238 - const HAVE_ARGUMENT: i32 = 44; - - pub fn try_from_pyint(raw: PyIntRef, vm: &VirtualMachine) -> PyResult { - let instruction = raw - .try_to_primitive::(vm) - .and_then(|v| { - Instruction::try_from(v).map_err(|_| { - vm.new_exception_empty(vm.ctx.exceptions.value_error.to_owned()) - }) - }) - .map_err(|_| vm.new_value_error("invalid opcode or oparg"))?; - - Ok(Self(instruction)) - } - - /// https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Include/internal/pycore_opcode_metadata.h#L914-L916 - #[must_use] - pub const fn is_valid(opcode: i32) -> bool { - opcode >= 0 && opcode < 268 && opcode != 255 - } - - // All `has_*` methods below mimics - // https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Include/internal/pycore_opcode_metadata.h#L966-L1190 - - #[must_use] - pub const fn has_arg(opcode: i32) -> bool { - Self::is_valid(opcode) && opcode > Self::HAVE_ARGUMENT - } - - #[must_use] - pub const fn has_const(opcode: i32) -> bool { - Self::is_valid(opcode) && matches!(opcode, 83 | 103 | 240) - } - - #[must_use] - pub const fn has_name(opcode: i32) -> bool { - Self::is_valid(opcode) - && matches!( - opcode, - 63 | 66 - | 67 - | 74 - | 75 - | 82 - | 90 - | 91 - | 92 - | 93 - | 108 - | 113 - | 114 - | 259 - | 260 - | 261 - | 262 - ) - } - - #[must_use] - pub const fn has_jump(opcode: i32) -> bool { - Self::is_valid(opcode) - && matches!( - opcode, - 72 | 77 | 78 | 79 | 97 | 98 | 99 | 100 | 104 | 256 | 257 - ) - } - - #[must_use] - pub const fn has_free(opcode: i32) -> bool { - Self::is_valid(opcode) && matches!(opcode, 64 | 84 | 89 | 94 | 109) - } - - #[must_use] - pub const fn has_local(opcode: i32) -> bool { - Self::is_valid(opcode) - && matches!(opcode, 65 | 85 | 86 | 87 | 88 | 110 | 111 | 112 | 258 | 267) - } - - #[must_use] - pub const fn has_exc(opcode: i32) -> bool { - Self::is_valid(opcode) && matches!(opcode, 264..=266) - } - } - #[pyattr] - const ENABLE_SPECIALIZATION: i8 = 1; + const ENABLE_SPECIALIZATION: u8 = 1; #[derive(FromArgs)] struct StackEffectArgs { @@ -119,8 +20,11 @@ mod opcode { jump: Option, } + // https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Python/compile.c#L704-L767 #[pyfunction] fn stack_effect(args: StackEffectArgs, vm: &VirtualMachine) -> PyResult { + let invalid_opcode = || vm.new_value_error("invalid opcode or oparg"); + let oparg = args .oparg .map(|v| { @@ -131,67 +35,136 @@ mod opcode { ))); } v.downcast_ref::() - .ok_or_else(|| vm.new_type_error(""))? - .try_to_primitive::(vm) + .ok_or_else(|| { + vm.new_type_error(format!( + "'{}' object cannot be interpreted as an integer", + v.class().name() + )) + })? + .try_to_primitive::(vm) }) .unwrap_or(Ok(0))?; let jump = args .jump .map(|v| { - match_class!(match v { - b @ PyBool => Ok(b.is(&vm.ctx.true_value)), - _n @ PyNone => Ok(false), - _ => { - Err(vm.new_value_error("stack_effect: jump must be False, True or None")) - } + v.try_to_bool(vm).map_err(|_| { + vm.new_value_error("stack_effect: jump must be False, True or None") }) }) .unwrap_or(Ok(false))?; - let opcode = Opcode::try_from_pyint(args.opcode, vm)?; + let raw_opcode = args.opcode.try_to_primitive::(vm)?; + let opcode = Opcode::try_from(raw_opcode).map_err(|_| invalid_opcode())?; + + Ok(match opcode { + Opcode::Real(r_op) => { + // ExitInitCheck at CPython is under the pseudos match? + // https://github.com/python/cpython/blob/bcee1c322115c581da27600f2ae55e5439c027eb/Python/compile.c#L736-L737 + if matches!(r_op, RealOpcode::ExitInitCheck) { + return Ok(-1); + } + + if r_op.deopt().is_some() { + // Specialized instructions are not supported. + return Err(invalid_opcode()); + } + + let popped = r_op.num_popped(oparg); + let pushed = r_op.num_pushed(oparg); + + if popped < 0 || pushed < 0 { + return Err(invalid_opcode()); + } + pushed - popped + } + Opcode::Pseudo(p_op) => { + match p_op { + PseudoOpcode::PopBlock | PseudoOpcode::Jump | PseudoOpcode::JumpNoInterrupt => { + 0 + } + // Exception handling pseudo-instructions + PseudoOpcode::SetupFinally => { + if jump { + 1 + } else { + 0 + } + } + PseudoOpcode::SetupCleanup => { + if jump { + 2 + } else { + 0 + } + } + PseudoOpcode::SetupWith => { + if jump { + 1 + } else { + 0 + } + } + PseudoOpcode::StoreFastMaybeNull => -1, + PseudoOpcode::LoadClosure => 1, + PseudoOpcode::LoadMethod => 1, + PseudoOpcode::LoadSuperMethod + | PseudoOpcode::LoadZeroSuperMethod + | PseudoOpcode::LoadZeroSuperAttr => -1, + } + } + }) + } + + macro_rules! real_opcode_check { + ($opcode:expr, $method:ident) => {{ + let opcode_u8 = match u8::try_from($opcode) { + Ok(v) => v, + Err(_) => return false, + }; - Ok(opcode.stack_effect(oparg.into(), jump)) + RealOpcode::try_from(opcode_u8).map_or(false, |op| op.$method()) + }}; } #[pyfunction] fn is_valid(opcode: i32) -> bool { - Opcode::is_valid(opcode) + Opcode::try_from(opcode).is_ok() } #[pyfunction] fn has_arg(opcode: i32) -> bool { - Opcode::has_arg(opcode) + real_opcode_check!(opcode, has_arg) } #[pyfunction] fn has_const(opcode: i32) -> bool { - Opcode::has_const(opcode) + real_opcode_check!(opcode, has_const) } #[pyfunction] fn has_name(opcode: i32) -> bool { - Opcode::has_name(opcode) + real_opcode_check!(opcode, has_name) } #[pyfunction] fn has_jump(opcode: i32) -> bool { - Opcode::has_jump(opcode) + real_opcode_check!(opcode, has_jump) } #[pyfunction] fn has_free(opcode: i32) -> bool { - Opcode::has_free(opcode) + real_opcode_check!(opcode, has_free) } #[pyfunction] fn has_local(opcode: i32) -> bool { - Opcode::has_local(opcode) + real_opcode_check!(opcode, has_local) } #[pyfunction] fn has_exc(opcode: i32) -> bool { - Opcode::has_exc(opcode) + real_opcode_check!(opcode, has_exc) } #[pyfunction] diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 94147345a6b..59691deee2a 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -99,7 +99,7 @@ pub use self::object::{ pub use self::vm::{Context, Interpreter, Settings, VirtualMachine}; pub use rustpython_common as common; -pub use rustpython_compiler_core::{bytecode, frozen}; +pub use rustpython_compiler_core::{bytecode, frozen, opcode}; pub use rustpython_literal as literal; #[doc(hidden)]