diff --git a/README.md b/README.md index 95aaac8..2a836ba 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Python Cloud Debugger Agent Google [Cloud Debugger](https://cloud.google.com/debugger/) for -Python 3.6, Python 3.7, Python 3.8 and Python 3.9. +Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10. ## Overview @@ -28,7 +28,7 @@ tested on Debian Linux, but it should work on other distributions as well. Cloud Debugger consists of 3 primary components: 1. The Python debugger agent (this repo implements one for CPython 3.6, - 3.7, 3.8 and 3.9). + 3.7, 3.8, 3.9, and 3.10). 2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the APIs using [APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/). diff --git a/src/build-wheels.sh b/src/build-wheels.sh index 9fec334..1e4a0c6 100755 --- a/src/build-wheels.sh +++ b/src/build-wheels.sh @@ -3,7 +3,7 @@ GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.2.2.tar.gz GLOG_URL=https://github.com/google/glog/archive/v0.4.0.tar.gz -SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39) +SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39 cp310-cp310) ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P) diff --git a/src/googleclouddebugger/BUILD b/src/googleclouddebugger/BUILD index dedf1aa..c0d6ae7 100644 --- a/src/googleclouddebugger/BUILD +++ b/src/googleclouddebugger/BUILD @@ -24,6 +24,7 @@ cc_library( deps = [ ":common", ":nullable", + "//src/third_party:pylinetable", ], ) diff --git a/src/googleclouddebugger/bytecode_breakpoint.cc b/src/googleclouddebugger/bytecode_breakpoint.cc index 8b782d7..dd1af6e 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.cc +++ b/src/googleclouddebugger/bytecode_breakpoint.cc @@ -82,7 +82,7 @@ int BytecodeBreakpoint::CreateBreakpoint( // table in case "code_object" is already patched with another breakpoint. CodeObjectLinesEnumerator lines_enumerator( code_object->co_firstlineno, - code_object_breakpoints->original_lnotab.get()); + code_object_breakpoints->original_linedata.get()); while (lines_enumerator.line_number() != line) { if (!lines_enumerator.Next()) { LOG(ERROR) << "Line " << line << " not found in " @@ -237,8 +237,14 @@ BytecodeBreakpoint::PreparePatchCodeObject( return nullptr; // Probably a built-in method or uninitialized code object. } - data->original_lnotab = + // Store the original (unmodified) line data. +#if PY_VERSION_HEX < 0x030A0000 + data->original_linedata = ScopedPyObject::NewReference(code_object.get()->co_lnotab); +#else + data->original_linedata = + ScopedPyObject::NewReference(code_object.get()->co_linetable); +#endif patches_[code_object] = data.get(); return data.release(); @@ -262,29 +268,38 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { << " from patched " << code->zombie_refs.back().get(); Py_INCREF(code_object->co_code); + // Restore the original line data to the code object. +#if PY_VERSION_HEX < 0x030A0000 if (code_object->co_lnotab != nullptr) { code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab)); } - code_object->co_lnotab = code->original_lnotab.get(); + code_object->co_lnotab = code->original_linedata.get(); Py_INCREF(code_object->co_lnotab); +#else + if (code_object->co_linetable != nullptr) { + code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable)); + } + code_object->co_linetable = code->original_linedata.get(); + Py_INCREF(code_object->co_linetable); +#endif return; } std::vector bytecode = PyBytesToByteArray(code->original_code.get()); - bool has_lnotab = false; - std::vector lnotab; - if (!code->original_lnotab.is_null() && - PyBytes_CheckExact(code->original_lnotab.get())) { - has_lnotab = true; - lnotab = PyBytesToByteArray(code->original_lnotab.get()); + bool has_linedata = false; + std::vector linedata; + if (!code->original_linedata.is_null() && + PyBytes_CheckExact(code->original_linedata.get())) { + has_linedata = true; + linedata = PyBytesToByteArray(code->original_linedata.get()); } BytecodeManipulator bytecode_manipulator( std::move(bytecode), - has_lnotab, - std::move(lnotab)); + has_linedata, + std::move(linedata)); // Add callbacks to code object constants and patch the bytecode. std::vector callbacks; @@ -306,17 +321,16 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { callbacks.push_back(breakpoint.hit_callable.get()); -#if PY_MAJOR_VERSION >= 3 // In Python 3, since we allow upgrading of instructions to use // EXTENDED_ARG, the offsets for lines originally calculated might not be // accurate, so we need to recalculate them each insertion. offset_found = false; - if (bytecode_manipulator.has_lnotab()) { - ScopedPyObject lnotab(PyBytes_FromStringAndSize( - reinterpret_cast(bytecode_manipulator.lnotab().data()), - bytecode_manipulator.lnotab().size())); + if (bytecode_manipulator.has_linedata()) { + ScopedPyObject linedata(PyBytes_FromStringAndSize( + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); CodeObjectLinesEnumerator lines_enumerator(code_object->co_firstlineno, - lnotab.release()); + linedata.release()); while (lines_enumerator.line_number() != breakpoint.line) { if (!lines_enumerator.Next()) { break; @@ -325,7 +339,6 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { } offset_found = lines_enumerator.line_number() == breakpoint.line; } -#endif if (!offset_found || !bytecode_manipulator.InjectMethodCall(offset, const_index)) { @@ -355,14 +368,26 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) { << " reassigned to " << code_object->co_code << ", original was " << code->original_code.get(); - if (has_lnotab) { + // Update the line data in the code object. +#if PY_VERSION_HEX < 0x030A0000 + if (has_linedata) { code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab)); ScopedPyObject lnotab_string(PyBytes_FromStringAndSize( - reinterpret_cast(bytecode_manipulator.lnotab().data()), - bytecode_manipulator.lnotab().size())); + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); DCHECK(!lnotab_string.is_null()); code_object->co_lnotab = lnotab_string.release(); } +#else + if (has_linedata) { + code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable)); + ScopedPyObject linetable_string(PyBytes_FromStringAndSize( + reinterpret_cast(bytecode_manipulator.linedata().data()), + bytecode_manipulator.linedata().size())); + DCHECK(!linetable_string.is_null()); + code_object->co_linetable = linetable_string.release(); + } +#endif // Invoke error callback after everything else is done. The callback may // decide to remove the breakpoint, which will change "code". diff --git a/src/googleclouddebugger/bytecode_breakpoint.h b/src/googleclouddebugger/bytecode_breakpoint.h index 057766f..5eaa893 100644 --- a/src/googleclouddebugger/bytecode_breakpoint.h +++ b/src/googleclouddebugger/bytecode_breakpoint.h @@ -162,9 +162,10 @@ class BytecodeBreakpoint { // Original value of PyCodeObject::co_code before patching. ScopedPyObject original_code; - // Original value of PythonCode::co_lnotab before patching. - // "lnotab" stands for "line numbers table" in CPython lingo. - ScopedPyObject original_lnotab; + // Original value of PythonCode::co_lnotab or PythonCode::co_linetable + // before patching. This is the line numbers table in CPython <= 3.9 and + // CPython >= 3.10 respectively + ScopedPyObject original_linedata; }; // Loads code object into "patches_" if not there yet. Returns nullptr if diff --git a/src/googleclouddebugger/bytecode_manipulator.cc b/src/googleclouddebugger/bytecode_manipulator.cc index 9ee7e27..3c95edd 100644 --- a/src/googleclouddebugger/bytecode_manipulator.cc +++ b/src/googleclouddebugger/bytecode_manipulator.cc @@ -228,11 +228,11 @@ static std::vector BuildMethodCall(int const_index) { } BytecodeManipulator::BytecodeManipulator(std::vector bytecode, - const bool has_lnotab, - std::vector lnotab) - : has_lnotab_(has_lnotab) { + const bool has_linedata, + std::vector linedata) + : has_linedata_(has_linedata) { data_.bytecode = std::move(bytecode); - data_.lnotab = std::move(lnotab); + data_.linedata = std::move(linedata); strategy_ = STRATEGY_INSERT; // Default strategy. for (auto it = data_.bytecode.begin(); it < data_.bytecode.end(); ) { @@ -296,21 +296,13 @@ struct Insertion { // InsertAndUpdateBranchInstructions. static const int kMaxInsertionIterations = 10; - +#if PY_VERSION_HEX < 0x030A0000 // Updates the line number table for an insertion in the bytecode. -// This is different than what the Python 2 version of InsertMethodCall() does. -// It should be more accurate, but is confined to Python 3 only for safety. -// This handles the case of adding insertion for EXTENDED_ARG better. // Example for inserting 2 bytes at offset 2: -// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta} -// Old algorithm: [{2, 0}, {2, 1}, {4, 1}] -// New algorithm: [{2, 1}, {6, 1}] -// In the old version, trying to get the offset to insert a breakpoint right -// before line 1 would result in an offset of 2, which is inaccurate as the -// instruction before is an EXTENDED_ARG which will now be applied to the first -// instruction inserted instead of its original target. -static void InsertAndUpdateLnotab(int offset, int size, - std::vector* lnotab) { +// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta} +// updated: [{2, 1}, {6, 1}] +static void InsertAndUpdateLineData(int offset, int size, + std::vector* lnotab) { int current_offset = 0; for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) { current_offset += it[0]; @@ -330,6 +322,36 @@ static void InsertAndUpdateLnotab(int offset, int size, } } } +#else +// Updates the line number table for an insertion in the bytecode. +// Example for inserting 2 bytes at offset 2: +// linetable: [{2, 1}, {4, 1}] // {address_end_delta, line_delta} +// updated: [{2, 1}, {6, 1}] +// +// For more information on the linetable format in Python 3.10, see: +// https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt +static void InsertAndUpdateLineData(int offset, int size, + std::vector* linetable) { + int current_offset = 0; + for (auto it = linetable->begin(); it != linetable->end(); it += 2) { + current_offset += it[0]; + + if (current_offset > offset) { + int remaining_size = it[0] + size; + int remaining_lines = it[1]; + it = linetable->erase(it, it + 2); + while (remaining_size > 0xFE) { // Max address delta is listed as 254. + it = linetable->insert(it, 0xFE) + 1; + it = linetable->insert(it, 0) + 1; + remaining_size -= 0xFE; + } + it = linetable->insert(it, remaining_size) + 1; + it = linetable->insert(it, remaining_lines) + 1; + return; + } + } +} +#endif // Reserves space for instructions to be inserted into the bytecode, and // calculates the new offsets and arguments of branch instructions. @@ -426,8 +448,16 @@ static bool InsertAndUpdateBranchInstructions( } if (need_to_update) { +#if PY_VERSION_HEX < 0x030A0000 + int delta = insertion.size; +#else + // Changed in version 3.10: The argument of jump, exception handling + // and loop instructions is now the instruction offset rather than the + // byte offset. + int delta = insertion.size / 2; +#endif PythonInstruction new_instruction = - PythonInstructionArg(instruction.opcode, arg + insertion.size); + PythonInstructionArg(instruction.opcode, arg + delta); int size_diff = new_instruction.size - instruction.size; if (size_diff > 0) { insertions.push_back(Insertion { size_diff, it->current_offset }); @@ -490,8 +520,8 @@ bool BytecodeManipulator::InsertMethodCall( // Insert the method call. data->bytecode.insert(data->bytecode.begin() + offset, method_call_size, NOP); WriteInstructions(data->bytecode.begin() + offset, method_call_instructions); - if (has_lnotab_) { - InsertAndUpdateLnotab(offset, method_call_size, &data->lnotab); + if (has_linedata_) { + InsertAndUpdateLineData(offset, method_call_size, &data->linedata); } // Write new branch instructions. @@ -503,8 +533,8 @@ bool BytecodeManipulator::InsertMethodCall( int offset = it->current_offset; if (size_diff > 0) { data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP); - if (has_lnotab_) { - InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab); + if (has_linedata_) { + InsertAndUpdateLineData(it->current_offset, size_diff, &data->linedata); } } else if (size_diff < 0) { // The Python compiler sometimes prematurely adds EXTENDED_ARG with an diff --git a/src/googleclouddebugger/bytecode_manipulator.h b/src/googleclouddebugger/bytecode_manipulator.h index d3a7de4..31a5e46 100644 --- a/src/googleclouddebugger/bytecode_manipulator.h +++ b/src/googleclouddebugger/bytecode_manipulator.h @@ -71,17 +71,17 @@ namespace cdbg { // 19 JUMP_ABSOLUTE 3 class BytecodeManipulator { public: - BytecodeManipulator(std::vector bytecode, const bool has_lnotab, - std::vector lnotab); + BytecodeManipulator(std::vector bytecode, const bool has_linedata, + std::vector linedata); // Gets the transformed method bytecode. const std::vector& bytecode() const { return data_.bytecode; } // Returns true if this class was initialized with line numbers table. - bool has_lnotab() const { return has_lnotab_; } + bool has_linedata() const { return has_linedata_; } // Gets the method line numbers table or empty vector if not available. - const std::vector& lnotab() const { return data_.lnotab; } + const std::vector& linedata() const { return data_.linedata; } // Rewrites the method bytecode to invoke callable at the specified offset. // Return false if the method call could not be inserted. The bytecode @@ -109,8 +109,8 @@ class BytecodeManipulator { // Bytecode of a transformed method. std::vector bytecode; - // Method line numbers table or empty vector if "has_lnotab_" is false. - std::vector lnotab; + // Method line numbers table or empty vector if "has_linedata_" is false. + std::vector linedata; }; // Insert space into the bytecode. This space is later used to add new @@ -130,7 +130,7 @@ class BytecodeManipulator { Data data_; // True if the method has line number table. - const bool has_lnotab_; + const bool has_linedata_; // Algorithm to insert breakpoint callback into method bytecode. Strategy strategy_; diff --git a/src/googleclouddebugger/immutability_tracer.cc b/src/googleclouddebugger/immutability_tracer.cc index d5f102a..c05d407 100644 --- a/src/googleclouddebugger/immutability_tracer.cc +++ b/src/googleclouddebugger/immutability_tracer.cc @@ -400,6 +400,16 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { #if PY_VERSION_HEX >= 0x03080000 // Added back in Python 3.8 (was in 2.7 as well) case ROT_FOUR: +#endif +#if PY_VERSION_HEX >= 0x030A0000 + // Added in Python 3.10 + case COPY_DICT_WITHOUT_KEYS: + case GET_LEN: + case MATCH_MAPPING: + case MATCH_SEQUENCE: + case MATCH_KEYS: + case MATCH_CLASS: + case ROT_N: #endif return OPCODE_NOT_MUTABLE; @@ -468,6 +478,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) { case RERAISE: case WITH_EXCEPT_START: case LOAD_ASSERTION_ERROR: +#endif +#if PY_VERSION_HEX >= 0x030A0000 + // Added in Python 3.10 + case GEN_START: #endif return OPCODE_MUTABLE; diff --git a/src/googleclouddebugger/module_explorer.py b/src/googleclouddebugger/module_explorer.py index acecea9..99829df 100644 --- a/src/googleclouddebugger/module_explorer.py +++ b/src/googleclouddebugger/module_explorer.py @@ -78,15 +78,24 @@ def _GetLineNumbers(code_object): Yields: The next line number in the code object. """ - # Get the line number deltas, which are the odd number entries, from the - # lnotab. See - # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt - # In Python 3, this is just a byte array. - line_incrs = code_object.co_lnotab[1::2] - current_line = code_object.co_firstlineno - for line_incr in line_incrs: - current_line += line_incr - yield current_line + + if sys.version_info.minor < 10: + # Get the line number deltas, which are the odd number entries, from the + # lnotab. See + # https://svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt + # In Python 3, prior to 3.10, this is just a byte array. + line_incrs = code_object.co_lnotab[1::2] + current_line = code_object.co_firstlineno + for line_incr in line_incrs: + current_line += line_incr + yield current_line + else: + # Get the line numbers directly, which are the third entry in the tuples. + # https://peps.python.org/pep-0626/#the-new-co-lines-method-of-code-objects + line_numbers = [entry[2] for entry in code_object.co_lines()] + for line_number in line_numbers: + if line_number is not None: + yield line_number def _GetModuleCodeObjects(module): diff --git a/src/googleclouddebugger/python_util.cc b/src/googleclouddebugger/python_util.cc index 90b67ce..e28a142 100644 --- a/src/googleclouddebugger/python_util.cc +++ b/src/googleclouddebugger/python_util.cc @@ -23,6 +23,11 @@ #include +#if PY_VERSION_HEX >= 0x030A0000 +#include "../third_party/pylinetable.h" +#endif // PY_VERSION_HEX >= 0x030A0000 + + namespace devtools { namespace cdbg { @@ -32,17 +37,22 @@ static PyObject* g_debuglet_module = nullptr; CodeObjectLinesEnumerator::CodeObjectLinesEnumerator( PyCodeObject* code_object) { +#if PY_VERSION_HEX < 0x030A0000 Initialize(code_object->co_firstlineno, code_object->co_lnotab); +#else + Initialize(code_object->co_firstlineno, code_object->co_linetable); +#endif // PY_VERSION_HEX < 0x030A0000 } CodeObjectLinesEnumerator::CodeObjectLinesEnumerator( int firstlineno, - PyObject* lnotab) { - Initialize(firstlineno, lnotab); + PyObject* linedata) { + Initialize(firstlineno, linedata); } +#if PY_VERSION_HEX < 0x030A0000 void CodeObjectLinesEnumerator::Initialize( int firstlineno, PyObject* lnotab) { @@ -86,7 +96,26 @@ bool CodeObjectLinesEnumerator::Next() { } } } +#else + +void CodeObjectLinesEnumerator::Initialize( + int firstlineno, + PyObject* linetable) { + Py_ssize_t length = PyBytes_Size(linetable); + _PyLineTable_InitAddressRange(PyBytes_AsString(linetable), length, firstlineno, &range_); +} +bool CodeObjectLinesEnumerator::Next() { + while (_PyLineTable_NextAddressRange(&range_)) { + if (range_.ar_line >= 0) { + line_number_ = range_.ar_line; + offset_ = range_.ar_start; + return true; + } + } + return false; +} +#endif // PY_VERSION_HEX < 0x030A0000 PyObject* GetDebugletModule() { DCHECK(g_debuglet_module != nullptr); diff --git a/src/googleclouddebugger/python_util.h b/src/googleclouddebugger/python_util.h index 57b5425..10116be 100644 --- a/src/googleclouddebugger/python_util.h +++ b/src/googleclouddebugger/python_util.h @@ -178,7 +178,7 @@ class CodeObjectLinesEnumerator { explicit CodeObjectLinesEnumerator(PyCodeObject* code_object); // Uses explicitly provided line table. - CodeObjectLinesEnumerator(int firstlineno, PyObject* lnotab); + CodeObjectLinesEnumerator(int firstlineno, PyObject* linedata); // Moves over to the next entry in code object line table. bool Next(); @@ -190,24 +190,31 @@ class CodeObjectLinesEnumerator { int32_t line_number() const { return line_number_; } private: - void Initialize(int firstlineno, PyObject* lnotab); + void Initialize(int firstlineno, PyObject* linedata); private: + // Bytecode offset of the current line. + int32_t offset_; + + // Current source code line number + int32_t line_number_; + +#if PY_VERSION_HEX < 0x030A0000 // Number of remaining entries in line table. int remaining_entries_; // Pointer to the next entry of line table. const uint8_t* next_entry_; - // Bytecode offset of the current line. - int32_t offset_; - - // Current source code line number - int32_t line_number_; +#else + // Current address range in the linetable data. + PyCodeAddressRange range_; +#endif DISALLOW_COPY_AND_ASSIGN(CodeObjectLinesEnumerator); }; + template bool operator== (TPointer* ref1, const ScopedPyObjectT& ref2) { return ref2 == ref1; diff --git a/src/setup.py b/src/setup.py index 0c24bad..6b380d5 100644 --- a/src/setup.py +++ b/src/setup.py @@ -117,6 +117,7 @@ def ReadConfig(section, value, default): 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', ]) diff --git a/src/third_party/BUILD b/src/third_party/BUILD new file mode 100644 index 0000000..bcce1e2 --- /dev/null +++ b/src/third_party/BUILD @@ -0,0 +1,7 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "pylinetable", + hdrs = ["pylinetable.h"], +) + diff --git a/src/third_party/pylinetable.h b/src/third_party/pylinetable.h new file mode 100644 index 0000000..ea44c64 --- /dev/null +++ b/src/third_party/pylinetable.h @@ -0,0 +1,210 @@ +/** + * Copyright (c) 2001-2023 Python Software Foundation; All Rights Reserved + * + * You may obtain a copy of the PSF License at + * + * https://docs.python.org/3/license.html + */ + +#ifndef DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ +#define DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ + +/* Python Linetable helper methods. + * They are not part of the cpython api. + * This code has been extracted from: + * https://github.com/python/cpython/blob/main/Objects/codeobject.c + * + * See https://peps.python.org/pep-0626/#out-of-process-debuggers-and-profilers + * for more information about this code and its usage. + */ + +#if PY_VERSION_HEX >= 0x030B0000 +// Things are different in 3.11 than 3.10. +// See https://github.com/python/cpython/blob/main/Objects/locations.md + +typedef enum _PyCodeLocationInfoKind { + /* short forms are 0 to 9 */ + PY_CODE_LOCATION_INFO_SHORT0 = 0, + /* one lineforms are 10 to 12 */ + PY_CODE_LOCATION_INFO_ONE_LINE0 = 10, + PY_CODE_LOCATION_INFO_ONE_LINE1 = 11, + PY_CODE_LOCATION_INFO_ONE_LINE2 = 12, + + PY_CODE_LOCATION_INFO_NO_COLUMNS = 13, + PY_CODE_LOCATION_INFO_LONG = 14, + PY_CODE_LOCATION_INFO_NONE = 15 +} _PyCodeLocationInfoKind; + +/** Out of process API for initializing the location table. */ +extern void _PyLineTable_InitAddressRange( + const char *linetable, + Py_ssize_t length, + int firstlineno, + PyCodeAddressRange *range); + +/** API for traversing the line number table. */ +extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); + + +void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { + range->opaque.lo_next = linetable; + range->opaque.limit = range->opaque.lo_next + length; + range->ar_start = -1; + range->ar_end = 0; + range->opaque.computed_line = firstlineno; + range->ar_line = -1; +} + +static int +scan_varint(const uint8_t *ptr) +{ + unsigned int read = *ptr++; + unsigned int val = read & 63; + unsigned int shift = 0; + while (read & 64) { + read = *ptr++; + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +scan_signed_varint(const uint8_t *ptr) +{ + unsigned int uval = scan_varint(ptr); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + +static int +get_line_delta(const uint8_t *ptr) +{ + int code = ((*ptr) >> 3) & 15; + switch (code) { + case PY_CODE_LOCATION_INFO_NONE: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_LONG: + return scan_signed_varint(ptr+1); + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2; + default: + /* Same line */ + return 0; + } +} + +static int +is_no_line_marker(uint8_t b) +{ + return (b >> 3) == 0x1f; +} + + +#define ASSERT_VALID_BOUNDS(bounds) \ + assert(bounds->opaque.lo_next <= bounds->opaque.limit && \ + (bounds->ar_line == -1 || bounds->ar_line == bounds->opaque.computed_line) && \ + (bounds->opaque.lo_next == bounds->opaque.limit || \ + (*bounds->opaque.lo_next) & 128)) + +static int +next_code_delta(PyCodeAddressRange *bounds) +{ + assert((*bounds->opaque.lo_next) & 128); + return (((*bounds->opaque.lo_next) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static void +advance(PyCodeAddressRange *bounds) +{ + ASSERT_VALID_BOUNDS(bounds); + bounds->opaque.computed_line += get_line_delta(reinterpret_cast(bounds->opaque.lo_next)); + if (is_no_line_marker(*bounds->opaque.lo_next)) { + bounds->ar_line = -1; + } + else { + bounds->ar_line = bounds->opaque.computed_line; + } + bounds->ar_start = bounds->ar_end; + bounds->ar_end += next_code_delta(bounds); + do { + bounds->opaque.lo_next++; + } while (bounds->opaque.lo_next < bounds->opaque.limit && + ((*bounds->opaque.lo_next) & 128) == 0); + ASSERT_VALID_BOUNDS(bounds); +} + +static inline int +at_end(PyCodeAddressRange *bounds) { + return bounds->opaque.lo_next >= bounds->opaque.limit; +} + +int +_PyLineTable_NextAddressRange(PyCodeAddressRange *range) +{ + if (at_end(range)) { + return 0; + } + advance(range); + assert(range->ar_end > range->ar_start); + return 1; +} +#elif PY_VERSION_HEX >= 0x030A0000 +void +_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) +{ + range->opaque.lo_next = linetable; + range->opaque.limit = range->opaque.lo_next + length; + range->ar_start = -1; + range->ar_end = 0; + range->opaque.computed_line = firstlineno; + range->ar_line = -1; +} + +static void +advance(PyCodeAddressRange *bounds) +{ + bounds->ar_start = bounds->ar_end; + int delta = ((unsigned char *)bounds->opaque.lo_next)[0]; + bounds->ar_end += delta; + int ldelta = ((signed char *)bounds->opaque.lo_next)[1]; + bounds->opaque.lo_next += 2; + if (ldelta == -128) { + bounds->ar_line = -1; + } + else { + bounds->opaque.computed_line += ldelta; + bounds->ar_line = bounds->opaque.computed_line; + } +} + +static inline int +at_end(PyCodeAddressRange *bounds) { + return bounds->opaque.lo_next >= bounds->opaque.limit; +} + +int +_PyLineTable_NextAddressRange(PyCodeAddressRange *range) +{ + if (at_end(range)) { + return 0; + } + advance(range); + while (range->ar_start == range->ar_end) { + assert(!at_end(range)); + advance(range); + } + return 1; +} +#endif + +#endif // DEVTOOLS_CDBG_DEBUGLETS_PYTHON_PYLINETABLE_H_ diff --git a/tests/cpp/bytecode_manipulator_test.cc b/tests/cpp/bytecode_manipulator_test.cc index 6f21ae1..934dfef 100644 --- a/tests/cpp/bytecode_manipulator_test.cc +++ b/tests/cpp/bytecode_manipulator_test.cc @@ -116,7 +116,6 @@ static std::string FormatOpcode(uint8_t opcode) { case SETUP_LOOP: return "SETUP_LOOP"; case SETUP_EXCEPT: return "SETUP_EXCEPT"; #endif -#if PY_MAJOR_VERSION >= 3 case DUP_TOP_TWO: return "DUP_TOP_TWO"; case BINARY_MATRIX_MULTIPLY: return "BINARY_MATRIX_MULTIPLY"; case INPLACE_MATRIX_MULTIPLY: return "INPLACE_MATRIX_MULTIPLY"; @@ -182,38 +181,7 @@ static std::string FormatOpcode(uint8_t opcode) { case DICT_MERGE: return "DICT_MERGE"; case DICT_UPDATE: return "DICT_UPDATE"; #endif -#else - case STOP_CODE: return "STOP_CODE"; - case ROT_FOUR: return "ROT_FOUR"; - case UNARY_CONVERT: return "UNARY_CONVERT"; - case BINARY_DIVIDE: return "BINARY_DIVIDE"; - case SLICE: return "SLICE"; - case SLICE_1: return "SLICE_1"; - case SLICE_2: return "SLICE_2"; - case SLICE_3: return "SLICE_3"; - case STORE_SLICE: return "STORE_SLICE"; - case STORE_SLICE_1: return "STORE_SLICE_1"; - case STORE_SLICE_2: return "STORE_SLICE_2"; - case STORE_SLICE_3: return "STORE_SLICE_3"; - case DELETE_SLICE: return "DELETE_SLICE"; - case DELETE_SLICE_1: return "DELETE_SLICE_1"; - case DELETE_SLICE_2: return "DELETE_SLICE_2"; - case DELETE_SLICE_3: return "DELETE_SLICE_3"; - case STORE_MAP: return "STORE_MAP"; - case INPLACE_DIVIDE: return "INPLACE_DIVIDE"; - case PRINT_NEWLINE: return "PRINT_NEWLINE"; - case PRINT_ITEM: return "PRINT_ITEM"; - case PRINT_ITEM_TO: return "PRINT_ITEM_TO"; - case PRINT_NEWLINE_TO: return "PRINT_NEWLINE_TO"; - case LOAD_LOCALS: return "LOAD_LOCALS"; - case EXEC_STMT: return "EXEC_STMT"; - case BUILD_CLASS: return "BUILD_CLASS"; - case DUP_TOPX: return "DUP_TOPX"; - case MAKE_CLOSURE: return "MAKE_CLOSURE"; - case CALL_FUNCTION_VAR: return "CALL_FUNCTION_VAR"; - case CALL_FUNCTION_VAR_KW: return "CALL_FUNCTION_VAR_KW"; - case WITH_CLEANUP: return "WITH_CLEANUP"; -#endif + default: return std::to_string(static_cast(opcode)); } } @@ -263,12 +231,12 @@ static void VerifyBytecode(const BytecodeManipulator& bytecode_manipulator, static void VerifyLineNumbersTable( const BytecodeManipulator& bytecode_manipulator, - std::vector expected_lnotab) { + std::vector expected_linedata) { // Convert to integers to better logging by EXPECT_EQ. - std::vector expected(expected_lnotab.begin(), expected_lnotab.end()); + std::vector expected(expected_linedata.begin(), expected_linedata.end()); std::vector actual( - bytecode_manipulator.lnotab().begin(), - bytecode_manipulator.lnotab().end()); + bytecode_manipulator.linedata().begin(), + bytecode_manipulator.linedata().end()); EXPECT_EQ(expected, actual); } @@ -281,10 +249,10 @@ TEST(BytecodeManipulatorTest, EmptyBytecode) { TEST(BytecodeManipulatorTest, HasLineNumbersTable) { BytecodeManipulator instance1({}, false, {}); - EXPECT_FALSE(instance1.has_lnotab()); + EXPECT_FALSE(instance1.has_linedata()); BytecodeManipulator instance2({}, true, {}); - EXPECT_TRUE(instance2.has_lnotab()); + EXPECT_TRUE(instance2.has_linedata()); } @@ -373,6 +341,25 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpdates) { {}); ASSERT_TRUE(instance.InjectMethodCall(2, 47)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. + VerifyBytecode( + instance, + { + JUMP_FORWARD, // offset 0. + 12 + 3, // offset 1. + LOAD_CONST, // offset 2. + 47, // offset 3. + CALL_FUNCTION, // offset 4. + 0, // offset 5. + POP_TOP, // offset 6. + 0, // offset 7. + NOP, // offset 8. + 0, // offset 9. + JUMP_ABSOLUTE, // offset 10. + 34 + 3 // offset 11. + }); +#else VerifyBytecode( instance, { @@ -389,6 +376,7 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpdates) { JUMP_ABSOLUTE, // offset 10. 34 + 6 // offset 11. }); +#endif } @@ -418,6 +406,37 @@ TEST(BytecodeManipulatorTest, InsertionExtendedOffsetUpdates) { {}); ASSERT_TRUE(instance.InjectMethodCall(8, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 12, // offset 1. + EXTENDED_ARG, // offset 2. + 34, // offset 3. + EXTENDED_ARG, // offset 4. + 56, // offset 5. + JUMP_FORWARD, // offset 6. + 78 + 3, // offset 7. + LOAD_CONST, // offset 8. + 11, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0, // offset 15. + EXTENDED_ARG, // offset 16. + 98, // offset 17. + EXTENDED_ARG, // offset 18. + 76, // offset 19. + EXTENDED_ARG, // offset 20. + 54, // offset 21. + JUMP_ABSOLUTE, // offset 22. + 32 + 3 // offset 23. + }); +#else VerifyBytecode( instance, { @@ -446,6 +465,7 @@ TEST(BytecodeManipulatorTest, InsertionExtendedOffsetUpdates) { JUMP_ABSOLUTE, // offset 22. 32 + 6 // offset 23. }); +#endif } @@ -521,13 +541,15 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUneededExtended) { {}); ASSERT_TRUE(instance.InjectMethodCall(4, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { EXTENDED_ARG, // offset 0. 0, // offset 1. JUMP_FORWARD, // offset 2. - 8, // offset 3. + 2 + 3, // offset 3. LOAD_CONST, // offset 4. 11, // offset 5. CALL_FUNCTION, // offset 6. @@ -537,13 +559,33 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUneededExtended) { NOP, // offset 10. 0 // offset 11. }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 0, // offset 1. + JUMP_FORWARD, // offset 2. + 2 + 6, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +#endif } TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtended) { - BytecodeManipulator instance({ JUMP_ABSOLUTE, 250 , NOP, 0 }, false, {}); + BytecodeManipulator instance({ JUMP_ABSOLUTE, 254 , NOP, 0 }, false, {}); ASSERT_TRUE(instance.InjectMethodCall(2, 11)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { @@ -560,27 +602,68 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtended) { NOP, // offset 10. 0 // offset 11. }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. + LOAD_CONST, // offset 4. + 11, // offset 5. + CALL_FUNCTION, // offset 6. + 0, // offset 7. + POP_TOP, // offset 8. + 0, // offset 9. + NOP, // offset 10. + 0 // offset 11. + }); +#endif } TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtendedTwice) { BytecodeManipulator instance( - { JUMP_ABSOLUTE, 248, JUMP_ABSOLUTE, 250, NOP, 0 }, + { JUMP_ABSOLUTE, 252, JUMP_ABSOLUTE, 254, NOP, 0 }, false, {}); ASSERT_TRUE(instance.InjectMethodCall(4, 12)); +#if PY_VERSION_HEX >= 0x030A0000 + // Jump offsets are instruction offsets, not byte offsets. VerifyBytecode( instance, { EXTENDED_ARG, // offset 0. 1, // offset 1. JUMP_ABSOLUTE, // offset 2. - 2, // offset 3. + 1, // offset 3. + EXTENDED_ARG, // offset 4. + 1, // offset 5. + JUMP_ABSOLUTE, // offset 6. + 3, // offset 7. + LOAD_CONST, // offset 8. + 12, // offset 9. + CALL_FUNCTION, // offset 10. + 0, // offset 11. + POP_TOP, // offset 12. + 0, // offset 13. + NOP, // offset 14. + 0 // offset 15. + }); +#else + VerifyBytecode( + instance, + { + EXTENDED_ARG, // offset 0. + 1, // offset 1. + JUMP_ABSOLUTE, // offset 2. + 6, // offset 3. EXTENDED_ARG, // offset 4. 1, // offset 5. JUMP_ABSOLUTE, // offset 6. - 4, // offset 7. + 8, // offset 7. LOAD_CONST, // offset 8. 12, // offset 9. CALL_FUNCTION, // offset 10. @@ -590,6 +673,7 @@ TEST(BytecodeManipulatorTest, InsertionOffsetUpgradeExtendedTwice) { NOP, // offset 14. 0 // offset 15. }); +#endif } @@ -629,16 +713,16 @@ TEST(BytecodeManipulatorTest, InsertionMidInstruction) { TEST(BytecodeManipulatorTest, InsertionTooManyUpgrades) { BytecodeManipulator instance( { - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, - JUMP_ABSOLUTE, 250, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, + JUMP_ABSOLUTE, 254, NOP, 0 }, false, @@ -707,7 +791,7 @@ TEST(BytecodeManipulatorTest, LineNumbersTablePastEnd) { TEST(BytecodeManipulatorTest, LineNumbersTableUpgradeExtended) { BytecodeManipulator instance( - { JUMP_ABSOLUTE, 250, RETURN_VALUE, 0 }, + { JUMP_ABSOLUTE, 254, RETURN_VALUE, 0 }, true, { 2, 1, 2, 1 }); ASSERT_TRUE(instance.InjectMethodCall(2, 99)); @@ -724,7 +808,11 @@ TEST(BytecodeManipulatorTest, LineNumbersTableOverflow) { { 254, 1 }); ASSERT_TRUE(instance.InjectMethodCall(2, 99)); +#if PY_VERSION_HEX >= 0x030A0000 + VerifyLineNumbersTable(instance, { 254, 0, 6, 1 }); +#else VerifyLineNumbersTable(instance, { 255, 0, 5, 1 }); +#endif } diff --git a/tests/py/collector_test.py b/tests/py/collector_test.py index fe936ad..abc39b2 100644 --- a/tests/py/collector_test.py +++ b/tests/py/collector_test.py @@ -5,6 +5,7 @@ import inspect import logging import os +import sys import time from unittest import mock @@ -1428,7 +1429,8 @@ def testLogBytesQuota(self): def testMissingLogLevel(self): # Missing is equivalent to INFO. - log_collector = LogCollectorWithDefaultLocation({'logMessageFormat': 'hello'}) + log_collector = LogCollectorWithDefaultLocation( + {'logMessageFormat': 'hello'}) self.assertIsNone(log_collector.Log(inspect.currentframe())) self.assertTrue(self._verifier.GotMessage('LOGPOINT: hello')) @@ -1487,11 +1489,17 @@ def testBadExpression(self): 'expressions': ['-', '+'] }) self.assertIsNone(log_collector.Log(inspect.currentframe())) - self.assertTrue( - self._verifier.GotMessage( - 'LOGPOINT: a=, b=')) + if sys.version_info.minor < 10: + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: a=, b=')) + else: + self.assertTrue( + self._verifier.GotMessage( + 'LOGPOINT: a=, ' + 'b=')) def testDollarEscape(self): unused_integer = 12345 diff --git a/tests/py/module_search_test.py b/tests/py/module_search_test.py index 70c67b4..3a12c57 100644 --- a/tests/py/module_search_test.py +++ b/tests/py/module_search_test.py @@ -83,8 +83,7 @@ def testSearchSymLinkInSysPath(self): # Returned result should have a successful file match and symbolic # links should be kept. - self.assertEndsWith( - module_search.Search('b/first.py'), 'link/b/first.py') + self.assertEndsWith(module_search.Search('b/first.py'), 'link/b/first.py') finally: sys.path.remove(os.path.join(self._test_package_dir, 'link'))