Skip to content
This repository has been archived by the owner on Jan 23, 2024. It is now read-only.

feat: Add support for Python 3.10 #70

Merged
merged 15 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Python Cloud Debugger Agent

Google [Cloud Debugger](https://cloud.google.com/debugger/) for
Python 3.6, Python 3.7, Python 3.8 and Python 3.9.
Python 3.6, Python 3.7, Python 3.8, Python 3.9, and Python 3.10.

## Overview

Expand All @@ -28,7 +28,7 @@ tested on Debian Linux, but it should work on other distributions as well.
Cloud Debugger consists of 3 primary components:

1. The Python debugger agent (this repo implements one for CPython 3.6,
3.7, 3.8 and 3.9).
3.7, 3.8, 3.9, and 3.10).
2. Cloud Debugger service storing and managing snapshots/logpoints. Explore the
APIs using
[APIs Explorer](https://cloud.google.com/debugger/api/reference/rest/).
Expand Down
2 changes: 1 addition & 1 deletion src/build-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
GFLAGS_URL=https://github.com/gflags/gflags/archive/v2.2.2.tar.gz
GLOG_URL=https://github.com/google/glog/archive/v0.4.0.tar.gz

SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39)
SUPPORTED_VERSIONS=(cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39 cp310-cp310)

ROOT=$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null; /bin/pwd -P)

Expand Down
1 change: 1 addition & 0 deletions src/googleclouddebugger/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cc_library(
deps = [
":common",
":nullable",
"//src/third_party:pylinetable",
],
)

Expand Down
67 changes: 46 additions & 21 deletions src/googleclouddebugger/bytecode_breakpoint.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ int BytecodeBreakpoint::CreateBreakpoint(
// table in case "code_object" is already patched with another breakpoint.
CodeObjectLinesEnumerator lines_enumerator(
code_object->co_firstlineno,
code_object_breakpoints->original_lnotab.get());
code_object_breakpoints->original_linedata.get());
while (lines_enumerator.line_number() != line) {
if (!lines_enumerator.Next()) {
LOG(ERROR) << "Line " << line << " not found in "
Expand Down Expand Up @@ -237,8 +237,14 @@ BytecodeBreakpoint::PreparePatchCodeObject(
return nullptr; // Probably a built-in method or uninitialized code object.
}

data->original_lnotab =
// Store the original (unmodified) line data.
#if PY_VERSION_HEX < 0x030A0000
data->original_linedata =
ScopedPyObject::NewReference(code_object.get()->co_lnotab);
#else
data->original_linedata =
ScopedPyObject::NewReference(code_object.get()->co_linetable);
#endif

patches_[code_object] = data.get();
return data.release();
Expand All @@ -262,29 +268,38 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {
<< " from patched " << code->zombie_refs.back().get();
Py_INCREF(code_object->co_code);

// Restore the original line data to the code object.
#if PY_VERSION_HEX < 0x030A0000
if (code_object->co_lnotab != nullptr) {
code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab));
}
code_object->co_lnotab = code->original_lnotab.get();
code_object->co_lnotab = code->original_linedata.get();
Py_INCREF(code_object->co_lnotab);
#else
if (code_object->co_linetable != nullptr) {
code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable));
}
code_object->co_linetable = code->original_linedata.get();
Py_INCREF(code_object->co_linetable);
#endif

return;
}

std::vector<uint8_t> bytecode = PyBytesToByteArray(code->original_code.get());

bool has_lnotab = false;
std::vector<uint8_t> lnotab;
if (!code->original_lnotab.is_null() &&
PyBytes_CheckExact(code->original_lnotab.get())) {
has_lnotab = true;
lnotab = PyBytesToByteArray(code->original_lnotab.get());
bool has_linedata = false;
std::vector<uint8_t> linedata;
if (!code->original_linedata.is_null() &&
PyBytes_CheckExact(code->original_linedata.get())) {
has_linedata = true;
linedata = PyBytesToByteArray(code->original_linedata.get());
}

BytecodeManipulator bytecode_manipulator(
std::move(bytecode),
has_lnotab,
std::move(lnotab));
has_linedata,
std::move(linedata));

// Add callbacks to code object constants and patch the bytecode.
std::vector<PyObject*> callbacks;
Expand All @@ -306,17 +321,16 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {

callbacks.push_back(breakpoint.hit_callable.get());

#if PY_MAJOR_VERSION >= 3
// In Python 3, since we allow upgrading of instructions to use
// EXTENDED_ARG, the offsets for lines originally calculated might not be
// accurate, so we need to recalculate them each insertion.
offset_found = false;
if (bytecode_manipulator.has_lnotab()) {
ScopedPyObject lnotab(PyBytes_FromStringAndSize(
reinterpret_cast<const char*>(bytecode_manipulator.lnotab().data()),
bytecode_manipulator.lnotab().size()));
if (bytecode_manipulator.has_linedata()) {
ScopedPyObject linedata(PyBytes_FromStringAndSize(
reinterpret_cast<const char*>(bytecode_manipulator.linedata().data()),
bytecode_manipulator.linedata().size()));
CodeObjectLinesEnumerator lines_enumerator(code_object->co_firstlineno,
lnotab.release());
linedata.release());
while (lines_enumerator.line_number() != breakpoint.line) {
if (!lines_enumerator.Next()) {
break;
Expand All @@ -325,7 +339,6 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {
}
offset_found = lines_enumerator.line_number() == breakpoint.line;
}
#endif

if (!offset_found ||
!bytecode_manipulator.InjectMethodCall(offset, const_index)) {
Expand Down Expand Up @@ -355,14 +368,26 @@ void BytecodeBreakpoint::PatchCodeObject(CodeObjectBreakpoints* code) {
<< " reassigned to " << code_object->co_code
<< ", original was " << code->original_code.get();

if (has_lnotab) {
// Update the line data in the code object.
#if PY_VERSION_HEX < 0x030A0000
if (has_linedata) {
code->zombie_refs.push_back(ScopedPyObject(code_object->co_lnotab));
ScopedPyObject lnotab_string(PyBytes_FromStringAndSize(
reinterpret_cast<const char*>(bytecode_manipulator.lnotab().data()),
bytecode_manipulator.lnotab().size()));
reinterpret_cast<const char*>(bytecode_manipulator.linedata().data()),
bytecode_manipulator.linedata().size()));
DCHECK(!lnotab_string.is_null());
code_object->co_lnotab = lnotab_string.release();
}
#else
if (has_linedata) {
code->zombie_refs.push_back(ScopedPyObject(code_object->co_linetable));
ScopedPyObject linetable_string(PyBytes_FromStringAndSize(
reinterpret_cast<const char*>(bytecode_manipulator.linedata().data()),
bytecode_manipulator.linedata().size()));
DCHECK(!linetable_string.is_null());
code_object->co_linetable = linetable_string.release();
}
#endif

// Invoke error callback after everything else is done. The callback may
// decide to remove the breakpoint, which will change "code".
Expand Down
7 changes: 4 additions & 3 deletions src/googleclouddebugger/bytecode_breakpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,10 @@ class BytecodeBreakpoint {
// Original value of PyCodeObject::co_code before patching.
ScopedPyObject original_code;

// Original value of PythonCode::co_lnotab before patching.
// "lnotab" stands for "line numbers table" in CPython lingo.
ScopedPyObject original_lnotab;
// Original value of PythonCode::co_lnotab or PythonCode::co_linetable
// before patching. This is the line numbers table in CPython <= 3.9 and
// CPython >= 3.10 respectively
ScopedPyObject original_linedata;
};

// Loads code object into "patches_" if not there yet. Returns nullptr if
Expand Down
74 changes: 52 additions & 22 deletions src/googleclouddebugger/bytecode_manipulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,11 @@ static std::vector<PythonInstruction> BuildMethodCall(int const_index) {
}

BytecodeManipulator::BytecodeManipulator(std::vector<uint8_t> bytecode,
const bool has_lnotab,
std::vector<uint8_t> lnotab)
: has_lnotab_(has_lnotab) {
const bool has_linedata,
std::vector<uint8_t> linedata)
: has_linedata_(has_linedata) {
data_.bytecode = std::move(bytecode);
data_.lnotab = std::move(lnotab);
data_.linedata = std::move(linedata);

strategy_ = STRATEGY_INSERT; // Default strategy.
for (auto it = data_.bytecode.begin(); it < data_.bytecode.end(); ) {
Expand Down Expand Up @@ -296,21 +296,13 @@ struct Insertion {
// InsertAndUpdateBranchInstructions.
static const int kMaxInsertionIterations = 10;


#if PY_VERSION_HEX < 0x030A0000
// Updates the line number table for an insertion in the bytecode.
// This is different than what the Python 2 version of InsertMethodCall() does.
// It should be more accurate, but is confined to Python 3 only for safety.
// This handles the case of adding insertion for EXTENDED_ARG better.
// Example for inserting 2 bytes at offset 2:
// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta}
// Old algorithm: [{2, 0}, {2, 1}, {4, 1}]
// New algorithm: [{2, 1}, {6, 1}]
// In the old version, trying to get the offset to insert a breakpoint right
// before line 1 would result in an offset of 2, which is inaccurate as the
// instruction before is an EXTENDED_ARG which will now be applied to the first
// instruction inserted instead of its original target.
static void InsertAndUpdateLnotab(int offset, int size,
std::vector<uint8_t>* lnotab) {
// lnotab: [{2, 1}, {4, 1}] // {offset_delta, line_delta}
// updated: [{2, 1}, {6, 1}]
static void InsertAndUpdateLineData(int offset, int size,
std::vector<uint8_t>* lnotab) {
int current_offset = 0;
for (auto it = lnotab->begin(); it != lnotab->end(); it += 2) {
current_offset += it[0];
Expand All @@ -330,6 +322,36 @@ static void InsertAndUpdateLnotab(int offset, int size,
}
}
}
#else
// Updates the line number table for an insertion in the bytecode.
// Example for inserting 2 bytes at offset 2:
// linetable: [{2, 1}, {4, 1}] // {address_end_delta, line_delta}
// updated: [{2, 1}, {6, 1}]
//
// For more information on the linetable format in Python 3.10, see:
// https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt
static void InsertAndUpdateLineData(int offset, int size,
jasonborg marked this conversation as resolved.
Show resolved Hide resolved
std::vector<uint8_t>* linetable) {
int current_offset = 0;
for (auto it = linetable->begin(); it != linetable->end(); it += 2) {
current_offset += it[0];

if (current_offset > offset) {
int remaining_size = it[0] + size;
int remaining_lines = it[1];
it = linetable->erase(it, it + 2);
while (remaining_size > 0xFE) { // Max address delta is listed as 254.
it = linetable->insert(it, 0xFE) + 1;
it = linetable->insert(it, 0) + 1;
remaining_size -= 0xFE;
}
it = linetable->insert(it, remaining_size) + 1;
it = linetable->insert(it, remaining_lines) + 1;
return;
}
}
}
#endif

// Reserves space for instructions to be inserted into the bytecode, and
// calculates the new offsets and arguments of branch instructions.
Expand Down Expand Up @@ -426,8 +448,16 @@ static bool InsertAndUpdateBranchInstructions(
}

if (need_to_update) {
#if PY_VERSION_HEX < 0x030A0000
int delta = insertion.size;
#else
// Changed in version 3.10: The argument of jump, exception handling
// and loop instructions is now the instruction offset rather than the
// byte offset.
int delta = insertion.size / 2;
#endif
PythonInstruction new_instruction =
PythonInstructionArg(instruction.opcode, arg + insertion.size);
PythonInstructionArg(instruction.opcode, arg + delta);
int size_diff = new_instruction.size - instruction.size;
if (size_diff > 0) {
insertions.push_back(Insertion { size_diff, it->current_offset });
Expand Down Expand Up @@ -490,8 +520,8 @@ bool BytecodeManipulator::InsertMethodCall(
// Insert the method call.
data->bytecode.insert(data->bytecode.begin() + offset, method_call_size, NOP);
WriteInstructions(data->bytecode.begin() + offset, method_call_instructions);
if (has_lnotab_) {
InsertAndUpdateLnotab(offset, method_call_size, &data->lnotab);
if (has_linedata_) {
InsertAndUpdateLineData(offset, method_call_size, &data->linedata);
}

// Write new branch instructions.
Expand All @@ -503,8 +533,8 @@ bool BytecodeManipulator::InsertMethodCall(
int offset = it->current_offset;
if (size_diff > 0) {
data->bytecode.insert(data->bytecode.begin() + offset, size_diff, NOP);
if (has_lnotab_) {
InsertAndUpdateLnotab(it->current_offset, size_diff, &data->lnotab);
if (has_linedata_) {
InsertAndUpdateLineData(it->current_offset, size_diff, &data->linedata);
}
} else if (size_diff < 0) {
// The Python compiler sometimes prematurely adds EXTENDED_ARG with an
Expand Down
14 changes: 7 additions & 7 deletions src/googleclouddebugger/bytecode_manipulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ namespace cdbg {
// 19 JUMP_ABSOLUTE 3
class BytecodeManipulator {
public:
BytecodeManipulator(std::vector<uint8_t> bytecode, const bool has_lnotab,
std::vector<uint8_t> lnotab);
BytecodeManipulator(std::vector<uint8_t> bytecode, const bool has_linedata,
std::vector<uint8_t> linedata);

// Gets the transformed method bytecode.
const std::vector<uint8_t>& bytecode() const { return data_.bytecode; }

// Returns true if this class was initialized with line numbers table.
bool has_lnotab() const { return has_lnotab_; }
bool has_linedata() const { return has_linedata_; }

// Gets the method line numbers table or empty vector if not available.
const std::vector<uint8_t>& lnotab() const { return data_.lnotab; }
const std::vector<uint8_t>& linedata() const { return data_.linedata; }

// Rewrites the method bytecode to invoke callable at the specified offset.
// Return false if the method call could not be inserted. The bytecode
Expand Down Expand Up @@ -109,8 +109,8 @@ class BytecodeManipulator {
// Bytecode of a transformed method.
std::vector<uint8_t> bytecode;

// Method line numbers table or empty vector if "has_lnotab_" is false.
std::vector<uint8_t> lnotab;
// Method line numbers table or empty vector if "has_linedata_" is false.
std::vector<uint8_t> linedata;
};

// Insert space into the bytecode. This space is later used to add new
Expand All @@ -130,7 +130,7 @@ class BytecodeManipulator {
Data data_;

// True if the method has line number table.
const bool has_lnotab_;
const bool has_linedata_;

// Algorithm to insert breakpoint callback into method bytecode.
Strategy strategy_;
Expand Down
14 changes: 14 additions & 0 deletions src/googleclouddebugger/immutability_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,16 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) {
#if PY_VERSION_HEX >= 0x03080000
// Added back in Python 3.8 (was in 2.7 as well)
case ROT_FOUR:
#endif
#if PY_VERSION_HEX >= 0x030A0000
// Added in Python 3.10
case COPY_DICT_WITHOUT_KEYS:
case GET_LEN:
case MATCH_MAPPING:
case MATCH_SEQUENCE:
case MATCH_KEYS:
case MATCH_CLASS:
case ROT_N:
#endif
return OPCODE_NOT_MUTABLE;

Expand Down Expand Up @@ -468,6 +478,10 @@ static OpcodeMutableStatus IsOpcodeMutable(const uint8_t opcode) {
case RERAISE:
case WITH_EXCEPT_START:
case LOAD_ASSERTION_ERROR:
#endif
#if PY_VERSION_HEX >= 0x030A0000
// Added in Python 3.10
case GEN_START:
#endif
return OPCODE_MUTABLE;

Expand Down
Loading