diff --git a/Makefile b/Makefile index a15c4035..5eece9f1 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ else ifeq ($(BUILD),Debug) PYTHON_BUILD_ENV += BUILD_TYPE=Debug else ifeq ($(BUILD),DRelease) PYTHON_BUILD_ENV += BUILD_TYPE=DRelease -else ifeq($(BUILD), None) +else ifeq ($(BUILD), None) PYTHON_BUILD_ENV += BUILD_TYPE=None else # Release build PYTHON_BUILD_ENV += BUILD_TYPE=Release diff --git a/include/JSStringProxy.hh b/include/JSStringProxy.hh index 8c82374c..955f2830 100644 --- a/include/JSStringProxy.hh +++ b/include/JSStringProxy.hh @@ -21,9 +21,23 @@ */ typedef struct { PyUnicodeObject str; - JS::PersistentRootedValue jsString; + JS::PersistentRootedValue *jsString; } JSStringProxy; +/** + * @brief This struct is a bundle of methods used by the JSStringProxy type + * + */ +struct JSStringProxyMethodDefinitions { +public: + /** + * @brief Deallocation method (.tp_dealloc), removes the reference to the underlying JSString before freeing the JSStringProxy + * + * @param self - The JSStringProxy to be free'd + */ + static void JSStringProxy_dealloc(JSStringProxy *self); +}; + /** * @brief Struct for the JSStringProxyType, used by all JSStringProxy objects */ diff --git a/include/StrType.hh b/include/StrType.hh index 7d516485..927d0195 100644 --- a/include/StrType.hh +++ b/include/StrType.hh @@ -34,9 +34,9 @@ public: * * @returns PyObject* pointer to the resulting PyObject */ - static PyObject *getPyObject(JSContext *cx, JSString *str); + static PyObject *getPyObject(JSContext *cx, JS::HandleValue str); - static const char *getValue(JSContext *cx, JSString *str); + static const char *getValue(JSContext *cx, JS::HandleValue str); }; #endif \ No newline at end of file diff --git a/include/jsTypeFactory.hh b/include/jsTypeFactory.hh index f791a257..e500908e 100644 --- a/include/jsTypeFactory.hh +++ b/include/jsTypeFactory.hh @@ -16,7 +16,26 @@ #include -struct PythonExternalString; +struct PythonExternalString : public JSExternalStringCallbacks { +public: + /** + * @brief Get the PyObject using the given char buffer + * + * @param chars - the char buffer of the PyObject + * @return PyObject* - the PyObject string + */ + static PyObject *getPyString(const char16_t *chars); + + /** + * @brief decrefs the underlying PyObject string when the JSString is finalized + * + * @param chars - The char buffer of the string + */ + void finalize(char16_t *chars) const override; + + size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override; +}; +extern PythonExternalString PythonExternalStringCallbacks; /** * @brief Function that makes a UTF16-encoded copy of a UCS4 string diff --git a/src/ExceptionType.cc b/src/ExceptionType.cc index 5e797e1f..3b4cb629 100644 --- a/src/ExceptionType.cc +++ b/src/ExceptionType.cc @@ -107,7 +107,8 @@ JSObject *ExceptionType::toJsError(JSContext *cx, PyObject *exceptionValue, PyOb if (stackObj.get()) { JS::RootedString stackStr(cx); JS::BuildStackString(cx, nullptr, stackObj, &stackStr, 2, js::StackFormat::SpiderMonkey); - stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStr); + JS::RootedValue stackStrVal(cx, JS::StringValue(stackStr)); + stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStrVal); } diff --git a/src/JSObjectIterProxy.cc b/src/JSObjectIterProxy.cc index eb7c9fc6..0163e713 100644 --- a/src/JSObjectIterProxy.cc +++ b/src/JSObjectIterProxy.cc @@ -78,11 +78,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject ret = key; } + Py_INCREF(ret); if (self->it.kind != KIND_KEYS) { Py_DECREF(value); } - Py_INCREF(ret); return ret; } } else { @@ -108,11 +108,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject ret = key; } + Py_INCREF(ret); if (self->it.kind != KIND_KEYS) { Py_DECREF(value); } - Py_INCREF(ret); return ret; } } diff --git a/src/JSStringProxy.cc b/src/JSStringProxy.cc new file mode 100644 index 00000000..01c1ad7d --- /dev/null +++ b/src/JSStringProxy.cc @@ -0,0 +1,16 @@ +/** + * @file JSStringProxy.cc + * @author Caleb Aikens (caleb@distributive.network) + * @brief JSStringProxy is a custom C-implemented python type that derives from str. It acts as a proxy for JSStrings from Spidermonkey, and behaves like a str would. + * @date 2024-05-15 + * + * @copyright Copyright (c) 2024 Distributive Corp. + * + */ + +#include "include/JSStringProxy.hh" + +void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self) +{ + delete self->jsString; +} \ No newline at end of file diff --git a/src/StrType.cc b/src/StrType.cc index 546e6979..0e4abf63 100644 --- a/src/StrType.cc +++ b/src/StrType.cc @@ -10,6 +10,7 @@ #include "include/StrType.hh" #include "include/JSStringProxy.hh" +#include "include/jsTypeFactory.hh" #include #include @@ -58,15 +59,15 @@ static bool containsSurrogatePair(const char16_t *chars, size_t length) { * @return PyObject* - the UCS4-encoding of the pyObject string * */ -static PyObject *asUCS4(PyObject *pyObject) { - if (PyUnicode_KIND(pyObject) != PyUnicode_2BYTE_KIND) { +static PyObject *asUCS4(PyObject *pyString) { + if (PyUnicode_KIND(pyString) != PyUnicode_2BYTE_KIND) { // return a new reference to match the behaviour of `PyUnicode_FromKindAndData` - Py_INCREF(pyObject); - return pyObject; + Py_INCREF(pyString); + return pyString; } - uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyObject); - size_t length = PY_UNICODE_OBJECT_LENGTH(pyObject); + uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyString); + size_t length = PY_UNICODE_OBJECT_LENGTH(pyString); uint32_t *ucs4String = new uint32_t[length]; size_t ucs4Length = 0; @@ -94,45 +95,51 @@ static PyObject *asUCS4(PyObject *pyObject) { return ret; } -static PyObject *processString(JSContext *cx, JSString *str) { +static PyObject *processString(JSContext *cx, JS::HandleValue strVal) { + JS::RootedString str(cx, strVal.toString()); JSLinearString *lstr = JS_EnsureLinearString(cx, str); JS::AutoCheckCannotGC nogc; PyObject *p; size_t length = JS::GetLinearStringLength(lstr); - PyObject *pyObject = (PyObject *)PyObject_New(JSStringProxy, &JSStringProxyType); // new reference - Py_INCREF(pyObject); + JSStringProxy *pyString = PyObject_New(JSStringProxy, &JSStringProxyType); // new reference - ((JSStringProxy *)pyObject)->jsString.setString((JSString *)lstr); + if (pyString == NULL) { + return NULL; + } + + JS::RootedObject obj(cx); + pyString->jsString = new JS::PersistentRootedValue(cx); + pyString->jsString->setString((JSString *)lstr); // Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93) // see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245 - PY_UNICODE_OBJECT_HASH(pyObject) = -1; - PY_UNICODE_OBJECT_STATE(pyObject).interned = 0; - PY_UNICODE_OBJECT_STATE(pyObject).compact = 0; - PY_UNICODE_OBJECT_STATE(pyObject).ascii = 0; - PY_UNICODE_OBJECT_UTF8(pyObject) = NULL; - PY_UNICODE_OBJECT_UTF8_LENGTH(pyObject) = 0; + PY_UNICODE_OBJECT_HASH(pyString) = -1; + PY_UNICODE_OBJECT_STATE(pyString).interned = 0; + PY_UNICODE_OBJECT_STATE(pyString).compact = 0; + PY_UNICODE_OBJECT_STATE(pyString).ascii = 0; + PY_UNICODE_OBJECT_UTF8(pyString) = NULL; + PY_UNICODE_OBJECT_UTF8_LENGTH(pyString) = 0; if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr); - PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars; - PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_1BYTE_KIND; - PY_UNICODE_OBJECT_LENGTH(pyObject) = length; + PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars; + PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_1BYTE_KIND; + PY_UNICODE_OBJECT_LENGTH(pyString) = length; #if PY_UNICODE_HAS_WSTR - PY_UNICODE_OBJECT_WSTR(pyObject) = NULL; - PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0; - PY_UNICODE_OBJECT_READY(pyObject) = 1; + PY_UNICODE_OBJECT_WSTR(pyString) = NULL; + PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0; + PY_UNICODE_OBJECT_READY(pyString) = 1; #endif } else { // utf16 spidermonkey, ucs2 python const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr); - PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars; - PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_2BYTE_KIND; - PY_UNICODE_OBJECT_LENGTH(pyObject) = length; + PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars; + PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_2BYTE_KIND; + PY_UNICODE_OBJECT_LENGTH(pyString) = length; #if PY_UNICODE_HAS_WSTR // python unicode objects take advantage of a possible performance gain on systems where @@ -141,38 +148,49 @@ static PyObject *processString(JSContext *cx, JSString *str) { // On systems where sizeof(wchar_t) == 4, i.e. Unixy systems, a similar performance gain happens if the // string is using UCS4 encoding [this is automatically handled by asUCS4()] if (sizeof(wchar_t) == 2) { - PY_UNICODE_OBJECT_WSTR(pyObject) = (wchar_t *)chars; - PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = length; + PY_UNICODE_OBJECT_WSTR(pyString) = (wchar_t *)chars; + PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = length; } else { - PY_UNICODE_OBJECT_WSTR(pyObject) = NULL; - PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0; + PY_UNICODE_OBJECT_WSTR(pyString) = NULL; + PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0; } - PY_UNICODE_OBJECT_READY(pyObject) = 1; + PY_UNICODE_OBJECT_READY(pyString) = 1; #endif if (containsSurrogatePair(chars, length)) { // We must convert to UCS4 here because Python does not support decoding string containing surrogate pairs to bytes - PyObject *ucs4Obj = asUCS4(pyObject); // convert to a new PyUnicodeObject with UCS4 data + PyObject *ucs4Obj = asUCS4((PyObject *)pyString); // convert to a new PyUnicodeObject with UCS4 data if (!ucs4Obj) { - // conversion fails, keep the original `pyObject` - return pyObject; + // conversion fails, keep the original `pyString` + return (PyObject *)pyString; } - Py_DECREF(pyObject); + Py_DECREF(pyString); return ucs4Obj; } } - return pyObject; + return (PyObject *)pyString; } -PyObject *StrType::getPyObject(JSContext *cx, JSString *str) { +PyObject *StrType::getPyObject(JSContext *cx, JS::HandleValue str) { + const PythonExternalString *callbacks; + const char16_t *chars; + + if (JS::IsExternalString(str.toString(), (const JSExternalStringCallbacks **)&callbacks, &chars)) { + if (callbacks == &PythonExternalStringCallbacks) { + PyObject *pyString = callbacks->getPyString(chars); + Py_INCREF(pyString); + return pyString; + } + } + return processString(cx, str); } -const char *StrType::getValue(JSContext *cx, JSString *str) { - PyObject *pyObject = processString(cx, str); - const char *value = PyUnicode_AsUTF8(pyObject); - Py_DECREF(pyObject); +const char *StrType::getValue(JSContext *cx, JS::HandleValue str) { + PyObject *pyString = processString(cx, str); + const char *value = PyUnicode_AsUTF8(pyString); + Py_DECREF(pyString); return value; } \ No newline at end of file diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index d00860d0..b0998160 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -49,24 +49,30 @@ static PyObjectProxyHandler pyObjectProxyHandler; static PyListProxyHandler pyListProxyHandler; static PyIterableProxyHandler pyIterableProxyHandler; -std::unordered_map charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings - -struct PythonExternalString : public JSExternalStringCallbacks { - void finalize(char16_t *chars) const override { - // We cannot call Py_DECREF here when shutting down as the thread state is gone. - // Then, when shutting down, there is only on reference left, and we don't need - // to free the object since the entire process memory is being released. - PyObject *object = charToPyObjectMap[chars]; - if (Py_REFCNT(object) > 1) { - Py_DECREF(object); - } - } - size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override { - return 0; +std::unordered_map charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings + +PyObject *PythonExternalString::getPyString(const char16_t *chars) +{ + return charToPyObjectMap[chars]; +} + +void PythonExternalString::finalize(char16_t *chars) const +{ + // We cannot call Py_DECREF here when shutting down as the thread state is gone. + // Then, when shutting down, there is only on reference left, and we don't need + // to free the object since the entire process memory is being released. + PyObject *object = charToPyObjectMap[chars]; + if (Py_REFCNT(object) > 1) { + Py_DECREF(object); } -}; +} + +size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const +{ + return PyUnicode_GetLength(charToPyObjectMap[chars]); +} -static constexpr PythonExternalString PythonExternalStringCallbacks; +PythonExternalString PythonExternalStringCallbacks = {}; size_t UCS4ToUTF16(const uint32_t *chars, size_t length, uint16_t **outStr) { uint16_t *utf16String = (uint16_t *)malloc(sizeof(uint16_t) * length*2); @@ -112,7 +118,7 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { returnType.setNumber(PyFloat_AsDouble(object)); } else if (PyObject_TypeCheck(object, &JSStringProxyType)) { - returnType.setString(((JSStringProxy *)object)->jsString.toString()); + returnType.setString(((JSStringProxy *)object)->jsString->toString()); } else if (PyUnicode_Check(object)) { switch (PyUnicode_KIND(object)) { diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index 60b1bc8d..9ee013e7 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -128,6 +128,7 @@ PyTypeObject JSObjectProxyType = { PyTypeObject JSStringProxyType = { .tp_name = PyUnicode_Type.tp_name, .tp_basicsize = sizeof(JSStringProxy), + .tp_dealloc = (destructor)JSStringProxyMethodDefinitions::JSStringProxy_dealloc, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_UNICODE_SUBCLASS, .tp_doc = PyDoc_STR("Javascript String value"), .tp_base = &PyUnicode_Type @@ -419,7 +420,7 @@ static PyObject *eval(PyObject *self, PyObject *args) { // compile the code to execute JS::RootedScript script(GLOBAL_CX); - JS::Rooted *rval = new JS::Rooted(GLOBAL_CX); + JS::Rooted rval(GLOBAL_CX); if (code) { JS::SourceText source; const char *codeChars = PyUnicode_AsUTF8(code); @@ -440,27 +441,17 @@ static PyObject *eval(PyObject *self, PyObject *args) { } // execute the compiled code; last expr goes to rval - if (!JS_ExecuteScript(GLOBAL_CX, script, rval)) { + if (!JS_ExecuteScript(GLOBAL_CX, script, &rval)) { setSpiderMonkeyException(GLOBAL_CX); return NULL; } // translate to the proper python type - PyObject *returnValue = pyTypeFactory(GLOBAL_CX, *rval); + PyObject *returnValue = pyTypeFactory(GLOBAL_CX, rval); if (PyErr_Occurred()) { return NULL; } - // TODO: Find a way to root strings for the lifetime of a proxying python string - js::ESClass cls = js::ESClass::Other; // placeholder if `rval` is not a JSObject - if (rval->isObject()) { - JS::GetBuiltinClass(GLOBAL_CX, JS::RootedObject(GLOBAL_CX, &rval->toObject()), &cls); - } - - if (!(rval->isString() || cls == js::ESClass::String)) { // rval may be a string which must be kept alive. - delete rval; - } - if (returnValue) { return returnValue; } diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index fbe42b11..63a0a34f 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -50,7 +50,7 @@ PyObject *pyTypeFactory(JSContext *cx, JS::HandleValue rval) { return FloatType::getPyObject(rval.toNumber()); } else if (rval.isString()) { - return StrType::getPyObject(cx, rval.toString()); + return StrType::getPyObject(cx, rval); } else if (rval.isSymbol()) { printf("symbol type is not handled by PythonMonkey yet"); diff --git a/src/setSpiderMonkeyException.cc b/src/setSpiderMonkeyException.cc index 7b0e2bab..ac08cb8c 100644 --- a/src/setSpiderMonkeyException.cc +++ b/src/setSpiderMonkeyException.cc @@ -62,7 +62,8 @@ PyObject *getExceptionString(JSContext *cx, const JS::ExceptionStack &exceptionS if (stackObj.get()) { JS::RootedString stackStr(cx); BuildStackString(cx, nullptr, stackObj, &stackStr, 2, js::StackFormat::SpiderMonkey); - outStrStream << "Stack Trace:\n" << StrType::getValue(cx, stackStr); + JS::RootedValue stackStrVal(cx, JS::StringValue(stackStr)); + outStrStream << "Stack Trace:\n" << StrType::getValue(cx, stackStrVal); } } diff --git a/tests/python/test_strings.py b/tests/python/test_strings.py index 694de79f..cb3d37ae 100644 --- a/tests/python/test_strings.py +++ b/tests/python/test_strings.py @@ -3,6 +3,12 @@ import random +def test_identity(): + py_string = "abc" + js_string = pm.eval("(str) => str")(py_string) + assert py_string is js_string + + def test_eval_ascii_string_matches_evaluated_string(): py_ascii_string = "abc" js_ascii_string = pm.eval(repr(py_ascii_string))