Skip to content
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ else ifeq ($(BUILD),Debug)
PYTHON_BUILD_ENV += BUILD_TYPE=Debug
else ifeq ($(BUILD),DRelease)
PYTHON_BUILD_ENV += BUILD_TYPE=DRelease
else ifeq($(BUILD), None)
else ifeq ($(BUILD), None)
PYTHON_BUILD_ENV += BUILD_TYPE=None
else # Release build
PYTHON_BUILD_ENV += BUILD_TYPE=Release
Expand Down
16 changes: 15 additions & 1 deletion include/JSStringProxy.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,23 @@
*/
typedef struct {
PyUnicodeObject str;
JS::PersistentRootedValue jsString;
JS::PersistentRootedValue *jsString;
} JSStringProxy;

/**
* @brief This struct is a bundle of methods used by the JSStringProxy type
*
*/
struct JSStringProxyMethodDefinitions {
public:
/**
* @brief Deallocation method (.tp_dealloc), removes the reference to the underlying JSString before freeing the JSStringProxy
*
* @param self - The JSStringProxy to be free'd
*/
static void JSStringProxy_dealloc(JSStringProxy *self);
};

/**
* @brief Struct for the JSStringProxyType, used by all JSStringProxy objects
*/
Expand Down
4 changes: 2 additions & 2 deletions include/StrType.hh
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ public:
*
* @returns PyObject* pointer to the resulting PyObject
*/
static PyObject *getPyObject(JSContext *cx, JSString *str);
static PyObject *getPyObject(JSContext *cx, JS::HandleValue str);

static const char *getValue(JSContext *cx, JSString *str);
static const char *getValue(JSContext *cx, JS::HandleValue str);
};

#endif
21 changes: 20 additions & 1 deletion include/jsTypeFactory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,26 @@
#include <Python.h>


struct PythonExternalString;
struct PythonExternalString : public JSExternalStringCallbacks {
public:
/**
* @brief Get the PyObject using the given char buffer
*
* @param chars - the char buffer of the PyObject
* @return PyObject* - the PyObject string
*/
static PyObject *getPyString(const char16_t *chars);

/**
* @brief decrefs the underlying PyObject string when the JSString is finalized
*
* @param chars - The char buffer of the string
*/
void finalize(char16_t *chars) const override;

size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override;
};
extern PythonExternalString PythonExternalStringCallbacks;

/**
* @brief Function that makes a UTF16-encoded copy of a UCS4 string
Expand Down
3 changes: 2 additions & 1 deletion src/ExceptionType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ JSObject *ExceptionType::toJsError(JSContext *cx, PyObject *exceptionValue, PyOb
if (stackObj.get()) {
JS::RootedString stackStr(cx);
JS::BuildStackString(cx, nullptr, stackObj, &stackStr, 2, js::StackFormat::SpiderMonkey);
stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStr);
JS::RootedValue stackStrVal(cx, JS::StringValue(stackStr));
stackStream << "\nJS Stack Trace:\n" << StrType::getValue(cx, stackStrVal);
}


Expand Down
4 changes: 2 additions & 2 deletions src/JSObjectIterProxy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject
ret = key;
}

Py_INCREF(ret);
if (self->it.kind != KIND_KEYS) {
Py_DECREF(value);
}

Py_INCREF(ret);
return ret;
}
} else {
Expand All @@ -108,11 +108,11 @@ PyObject *JSObjectIterProxyMethodDefinitions::JSObjectIterProxy_nextkey(JSObject
ret = key;
}

Py_INCREF(ret);
if (self->it.kind != KIND_KEYS) {
Py_DECREF(value);
}

Py_INCREF(ret);
return ret;
}
}
Expand Down
16 changes: 16 additions & 0 deletions src/JSStringProxy.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/**
* @file JSStringProxy.cc
* @author Caleb Aikens (caleb@distributive.network)
* @brief JSStringProxy is a custom C-implemented python type that derives from str. It acts as a proxy for JSStrings from Spidermonkey, and behaves like a str would.
* @date 2024-05-15
*
* @copyright Copyright (c) 2024 Distributive Corp.
*
*/

#include "include/JSStringProxy.hh"

void JSStringProxyMethodDefinitions::JSStringProxy_dealloc(JSStringProxy *self)
{
delete self->jsString;
}
98 changes: 58 additions & 40 deletions src/StrType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "include/StrType.hh"
#include "include/JSStringProxy.hh"
#include "include/jsTypeFactory.hh"

#include <jsapi.h>
#include <js/String.h>
Expand Down Expand Up @@ -58,15 +59,15 @@ static bool containsSurrogatePair(const char16_t *chars, size_t length) {
* @return PyObject* - the UCS4-encoding of the pyObject string
*
*/
static PyObject *asUCS4(PyObject *pyObject) {
if (PyUnicode_KIND(pyObject) != PyUnicode_2BYTE_KIND) {
static PyObject *asUCS4(PyObject *pyString) {
if (PyUnicode_KIND(pyString) != PyUnicode_2BYTE_KIND) {
// return a new reference to match the behaviour of `PyUnicode_FromKindAndData`
Py_INCREF(pyObject);
return pyObject;
Py_INCREF(pyString);
return pyString;
}

uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyObject);
size_t length = PY_UNICODE_OBJECT_LENGTH(pyObject);
uint16_t *chars = PY_UNICODE_OBJECT_DATA_UCS2(pyString);
size_t length = PY_UNICODE_OBJECT_LENGTH(pyString);

uint32_t *ucs4String = new uint32_t[length];
size_t ucs4Length = 0;
Expand Down Expand Up @@ -94,45 +95,51 @@ static PyObject *asUCS4(PyObject *pyObject) {
return ret;
}

static PyObject *processString(JSContext *cx, JSString *str) {
static PyObject *processString(JSContext *cx, JS::HandleValue strVal) {
JS::RootedString str(cx, strVal.toString());
JSLinearString *lstr = JS_EnsureLinearString(cx, str);
JS::AutoCheckCannotGC nogc;
PyObject *p;

size_t length = JS::GetLinearStringLength(lstr);

PyObject *pyObject = (PyObject *)PyObject_New(JSStringProxy, &JSStringProxyType); // new reference
Py_INCREF(pyObject);
JSStringProxy *pyString = PyObject_New(JSStringProxy, &JSStringProxyType); // new reference

((JSStringProxy *)pyObject)->jsString.setString((JSString *)lstr);
if (pyString == NULL) {
return NULL;
}

JS::RootedObject obj(cx);
pyString->jsString = new JS::PersistentRootedValue(cx);
pyString->jsString->setString((JSString *)lstr);

// Initialize as legacy string (https://github.com/python/cpython/blob/v3.12.0b1/Include/cpython/unicodeobject.h#L78-L93)
// see https://github.com/python/cpython/blob/v3.11.3/Objects/unicodeobject.c#L1230-L1245
PY_UNICODE_OBJECT_HASH(pyObject) = -1;
PY_UNICODE_OBJECT_STATE(pyObject).interned = 0;
PY_UNICODE_OBJECT_STATE(pyObject).compact = 0;
PY_UNICODE_OBJECT_STATE(pyObject).ascii = 0;
PY_UNICODE_OBJECT_UTF8(pyObject) = NULL;
PY_UNICODE_OBJECT_UTF8_LENGTH(pyObject) = 0;
PY_UNICODE_OBJECT_HASH(pyString) = -1;
PY_UNICODE_OBJECT_STATE(pyString).interned = 0;
PY_UNICODE_OBJECT_STATE(pyString).compact = 0;
PY_UNICODE_OBJECT_STATE(pyString).ascii = 0;
PY_UNICODE_OBJECT_UTF8(pyString) = NULL;
PY_UNICODE_OBJECT_UTF8_LENGTH(pyString) = 0;

if (JS::LinearStringHasLatin1Chars(lstr)) { // latin1 spidermonkey, latin1 python
const JS::Latin1Char *chars = JS::GetLatin1LinearStringChars(nogc, lstr);

PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars;
PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_1BYTE_KIND;
PY_UNICODE_OBJECT_LENGTH(pyObject) = length;
PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars;
PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_1BYTE_KIND;
PY_UNICODE_OBJECT_LENGTH(pyString) = length;
#if PY_UNICODE_HAS_WSTR
PY_UNICODE_OBJECT_WSTR(pyObject) = NULL;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0;
PY_UNICODE_OBJECT_READY(pyObject) = 1;
PY_UNICODE_OBJECT_WSTR(pyString) = NULL;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0;
PY_UNICODE_OBJECT_READY(pyString) = 1;
#endif
}
else { // utf16 spidermonkey, ucs2 python
const char16_t *chars = JS::GetTwoByteLinearStringChars(nogc, lstr);

PY_UNICODE_OBJECT_DATA_ANY(pyObject) = (void *)chars;
PY_UNICODE_OBJECT_KIND(pyObject) = PyUnicode_2BYTE_KIND;
PY_UNICODE_OBJECT_LENGTH(pyObject) = length;
PY_UNICODE_OBJECT_DATA_ANY(pyString) = (void *)chars;
PY_UNICODE_OBJECT_KIND(pyString) = PyUnicode_2BYTE_KIND;
PY_UNICODE_OBJECT_LENGTH(pyString) = length;

#if PY_UNICODE_HAS_WSTR
// python unicode objects take advantage of a possible performance gain on systems where
Expand All @@ -141,38 +148,49 @@ static PyObject *processString(JSContext *cx, JSString *str) {
// On systems where sizeof(wchar_t) == 4, i.e. Unixy systems, a similar performance gain happens if the
// string is using UCS4 encoding [this is automatically handled by asUCS4()]
if (sizeof(wchar_t) == 2) {
PY_UNICODE_OBJECT_WSTR(pyObject) = (wchar_t *)chars;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = length;
PY_UNICODE_OBJECT_WSTR(pyString) = (wchar_t *)chars;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = length;
}
else {
PY_UNICODE_OBJECT_WSTR(pyObject) = NULL;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyObject) = 0;
PY_UNICODE_OBJECT_WSTR(pyString) = NULL;
PY_UNICODE_OBJECT_WSTR_LENGTH(pyString) = 0;
}
PY_UNICODE_OBJECT_READY(pyObject) = 1;
PY_UNICODE_OBJECT_READY(pyString) = 1;
#endif

if (containsSurrogatePair(chars, length)) {
// We must convert to UCS4 here because Python does not support decoding string containing surrogate pairs to bytes
PyObject *ucs4Obj = asUCS4(pyObject); // convert to a new PyUnicodeObject with UCS4 data
PyObject *ucs4Obj = asUCS4((PyObject *)pyString); // convert to a new PyUnicodeObject with UCS4 data
if (!ucs4Obj) {
// conversion fails, keep the original `pyObject`
return pyObject;
// conversion fails, keep the original `pyString`
return (PyObject *)pyString;
}
Py_DECREF(pyObject);
Py_DECREF(pyString);
return ucs4Obj;
}
}

return pyObject;
return (PyObject *)pyString;
}

PyObject *StrType::getPyObject(JSContext *cx, JSString *str) {
PyObject *StrType::getPyObject(JSContext *cx, JS::HandleValue str) {
const PythonExternalString *callbacks;
const char16_t *chars;

if (JS::IsExternalString(str.toString(), (const JSExternalStringCallbacks **)&callbacks, &chars)) {
if (callbacks == &PythonExternalStringCallbacks) {
PyObject *pyString = callbacks->getPyString(chars);
Py_INCREF(pyString);
return pyString;
}
}

return processString(cx, str);
}

const char *StrType::getValue(JSContext *cx, JSString *str) {
PyObject *pyObject = processString(cx, str);
const char *value = PyUnicode_AsUTF8(pyObject);
Py_DECREF(pyObject);
const char *StrType::getValue(JSContext *cx, JS::HandleValue str) {
PyObject *pyString = processString(cx, str);
const char *value = PyUnicode_AsUTF8(pyString);
Py_DECREF(pyString);
return value;
}
40 changes: 23 additions & 17 deletions src/jsTypeFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,30 @@ static PyObjectProxyHandler pyObjectProxyHandler;
static PyListProxyHandler pyListProxyHandler;
static PyIterableProxyHandler pyIterableProxyHandler;

std::unordered_map<char16_t *, PyObject *> charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings

struct PythonExternalString : public JSExternalStringCallbacks {
void finalize(char16_t *chars) const override {
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *object = charToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
}
}
size_t sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const override {
return 0;
std::unordered_map<const char16_t *, PyObject *> charToPyObjectMap; // a map of char16_t buffers to their corresponding PyObjects, used when finalizing JSExternalStrings

PyObject *PythonExternalString::getPyString(const char16_t *chars)
{
return charToPyObjectMap[chars];
}

void PythonExternalString::finalize(char16_t *chars) const
{
// We cannot call Py_DECREF here when shutting down as the thread state is gone.
// Then, when shutting down, there is only on reference left, and we don't need
// to free the object since the entire process memory is being released.
PyObject *object = charToPyObjectMap[chars];
if (Py_REFCNT(object) > 1) {
Py_DECREF(object);
}
};
}

size_t PythonExternalString::sizeOfBuffer(const char16_t *chars, mozilla::MallocSizeOf mallocSizeOf) const
{
return PyUnicode_GetLength(charToPyObjectMap[chars]);
}

static constexpr PythonExternalString PythonExternalStringCallbacks;
PythonExternalString PythonExternalStringCallbacks = {};

size_t UCS4ToUTF16(const uint32_t *chars, size_t length, uint16_t **outStr) {
uint16_t *utf16String = (uint16_t *)malloc(sizeof(uint16_t) * length*2);
Expand Down Expand Up @@ -112,7 +118,7 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) {
returnType.setNumber(PyFloat_AsDouble(object));
}
else if (PyObject_TypeCheck(object, &JSStringProxyType)) {
returnType.setString(((JSStringProxy *)object)->jsString.toString());
returnType.setString(((JSStringProxy *)object)->jsString->toString());
}
else if (PyUnicode_Check(object)) {
switch (PyUnicode_KIND(object)) {
Expand Down
17 changes: 4 additions & 13 deletions src/modules/pythonmonkey/pythonmonkey.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ PyTypeObject JSObjectProxyType = {
PyTypeObject JSStringProxyType = {
.tp_name = PyUnicode_Type.tp_name,
.tp_basicsize = sizeof(JSStringProxy),
.tp_dealloc = (destructor)JSStringProxyMethodDefinitions::JSStringProxy_dealloc,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_UNICODE_SUBCLASS,
.tp_doc = PyDoc_STR("Javascript String value"),
.tp_base = &PyUnicode_Type
Expand Down Expand Up @@ -419,7 +420,7 @@ static PyObject *eval(PyObject *self, PyObject *args) {

// compile the code to execute
JS::RootedScript script(GLOBAL_CX);
JS::Rooted<JS::Value> *rval = new JS::Rooted<JS::Value>(GLOBAL_CX);
JS::Rooted<JS::Value> rval(GLOBAL_CX);
if (code) {
JS::SourceText<mozilla::Utf8Unit> source;
const char *codeChars = PyUnicode_AsUTF8(code);
Expand All @@ -440,27 +441,17 @@ static PyObject *eval(PyObject *self, PyObject *args) {
}

// execute the compiled code; last expr goes to rval
if (!JS_ExecuteScript(GLOBAL_CX, script, rval)) {
if (!JS_ExecuteScript(GLOBAL_CX, script, &rval)) {
setSpiderMonkeyException(GLOBAL_CX);
return NULL;
}

// translate to the proper python type
PyObject *returnValue = pyTypeFactory(GLOBAL_CX, *rval);
PyObject *returnValue = pyTypeFactory(GLOBAL_CX, rval);
if (PyErr_Occurred()) {
return NULL;
}

// TODO: Find a way to root strings for the lifetime of a proxying python string
js::ESClass cls = js::ESClass::Other; // placeholder if `rval` is not a JSObject
if (rval->isObject()) {
JS::GetBuiltinClass(GLOBAL_CX, JS::RootedObject(GLOBAL_CX, &rval->toObject()), &cls);
}

if (!(rval->isString() || cls == js::ESClass::String)) { // rval may be a string which must be kept alive.
delete rval;
}

if (returnValue) {
return returnValue;
}
Expand Down
Loading