From 5701b4485105798a46be7e4c220cf31eb83e998b Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 16 Mar 2023 21:03:54 +0000 Subject: [PATCH 01/36] chore: remove unused `explore` module --- include/modules/explore/explore.hh | 40 ----------- src/CMakeLists.txt | 23 +------ src/modules/explore/explore.cc | 103 ----------------------------- tests/python/test_explore.py | 29 -------- 4 files changed, 1 insertion(+), 194 deletions(-) delete mode 100644 include/modules/explore/explore.hh delete mode 100644 src/modules/explore/explore.cc delete mode 100644 tests/python/test_explore.py diff --git a/include/modules/explore/explore.hh b/include/modules/explore/explore.hh deleted file mode 100644 index 66aecd60..00000000 --- a/include/modules/explore/explore.hh +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef PythonMonkey_Explore_ -#define PythonMonkey_Explore_ - -#include "include/IntType.hh" -#include "include/ListType.hh" - -#include - -/** @brief Function that takes an arbitrary number of arguments from python and outputs their C/C++ values. - - @author Giovanni Tedesco & Caleb Aikens - @date July 2022 - - @param self - Pointer to the python environment - @param args - The PyTuple of arguments that are passed into the function - */ -static PyObject *output(PyObject *self, PyObject *args); -static PyObject *factor(PyObject *self, PyObject *args); - -/** - * @brief - * - * @param self - Pointer to the python environment - * @param args - The PyTuple of arguments that are passed into the function - * @return PyObject* - */ -static PyObject *run(PyObject *self, PyObject *args); - -/** - * @brief Function that factors an integer in python - * - * @param self - Pointer to python environment - * @param args - The PyTuple of arugments that are passed into the function - * @return PyObject* - */ -static PyObject *pfactor(PyObject *self, PyObject *args); - -ListType *factor_int(IntType *x); - -#endif \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2e3d8357..fb9e7eca 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,7 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}) -#list(APPEND EXPLORE_SOURCE_FILES ${SOURCE_FILES} "${CMAKE_SOURCE_DIR}/src/modules/explore/explore.cc") list(APPEND PYTHONMONKEY_SOURCE_FILES ${SOURCE_FILES} "${CMAKE_SOURCE_DIR}/src/modules/pythonmonkey/pythonmonkey.cc") -# add_library(explore SHARED -# ${EXPLORE_SOURCE_FILES} -# ) - add_library(pythonmonkey SHARED ${PYTHONMONKEY_SOURCE_FILES} ) @@ -16,18 +11,9 @@ execute_process( OUTPUT_VARIABLE pyloc ) -# target_include_directories(explore PUBLIC ..) target_include_directories(pythonmonkey PUBLIC ..) if(WIN32) - # set_target_properties( - # explore - # PROPERTIES - # PREFIX "" - # SUFFIX ".pyd" - # OUTPUT_NAME "explore" - # CXX_STANDARD 17 - # ) set_target_properties( pythonmonkey PROPERTIES @@ -37,12 +23,6 @@ if(WIN32) CXX_STANDARD 17 ) elseif(UNIX) - # set_target_properties( - # explore - # PROPERTIES - # PREFIX "" - # SUFFIX ".so" - # ) set_target_properties( pythonmonkey PROPERTIES @@ -50,10 +30,9 @@ elseif(UNIX) SUFFIX ".so" ) endif() -# target_link_libraries(explore ${PYTHON_LIBRARIES}) + target_link_libraries(pythonmonkey ${PYTHON_LIBRARIES}) target_link_libraries(pythonmonkey ${SPIDERMONKEY_LIBRARIES}) -# target_include_directories(explore PRIVATE ${PYTHON_INCLUDE_DIR}) target_include_directories(pythonmonkey PRIVATE ${PYTHON_INCLUDE_DIR}) target_include_directories(pythonmonkey PRIVATE ${SPIDERMONKEY_INCLUDE_DIR}) \ No newline at end of file diff --git a/src/modules/explore/explore.cc b/src/modules/explore/explore.cc deleted file mode 100644 index b36a74e5..00000000 --- a/src/modules/explore/explore.cc +++ /dev/null @@ -1,103 +0,0 @@ -#include "include/modules/explore/explore.hh" - -#include "include/FuncType.hh" -#include "include/IntType.hh" -#include "include/ListType.hh" -#include "include/PyEvaluator.hh" -#include "include/pyTypeFactory.hh" -#include "include/StrType.hh" -#include "include/TupleType.hh" -#include "include/utilities.hh" - -#include - -#include - -#include - -/** - * @brief Factors an IntType - * - * @param x The IntType representation of the integer you want to factor - * @return PyObject* a list which is not referenced by the python garbage collector - */ -ListType *factor_int(IntType *x) { - ListType *list = new ListType(); - int n = x->getValue(); - - for (int i = 1; i < sqrt(n); i++) { - if (n % i == 0) { - IntType *a = new IntType(i); - IntType *b = new IntType(n/i); - - list->append(a); - list->append(b); - } - } - - list->sort(); - - return list; -} - -static PyObject *output(PyObject *self, PyObject *args) { - const int size = PyTuple_Size(args); - for (int i = 0; i < size; i++) { - PyType *item = pyTypeFactory(PyTuple_GET_ITEM(args, i)); - - std::cout << *item << std::endl; - } - Py_RETURN_NONE; -} - -static PyObject *factor(PyObject *self, PyObject *args) { - IntType *input = new IntType(PyTuple_GetItem(args, 0)); - - return factor_int(input)->getPyObject(); -} - -static PyObject *pfactor(PyObject *self, PyObject *args) { - PyEvaluator p = PyEvaluator(); - TupleType *arguments = new TupleType(args); - - PyType *result = p.eval("import math\ndef f(n):\n\treturn [x for x in range(1, n + 1) if n % x == 0]\n", "pfactor", arguments); - - if (result) { - return result->getPyObject(); - } - else { - return NULL; - } -} - -static PyObject *run(PyObject *self, PyObject *args) { - PyEvaluator p = PyEvaluator(); - - StrType *input = new StrType(PyTuple_GetItem(args, 0)); - - p.eval(input->getValue()); - - Py_RETURN_NONE; -} - -static PyMethodDef ExploreMethods[] = { - {"output", output, METH_VARARGS, "Multivariatic function outputs"}, - {"factor", factor, METH_VARARGS, "Factor a python integer in C++"}, - {"pfactor", pfactor, METH_VARARGS, "Factor a python integer in C++ using python"}, - {"run", run, METH_VARARGS, "Run an arbirtrary python command in c++"}, - {NULL, NULL, 0, NULL} -}; - -static struct PyModuleDef explore = -{ - PyModuleDef_HEAD_INIT, - "explore", /* name of module */ - "", /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */ - ExploreMethods -}; - -PyMODINIT_FUNC PyInit_explore(void) -{ - return PyModule_Create(&explore); -} \ No newline at end of file diff --git a/tests/python/test_explore.py b/tests/python/test_explore.py deleted file mode 100644 index a950edae..00000000 --- a/tests/python/test_explore.py +++ /dev/null @@ -1,29 +0,0 @@ -# import explore - - -# def test_passes(): -# assert True - - -# def test_output_outputs_correct_integers(capfd): -# explore.output(1, 65, 72) - -# out, err = capfd.readouterr() -# assert out == "16572" - - -# def test_output_outputs_correct_strings(capfd): - -# explore.output("abcd", "newline\n", "tabs\t") - -# out, err = capfd.readouterr() -# assert out == "abcdnewline\ntabs\t" - - -# def test_output_outputs_correct_mixture(capfd): - -# explore.output("abcd", 13, "some string\n", 65) - -# out, err = capfd.readouterr() - -# assert out == "abcd13some string\n65" From b2717c8f2e27ae2b95c5b45a28a80d8de05098a0 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 16 Mar 2023 21:05:36 +0000 Subject: [PATCH 02/36] chore: add `Testing/Temporary` to `.gitignore` --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 441bbb6b..69526892 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ firefox-102.2.0/* firefox-*/* firefox-*/ tests/__pycache__/* -tests/python/__pycache__/* \ No newline at end of file +tests/python/__pycache__/* +Testing/Temporary \ No newline at end of file From b0dd745fc851ec32b569f7d1ec909456d870b07f Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 19:31:44 +0000 Subject: [PATCH 03/36] feat(bigint): initial implementation of BigInt --- include/IntType.hh | 22 ++++++++++++++++------ src/IntType.cc | 44 +++++++++++++++++++++++++++++++++++++++++--- src/pyTypeFactory.cc | 2 +- 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/include/IntType.hh b/include/IntType.hh index ded6bf7e..8b2206f9 100644 --- a/include/IntType.hh +++ b/include/IntType.hh @@ -1,11 +1,11 @@ /** * @file IntType.hh - * @author Caleb Aikens (caleb@distributive.network) & Giovanni Tedesco (giovanni@distributive.network) + * @author Caleb Aikens (caleb@distributive.network) & Giovanni Tedesco (giovanni@distributive.network) & Tom Tang (xmader@distributive.network) * @brief Struct for representing python ints - * @version 0.1 - * @date 2022-07-27 + * @version 0.2 + * @date 2023-03-16 * - * @copyright Copyright (c) 2022 + * @copyright Copyright (c) 2023 * */ @@ -15,19 +15,29 @@ #include "PyType.hh" #include "TypeEnum.hh" +#include + #include #include /** - * @brief This struct represents the 'int' type in Python, which is represented as a 'long' in C++. It inherits from the PyType struct + * @brief This struct represents the 'int' type (arbitrary-precision) in Python. It inherits from the PyType struct */ struct IntType : public PyType { public: IntType(PyObject *object); IntType(long n); + + /** + * @brief Construct a new IntType object from a JS::BigInt. + * + * @param cx - javascript context pointer + * @param str - JS::BigInt pointer + */ + IntType(JSContext *cx, JS::BigInt *bigint); + const TYPE returnType = TYPE::INT; - long getValue() const; protected: virtual void print(std::ostream &os) const override; diff --git a/src/IntType.cc b/src/IntType.cc index bbe42503..6198a31b 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -3,19 +3,57 @@ #include "include/PyType.hh" #include "include/TypeEnum.hh" +#include +#include + #include #include +#include + +#define SIGN_BIT_MASK 0b1000 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l40 +#define CELL_HEADER_LENGTH 8 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l602 +#define JS_DIGIT_BIT JS_BITS_PER_WORD +#define PY_DIGIT_BIT PYLONG_BITS_IN_DIGIT +#define JS_DIGIT_BYTE (sizeof(uintptr_t)/sizeof(uint8_t)) IntType::IntType(PyObject *object) : PyType(object) {} IntType::IntType(long n) : PyType(Py_BuildValue("i", n)) {} -long IntType::getValue() const { - return PyLong_AS_LONG(pyObject); +IntType::IntType(JSContext *cx, JS::BigInt *bigint) { + // Get the sign bit + uint32_t flagsField = ((uint32_t *)bigint)[0]; + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l91 + bool isNegative = flagsField & SIGN_BIT_MASK; + + // Read the digits count in the JS BigInt + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l623 + uint32_t jsDigitCount = ((uint32_t *)bigint)[1]; + + // Get all the 64-bit (assuming we compile on 64-bit OS) "digits" from JS BigInt + uintptr_t *jsDigits = (uintptr_t *)(((char *)bigint) + CELL_HEADER_LENGTH); + // + // The digit storage starts with the least significant digit (little-endian digit order). + // Byte order within a digit is native-endian. + + if constexpr (std::endian::native == std::endian::big) { // C++20 + // TODO: use C++23 std::byteswap? + printf("big-endian cpu is not supported by PythonMonkey yet"); + return; + } + // If the native endianness is also little-endian, + // we now have uniform bytes of 8-bit "digits" in little-endian order + auto bytes = const_cast((uint8_t *)jsDigits); + pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); + // FIXME: sign } void IntType::print(std::ostream &os) const { - os << this->getValue(); + // Making sure the value does not overflow even if the int has millions of bits of precision + // FIXME double still overflows at 1.7976931348623157E+308 + // TODO (Tom Tang) use Python's `str` conversion and then use `PyUnicode_AsUTF8` to print with whole precisions + os << PyLong_AsDouble(pyObject); } \ No newline at end of file diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index 5fa7d97f..9eae0da7 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -75,7 +75,7 @@ PyType *pyTypeFactory(JSContext *cx, JS::Rooted *global, JS::Rooted< printf("symbol type is not handled by PythonMonkey yet"); } else if (rval->isBigInt()) { - printf("bigint type is not handled by PythonMonkey yet"); + returnValue = new IntType(cx, rval->toBigInt()); } else if (rval->isObject()) { JS::Rooted obj(cx); From facb4aaa0b30c86bd76bbf0708925b5917ac3841 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 19:40:28 +0000 Subject: [PATCH 04/36] refactor(bigint): use public SpiderMonkey API to read the sign bit --- src/IntType.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 6198a31b..71c4cf63 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -11,7 +11,6 @@ #include #include -#define SIGN_BIT_MASK 0b1000 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l40 #define CELL_HEADER_LENGTH 8 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l602 #define JS_DIGIT_BIT JS_BITS_PER_WORD @@ -24,9 +23,7 @@ IntType::IntType(long n) : PyType(Py_BuildValue("i", n)) {} IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // Get the sign bit - uint32_t flagsField = ((uint32_t *)bigint)[0]; - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l91 - bool isNegative = flagsField & SIGN_BIT_MASK; + bool isNegative = BigIntIsNegative(bigint); // Read the digits count in the JS BigInt // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 From 2ed0c808cff970227621027b362c7c8c6a916d7e Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 20:19:33 +0000 Subject: [PATCH 05/36] fix(bigint): properly get the digit storage if the number cannot fit in one uint64_t --- src/IntType.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 71c4cf63..6829f69f 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -15,7 +15,11 @@ #define JS_DIGIT_BIT JS_BITS_PER_WORD #define PY_DIGIT_BIT PYLONG_BITS_IN_DIGIT -#define JS_DIGIT_BYTE (sizeof(uintptr_t)/sizeof(uint8_t)) + +#define js_digit_t uintptr_t // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l36 +#define JS_DIGIT_BYTE (sizeof(js_digit_t)/sizeof(uint8_t)) + +#define JS_INLINE_DIGIT_MAX_LEN 1 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l43 IntType::IntType(PyObject *object) : PyType(object) {} @@ -25,13 +29,18 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // Get the sign bit bool isNegative = BigIntIsNegative(bigint); - // Read the digits count in the JS BigInt - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l623 + // Read the digits count in this JS BigInt + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l623 uint32_t jsDigitCount = ((uint32_t *)bigint)[1]; // Get all the 64-bit (assuming we compile on 64-bit OS) "digits" from JS BigInt - uintptr_t *jsDigits = (uintptr_t *)(((char *)bigint) + CELL_HEADER_LENGTH); + js_digit_t *jsDigits = (js_digit_t *)(((char *)bigint) + CELL_HEADER_LENGTH); + if (jsDigitCount > JS_INLINE_DIGIT_MAX_LEN) { // hasHeapDigits + // We actually have a pointer to the digit storage if the number cannot fit in one uint64_t + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l54 + jsDigits = *((js_digit_t **)jsDigits); + } // // The digit storage starts with the least significant digit (little-endian digit order). // Byte order within a digit is native-endian. From 3fcc3d09d51a3901c567547ff57d39b900ab12a1 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 20:29:10 +0000 Subject: [PATCH 06/36] feat(bigint): signed BigInt --- src/IntType.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/IntType.cc b/src/IntType.cc index 6829f69f..9aa263a2 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -54,7 +54,11 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // we now have uniform bytes of 8-bit "digits" in little-endian order auto bytes = const_cast((uint8_t *)jsDigits); pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); - // FIXME: sign + + // Set the sign bit + // https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L956 + auto pyDigitCount = Py_SIZE(pyObject); + Py_SET_SIZE(pyObject, isNegative ? -pyDigitCount : pyDigitCount); } void IntType::print(std::ostream &os) const { From 0f04c5270362b437ab6d1e337140f58fe5d5e45e Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 22:16:06 +0000 Subject: [PATCH 07/36] perf(bigint): skip setting the sign bit if the number is positive --- src/IntType.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 9aa263a2..6223c18a 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -30,15 +30,15 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { bool isNegative = BigIntIsNegative(bigint); // Read the digits count in this JS BigInt - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l623 + // see https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l48 + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l623 uint32_t jsDigitCount = ((uint32_t *)bigint)[1]; // Get all the 64-bit (assuming we compile on 64-bit OS) "digits" from JS BigInt js_digit_t *jsDigits = (js_digit_t *)(((char *)bigint) + CELL_HEADER_LENGTH); if (jsDigitCount > JS_INLINE_DIGIT_MAX_LEN) { // hasHeapDigits // We actually have a pointer to the digit storage if the number cannot fit in one uint64_t - // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l54 + // see https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l54 jsDigits = *((js_digit_t **)jsDigits); } // @@ -56,9 +56,11 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); // Set the sign bit - // https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L956 - auto pyDigitCount = Py_SIZE(pyObject); - Py_SET_SIZE(pyObject, isNegative ? -pyDigitCount : pyDigitCount); + // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L956 + if (isNegative) { + auto pyDigitCount = Py_SIZE(pyObject); + Py_SET_SIZE(pyObject, -pyDigitCount); + } } void IntType::print(std::ostream &os) const { From 1188540ba4a506b2270dd7661428bc91c5cf7d0d Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sat, 18 Mar 2023 23:05:08 +0000 Subject: [PATCH 08/36] feat(bigint): handle boxed `BigInt` as in `Object(1n)` --- src/pyTypeFactory.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index 9eae0da7..742612ae 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -84,6 +84,8 @@ PyType *pyTypeFactory(JSContext *cx, JS::Rooted *global, JS::Rooted< JS::GetBuiltinClass(cx, obj, &cls); switch (cls) { case js::ESClass::Boolean: { + // TODO: refactor out all `js::Unbox` calls + // TODO: refactor using recursive call to `pyTypeFactory` JS::RootedValue unboxed(cx); js::Unbox(cx, obj, &unboxed); returnValue = new BoolType(unboxed.toBoolean()); @@ -108,6 +110,12 @@ PyType *pyTypeFactory(JSContext *cx, JS::Rooted *global, JS::Rooted< returnValue = new FloatType(unboxed.toNumber()); break; } + case js::ESClass::BigInt: { + JS::RootedValue unboxed(cx); + js::Unbox(cx, obj, &unboxed); + returnValue = new IntType(cx, unboxed.toBigInt()); + break; + } case js::ESClass::String: { JS::RootedValue unboxed(cx); js::Unbox(cx, obj, &unboxed); From 8eb93cc28b95275866b7d63f200079ec3a1c1fe2 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sun, 19 Mar 2023 01:15:17 +0000 Subject: [PATCH 09/36] test(bigint): write tests for BigInt --- tests/python/test_pythonmonkey_eval.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index 051084b1..8471d6da 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -146,6 +146,27 @@ def test_eval_numbers_integers(): js_number = pm.eval(repr(py_number)) assert py_number == js_number +def test_eval_numbers_bigints(): + def test_bigint(py_number: int): + js_number = pm.eval(f'{repr(py_number)}n') + assert py_number == js_number + + test_bigint(0) + test_bigint(1) + test_bigint(-1) + + test_bigint(18014398509481984) # 2**54 + test_bigint(-18014398509481984) # -2**54 + test_bigint(18446744073709551615) # 2**64-1 + test_bigint(18446744073709551616) # 2**64 + test_bigint(-18446744073709551617) # -2**64-1 + + limit = 2037035976334486086268445688409378161051468393665936250636140449354381299763336706183397376 + # = 2**300 + for i in range(10): + py_number = random.randint(-limit, limit) + test_bigint(py_number) + def test_eval_booleans(): py_bool = True js_bool = pm.eval('true') From 0db2a4f2e47e78d4fd216bfe62d7a92351dbafd8 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Sun, 19 Mar 2023 01:30:33 +0000 Subject: [PATCH 10/36] test(bigint): write tests for boxed BigInt --- tests/python/test_pythonmonkey_eval.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index 8471d6da..689f869c 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -207,6 +207,23 @@ def test_eval_boxed_numbers_integers(): js_number = pm.eval(f'new Number({repr(py_number)})') assert py_number == js_number +def test_eval_boxed_numbers_bigints(): + def test_boxed_bigint(py_number: int): + # `BigInt()` can only be called without `new` + # https://tc39.es/ecma262/#sec-bigint-constructor + js_number = pm.eval(f'new Object({repr(py_number)}n)') + assert py_number == js_number + + test_boxed_bigint(0) + test_boxed_bigint(1) + test_boxed_bigint(-1) + + limit = 2037035976334486086268445688409378161051468393665936250636140449354381299763336706183397376 + # = 2**300 + for i in range(10): + py_number = random.randint(-limit, limit) + test_boxed_bigint(py_number) + def test_eval_boxed_ascii_string_matches_evaluated_string(): py_ascii_string = "abc" js_ascii_string = pm.eval(f'new String({repr(py_ascii_string)})') From e32f33c8125f5193cfe7c15298d7e4a59c4f896d Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 20 Mar 2023 14:55:16 +0000 Subject: [PATCH 11/36] feat(bigint): check if integer exceeds Number.MAX_SAFE_INTEGER --- src/jsTypeFactory.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 9013177d..1cb5447b 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -22,7 +22,12 @@ JS::Value jsTypeFactory(PyObject *object) { returnType.setBoolean(PyLong_AsLong(object)); } else if (PyLong_Check(object)) { - returnType.setNumber(PyLong_AsLong(object)); + long num = PyLong_AsLong(object); // FIXME: long is 32-bit on Win64 or 32bit *nix + if (JS::Value::isNumberRepresentable(num)) { + returnType.setNumber(num); + } else { + PyErr_SetString(PyExc_TypeError, "Integer exceeds Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); + } } else if (PyFloat_Check(object)) { returnType.setNumber(PyFloat_AsDouble(object)); From f54a0d417d62ccf56731f8fa178f7c1db8adb93a Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 20 Mar 2023 19:41:10 +0000 Subject: [PATCH 12/36] refactor(pm.null): use designated initializers (C++20 feature) --- src/modules/pythonmonkey/pythonmonkey.cc | 45 +----------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index 7b77f4c5..0707c109 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -30,57 +30,14 @@ typedef struct { std::unordered_map *>> PyTypeToGCThing; /**< data structure to hold memoized PyObject & GCThing data for handling GC*/ -// @TODO (Caleb Aikens) figure out how to use C99-style designated initializers with a modern C++ compiler static PyTypeObject NullType = { .ob_base = PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "pythonmonkey.Null", + .tp_name = "pythonmonkey.null", .tp_basicsize = sizeof(NullObject), .tp_itemsize = 0, - .tp_dealloc = NULL, - .tp_vectorcall_offset = NULL, - .tp_getattr = NULL, - .tp_setattr = NULL, - .tp_as_async = NULL, - .tp_repr = NULL, - .tp_as_number = NULL, - .tp_as_sequence = NULL, - .tp_as_mapping = NULL, - .tp_hash = NULL, - .tp_call = NULL, - .tp_str = NULL, - .tp_getattro = NULL, - .tp_setattro = NULL, - .tp_as_buffer = NULL, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = PyDoc_STR("Javascript null object"), - .tp_traverse = NULL, - .tp_clear = NULL, - .tp_richcompare = NULL, - .tp_weaklistoffset = NULL, - .tp_iter = NULL, - .tp_iternext = NULL, - .tp_methods = NULL, - .tp_members = NULL, - .tp_getset = NULL, - .tp_base = NULL, - .tp_dict = NULL, - .tp_descr_get = NULL, - .tp_descr_set = NULL, - .tp_dictoffset = NULL, - .tp_init = NULL, - .tp_alloc = NULL, .tp_new = PyType_GenericNew, - .tp_free = NULL, - .tp_is_gc = NULL, - .tp_bases = NULL, - .tp_mro = NULL, - .tp_cache = NULL, - .tp_subclasses = NULL, - .tp_weaklist = NULL, - .tp_del = NULL, - .tp_version_tag = NULL, - .tp_finalize = NULL, - .tp_vectorcall = NULL, }; static void cleanup() { From 421133e8fef9a3ba66d1c75803b737ecd311fc46 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 20 Mar 2023 21:01:20 +0000 Subject: [PATCH 13/36] feat(bigint): create a custom class on PythonMonkey that maps to JS BigInt --- src/modules/pythonmonkey/pythonmonkey.cc | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index 0707c109..7f142109 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -40,6 +40,15 @@ static PyTypeObject NullType = { .tp_new = PyType_GenericNew, }; +static PyTypeObject BigIntType = { + .tp_name = "pythonmonkey.bigint", + .tp_flags = Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_LONG_SUBCLASS // https://docs.python.org/3/c-api/typeobj.html#Py_TPFLAGS_LONG_SUBCLASS + | Py_TPFLAGS_BASETYPE, // can be subclassed + .tp_doc = PyDoc_STR("Javascript BigInt object"), + .tp_base = &PyLong_Type, // extending the builtin int type +}; + static void cleanup() { JS_DestroyContext(cx); JS_ShutDown(); @@ -167,7 +176,6 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void) PyErr_SetString(SpiderMonkeyError, "Spidermonkey could not be initialized."); return NULL; } - cx = JS_NewContext(JS::DefaultHeapMaxBytes); if (!cx) { @@ -194,6 +202,8 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void) PyObject *pyModule; if (PyType_Ready(&NullType) < 0) return NULL; + if (PyType_Ready(&BigIntType) < 0) + return NULL; pyModule = PyModule_Create(&pythonmonkey); if (pyModule == NULL) @@ -205,6 +215,12 @@ PyMODINIT_FUNC PyInit_pythonmonkey(void) Py_DECREF(pyModule); return NULL; } + Py_INCREF(&BigIntType); + if (PyModule_AddObject(pyModule, "bigint", (PyObject *)&BigIntType) < 0) { + Py_DECREF(&BigIntType); + Py_DECREF(pyModule); + return NULL; + } SpiderMonkeyError = PyErr_NewException("pythonmonkey.SpiderMonkeyError", NULL, NULL); if (PyModule_AddObject(pyModule, "SpiderMonkeyError", SpiderMonkeyError)) { From f3b2ee3d18c938ef24113ba27a7880b99e607e07 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 20 Mar 2023 21:03:14 +0000 Subject: [PATCH 14/36] fix(pm.null): `pythonmonkey.null` shouldn't be instantiated --- src/modules/pythonmonkey/pythonmonkey.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index 7f142109..8e63f0ff 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -34,10 +34,8 @@ static PyTypeObject NullType = { .ob_base = PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "pythonmonkey.null", .tp_basicsize = sizeof(NullObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, // https://docs.python.org/3/c-api/typeobj.html#Py_TPFLAGS_DISALLOW_INSTANTIATION .tp_doc = PyDoc_STR("Javascript null object"), - .tp_new = PyType_GenericNew, }; static PyTypeObject BigIntType = { From 3074fc423ddb4d53f4ca2f0793a13c73aed8502b Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Mon, 20 Mar 2023 21:06:06 +0000 Subject: [PATCH 15/36] revert(pm.null): `Py_TPFLAGS_DISALLOW_INSTANTIATION` is new from Python 3.10 --- src/modules/pythonmonkey/pythonmonkey.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/pythonmonkey/pythonmonkey.cc b/src/modules/pythonmonkey/pythonmonkey.cc index 8e63f0ff..2576aa12 100644 --- a/src/modules/pythonmonkey/pythonmonkey.cc +++ b/src/modules/pythonmonkey/pythonmonkey.cc @@ -34,7 +34,7 @@ static PyTypeObject NullType = { .ob_base = PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "pythonmonkey.null", .tp_basicsize = sizeof(NullObject), - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, // https://docs.python.org/3/c-api/typeobj.html#Py_TPFLAGS_DISALLOW_INSTANTIATION + .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = PyDoc_STR("Javascript null object"), }; From bccfeff6ccdb3cd1a63897f801cea9fdecd36cc4 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 13:20:08 +0000 Subject: [PATCH 16/36] feat(bigint): convert Js BigInt to pm.bigint --- include/modules/pythonmonkey/pythonmonkey.hh | 3 ++- src/IntType.cc | 8 +++++++- src/jsTypeFactory.cc | 12 ++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/modules/pythonmonkey/pythonmonkey.hh b/include/modules/pythonmonkey/pythonmonkey.hh index 7e927512..049087ee 100644 --- a/include/modules/pythonmonkey/pythonmonkey.hh +++ b/include/modules/pythonmonkey/pythonmonkey.hh @@ -19,7 +19,8 @@ #include -#define PythonMonkey_Null PyObject_GetAttrString(PyState_FindModule(&pythonmonkey), "null") /**< macro for python null object*/ +#define PythonMonkey_Null PyObject_GetAttrString(PyState_FindModule(&pythonmonkey), "null") /**< macro for pythonmonkey.null object*/ +#define PythonMonkey_BigInt PyObject_GetAttrString(PyState_FindModule(&pythonmonkey), "bigint") /**< macro for pythonmonkey.bigint class object */ static JSContext *cx; /**< pointer to PythonMonkey's JSContext */ static JS::Rooted *global; /**< pointer to the global object of PythonMonkey's JSContext */ diff --git a/src/IntType.cc b/src/IntType.cc index 6223c18a..3d24870c 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -1,3 +1,5 @@ +#include "include/modules/pythonmonkey/pythonmonkey.hh" + #include "include/IntType.hh" #include "include/PyType.hh" @@ -51,7 +53,7 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { return; } // If the native endianness is also little-endian, - // we now have uniform bytes of 8-bit "digits" in little-endian order + // we now have consecutive bytes of 8-bit "digits" in little-endian order auto bytes = const_cast((uint8_t *)jsDigits); pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); @@ -61,6 +63,10 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { auto pyDigitCount = Py_SIZE(pyObject); Py_SET_SIZE(pyObject, -pyDigitCount); } + + // Cast to a pythonmonkey.bigint to differentiate it from a normal Python int, + // allowing Py<->JS two-way BigInt conversion + Py_SET_TYPE(pyObject, (PyTypeObject *)(PythonMonkey_BigInt)); } void IntType::print(std::ostream &os) const { diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 1cb5447b..0958ee1f 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -22,11 +22,15 @@ JS::Value jsTypeFactory(PyObject *object) { returnType.setBoolean(PyLong_AsLong(object)); } else if (PyLong_Check(object)) { - long num = PyLong_AsLong(object); // FIXME: long is 32-bit on Win64 or 32bit *nix - if (JS::Value::isNumberRepresentable(num)) { - returnType.setNumber(num); + if (PyObject_IsInstance(object, PythonMonkey_BigInt)) { // pm.bigint is a subclass of the builtin int type + returnType.setBigInt(num); } else { - PyErr_SetString(PyExc_TypeError, "Integer exceeds Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); + long num = PyLong_AsLong(object); // FIXME: long is 32-bit on Win64 or 32bit *nix + if (JS::Value::isNumberRepresentable(num)) { + returnType.setNumber(num); + } else { + PyErr_SetString(PyExc_TypeError, "Integer exceeds Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); + } } } else if (PyFloat_Check(object)) { From 032bfa85862adfc6db04d71c49e1f0699deec55f Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 13:21:30 +0000 Subject: [PATCH 17/36] fix(callJSFunc): fail-fast on exception --- src/pyTypeFactory.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index 742612ae..d5518002 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -145,6 +145,9 @@ static PyObject *callJSFunc(PyObject *JSCxGlobalFuncTuple, PyObject *args) { for (size_t i = 0; i < PyTuple_Size(args); i++) { // TODO (Caleb Aikens) write an overload for jsTypeFactory to handle PyObjects directly JS::Value jsValue = jsTypeFactory(PyTuple_GetItem(args, i)); + if (PyErr_Occurred()) { // Check if an exception has already been set in the flow of control + return NULL; // Fail-fast + } JSargsVector.append(jsValue); } From 71cf24806a1e1d457e13d7989158107e4efa4d03 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 13:31:42 +0000 Subject: [PATCH 18/36] fix(bigint): long is 32-bit on Win64 or 32bit *nix --- src/jsTypeFactory.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 0958ee1f..311fbbf9 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -25,8 +25,8 @@ JS::Value jsTypeFactory(PyObject *object) { if (PyObject_IsInstance(object, PythonMonkey_BigInt)) { // pm.bigint is a subclass of the builtin int type returnType.setBigInt(num); } else { - long num = PyLong_AsLong(object); // FIXME: long is 32-bit on Win64 or 32bit *nix - if (JS::Value::isNumberRepresentable(num)) { + long long num = PyLong_AsLongLong(object); + if (JS::Value::isNumberRepresentable(num)) { // TODO: refactor using _PyLong_NumBits ? returnType.setNumber(num); } else { PyErr_SetString(PyExc_TypeError, "Integer exceeds Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); From 68e172fa38ddb4c76f1c861fbd82b807e0f0225e Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 13:45:45 +0000 Subject: [PATCH 19/36] fix(bigint): print out the `IntType` as a string --- src/IntType.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 3d24870c..5f436a64 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -71,7 +71,8 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { void IntType::print(std::ostream &os) const { // Making sure the value does not overflow even if the int has millions of bits of precision - // FIXME double still overflows at 1.7976931348623157E+308 - // TODO (Tom Tang) use Python's `str` conversion and then use `PyUnicode_AsUTF8` to print with whole precisions - os << PyLong_AsDouble(pyObject); + auto str = PyObject_Str(pyObject); + os << PyUnicode_AsUTF8(str); + // https://pythonextensionpatterns.readthedocs.io/en/latest/refcount.html#new-references + Py_DECREF(str); // free } \ No newline at end of file From 9954fc25a1a54e611088d50a07163b594f0ab063 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 19:29:37 +0000 Subject: [PATCH 20/36] feat(bigint): convert Python int/pm.bigint to JS BigInt --- include/IntType.hh | 7 +++++++ src/IntType.cc | 38 ++++++++++++++++++++++++++++++++++++++ src/jsTypeFactory.cc | 4 +++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/include/IntType.hh b/include/IntType.hh index 8b2206f9..36cf5c60 100644 --- a/include/IntType.hh +++ b/include/IntType.hh @@ -39,6 +39,13 @@ public: const TYPE returnType = TYPE::INT; + /** + * @brief Convert the IntType object to a JS::BigInt + * + * @param cx - javascript context pointer + */ + JS::BigInt *toJsBigInt(JSContext *cx); + protected: virtual void print(std::ostream &os) const override; }; diff --git a/src/IntType.cc b/src/IntType.cc index 5f436a64..1fc04309 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -23,6 +23,8 @@ #define JS_INLINE_DIGIT_MAX_LEN 1 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l43 +static const char HEX_CHAR_LOOKUP_TABLE[] = "0123456789ABCDEF"; + IntType::IntType(PyObject *object) : PyType(object) {} IntType::IntType(long n) : PyType(Py_BuildValue("i", n)) {} @@ -69,6 +71,42 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { Py_SET_TYPE(pyObject, (PyTypeObject *)(PythonMonkey_BigInt)); } +JS::BigInt *IntType::toJsBigInt(JSContext *cx) { + // Figure out how many 64-bit "digits" we have + // see https://github.com/python/cpython/blob/3.9/Modules/_randommodule.c#L306 + auto bitCount = _PyLong_NumBits(pyObject); + if (bitCount == (size_t)-1 && PyErr_Occurred()) + return nullptr; + uint32_t jsDigitCount = bitCount == 0 ? 1 : (bitCount - 1) / JS_DIGIT_BIT + 1; + size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; + + // Convert to bytes of 8-bit "digits" in **big-endian** order + auto bytes = (uint8_t *)PyMem_Malloc(byteCount); + if (bytes == NULL) { + PyErr_NoMemory(); + return nullptr; + } + _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); + + // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) + // TODO: We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API + // TODO: Could we fill in an object with similar memory alignment (maybe by NewArrayBufferWithContents), and coerce it to BigInt? + + // Calculate the number of chars required to represent the bigint in hex string + auto charCount = byteCount * 2; + // Convert bytes to hex string (big-endian) + auto chars = std::vector(charCount+1); + for (size_t i = 0, j = 0; i < charCount; i += 2, j++) { + chars[i] = HEX_CHAR_LOOKUP_TABLE[(bytes[j] >> 4)&0xf]; // high nibble + chars[i+1] = HEX_CHAR_LOOKUP_TABLE[bytes[j]&0xf]; // low nibble + } + PyMem_Free(bytes); + + // Convert hex string to JS::BigInt + auto strSpan = mozilla::Span(chars); + return JS::SimpleStringToBigInt(cx, strSpan, 16); +} + void IntType::print(std::ostream &os) const { // Making sure the value does not overflow even if the int has millions of bits of precision auto str = PyObject_Str(pyObject); diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 311fbbf9..8002600f 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -12,6 +12,7 @@ #include "include/jsTypeFactory.hh" #include "include/modules/pythonmonkey/pythonmonkey.hh" +#include "include/IntType.hh" #include @@ -23,7 +24,8 @@ JS::Value jsTypeFactory(PyObject *object) { } else if (PyLong_Check(object)) { if (PyObject_IsInstance(object, PythonMonkey_BigInt)) { // pm.bigint is a subclass of the builtin int type - returnType.setBigInt(num); + auto bigint = IntType(object).toJsBigInt(cx); + returnType.setBigInt(bigint); } else { long long num = PyLong_AsLongLong(object); if (JS::Value::isNumberRepresentable(num)) { // TODO: refactor using _PyLong_NumBits ? From 924c6221700a15f68e03784998f4176231f76bdd Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 19:45:55 +0000 Subject: [PATCH 21/36] fix(bigint): use the JScontext from `callJSFunc` --- include/jsTypeFactory.hh | 3 ++- src/jsTypeFactory.cc | 2 +- src/pyTypeFactory.cc | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/jsTypeFactory.hh b/include/jsTypeFactory.hh index b041815e..3789a403 100644 --- a/include/jsTypeFactory.hh +++ b/include/jsTypeFactory.hh @@ -19,9 +19,10 @@ /** * @brief Function that takes a PyType and returns a corresponding JS::Value, doing shared memory management when necessary * + * @param cx - javascript context pointer * @param object - Pointer to the PyObject who's type and value we wish to encapsulate * @return JS::Value - A JS::Value corresponding to the PyType */ -JS::Value jsTypeFactory(PyObject *object); +JS::Value jsTypeFactory(JSContext *cx, PyObject *object); #endif \ No newline at end of file diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 8002600f..b1ef6339 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -16,7 +16,7 @@ #include -JS::Value jsTypeFactory(PyObject *object) { +JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { JS::Value returnType; if (PyBool_Check(object)) { diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index d5518002..576c7950 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -144,7 +144,7 @@ static PyObject *callJSFunc(PyObject *JSCxGlobalFuncTuple, PyObject *args) { JS::RootedVector JSargsVector(JScontext); for (size_t i = 0; i < PyTuple_Size(args); i++) { // TODO (Caleb Aikens) write an overload for jsTypeFactory to handle PyObjects directly - JS::Value jsValue = jsTypeFactory(PyTuple_GetItem(args, i)); + JS::Value jsValue = jsTypeFactory(JScontext, PyTuple_GetItem(args, i)); if (PyErr_Occurred()) { // Check if an exception has already been set in the flow of control return NULL; // Fail-fast } From 6c613206182f8ae978e63061e7c8ea3ddc604ba0 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Tue, 21 Mar 2023 21:01:29 +0000 Subject: [PATCH 22/36] fix(callJSFunc): fail-fast on exception --- src/pyTypeFactory.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index 27bd72a7..a585cec4 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -144,6 +144,9 @@ static PyObject *callJSFunc(PyObject *JSCxGlobalFuncTuple, PyObject *args) { JS::RootedVector JSargsVector(JScontext); for (size_t i = 0; i < PyTuple_Size(args); i++) { JS::Value jsValue = jsTypeFactory(JScontext, PyTuple_GetItem(args, i)); + if (PyErr_Occurred()) { // Check if an exception has already been set in the flow of control + return NULL; // Fail-fast + } JSargsVector.append(jsValue); } From 08d02ebf0a6c62019c39dd7c822f954be3400762 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Wed, 22 Mar 2023 00:10:29 +0000 Subject: [PATCH 23/36] fix(bigint): segfault (core dumped) when converting from hex string --- src/IntType.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IntType.cc b/src/IntType.cc index 1fc04309..18a2b8dd 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -103,7 +103,7 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { PyMem_Free(bytes); // Convert hex string to JS::BigInt - auto strSpan = mozilla::Span(chars); + auto strSpan = mozilla::MakeStringSpan(chars.data()); return JS::SimpleStringToBigInt(cx, strSpan, 16); } From 72c9eb465e6477bad11db21d0b03fd0fed9139ea Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Wed, 22 Mar 2023 17:33:36 +0000 Subject: [PATCH 24/36] fix(bigint): create a new object instead of reusing the object for int 0 every int 0 literal would become a pm.bigint after the conversion --- src/IntType.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/IntType.cc b/src/IntType.cc index 18a2b8dd..fc7ef18c 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -57,7 +57,14 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // If the native endianness is also little-endian, // we now have consecutive bytes of 8-bit "digits" in little-endian order auto bytes = const_cast((uint8_t *)jsDigits); - pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); + if (jsDigitCount == 0) { + // Create a new object instead of reusing the object for int 0 + // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L862 + // https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L310 + pyObject = (PyObject *)_PyLong_New(0); + } else { + pyObject = _PyLong_FromByteArray(bytes, jsDigitCount * JS_DIGIT_BYTE, true, false); + } // Set the sign bit // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L956 From fe7db7f5f2df7352e8127492e53ed7b4ed9eac50 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Wed, 22 Mar 2023 17:53:24 +0000 Subject: [PATCH 25/36] perf(bigint): use `mozilla::Span` constructor directly --- src/IntType.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index fc7ef18c..059f3efe 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -102,7 +102,7 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { // Calculate the number of chars required to represent the bigint in hex string auto charCount = byteCount * 2; // Convert bytes to hex string (big-endian) - auto chars = std::vector(charCount+1); + auto chars = std::vector(charCount); // can't be null-terminated, otherwise SimpleStringToBigInt would read the extra \0 character and then segfault for (size_t i = 0, j = 0; i < charCount; i += 2, j++) { chars[i] = HEX_CHAR_LOOKUP_TABLE[(bytes[j] >> 4)&0xf]; // high nibble chars[i+1] = HEX_CHAR_LOOKUP_TABLE[bytes[j]&0xf]; // low nibble @@ -110,7 +110,7 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { PyMem_Free(bytes); // Convert hex string to JS::BigInt - auto strSpan = mozilla::MakeStringSpan(chars.data()); + auto strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length return JS::SimpleStringToBigInt(cx, strSpan, 16); } From cf1fe80bd1f6c6ca4273ca8f0d0a1327cd11b199 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Wed, 22 Mar 2023 18:47:47 +0000 Subject: [PATCH 26/36] feat(bigint): convert negative pm.bigint to JS BigInt --- src/IntType.cc | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 059f3efe..b2149c7f 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -13,6 +13,7 @@ #include #include +#define SIGN_BIT_MASK 0b1000 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.h#l40 #define CELL_HEADER_LENGTH 8 // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/gc/Cell.h#l602 #define JS_DIGIT_BIT JS_BITS_PER_WORD @@ -79,7 +80,7 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { } JS::BigInt *IntType::toJsBigInt(JSContext *cx) { - // Figure out how many 64-bit "digits" we have + // Figure out how many 64-bit "digits" we would have for JS BigInt // see https://github.com/python/cpython/blob/3.9/Modules/_randommodule.c#L306 auto bitCount = _PyLong_NumBits(pyObject); if (bitCount == (size_t)-1 && PyErr_Occurred()) @@ -87,6 +88,16 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { uint32_t jsDigitCount = bitCount == 0 ? 1 : (bitCount - 1) / JS_DIGIT_BIT + 1; size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; + // Get the sign bit + // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L977 + auto pyDigitCount = Py_SIZE(pyObject); // negative on negative numbers + bool isNegative = pyDigitCount < 0; + // Force to make the number positive otherwise _PyLong_AsByteArray would complain + // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L980 + if (isNegative) { + Py_SET_SIZE(pyObject, /*abs()*/ -pyDigitCount); + } + // Convert to bytes of 8-bit "digits" in **big-endian** order auto bytes = (uint8_t *)PyMem_Malloc(byteCount); if (bytes == NULL) { @@ -94,6 +105,11 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { return nullptr; } _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); + // Make negative number back negative + // TODO: use _PyLong_Copy to create a new object. Not thread-safe here + if (isNegative) { + Py_SET_SIZE(pyObject, pyDigitCount); + } // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) // TODO: We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API @@ -111,7 +127,15 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { // Convert hex string to JS::BigInt auto strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length - return JS::SimpleStringToBigInt(cx, strSpan, 16); + auto bigint = JS::SimpleStringToBigInt(cx, strSpan, 16); + + // Set the sign bit + if (isNegative) { + // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l1801 + /* flagsField */ ((uint32_t *)bigint)[0] |= SIGN_BIT_MASK; + } + + return bigint; } void IntType::print(std::ostream &os) const { From 185e363bb2cd8122e5fd4583243d2608df526353 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 16:34:39 +0000 Subject: [PATCH 27/36] test(bigint): write tests for bigints as function arguments --- tests/python/test_pythonmonkey_eval.py | 49 ++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index a8c92073..ad07644d 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -1,3 +1,4 @@ +import pytest import pythonmonkey as pm import gc import random @@ -155,6 +156,23 @@ def test_bigint(py_number: int): test_bigint(1) test_bigint(-1) + # CPython would reuse the objects for small ints in range [-5, 256] + # Making sure we don't do any changes on them + def test_cached_int_object(py_number): + # type is still int + assert type(py_number) == int + assert type(py_number) != pm.bigint + test_bigint(py_number) + assert type(py_number) == int + assert type(py_number) != pm.bigint + # the value doesn't change + # TODO: Find a way to create a NEW int object with the same value, because int literals also reuse the cached int objects + for _ in range(2): + test_cached_int_object(0) # _PyLong_FromByteArray reuses the int 0 object, + # see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L862 + for i in range(10): + test_cached_int_object(random.randint(-5, 256)) + test_bigint(18014398509481984) # 2**54 test_bigint(-18014398509481984) # -2**54 test_bigint(18446744073709551615) # 2**64-1 @@ -167,6 +185,11 @@ def test_bigint(py_number: int): py_number = random.randint(-limit, limit) test_bigint(py_number) + # TODO: test -0 (negative zero) + # There's no -0 in both Python int and JS BigInt, + # but this could be possible in JS BigInt's internal representation as it uses a sign bit flag. + # On the other hand, Python int uses `ob_size` 0 for 0, >0 for positive values, <0 for negative values + def test_eval_booleans(): py_bool = True js_bool = pm.eval('true') @@ -397,6 +420,32 @@ def test_eval_functions_latin1_string_args(): assert concatenate(string1, string2) == (string1 + string2) +def test_eval_functions_bigints(): + ident = pm.eval("(a) => { return a }") + add = pm.eval("(a, b) => { return a + b }") + + int1 = random.randint(-1000000,1000000) + bigint1 = pm.bigint(int1) + assert int1 == bigint1 + + # should return pm.bigint + assert type(ident(bigint1)) == pm.bigint + assert ident(bigint1) is not bigint1 + # should return float (because JS number is float64) + assert type(ident(int1)) == float + assert ident(int1) == ident(bigint1) + + # should raise exception on ints > 2^53, or < -2^53 + ident(9007199254740991) # 2**53-1, 0x433_FFFFFFFFFFFFF in float64 + ident(9007199254740992) # 2**53, 0x434_0000000000000 in float64 + with pytest.raises(TypeError, match="Use pythonmonkey.bigint instead"): + ident(9007199254740993) # 2**53+1, NOT 0x434_0000000000001 (2**53+2) + # ident(9007199254740994) # FIXME: Should 2**53+2 and other large integers that can be exactly represented by a float64 raise exception? + ident(-9007199254740991) # -(2**53-1) + ident(-9007199254740992) # -(2**53) + with pytest.raises(TypeError, match="Use pythonmonkey.bigint instead"): + ident(-9007199254740993) # -(2**53+1) + def test_eval_functions_ucs2_string_args(): concatenate = pm.eval("(a, b) => { return a + b}") n = 10 From 6604046d97216cbb473a5a852d23baeecc704119 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 16:38:48 +0000 Subject: [PATCH 28/36] fix(bigint): OverflowError instead of TypeError should be raised on ints > 2^53, or < -2^53 --- src/jsTypeFactory.cc | 2 +- tests/python/test_pythonmonkey_eval.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 49043854..09f966d1 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -74,7 +74,7 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { if (JS::Value::isNumberRepresentable(num)) { // TODO: refactor using _PyLong_NumBits ? returnType.setNumber(num); } else { - PyErr_SetString(PyExc_TypeError, "Integer exceeds Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); + PyErr_SetString(PyExc_OverflowError, "Absolute value of the integer exceeds JS Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); } } } diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index ad07644d..65a7f71d 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -438,12 +438,12 @@ def test_eval_functions_bigints(): # should raise exception on ints > 2^53, or < -2^53 ident(9007199254740991) # 2**53-1, 0x433_FFFFFFFFFFFFF in float64 ident(9007199254740992) # 2**53, 0x434_0000000000000 in float64 - with pytest.raises(TypeError, match="Use pythonmonkey.bigint instead"): + with pytest.raises(OverflowError, match="Use pythonmonkey.bigint instead"): ident(9007199254740993) # 2**53+1, NOT 0x434_0000000000001 (2**53+2) # ident(9007199254740994) # FIXME: Should 2**53+2 and other large integers that can be exactly represented by a float64 raise exception? ident(-9007199254740991) # -(2**53-1) ident(-9007199254740992) # -(2**53) - with pytest.raises(TypeError, match="Use pythonmonkey.bigint instead"): + with pytest.raises(OverflowError, match="Use pythonmonkey.bigint instead"): ident(-9007199254740993) # -(2**53+1) def test_eval_functions_ucs2_string_args(): From b5069a1f5fca02c94b30be86843d27e3faf1eead Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 17:56:57 +0000 Subject: [PATCH 29/36] fix(bigint): ints larger than 64bits should also ask user to use pm.bigint, instead of throwing `PyLong_AsLongLong`'s "OverflowError: int too big to convert" --- src/jsTypeFactory.cc | 10 ++++----- tests/python/test_pythonmonkey_eval.py | 29 +++++++++++++++++--------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 09f966d1..f8d5efc9 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -69,13 +69,11 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { if (PyObject_IsInstance(object, PythonMonkey_BigInt)) { // pm.bigint is a subclass of the builtin int type auto bigint = IntType(object).toJsBigInt(cx); returnType.setBigInt(bigint); - } else { + } else if (_PyLong_NumBits(object) <= 53) { // num <= JS Number.MAX_SAFE_INTEGER, the mantissa of a float64 is 53 bits (with 52 explicitly stored and the highest bit always being 1) long long num = PyLong_AsLongLong(object); - if (JS::Value::isNumberRepresentable(num)) { // TODO: refactor using _PyLong_NumBits ? - returnType.setNumber(num); - } else { - PyErr_SetString(PyExc_OverflowError, "Absolute value of the integer exceeds JS Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); - } + returnType.setNumber(num); + } else { + PyErr_SetString(PyExc_OverflowError, "Absolute value of the integer exceeds JS Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); } } else if (PyFloat_Check(object)) { diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index 65a7f71d..c9950a33 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -435,16 +435,25 @@ def test_eval_functions_bigints(): assert type(ident(int1)) == float assert ident(int1) == ident(bigint1) - # should raise exception on ints > 2^53, or < -2^53 - ident(9007199254740991) # 2**53-1, 0x433_FFFFFFFFFFFFF in float64 - ident(9007199254740992) # 2**53, 0x434_0000000000000 in float64 - with pytest.raises(OverflowError, match="Use pythonmonkey.bigint instead"): - ident(9007199254740993) # 2**53+1, NOT 0x434_0000000000001 (2**53+2) - # ident(9007199254740994) # FIXME: Should 2**53+2 and other large integers that can be exactly represented by a float64 raise exception? - ident(-9007199254740991) # -(2**53-1) - ident(-9007199254740992) # -(2**53) - with pytest.raises(OverflowError, match="Use pythonmonkey.bigint instead"): - ident(-9007199254740993) # -(2**53+1) + # should raise exception on ints > (2^53-1), or < -(2^53-1) + def not_raise(num): + ident(num) + def should_raise(num): + with pytest.raises(OverflowError, match="Use pythonmonkey.bigint instead"): + ident(num) + not_raise(9007199254740991) # 2**53-1, 0x433_FFFFFFFFFFFFF in float64 + should_raise(9007199254740992) # 2**53, 0x434_0000000000000 in float64 + should_raise(9007199254740993) # 2**53+1, NOT 0x434_0000000000001 (2**53+2) + # ident(9007199254740994) # FIXME: Should raise exception on 2**53+2 and other large integers that can be exactly represented by a float64? + not_raise(-9007199254740991) # -(2**53-1) + should_raise(-9007199254740992) # -(2**53) + should_raise(-9007199254740993) # -(2**53+1) + + # Should raise "Use pythonmonkey.bigint" instead of `PyLong_AsLongLong`'s "OverflowError: int too big to convert" on ints larger than 64bits + should_raise(2**65) + should_raise(-2**65) + not_raise(pm.bigint(2**65)) + not_raise(pm.bigint(-2**65)) def test_eval_functions_ucs2_string_args(): concatenate = pm.eval("(a, b) => { return a + b}") From e22979a7531f12dd5600b56380cbc34709891fa2 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 19:17:02 +0000 Subject: [PATCH 30/36] test(bigint): write tests for adding bigints in JS function --- tests/python/test_pythonmonkey_eval.py | 31 ++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index c9950a33..5c4691cd 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -444,17 +444,44 @@ def should_raise(num): not_raise(9007199254740991) # 2**53-1, 0x433_FFFFFFFFFFFFF in float64 should_raise(9007199254740992) # 2**53, 0x434_0000000000000 in float64 should_raise(9007199254740993) # 2**53+1, NOT 0x434_0000000000001 (2**53+2) - # ident(9007199254740994) # FIXME: Should raise exception on 2**53+2 and other large integers that can be exactly represented by a float64? not_raise(-9007199254740991) # -(2**53-1) should_raise(-9007199254740992) # -(2**53) should_raise(-9007199254740993) # -(2**53+1) - # Should raise "Use pythonmonkey.bigint" instead of `PyLong_AsLongLong`'s "OverflowError: int too big to convert" on ints larger than 64bits + # should also raise exception on large integers (>=2**53) that can be exactly represented by a float64 + # in our current implementation + should_raise(9007199254740994) # 2**53+2, 0x434_0000000000001 in float64 + should_raise(2**61+2**9) # 0x43C_0000000000001 in float64 + + # should raise "Use pythonmonkey.bigint" instead of `PyLong_AsLongLong`'s "OverflowError: int too big to convert" on ints larger than 64bits should_raise(2**65) should_raise(-2**65) not_raise(pm.bigint(2**65)) not_raise(pm.bigint(-2**65)) + # should raise JS error when mixing a BigInt with a number in arithmetic operations + def should_js_error(a, b): + with pytest.raises(pm.SpiderMonkeyError, match="can't convert BigInt to number"): + add(a, b) + should_js_error(pm.bigint(0), 0) + should_js_error(pm.bigint(1), 2) + should_js_error(3, pm.bigint(4)) + should_js_error(-5, pm.bigint(6)) + + assert add(pm.bigint(0), pm.bigint(0)) == 0 + assert add(pm.bigint(1), pm.bigint(0)) == 1 + assert add(pm.bigint(1), pm.bigint(2)) == 3 + assert add(pm.bigint(-1), pm.bigint(1)) == 0 + assert add(pm.bigint(2**60), pm.bigint(0)) == 1152921504606846976 + assert add(pm.bigint(2**65), pm.bigint(-2**65-1)) == -1 + + # fuzztest + limit = 2037035976334486086268445688409378161051468393665936250636140449354381299763336706183397376 # 2**300 + for i in range(10): + num1 = random.randint(-limit, limit) + num2 = random.randint(-limit, limit) + assert add(pm.bigint(num1), pm.bigint(num2)) == num1+num2 + def test_eval_functions_ucs2_string_args(): concatenate = pm.eval("(a, b) => { return a + b}") n = 10 From 1fc5167c6b71861ed11f5e6085d69483e1196881 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 19:30:51 +0000 Subject: [PATCH 31/36] test(bigint): test with real-world example of calculating the factorial --- tests/python/test_pythonmonkey_eval.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index 5c4691cd..c869f4b5 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -482,6 +482,14 @@ def should_js_error(a, b): num2 = random.randint(-limit, limit) assert add(pm.bigint(num1), pm.bigint(num2)) == num1+num2 +def test_eval_functions_bigint_factorial(): + factorial = pm.eval("(num) => {let r = 1n; for(let i = 0n; i Number.MAX_SAFE_INTEGER + assert factorial(pm.bigint(21)) == 51090942171709440000 # > 64 bit int + assert factorial(pm.bigint(35)) == 10333147966386144929666651337523200000000 # > 128 bit + def test_eval_functions_ucs2_string_args(): concatenate = pm.eval("(a, b) => { return a + b}") n = 10 From e6453bf96469c35b2d20130daaa15edbb43ed9a8 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 20:34:26 +0000 Subject: [PATCH 32/36] test(bigint): test with real-world use case of calculating the crc32 table --- tests/python/test_pythonmonkey_eval.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index c869f4b5..75b43e62 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -490,6 +490,29 @@ def test_eval_functions_bigint_factorial(): assert factorial(pm.bigint(21)) == 51090942171709440000 # > 64 bit int assert factorial(pm.bigint(35)) == 10333147966386144929666651337523200000000 # > 128 bit +def test_eval_functions_bigint_crc32(): + crc_table_at = pm.eval(""" + // translated from https://rosettacode.org/wiki/CRC-32#Python + const crc_table = (function create_table() { + const a = [] + for (let i = 0n; i < 256n; i++) { + let k = i + for (let j = 0n; j < 8n; j++) { + // must use bigint here as js number is trimmed to int32 in bitwise operations + if (k & 1n) k ^= 0x1db710640n + k >>= 1n + } + a.push(k) + } + return a + })(); + (n) => crc_table[n] + """) + assert type(crc_table_at(1)) == pm.bigint + assert crc_table_at(0) == 0 + assert crc_table_at(1) == 1996959894 + assert crc_table_at(255) == 755167117 # last item + def test_eval_functions_ucs2_string_args(): concatenate = pm.eval("(a, b) => { return a + b}") n = 10 From 53db0a1d80f177a015a81a3dbc925f27734239b8 Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Thu, 23 Mar 2023 20:49:22 +0000 Subject: [PATCH 33/36] fix(bigint): include pm.bigint in jsTypeFactory TypeError message --- src/jsTypeFactory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index f8d5efc9..736437ef 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -136,7 +136,7 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { returnType.setNull(); } else { - PyErr_SetString(PyExc_TypeError, "Python types other than bool, int, float, str, None, and our custom Null type are not supported by pythonmonkey yet."); + PyErr_SetString(PyExc_TypeError, "Python types other than bool, int, pythonmonkey.bigint, float, str, None, and our custom Null type are not supported by pythonmonkey yet."); } return returnType; From 8e0e46e34a7a9e5ac4f2b52a58050a42cf1d015d Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Fri, 24 Mar 2023 20:26:52 +0000 Subject: [PATCH 34/36] feat(bigint): fast path for ints that can fit in one uint64 --- src/IntType.cc | 66 ++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index b2149c7f..59b33b97 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -86,8 +86,6 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { if (bitCount == (size_t)-1 && PyErr_Occurred()) return nullptr; uint32_t jsDigitCount = bitCount == 0 ? 1 : (bitCount - 1) / JS_DIGIT_BIT + 1; - size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; - // Get the sign bit // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L977 auto pyDigitCount = Py_SIZE(pyObject); // negative on negative numbers @@ -98,39 +96,45 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { Py_SET_SIZE(pyObject, /*abs()*/ -pyDigitCount); } - // Convert to bytes of 8-bit "digits" in **big-endian** order - auto bytes = (uint8_t *)PyMem_Malloc(byteCount); - if (bytes == NULL) { - PyErr_NoMemory(); - return nullptr; + JS::BigInt *bigint = nullptr; + if (jsDigitCount <= 1) { + // Fast path for int fits in one js_digit_t (uint64 on 64-bit OS) + bigint = JS::detail::BigIntFromUint64(cx, PyLong_AsUnsignedLongLong(pyObject)); + } else { + // Convert to bytes of 8-bit "digits" in **big-endian** order + size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; + auto bytes = (uint8_t *)PyMem_Malloc(byteCount); + if (bytes == NULL) { + PyErr_NoMemory(); + return nullptr; + } + _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); + + // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) + // TODO: We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API + // TODO: Could we fill in an object with similar memory alignment (maybe by NewArrayBufferWithContents), and coerce it to BigInt? + + // Calculate the number of chars required to represent the bigint in hex string + auto charCount = byteCount * 2; + // Convert bytes to hex string (big-endian) + auto chars = std::vector(charCount); // can't be null-terminated, otherwise SimpleStringToBigInt would read the extra \0 character and then segfault + for (size_t i = 0, j = 0; i < charCount; i += 2, j++) { + chars[i] = HEX_CHAR_LOOKUP_TABLE[(bytes[j] >> 4)&0xf]; // high nibble + chars[i+1] = HEX_CHAR_LOOKUP_TABLE[bytes[j]&0xf]; // low nibble + } + PyMem_Free(bytes); + + // Convert hex string to JS::BigInt + auto strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length + bigint = JS::SimpleStringToBigInt(cx, strSpan, 16); } - _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); - // Make negative number back negative - // TODO: use _PyLong_Copy to create a new object. Not thread-safe here + if (isNegative) { + // Make negative number back negative + // TODO: use _PyLong_Copy to create a new object. Not thread-safe here Py_SET_SIZE(pyObject, pyDigitCount); - } - // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) - // TODO: We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API - // TODO: Could we fill in an object with similar memory alignment (maybe by NewArrayBufferWithContents), and coerce it to BigInt? - - // Calculate the number of chars required to represent the bigint in hex string - auto charCount = byteCount * 2; - // Convert bytes to hex string (big-endian) - auto chars = std::vector(charCount); // can't be null-terminated, otherwise SimpleStringToBigInt would read the extra \0 character and then segfault - for (size_t i = 0, j = 0; i < charCount; i += 2, j++) { - chars[i] = HEX_CHAR_LOOKUP_TABLE[(bytes[j] >> 4)&0xf]; // high nibble - chars[i+1] = HEX_CHAR_LOOKUP_TABLE[bytes[j]&0xf]; // low nibble - } - PyMem_Free(bytes); - - // Convert hex string to JS::BigInt - auto strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length - auto bigint = JS::SimpleStringToBigInt(cx, strSpan, 16); - - // Set the sign bit - if (isNegative) { + // Set the sign bit // https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l1801 /* flagsField */ ((uint32_t *)bigint)[0] |= SIGN_BIT_MASK; } From 8a925874c68f19efb4cd71ae6af6a07c5497e04a Mon Sep 17 00:00:00 2001 From: Tom Tang Date: Fri, 24 Mar 2023 20:32:20 +0000 Subject: [PATCH 35/36] fix(bigint): pm.bigint object malformed on NoMemory error Early return would cause a negative pm.bigint not resetting back to negative --- src/IntType.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 59b33b97..0a6d626f 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -104,10 +104,6 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { // Convert to bytes of 8-bit "digits" in **big-endian** order size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; auto bytes = (uint8_t *)PyMem_Malloc(byteCount); - if (bytes == NULL) { - PyErr_NoMemory(); - return nullptr; - } _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) From 8dcbcf507027fcd006befcc4f323f4ec67620587 Mon Sep 17 00:00:00 2001 From: Caleb Aikens Date: Mon, 27 Mar 2023 11:00:04 -0400 Subject: [PATCH 36/36] remove auto, add names to TODOs --- src/IntType.cc | 26 +++++++++++++------------- src/jsTypeFactory.cc | 4 ++-- src/pyTypeFactory.cc | 4 ++-- tests/python/test_pythonmonkey_eval.py | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/IntType.cc b/src/IntType.cc index 0a6d626f..cb8cdb18 100644 --- a/src/IntType.cc +++ b/src/IntType.cc @@ -51,13 +51,13 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // Byte order within a digit is native-endian. if constexpr (std::endian::native == std::endian::big) { // C++20 - // TODO: use C++23 std::byteswap? + // @TODO (Tom Tang): use C++23 std::byteswap? printf("big-endian cpu is not supported by PythonMonkey yet"); return; } // If the native endianness is also little-endian, // we now have consecutive bytes of 8-bit "digits" in little-endian order - auto bytes = const_cast((uint8_t *)jsDigits); + const uint8_t *bytes = const_cast((uint8_t *)jsDigits); if (jsDigitCount == 0) { // Create a new object instead of reusing the object for int 0 // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L862 @@ -70,7 +70,7 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { // Set the sign bit // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L956 if (isNegative) { - auto pyDigitCount = Py_SIZE(pyObject); + ssize_t pyDigitCount = Py_SIZE(pyObject); Py_SET_SIZE(pyObject, -pyDigitCount); } @@ -82,13 +82,13 @@ IntType::IntType(JSContext *cx, JS::BigInt *bigint) { JS::BigInt *IntType::toJsBigInt(JSContext *cx) { // Figure out how many 64-bit "digits" we would have for JS BigInt // see https://github.com/python/cpython/blob/3.9/Modules/_randommodule.c#L306 - auto bitCount = _PyLong_NumBits(pyObject); + size_t bitCount = _PyLong_NumBits(pyObject); if (bitCount == (size_t)-1 && PyErr_Occurred()) return nullptr; uint32_t jsDigitCount = bitCount == 0 ? 1 : (bitCount - 1) / JS_DIGIT_BIT + 1; // Get the sign bit // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L977 - auto pyDigitCount = Py_SIZE(pyObject); // negative on negative numbers + ssize_t pyDigitCount = Py_SIZE(pyObject); // negative on negative numbers bool isNegative = pyDigitCount < 0; // Force to make the number positive otherwise _PyLong_AsByteArray would complain // see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L980 @@ -103,17 +103,17 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { } else { // Convert to bytes of 8-bit "digits" in **big-endian** order size_t byteCount = (size_t)JS_DIGIT_BYTE * jsDigitCount; - auto bytes = (uint8_t *)PyMem_Malloc(byteCount); + uint8_t *bytes = (uint8_t *)PyMem_Malloc(byteCount); _PyLong_AsByteArray((PyLongObject *)pyObject, bytes, byteCount, /*is_little_endian*/ false, false); // Convert pm.bigint to JS::BigInt through hex strings (no public API to convert directly through bytes) - // TODO: We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API - // TODO: Could we fill in an object with similar memory alignment (maybe by NewArrayBufferWithContents), and coerce it to BigInt? + // TODO (Tom Tang): We could manually allocate the memory, https://hg.mozilla.org/releases/mozilla-esr102/file/tip/js/src/vm/BigIntType.cpp#l162, but still no public API + // TODO (Tom Tang): Could we fill in an object with similar memory alignment (maybe by NewArrayBufferWithContents), and coerce it to BigInt? // Calculate the number of chars required to represent the bigint in hex string - auto charCount = byteCount * 2; + size_t charCount = byteCount * 2; // Convert bytes to hex string (big-endian) - auto chars = std::vector(charCount); // can't be null-terminated, otherwise SimpleStringToBigInt would read the extra \0 character and then segfault + std::vector chars = std::vector(charCount); // can't be null-terminated, otherwise SimpleStringToBigInt would read the extra \0 character and then segfault for (size_t i = 0, j = 0; i < charCount; i += 2, j++) { chars[i] = HEX_CHAR_LOOKUP_TABLE[(bytes[j] >> 4)&0xf]; // high nibble chars[i+1] = HEX_CHAR_LOOKUP_TABLE[bytes[j]&0xf]; // low nibble @@ -121,13 +121,13 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { PyMem_Free(bytes); // Convert hex string to JS::BigInt - auto strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length + mozilla::Span strSpan = mozilla::Span(chars); // storing only a pointer to the underlying array and length bigint = JS::SimpleStringToBigInt(cx, strSpan, 16); } if (isNegative) { // Make negative number back negative - // TODO: use _PyLong_Copy to create a new object. Not thread-safe here + // TODO (Tom Tang): use _PyLong_Copy to create a new object. Not thread-safe here Py_SET_SIZE(pyObject, pyDigitCount); // Set the sign bit @@ -140,7 +140,7 @@ JS::BigInt *IntType::toJsBigInt(JSContext *cx) { void IntType::print(std::ostream &os) const { // Making sure the value does not overflow even if the int has millions of bits of precision - auto str = PyObject_Str(pyObject); + PyObject *str = PyObject_Str(pyObject); os << PyUnicode_AsUTF8(str); // https://pythonextensionpatterns.readthedocs.io/en/latest/refcount.html#new-references Py_DECREF(str); // free diff --git a/src/jsTypeFactory.cc b/src/jsTypeFactory.cc index 736437ef..e0d1012d 100644 --- a/src/jsTypeFactory.cc +++ b/src/jsTypeFactory.cc @@ -67,10 +67,10 @@ JS::Value jsTypeFactory(JSContext *cx, PyObject *object) { } else if (PyLong_Check(object)) { if (PyObject_IsInstance(object, PythonMonkey_BigInt)) { // pm.bigint is a subclass of the builtin int type - auto bigint = IntType(object).toJsBigInt(cx); + JS::BigInt *bigint = IntType(object).toJsBigInt(cx); returnType.setBigInt(bigint); } else if (_PyLong_NumBits(object) <= 53) { // num <= JS Number.MAX_SAFE_INTEGER, the mantissa of a float64 is 53 bits (with 52 explicitly stored and the highest bit always being 1) - long long num = PyLong_AsLongLong(object); + uint64_t num = PyLong_AsLongLong(object); returnType.setNumber(num); } else { PyErr_SetString(PyExc_OverflowError, "Absolute value of the integer exceeds JS Number.MAX_SAFE_INTEGER. Use pythonmonkey.bigint instead."); diff --git a/src/pyTypeFactory.cc b/src/pyTypeFactory.cc index a585cec4..d80e653a 100644 --- a/src/pyTypeFactory.cc +++ b/src/pyTypeFactory.cc @@ -84,8 +84,8 @@ PyType *pyTypeFactory(JSContext *cx, JS::Rooted *global, JS::Rooted< JS::GetBuiltinClass(cx, obj, &cls); switch (cls) { case js::ESClass::Boolean: { - // TODO: refactor out all `js::Unbox` calls - // TODO: refactor using recursive call to `pyTypeFactory` + // TODO (Caleb Aikens): refactor out all `js::Unbox` calls + // TODO (Caleb Aikens): refactor using recursive call to `pyTypeFactory` JS::RootedValue unboxed(cx); js::Unbox(cx, obj, &unboxed); returnValue = new BoolType(unboxed.toBoolean()); diff --git a/tests/python/test_pythonmonkey_eval.py b/tests/python/test_pythonmonkey_eval.py index 75b43e62..3a774f50 100644 --- a/tests/python/test_pythonmonkey_eval.py +++ b/tests/python/test_pythonmonkey_eval.py @@ -166,7 +166,7 @@ def test_cached_int_object(py_number): assert type(py_number) == int assert type(py_number) != pm.bigint # the value doesn't change - # TODO: Find a way to create a NEW int object with the same value, because int literals also reuse the cached int objects + # TODO (Tom Tang): Find a way to create a NEW int object with the same value, because int literals also reuse the cached int objects for _ in range(2): test_cached_int_object(0) # _PyLong_FromByteArray reuses the int 0 object, # see https://github.com/python/cpython/blob/3.9/Objects/longobject.c#L862 @@ -185,7 +185,7 @@ def test_cached_int_object(py_number): py_number = random.randint(-limit, limit) test_bigint(py_number) - # TODO: test -0 (negative zero) + # TODO (Tom Tang): test -0 (negative zero) # There's no -0 in both Python int and JS BigInt, # but this could be possible in JS BigInt's internal representation as it uses a sign bit flag. # On the other hand, Python int uses `ob_size` 0 for 0, >0 for positive values, <0 for negative values