From 39094aff40aec67fa4f95fd8df90e0ec1ee07e88 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 17:40:28 -0600 Subject: [PATCH] gh-117953: Work Relative to Specific Extension Kinds in the Import Machinery (gh-118205) This change will make some later changes simpler. --- Include/internal/pycore_importdl.h | 32 ++++- Python/import.c | 151 +++++++++++++++------ Python/importdl.c | 207 +++++++++++++++++++++++++---- 3 files changed, 324 insertions(+), 66 deletions(-) diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index adb89167d124eb4..b0af28da34e0871 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -15,8 +15,15 @@ extern "C" { extern const char *_PyImport_DynLoadFiletab[]; -typedef PyObject *(*PyModInitFunction)(void); +typedef enum ext_module_kind { + _Py_ext_module_kind_UNKNOWN = 0, + _Py_ext_module_kind_SINGLEPHASE = 1, + _Py_ext_module_kind_MULTIPHASE = 2, + _Py_ext_module_kind_INVALID = 3, +} _Py_ext_module_kind; + +/* Input for loading an extension module. */ struct _Py_ext_module_loader_info { PyObject *filename; #ifndef MS_WINDOWS @@ -43,10 +50,33 @@ extern int _Py_ext_module_loader_info_init_from_spec( struct _Py_ext_module_loader_info *info, PyObject *spec); +/* The result from running an extension module's init function. */ struct _Py_ext_module_loader_result { PyModuleDef *def; PyObject *module; + _Py_ext_module_kind kind; + struct _Py_ext_module_loader_result_error *err; + struct _Py_ext_module_loader_result_error { + enum _Py_ext_module_loader_result_error_kind { + _Py_ext_module_loader_result_EXCEPTION = 0, + _Py_ext_module_loader_result_ERR_MISSING = 1, + _Py_ext_module_loader_result_ERR_UNREPORTED_EXC = 2, + _Py_ext_module_loader_result_ERR_UNINITIALIZED = 3, + _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE = 4, + _Py_ext_module_loader_result_ERR_NOT_MODULE = 5, + _Py_ext_module_loader_result_ERR_MISSING_DEF = 6, + } kind; + PyObject *exc; + } _err; }; +extern void _Py_ext_module_loader_result_clear( + struct _Py_ext_module_loader_result *res); +extern void _Py_ext_module_loader_result_apply_error( + struct _Py_ext_module_loader_result *res, + const char *name); + +/* The module init function. */ +typedef PyObject *(*PyModInitFunction)(void); extern PyModInitFunction _PyImport_GetModInitFunc( struct _Py_ext_module_loader_info *info, FILE *fp); diff --git a/Python/import.c b/Python/import.c index 447114ab115bc7d..0c51ffc6285a9c6 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1193,26 +1193,63 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path) } #ifndef NDEBUG -static bool -is_singlephase(PyModuleDef *def) +static _Py_ext_module_kind +_get_extension_kind(PyModuleDef *def, bool check_size) { + _Py_ext_module_kind kind; if (def == NULL) { /* It must be a module created by reload_singlephase_extension() * from m_copy. Ideally we'd do away with this case. */ - return true; + kind = _Py_ext_module_kind_SINGLEPHASE; } - else if (def->m_slots == NULL) { - return true; + else if (def->m_slots != NULL) { + kind = _Py_ext_module_kind_MULTIPHASE; } - else { - return false; + else if (check_size && def->m_size == -1) { + kind = _Py_ext_module_kind_SINGLEPHASE; } -} + else if (def->m_base.m_init != NULL) { + kind = _Py_ext_module_kind_SINGLEPHASE; + } + else { + // This is probably single-phase init, but a multi-phase + // module *can* have NULL m_slots. + kind = _Py_ext_module_kind_UNKNOWN; + } + return kind; +} + +/* The module might not be fully initialized yet + * and PyModule_FromDefAndSpec() checks m_size + * so we skip m_size. */ +#define assert_multiphase_def(def) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(def, false); \ + assert(kind == _Py_ext_module_kind_MULTIPHASE \ + /* m_slots can be NULL. */ \ + || kind == _Py_ext_module_kind_UNKNOWN); \ + } while (0) + +#define assert_singlephase_def(def) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(def, true); \ + assert(kind == _Py_ext_module_kind_SINGLEPHASE \ + || kind == _Py_ext_module_kind_UNKNOWN); \ + } while (0) + +#define assert_singlephase(def) \ + assert_singlephase_def(def) + +#else /* defined(NDEBUG) */ +#define assert_multiphase_def(def) +#define assert_singlephase_def(def) +#define assert_singlephase(def) #endif struct singlephase_global_update { PyObject *m_dict; + PyModInitFunction m_init; }; static int @@ -1226,10 +1263,24 @@ update_global_state_for_extension(PyThreadState *tstate, assert(def->m_base.m_copy == NULL); } else { - assert(def->m_base.m_init != NULL - || is_core_module(tstate->interp, name, path)); - if (singlephase->m_dict == NULL) { + if (singlephase->m_init != NULL) { + assert(singlephase->m_dict == NULL); + assert(def->m_base.m_copy == NULL); + assert(def->m_size >= 0); + /* Remember pointer to module init function. */ + // XXX If two modules share a def then def->m_base will + // reflect the last one added (here) to the global cache. + // We should prevent this somehow. The simplest solution + // is probably to store m_copy/m_init in the cache along + // with the def, rather than within the def. + def->m_base.m_init = singlephase->m_init; + } + else if (singlephase->m_dict == NULL) { + /* It must be a core builtin module. */ + assert(is_core_module(tstate->interp, name, path)); + assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); + assert(def->m_base.m_init == NULL); } else { assert(PyDict_Check(singlephase->m_dict)); @@ -1316,7 +1367,7 @@ import_find_extension(PyThreadState *tstate, if (def == NULL) { return NULL; } - assert(is_singlephase(def)); + assert_singlephase(def); /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1369,11 +1420,26 @@ import_find_extension(PyThreadState *tstate, } struct _Py_ext_module_loader_result res; if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); return NULL; } assert(!PyErr_Occurred()); + assert(res.err == NULL); + assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); mod = res.module; - // XXX __file__ doesn't get set! + /* Tchnically, the init function could return a different module def. + * Then we would probably need to update the global cache. + * However, we don't expect anyone to change the def. */ + assert(res.def == def); + _Py_ext_module_loader_result_clear(&res); + + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + if (PyObject_SetItem(modules, info->name, mod) == -1) { Py_DECREF(mod); return NULL; @@ -1398,78 +1464,89 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, struct _Py_ext_module_loader_info *info, PyObject *spec, PyObject *modules) { + /* Core modules go through _PyImport_FixupBuiltin(). */ + assert(!is_core_module(tstate->interp, info->name, info->path)); + PyObject *mod = NULL; PyModuleDef *def = NULL; + const char *name_buf = PyBytes_AS_STRING(info->name_encoded); struct _Py_ext_module_loader_result res; if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { /* We discard res.def. */ assert(res.module == NULL); - assert(PyErr_Occurred()); - goto finally; + _Py_ext_module_loader_result_apply_error(&res, name_buf); + return NULL; } assert(!PyErr_Occurred()); + assert(res.err == NULL); mod = res.module; res.module = NULL; def = res.def; assert(def != NULL); - if (mod == NULL) { - //assert(!is_singlephase(def)); + if (res.kind == _Py_ext_module_kind_MULTIPHASE) { + assert_multiphase_def(def); assert(mod == NULL); mod = PyModule_FromDefAndSpec(def, spec); if (mod == NULL) { - goto finally; + goto error; } } else { - assert(is_singlephase(def)); + assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); + assert_singlephase_def(def); assert(PyModule_Check(mod)); - const char *name_buf = PyBytes_AS_STRING(info->name_encoded); if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { - Py_CLEAR(mod); - goto finally; + goto error; } - /* Remember pointer to module init function. */ - def->m_base.m_init = p0; - + /* Remember the filename as the __file__ attribute */ if (info->filename != NULL) { - /* Remember the filename as the __file__ attribute */ if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { PyErr_Clear(); /* Not important enough to report */ } } + /* Update global import state. */ struct singlephase_global_update singlephase = {0}; // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1 - && !is_core_module(tstate->interp, info->name, info->path)) - { + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); singlephase.m_dict = PyModule_GetDict(mod); assert(singlephase.m_dict != NULL); } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + singlephase.m_init = p0; + } if (update_global_state_for_extension( tstate, info->path, info->name, def, &singlephase) < 0) { - Py_CLEAR(mod); - goto finally; + goto error; } + /* Update per-interpreter import state. */ PyObject *modules = get_modules_dict(tstate, true); if (finish_singlephase_extension( tstate, mod, def, info->name, modules) < 0) { - Py_CLEAR(mod); - goto finally; + goto error; } } -finally: + _Py_ext_module_loader_result_clear(&res); return mod; + +error: + Py_XDECREF(mod); + _Py_ext_module_loader_result_clear(&res); + return NULL; } @@ -1532,7 +1609,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, * module state, but we also don't populate def->m_base.m_copy * for them. */ assert(is_core_module(tstate->interp, nameobj, nameobj)); - assert(is_singlephase(def)); + assert_singlephase_def(def); assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); @@ -1586,7 +1663,7 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) PyObject *mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert(is_singlephase(_PyModule_GetDef(mod))); + assert_singlephase(_PyModule_GetDef(mod)); goto finally; } else if (_PyErr_Occurred(tstate)) { @@ -3940,7 +4017,7 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert(is_singlephase(_PyModule_GetDef(mod))); + assert_singlephase(_PyModule_GetDef(mod)); goto finally; } else if (_PyErr_Occurred(tstate)) { diff --git a/Python/importdl.c b/Python/importdl.c index 3e4ea175c0cfbc9..a3091946bc94bfe 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -28,6 +28,11 @@ extern dl_funcptr _PyImport_FindSharedFuncptr(const char *prefix, const char *pathname, FILE *fp); #endif + +/***********************************/ +/* module info to use when loading */ +/***********************************/ + static const char * const ascii_only_prefix = "PyInit"; static const char * const nonascii_prefix = "PyInitU"; @@ -205,6 +210,150 @@ _Py_ext_module_loader_info_init_from_spec( } +/********************************/ +/* module init function results */ +/********************************/ + +void +_Py_ext_module_loader_result_clear(struct _Py_ext_module_loader_result *res) +{ + /* Instead, the caller should have called + * _Py_ext_module_loader_result_apply_error(). */ + assert(res->err == NULL); + *res = (struct _Py_ext_module_loader_result){0}; +} + +static void +_Py_ext_module_loader_result_set_error( + struct _Py_ext_module_loader_result *res, + enum _Py_ext_module_loader_result_error_kind kind) +{ +#ifndef NDEBUG + switch (kind) { + case _Py_ext_module_loader_result_EXCEPTION: /* fall through */ + case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC: + assert(PyErr_Occurred()); + break; + case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */ + case _Py_ext_module_loader_result_ERR_UNINITIALIZED: /* fall through */ + case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */ + case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */ + case _Py_ext_module_loader_result_ERR_MISSING_DEF: + assert(!PyErr_Occurred()); + break; + default: + /* We added a new error kind but forgot to add it to this switch. */ + assert(0); + } +#endif + + assert(res->err == NULL && res->_err.exc == NULL); + res->err = &res->_err; + *res->err = (struct _Py_ext_module_loader_result_error){ + .kind=kind, + .exc=PyErr_GetRaisedException(), + }; + + /* For some kinds, we also set/check res->kind. */ + switch (kind) { + case _Py_ext_module_loader_result_ERR_UNINITIALIZED: + assert(res->kind == _Py_ext_module_kind_UNKNOWN); + res->kind = _Py_ext_module_kind_INVALID; + break; + /* None of the rest affect the result kind. */ + case _Py_ext_module_loader_result_EXCEPTION: /* fall through */ + case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */ + case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC: /* fall through */ + case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */ + case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */ + case _Py_ext_module_loader_result_ERR_MISSING_DEF: + break; + default: + /* We added a new error kind but forgot to add it to this switch. */ + assert(0); + } +} + +void +_Py_ext_module_loader_result_apply_error( + struct _Py_ext_module_loader_result *res, + const char *name) +{ + assert(!PyErr_Occurred()); + assert(res->err != NULL && res->err == &res->_err); + struct _Py_ext_module_loader_result_error err = *res->err; + res->err = NULL; + + /* We're otherwise done with the result at this point. */ + _Py_ext_module_loader_result_clear(res); + +#ifndef NDEBUG + switch (err.kind) { + case _Py_ext_module_loader_result_EXCEPTION: /* fall through */ + case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC: + assert(err.exc != NULL); + break; + case _Py_ext_module_loader_result_ERR_MISSING: /* fall through */ + case _Py_ext_module_loader_result_ERR_UNINITIALIZED: /* fall through */ + case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: /* fall through */ + case _Py_ext_module_loader_result_ERR_NOT_MODULE: /* fall through */ + case _Py_ext_module_loader_result_ERR_MISSING_DEF: + assert(err.exc == NULL); + break; + default: + /* We added a new error kind but forgot to add it to this switch. */ + assert(0); + } +#endif + + const char *msg = NULL; + switch (err.kind) { + case _Py_ext_module_loader_result_EXCEPTION: + break; + case _Py_ext_module_loader_result_ERR_MISSING: + msg = "initialization of %s failed without raising an exception"; + break; + case _Py_ext_module_loader_result_ERR_UNREPORTED_EXC: + msg = "initialization of %s raised unreported exception"; + break; + case _Py_ext_module_loader_result_ERR_UNINITIALIZED: + msg = "init function of %s returned uninitialized object"; + break; + case _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE: + msg = "initialization of %s did not return PyModuleDef"; + break; + case _Py_ext_module_loader_result_ERR_NOT_MODULE: + msg = "initialization of %s did not return an extension module"; + break; + case _Py_ext_module_loader_result_ERR_MISSING_DEF: + msg = "initialization of %s did not return a valid extension module"; + break; + default: + /* We added a new error kind but forgot to add it to this switch. */ + assert(0); + PyErr_Format(PyExc_SystemError, + "loading %s failed due to init function", name); + return; + } + + if (err.exc != NULL) { + PyErr_SetRaisedException(err.exc); + err.exc = NULL; /* PyErr_SetRaisedException() stole our reference. */ + if (msg != NULL) { + _PyErr_FormatFromCause(PyExc_SystemError, msg, name); + } + } + else { + assert(msg != NULL); + PyErr_Format(PyExc_SystemError, msg, name); + } +} + + +/********************************************/ +/* getting/running the module init function */ +/********************************************/ + PyModInitFunction _PyImport_GetModInitFunc(struct _Py_ext_module_loader_info *info, FILE *fp) @@ -245,8 +394,9 @@ _PyImport_RunModInitFunc(PyModInitFunction p0, struct _Py_ext_module_loader_info *info, struct _Py_ext_module_loader_result *p_res) { - struct _Py_ext_module_loader_result res = {0}; - const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + struct _Py_ext_module_loader_result res = { + .kind=_Py_ext_module_kind_UNKNOWN, + }; /* Call the module init function. */ @@ -258,18 +408,18 @@ _PyImport_RunModInitFunc(PyModInitFunction p0, /* Validate the result (and populate "res". */ if (m == NULL) { - if (!PyErr_Occurred()) { - PyErr_Format( - PyExc_SystemError, - "initialization of %s failed without raising an exception", - name_buf); + if (PyErr_Occurred()) { + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_EXCEPTION); + } + else { + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_MISSING); } goto error; } else if (PyErr_Occurred()) { - _PyErr_FormatFromCause( - PyExc_SystemError, - "initialization of %s raised unreported exception", - name_buf); + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_UNREPORTED_EXC); /* We would probably be correct to decref m here, * but we weren't doing so before, * so we stick with doing nothing. */ @@ -281,9 +431,8 @@ _PyImport_RunModInitFunc(PyModInitFunction p0, /* This can happen when a PyModuleDef is returned without calling * PyModuleDef_Init on it */ - PyErr_Format(PyExc_SystemError, - "init function of %s returned uninitialized object", - name_buf); + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_UNINITIALIZED); /* Likewise, decref'ing here makes sense. However, the original * code has a note about "prevent segfault in DECREF", * so we play it safe and leave it alone. */ @@ -293,48 +442,50 @@ _PyImport_RunModInitFunc(PyModInitFunction p0, if (PyObject_TypeCheck(m, &PyModuleDef_Type)) { /* multi-phase init */ + res.kind = _Py_ext_module_kind_MULTIPHASE; res.def = (PyModuleDef *)m; /* Run PyModule_FromDefAndSpec() to finish loading the module. */ } else if (info->hook_prefix == nonascii_prefix) { - /* It should have been multi-phase init? */ + /* Non-ASCII is only supported for multi-phase init. */ + res.kind = _Py_ext_module_kind_MULTIPHASE; /* Don't allow legacy init for non-ASCII module names. */ - PyErr_Format( - PyExc_SystemError, - "initialization of %s did not return PyModuleDef", - name_buf); - Py_DECREF(m); - return -1; + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE); + goto error; } else { /* single-phase init (legacy) */ + res.kind = _Py_ext_module_kind_SINGLEPHASE; res.module = m; if (!PyModule_Check(m)) { - PyErr_Format(PyExc_SystemError, - "initialization of %s did not return an extension " - "module", name_buf); + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_NOT_MODULE); goto error; } res.def = _PyModule_GetDef(m); if (res.def == NULL) { - PyErr_Format(PyExc_SystemError, - "initialization of %s did not return a valid extension " - "module", name_buf); + PyErr_Clear(); + _Py_ext_module_loader_result_set_error( + &res, _Py_ext_module_loader_result_ERR_MISSING_DEF); goto error; } } assert(!PyErr_Occurred()); + assert(res.err == NULL); *p_res = res; return 0; error: - assert(PyErr_Occurred()); + assert(!PyErr_Occurred()); + assert(res.err != NULL); Py_CLEAR(res.module); res.def = NULL; *p_res = res; + p_res->err = &p_res->_err; return -1; }