diff --git a/crates/perry-codegen/src/codegen.rs b/crates/perry-codegen/src/codegen.rs index 32bf524d..8d22fa15 100644 --- a/crates/perry-codegen/src/codegen.rs +++ b/crates/perry-codegen/src/codegen.rs @@ -226,6 +226,27 @@ pub struct CompileOptions { /// value (single-path) or to chain string-compare dispatches /// (multi-path). Empty if this module performs no dynamic imports. pub dynamic_import_path_to_prefix: std::collections::HashMap, + + /// Issue #753: sanitized prefixes of modules whose init must NOT + /// run as part of the entry module's eager init chain. Reachable + /// from the entry only through dynamic `import()` edges, so their + /// `__init` fires lazily from the dispatch site. The entry + /// module's `main` filters this set out of `non_entry_module_prefixes` + /// when emitting the eager init call sequence. Empty when no module + /// in the program is deferred. + pub deferred_module_prefixes: std::collections::HashSet, + + /// Issue #753: sanitized prefixes of THIS module's static-import + + /// re-export source modules (non-entry only — the entry has no + /// `__init` to call). The wrapper `__init` calls each + /// dep's `__init` (idempotently) before invoking the body. + /// Required so that a Deferred module firing lazily transitively + /// initializes any Deferred deps reached only through its own + /// re-export chain — otherwise the namespace populator at the + /// tail of `__init_body` reads zero-initialized cross- + /// module globals. For Eager modules the redundant calls + /// short-circuit on the guard's first-write check. + pub module_init_deps: Vec, } /// Issue #100: one entry in a module's namespace-population list. @@ -483,6 +504,17 @@ pub(crate) struct CrossModuleCtx { /// dispatch site in `expr.rs::Expr::DynamicImport` to find the /// `@__perry_ns_` global to load. pub dynamic_import_path_to_prefix: std::collections::HashMap, + /// Issue #753: sanitized prefixes of modules reached only through + /// dynamic `import()` edges. Their `__init` is excluded + /// from the entry-main eager init call sequence and fires lazily + /// from each `Expr::DynamicImport` dispatch site. + pub deferred_module_prefixes: std::collections::HashSet, + /// Issue #753: this module's static-import + re-export source + /// prefixes (non-entry only). Consumed by `compile_module_entry` + /// when emitting the wrapper for `__init` so dep init + /// fires before the body — transitively pulls in any Deferred dep + /// chain reached only through this module's re-exports. + pub module_init_deps: Vec, } /// Compile a Perry HIR module to an object file via LLVM IR. @@ -1325,6 +1357,8 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> .collect(), namespace_entries: opts.namespace_entries.clone(), dynamic_import_path_to_prefix: opts.dynamic_import_path_to_prefix.clone(), + deferred_module_prefixes: opts.deferred_module_prefixes.clone(), + module_init_deps: opts.module_init_deps.clone(), }; // Module-level globals registry. Pre-walk: @@ -2715,6 +2749,13 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> for prefix in foreign_prefixes { let ns_name = format!("__perry_ns_{}", prefix); llmod.add_external_global(&ns_name, DOUBLE); + // Issue #753: declare each dynamic-import target's `__init` + // so the dispatch site in `Expr::DynamicImport` can call it + // before loading the namespace. The wrapper-side init is + // idempotent — calling it for an already-initialized + // target costs a load + cmp + cond_br. For Deferred + // targets it's the only thing that triggers their init. + llmod.declare_function(&format!("{}__init", prefix), VOID, &[]); } } @@ -3947,6 +3988,21 @@ fn compile_module_entry( for prefix in non_entry_module_prefixes { llmod.declare_function(&format!("{}__init", prefix), VOID, &[]); } + // Issue #753: emit a no-op `__init` stub so the + // dispatch site in some other module that does `await + // import("./entry.ts")` resolves at link time. The entry + // module's actual body runs in `main`, not in a separate + // `__init` — the stub exists purely to satisfy the dispatch's + // unconditional init call. The namespace populator at the + // tail of `main` (when `cross_module.namespace_entries` is + // non-empty) is what makes the entry observable through the + // dynamic-import namespace; the stub does no work. + { + let stub_name = format!("{}__init", module_prefix); + let stub = llmod.define_function(&stub_name, VOID, vec![]); + let _ = stub.create_block("entry"); + stub.block_mut(0).unwrap().ret_void(); + } // For dylib output, emit `void perry_module_init()` instead of // `int main()`. The host process calls this once after dlopen to @@ -4018,7 +4074,19 @@ fn compile_module_entry( // Then every non-entry module's init in order. Each // non-entry module's `__init` runs its own string // pool init internally before its top-level statements. + // + // Issue #753: skip Deferred modules — those reached only + // through dynamic `import()` edges. Their `__init` + // fires lazily from each `Expr::DynamicImport` dispatch + // site, idempotently guarded by `@__perry_init_done_` + // so a program that never reaches the dispatch never pays + // the startup cost. The extern declaration at line ~3947 + // still emits for every non-entry prefix so the dispatch + // site can resolve the symbol at link time. for prefix in non_entry_module_prefixes { + if cross_module.deferred_module_prefixes.contains(prefix) { + continue; + } blk.call_void(&format!("{}__init", prefix), &[]); } } @@ -4282,7 +4350,79 @@ fn compile_module_entry( llmod.add_raw_global(raw.clone()); } } else { + // Issue #753: idempotent init guard. Every non-entry module gets + // a one-byte `@__perry_init_done_` flag and a thin + // wrapper `__init` that returns immediately when the + // flag is set or stores 1 + dispatches to `__init_body` + // when it isn't. The wrapper is what the entry main calls + // eagerly (for Eager modules) and what every + // `Expr::DynamicImport` dispatch site calls (for any module + // that's a dynamic-import target — possibly multiple sites in + // the same program). The 2-state guard matches ESM's + // partial-cycle semantics: re-entry during init returns without + // re-running the body, leaving the namespace populator's work + // partially observable. The wrapper sets `done = 1` BEFORE + // calling the body so the re-entry path returns immediately. + let done_global = format!("__perry_init_done_{}", module_prefix); + llmod.add_internal_global(&done_global, I8, "0"); let init_name = format!("{}__init", module_prefix); + let init_body_name = format!("{}__init_body", module_prefix); + { + let wrap_fn = llmod.define_function(&init_name, VOID, vec![]); + let _ = wrap_fn.create_block("entry"); + let _ = wrap_fn.create_block("guard.ret"); + let _ = wrap_fn.create_block("guard.do"); + let ret_label = wrap_fn.block_mut(1).unwrap().label.clone(); + let do_label = wrap_fn.block_mut(2).unwrap().label.clone(); + { + let blk = wrap_fn.block_mut(0).unwrap(); + let done = blk.load(I8, &format!("@{}", done_global)); + let already = blk.icmp_ne(I8, &done, "0"); + blk.cond_br(&already, &ret_label, &do_label); + } + { + let blk = wrap_fn.block_mut(1).unwrap(); + blk.ret_void(); + } + { + let blk = wrap_fn.block_mut(2).unwrap(); + blk.store(I8, "1", &format!("@{}", done_global)); + // Trigger init of static-dep + re-export source modules + // before the body runs. Each `__init` is itself + // wrapped by the same guard pattern, so this short- + // circuits when the dep was already initialized + // (Eager-via-main path) and fires the body when the + // dep is Deferred and this is the first reach. The + // entry module has no `__init` so the driver excludes + // it from `module_init_deps`. + for dep_prefix in &cross_module.module_init_deps { + if dep_prefix == module_prefix { + continue; + } + blk.call_void(&format!("{}__init", dep_prefix), &[]); + } + blk.call_void(&init_body_name, &[]); + blk.ret_void(); + } + } + // Declare every dep's `__init` symbol so the wrapper's calls + // resolve at link time. Most overlap with `non_entry_module_prefixes` + // (whose declarations live in the entry module's compilation), + // but a non-entry module compiled standalone has no entry-side + // declaration list — emit them here too. `declare_function` + // dedupes by name. + for dep_prefix in &cross_module.module_init_deps { + if dep_prefix == module_prefix { + continue; + } + llmod.declare_function(&format!("{}__init", dep_prefix), VOID, &[]); + } + // The body retains every existing semantic of `__init` + // (strings init, globals/GC registration, top-level statements, + // namespace populator at the tail). It's `internal` linkage: + // only the wrapper above ever calls it, both within this module + // and across modules via the wrapper's external symbol. + let init_name = init_body_name; // Debug: emit puts("INIT: ") at the top of each module init let debug_init_const = if std::env::var("PERRY_DEBUG_INIT").is_ok() { let debug_msg = format!("INIT: {}\0", module_prefix); @@ -4295,6 +4435,7 @@ fn compile_module_entry( let ic_base = llmod.ic_counter; let buffer_alias_base = llmod.buffer_alias_counter; let init_fn = llmod.define_function(&init_name, VOID, vec![]); + init_fn.linkage = "internal".to_string(); let _ = init_fn.create_block("entry"); { let blk = init_fn.block_mut(0).unwrap(); diff --git a/crates/perry-codegen/src/expr.rs b/crates/perry-codegen/src/expr.rs index d37ace84..c24bd119 100644 --- a/crates/perry-codegen/src/expr.rs +++ b/crates/perry-codegen/src/expr.rs @@ -10819,7 +10819,15 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { let target_prefix = ctx.dynamic_import_path_to_prefix.get(path).cloned(); let blk = ctx.block(); let ns_val = match target_prefix { - Some(prefix) => blk.load(DOUBLE, &format!("@__perry_ns_{}", prefix)), + Some(prefix) => { + // Issue #753: trigger the target's init before + // loading its namespace. For Eager targets the + // guard short-circuits; for Deferred targets + // this is the only invocation that populates + // `@__perry_ns_`. + blk.call_void(&format!("{}__init", prefix), &[]); + blk.load(DOUBLE, &format!("@__perry_ns_{}", prefix)) + } None => { // Driver didn't resolve this path to a target // module — surface a rejected promise. @@ -10890,11 +10898,16 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { let next_label_str = ctx.block_label(next_label); ctx.block().cond_br(&cond, &match_label, &next_label_str); - // Match arm — load namespace, wrap in promise, store - // into result_slot, branch to join. + // Match arm — call target's __init (idempotent), load + // namespace, wrap in promise, store into result_slot, + // branch to join. Issue #753: the init call is the + // only thing that triggers a Deferred target's body + // and namespace populator; for Eager targets the + // guard short-circuits. ctx.current_block = match_block_idx; let join_label = ctx.block_label(join_block_idx); let blk = ctx.block(); + blk.call_void(&format!("{}__init", target_prefix), &[]); let ns_val = blk.load(DOUBLE, &format!("@__perry_ns_{}", target_prefix)); let promise = blk.call(I64, "js_promise_resolved", &[(DOUBLE, &ns_val)]); let boxed = nanbox_pointer_inline(blk, &promise); diff --git a/crates/perry/src/commands/compile.rs b/crates/perry/src/commands/compile.rs index 87445dc3..e2837f7f 100644 --- a/crates/perry/src/commands/compile.rs +++ b/crates/perry/src/commands/compile.rs @@ -1677,6 +1677,84 @@ pub fn run_with_parse_cache( .canonicalize() .unwrap_or_else(|_| args.input.clone()); + // Issue #753: reachability classification for eager vs deferred init. + // Modules reachable from the entry through any static-import or + // re-export edge init at program start (Eager). Modules reachable + // ONLY through dynamic `import()` edges init lazily on first + // dispatch (Deferred). Run a fixed-point pass starting from the + // entry and propagating Eager across static / re-export edges; what + // remains unmarked is Deferred. Re-export sources must propagate + // because an Eager module's namespace populator reads the source's + // getter at init time — if the source is Deferred, the getter + // returns a zero-initialized global rather than the real binding. + { + let mut eager: HashSet = HashSet::new(); + eager.insert(entry_path.clone()); + loop { + let mut changed = false; + let paths: Vec = ctx.native_modules.keys().cloned().collect(); + for path in &paths { + if !eager.contains(path) { + continue; + } + let module = match ctx.native_modules.get(path) { + Some(m) => m, + None => continue, + }; + let static_targets: Vec = module + .imports + .iter() + .filter(|i| !i.is_dynamic && !i.type_only) + .filter_map(|i| i.resolved_path.as_ref().map(PathBuf::from)) + .collect(); + let reexport_sources: Vec = module + .exports + .iter() + .filter_map(|e| match e { + perry_hir::Export::ExportAll { source } => Some(source.clone()), + perry_hir::Export::ReExport { source, .. } => Some(source.clone()), + perry_hir::Export::NamespaceReExport { source, .. } => Some(source.clone()), + perry_hir::Export::Named { .. } => None, + }) + .collect(); + for resolved_path in static_targets { + if ctx.native_modules.contains_key(&resolved_path) + && !eager.contains(&resolved_path) + { + eager.insert(resolved_path); + changed = true; + } + } + for src in reexport_sources { + if let Some((resolved_path, _)) = resolve_import( + &src, + path, + &ctx.project_root, + &ctx.compile_packages, + &ctx.compile_package_dirs, + ) { + if ctx.native_modules.contains_key(&resolved_path) + && !eager.contains(&resolved_path) + { + eager.insert(resolved_path); + changed = true; + } + } + } + } + if !changed { + break; + } + } + for (path, module) in ctx.native_modules.iter_mut() { + module.init_kind = if eager.contains(path) { + perry_hir::ModuleInitKind::Eager + } else { + perry_hir::ModuleInitKind::Deferred + }; + } + } + // Collect non-entry module names for init function calls // Topologically sort by import dependencies so that if module A imports from module B, // module B is initialized first. This ensures module-level variables (e.g., Maps) are @@ -3112,6 +3190,82 @@ pub fn run_with_parse_cache( } else { Vec::new() }; + // Issue #753: every module receives the program-wide set of + // Deferred module prefixes. The entry main filters these + // out of its eager init call sequence; non-entry modules + // ignore it. Empty when no module in the program is + // Deferred (i.e. no dynamic `import()` sites). + let deferred_module_prefixes: std::collections::HashSet = ctx + .native_modules + .iter() + .filter(|(_, m)| m.init_kind == perry_hir::ModuleInitKind::Deferred) + .map(|(_, m)| sanitize_name(&m.name)) + .collect(); + // Issue #753: prefixes of this module's static-import + + // re-export source modules (non-entry only — the entry's + // body is in `main`, not a `__init`). The wrapper at + // `__init` calls each dep's `__init` before + // dispatching to `__init_body`; this transitively + // initializes any Deferred dep reached only through this + // module's re-export chain. For Eager modules the calls + // short-circuit on the idempotent guard's first-write + // check (one load + cmp + cond_br each). + let module_init_deps: Vec = if is_entry { + Vec::new() + } else { + let mut deps: Vec = Vec::new(); + let mut seen: std::collections::HashSet = + std::collections::HashSet::new(); + let entry_prefix = ctx + .native_modules + .get(&entry_path) + .map(|m| sanitize_name(&m.name)); + let push_dep = |deps: &mut Vec, + seen: &mut std::collections::HashSet, + prefix: String| { + if Some(&prefix) == entry_prefix.as_ref() { + return; + } + if seen.insert(prefix.clone()) { + deps.push(prefix); + } + }; + for import in &hir_module.imports { + if import.is_dynamic || import.type_only { + continue; + } + if let Some(resolved) = &import.resolved_path { + let resolved_path = PathBuf::from(resolved); + if let Some(src_mod) = ctx.native_modules.get(&resolved_path) { + push_dep(&mut deps, &mut seen, sanitize_name(&src_mod.name)); + } + } + } + for export in &hir_module.exports { + let src = match export { + perry_hir::Export::ExportAll { source } => Some(source.clone()), + perry_hir::Export::ReExport { source, .. } => Some(source.clone()), + perry_hir::Export::NamespaceReExport { source, .. } => { + Some(source.clone()) + } + perry_hir::Export::Named { .. } => None, + }; + if let Some(src) = src { + if let Some((resolved_path, _)) = resolve_import( + &src, + path, + &ctx.project_root, + &ctx.compile_packages, + &ctx.compile_package_dirs, + ) { + if let Some(src_mod) = ctx.native_modules.get(&resolved_path) { + push_dep(&mut deps, &mut seen, sanitize_name(&src_mod.name)); + } + } + } + } + deps + }; // Build import → source-prefix table for cross-module // ExternFuncRef calls. For each Named import in this // module, look up the source module's HIR by resolved @@ -4254,6 +4408,8 @@ pub fn run_with_parse_cache( .get(path) .cloned() .unwrap_or_default(), + deferred_module_prefixes, + module_init_deps, }; // V2.2 + #686 object cache lookup. The key hashes every // codegen-affecting field of `opts` together with this diff --git a/crates/perry/src/commands/compile/object_cache.rs b/crates/perry/src/commands/compile/object_cache.rs index 316facf4..4562201d 100644 --- a/crates/perry/src/commands/compile/object_cache.rs +++ b/crates/perry/src/commands/compile/object_cache.rs @@ -183,6 +183,21 @@ pub fn compute_object_cache_key( &opts.non_entry_module_prefixes.join("|"), ); h.field("mod_init_names", &opts.native_module_init_names.join("|")); + // Issue #753: eager/deferred split. The set membership controls + // which `__init` calls main emits eagerly, so the entry + // module's `.o` bytes change when a target module moves between + // Eager and Deferred classifications (e.g. a new dynamic + // `import()` site appears that's the ONLY path to a previously- + // statically-reached module). + { + let mut v: Vec<&String> = opts.deferred_module_prefixes.iter().collect(); + v.sort(); + h.field( + "deferred_prefixes", + &v.iter().map(|s| s.as_str()).collect::>().join("|"), + ); + } + h.field("init_deps", &opts.module_init_deps.join("|")); h.field("js_specs", &opts.js_module_specifiers.join("|")); { let mut buf = String::new(); @@ -565,6 +580,8 @@ mod object_cache_tests { app_metadata: perry_codegen::AppMetadata::default(), namespace_entries: Vec::new(), dynamic_import_path_to_prefix: std::collections::HashMap::new(), + deferred_module_prefixes: std::collections::HashSet::new(), + module_init_deps: Vec::new(), } } diff --git a/test-files/dynamic_import_deferred_marker.ts b/test-files/dynamic_import_deferred_marker.ts new file mode 100644 index 00000000..a03527ea --- /dev/null +++ b/test-files/dynamic_import_deferred_marker.ts @@ -0,0 +1,6 @@ +// Helper for test_gap_dynamic_import_deferred.ts. Top-level +// `console.log` runs as a side effect of this module's `__init` — +// observable evidence of whether init fired. +console.log("deferred-init-ran"); + +export const x: number = 42; diff --git a/test-files/test_gap_dynamic_import_deferred.ts b/test-files/test_gap_dynamic_import_deferred.ts new file mode 100644 index 00000000..a4993c7d --- /dev/null +++ b/test-files/test_gap_dynamic_import_deferred.ts @@ -0,0 +1,16 @@ +// Issue #753 — a module reached only through dynamic `import()` must +// have its top-level side effects suppressed until the dispatch fires. +// `dynamic_import_deferred_marker.ts` logs "deferred-init-ran" at the +// top level; with the eager/deferred split, that line must appear AFTER +// "before" and BEFORE the import's resolved value is consumed. Pre-#753 +// the marker fired at program start (between "" and "before"), proving +// the heavy import path was paid eagerly. + +console.log("before"); +const branch: number = Number(process.argv[2] ?? "0"); +if (branch === 1) { + const m = await import("./dynamic_import_deferred_marker.ts"); + console.log("after:" + m.x); +} else { + console.log("skipped"); +}