From f495f73bfda5e0a7422b4ae1f63f2c0ff71cd68c Mon Sep 17 00:00:00 2001 From: Nikola Hristov Date: Mon, 25 May 2026 08:17:57 +0300 Subject: [PATCH] fix(eliminate): skip inlining when free vars in Init are moved before substitution site Fixes #56. Before this change, a candidate like: let display = var.clone(); ... var moved into struct field ... devlog!(..., display, ...); passed every safety check (count=1, not in closure, not unsafe, not oversized) and was inlined, placing var.clone() after the move, which produces E0382. The new IsFreeVarSafe function in Safe.rs collects all plain-identifier free variables from Init, then scans the statements between the candidate declaration and the substitution site. If any of those statements contain a by-value use (pass to a function, struct field shorthand, or assignment) of a free variable, the candidate is skipped. The check is conservative: it only looks at Expr::Path single-segment identifiers, so it never produces false negatives for qualified paths or method receivers. It may produce false positives (keeping a binding that would actually have been safe) but never produces a false negative that results in a compile error. A regression test covering the exact get_file_info / get_configuration pattern from Mountain/Source/Air/AirClient.rs is included in Inline.rs. --- Source/Eliminate/Transform/Inline.rs | 156 +++++++++++++---- Source/Eliminate/Transform/Safe.rs | 250 ++++++++++++++++++++++----- 2 files changed, 331 insertions(+), 75 deletions(-) diff --git a/Source/Eliminate/Transform/Inline.rs b/Source/Eliminate/Transform/Inline.rs index cd117c7..d477024 100644 --- a/Source/Eliminate/Transform/Inline.rs +++ b/Source/Eliminate/Transform/Inline.rs @@ -5,16 +5,24 @@ // // Algorithm (per block, bottom-up): // 1. Collect structurally eligible let-binding candidates (Collect). -// 2. For each candidate (in declaration order): a. Count references in -// subsequent statements (Count). This includes references inside macro -// token streams (json!, dev_log!, format!, etc.) so that multi-use -// variables are never misidentified as single-use. b. Skip if count ≠ 1, -// used-in-closure, or initialiser is unsafe/large. c. Substitute the -// single reference with the initialiser (SubstituteRef). Handles both -// plain expression positions AND macro token streams. d. Remove the let -// statement. e. Set Changed = true and restart candidate collection. -// 3. Wrap substituted binary/range expressions in parentheses when placed as -// a direct operand of a binary or unary expression (precedence safety). +// 2. For each candidate (in declaration order): +// a. Count references in subsequent statements (Count). This includes +// references inside macro token streams (json!, dev_log!, format!, +// etc.) so that multi-use variables are never misidentified as +// single-use. +// b. Skip if count != 1, used-in-closure, or initialiser is +// unsafe/large. +// c. Skip if any free variable inside the initialiser is moved by value +// in the statements between the candidate declaration and the +// substitution site (IsFreeVarSafe). This prevents E0382 borrow-of- +// moved-value errors introduced by inlining clone() helpers. +// d. Substitute the single reference with the initialiser (SubstituteRef). +// Handles both plain expression positions AND macro token streams. +// e. Remove the let statement. +// f. Set Changed = true and restart candidate collection. +// 3. Wrap substituted binary/range expressions in parentheses when placed +// as a direct operand of a binary or unary expression (precedence +// safety). //=============================================================================// use proc_macro2::{Group, TokenStream, TokenTree}; @@ -50,13 +58,27 @@ impl<'a> Eliminator<'a> { continue; } + let StmtsAfter = &Block.stmts[Candidate.StmtIndex + 1..]; + let (RefCount, InClosure) = - Count::CountReferences(&Candidate.Ident, &Block.stmts[Candidate.StmtIndex + 1..]); + Count::CountReferences(&Candidate.Ident, StmtsAfter); if RefCount != 1 || InClosure { continue; } + // Find the index of the substitution site within StmtsAfter + // (the first statement that contains a reference to Candidate). + // We need the slice of statements that come BEFORE that site + // to check whether any free variable in Init is moved there. + let SubstSiteOffset = FindSubstSite(StmtsAfter, &Candidate.Ident); + + let StmtsBetween = &StmtsAfter[..SubstSiteOffset]; + + if !Safe::IsFreeVarSafe(&Candidate.Init, StmtsBetween) { + continue; + } + let Substituted = SubstituteRef(&mut Block.stmts[Candidate.StmtIndex + 1..], &Candidate.Ident, &Candidate.Init); @@ -91,9 +113,9 @@ impl<'a> VisitMut for Eliminator<'a> { // Public: SubstituteRef // --------------------------------------------------------------------------- -/// Replace the first occurrence of `Target` (as a plain identifier expression -/// OR as an identifier token inside a macro's token stream) in `Stmts` with -/// `Replacement`. Returns `true` when the substitution was performed. +/// Replace the first occurrence of Target (as a plain identifier expression +/// OR as an identifier token inside a macro's token stream) in Stmts with +/// Replacement. Returns true when the substitution was performed. pub fn SubstituteRef(Stmts:&mut [Stmt], Target:&str, Replacement:&Expr) -> bool { let mut Sub = Substitutor { Target, Replacement, Substituted:false, InBinaryOperandPosition:false }; @@ -108,6 +130,26 @@ pub fn SubstituteRef(Stmts:&mut [Stmt], Target:&str, Replacement:&Expr) -> bool Sub.Substituted } +// --------------------------------------------------------------------------- +// Internal: FindSubstSite +// --------------------------------------------------------------------------- + +/// Return the index within Stmts of the first statement that contains a +/// reference to Target. Returns Stmts.len() (one past end) when not found, +/// which causes StmtsBetween to be the full slice - the conservative safe +/// choice. +fn FindSubstSite(Stmts:&[Stmt], Target:&str) -> usize { + for (I, Stmt) in Stmts.iter().enumerate() { + let (Count, _) = Count::CountReferences(Target, std::slice::from_ref(Stmt)); + + if Count > 0 { + return I; + } + } + + Stmts.len() +} + // --------------------------------------------------------------------------- // Internal: Substitutor // --------------------------------------------------------------------------- @@ -117,7 +159,7 @@ struct Substitutor<'a> { Replacement:&'a Expr, Substituted:bool, /// True when the current AST position is a direct operand of a binary or - /// unary expression - used to decide whether to wrap `Replacement`. + /// unary expression - used to decide whether to wrap Replacement. InBinaryOperandPosition:bool, } @@ -183,9 +225,8 @@ impl<'a> VisitMut for Substitutor<'a> { } } - /// Substitute inside macro token streams (e.g. `json!(…)`, `dev_log!(…)`). - /// The default VisitMut does NOT recurse into `Macro::tokens`, so we do it - /// manually via raw token-tree manipulation. + /// Substitute inside macro token streams (e.g. json!(), dev_log!()). + /// The default VisitMut does NOT recurse into Macro::tokens. fn visit_expr_macro_mut(&mut self, Node:&mut syn::ExprMacro) { if self.Substituted { return; @@ -202,9 +243,9 @@ impl<'a> VisitMut for Substitutor<'a> { } } - /// syn v2 separates top-level macro statements (`dev_log!("{}", X);`) into - /// `Stmt::Macro(StmtMacro)`, which is never routed through - /// `visit_expr_macro_mut`. Mirror the same substitution here. + /// syn v2 separates top-level macro statements (dev_log!("{}", X);) into + /// Stmt::Macro(StmtMacro), which is never routed through + /// visit_expr_macro_mut. Mirror the same substitution here. fn visit_stmt_macro_mut(&mut self, Node:&mut syn::StmtMacro) { if self.Substituted { return; @@ -244,8 +285,6 @@ impl<'a> VisitMut for Substitutor<'a> { // Token-stream helpers // --------------------------------------------------------------------------- -/// Convert a `syn::Expr` to a `proc_macro2::TokenStream` by calling -/// `quote::ToTokens::to_tokens`. fn ExprToTokenStream(E:&Expr) -> TokenStream { let mut Tokens = TokenStream::new(); @@ -254,9 +293,6 @@ fn ExprToTokenStream(E:&Expr) -> TokenStream { Tokens } -/// Walk `Tokens` and replace the first `Ident` token exactly equal to `Target` -/// with `Replacement` (a pre-rendered `TokenStream`). Recurses into `Group` -/// delimiters. Returns `(new_stream, found)`. fn SubstituteInTokenStream(Tokens:TokenStream, Target:&str, Replacement:&TokenStream) -> (TokenStream, bool) { let mut Result:Vec = Vec::new(); @@ -271,7 +307,6 @@ fn SubstituteInTokenStream(Tokens:TokenStream, Target:&str, Replacement:&TokenSt match Tree { TokenTree::Ident(ref I) if I.to_string() == Target => { - // Extend with the replacement's token trees. Result.extend(Replacement.clone()); Found = true; @@ -400,7 +435,6 @@ mod Tests { // --- Macro substitution tests ------------------------------------------- - /// Variable used only inside a json! macro gets inlined into the macro. #[test] fn InlineIntoJsonMacro() { AssertEliminates( @@ -414,7 +448,6 @@ mod Tests { ); } - /// Variable used in BOTH a macro and a plain expression: multi-use, kept. #[test] fn MacroAndExprMultiUseKept() { AssertUnchanged( @@ -426,7 +459,6 @@ mod Tests { ); } - /// Variable used twice inside the same macro: multi-use, kept. #[test] fn MacroDoubleUseKept() { AssertUnchanged( @@ -437,7 +469,67 @@ mod Tests { ); } - // --- URL-parsing pattern (primary motivating example) ------------------- + // --- Free-variable move-safety tests (regression for #56) --------------- + + /// display = path.clone() must NOT be inlined when path is moved into a + /// struct field between the declaration and the devlog use site. + /// This is the exact pattern from AirClient::get_file_info that produced + /// E0382 after eliminate ran on Source/Air. + #[test] + fn DisplayCloneKeptWhenOriginalMovedFirst() { + AssertUnchanged( + r#" + pub async fn get_file_info(request_id: String, path: String) -> Result<(), E> { + let path_display = path.clone(); + client + .get_file_info(Request::new(FileInfoRequest { request_id, path })) + .await?; + devlog!("{}", path_display, path.clone()); + Ok(()) + } + "#, + ); + } + + /// Same pattern with section instead of path (AirClient::get_configuration). + #[test] + fn SectionDisplayCloneKeptWhenOriginalMovedFirst() { + AssertUnchanged( + r#" + pub async fn get_configuration(request_id: String, section: String) -> Result<(), E> { + let section_display = section.clone(); + client + .get_configuration(Request::new(ConfigurationRequest { request_id, section })) + .await?; + devlog!("{}", section_display, section.clone()); + Ok(()) + } + "#, + ); + } + + /// When path is NOT moved between decl and use, the clone helper should + /// still be inlined (no false positive that would block legitimate inlines). + #[test] + fn DisplayCloneInlinedWhenOriginalNotMoved() { + AssertEliminates( + r#" + pub async fn log_path(path: String) -> Result<(), E> { + let path_display = path.clone(); + devlog!("{}", path_display); + Ok(()) + } + "#, + r#" + pub async fn log_path(path: String) -> Result<(), E> { + devlog!("{}", path.clone()); + Ok(()) + } + "#, + ); + } + + // --- URL-parsing pattern ------------------------------------------------ #[test] fn UrlPatternInlined() { @@ -492,8 +584,6 @@ mod Tests { #[test] fn ClosureCaptureKept() { - // F (the closure value) is single-use and is inlined. - // X (captured inside the closure) is conservatively kept. AssertEliminates( "fn f() { let X = heavy(); let F = move || X; call(F); }", "fn f() { let X = heavy(); call(move || X); }", diff --git a/Source/Eliminate/Transform/Safe.rs b/Source/Eliminate/Transform/Safe.rs index 49dda67..5aa6ad8 100644 --- a/Source/Eliminate/Transform/Safe.rs +++ b/Source/Eliminate/Transform/Safe.rs @@ -1,23 +1,25 @@ //=============================================================================// // File Path: Element/Maintain/Source/Eliminate/Transform/Safe.rs //=============================================================================// -// Module: Safe - Safety predicate for inlinable initialisers +// Module: Safe - Safety predicates for inlinable initialisers // // An initialiser is "safe to inline" when: -// 1. Its AST node count does not exceed `MaxSize` (avoids blowing up short -// call-site lines with enormous expressions). -// 2. It does not contain an `unsafe { … }` block. +// 1. Its AST node count does not exceed MaxSize. +// 2. It does not contain an unsafe { } block. +// 3. Every plain-identifier free variable in the initialiser is not moved +// (consumed by value) in the statements between the candidate declaration +// and the substitution site (IsFreeVarSafe). // // Notable inclusions (expressions that ARE safe in Rust): -// - `?` operator (`Expr::Try`) - propagates the error, context -// unchanged. -// - `.await` (`Expr::Await`) - async context is caller-determined. -// - Closures (`Expr::Closure`) - safe when the closure does not capture -// the candidate binding (that is checked by Count, not here). +// - ? operator (Expr::Try) - propagates the error, context unchanged. +// - .await (Expr::Await) - async context is caller-determined. +// - Closures (Expr::Closure)- safe when the closure does not capture +// the candidate binding (checked by Count, not here). //=============================================================================// use syn::{ Expr, + Stmt, visit::{Visit, visit_expr}, }; @@ -25,9 +27,135 @@ use syn::{ // Public API // --------------------------------------------------------------------------- -/// Returns `true` when `E` is safe to substitute at its single use site. +/// Returns true when E is safe to substitute at its single use site, +/// considering only the expression itself (size and unsafe). pub fn IsSafe(E:&Expr, MaxSize:usize) -> bool { NodeCount(E) <= MaxSize && !ContainsUnsafe(E) } +/// Returns true when every plain-identifier free variable inside Init remains +/// live (not moved by value) in the statements that appear between the +/// candidate let-binding (exclusive) and the substitution site (exclusive). +/// +/// Stmts is the slice Block.stmts[CandidateIndex + 1..SubstSite] - i.e. the +/// statements that execute after the let but before the single use. +/// +/// The check is conservative: it only tracks Expr::Path single-segment bare +/// identifiers. Qualified paths (a::b), method receivers (.foo()), and +/// reference borrows (&x) are all ignored, so false positives (keeping a +/// binding that would have been safe) are possible but false negatives that +/// would introduce a compile error are not. +pub fn IsFreeVarSafe(Init:&Expr, StmtsBetween:&[Stmt]) -> bool { + let FreeVars = CollectFreeIdents(Init); + + if FreeVars.is_empty() || StmtsBetween.is_empty() { + return true; + } + + for Name in &FreeVars { + if IsMovedInStmts(Name, StmtsBetween) { + return false; + } + } + + true +} + +// --------------------------------------------------------------------------- +// Free-identifier collection +// --------------------------------------------------------------------------- + +struct FreeIdentCollector { + Idents:Vec, +} + +impl<'ast> Visit<'ast> for FreeIdentCollector { + fn visit_expr_path(&mut self, Node:&'ast syn::ExprPath) { + if Node.qself.is_none() { + if let Some(Ident) = Node.path.get_ident() { + let Name = Ident.to_string(); + if !self.Idents.contains(&Name) { + self.Idents.push(Name); + } + } + } + } +} + +fn CollectFreeIdents(E:&Expr) -> Vec { + let mut C = FreeIdentCollector { Idents:vec![] }; + + C.visit_expr(E); + + C.Idents +} + +// --------------------------------------------------------------------------- +// Move detection +// --------------------------------------------------------------------------- + +/// Returns true when Target is consumed by value in any of the statements. +/// We detect by-value consumption conservatively: if Target appears as a +/// plain Expr::Path (bare identifier, no & / &mut / ref prefix) in a +/// position that syntactically transfers ownership: +/// - a function or method call argument +/// - a struct / tuple-struct field value (shorthand or explicit) +/// - the right-hand side of a let initialiser or assignment +/// - an array element, tuple element, or return expression +fn IsMovedInStmts(Target:&str, Stmts:&[Stmt]) -> bool { + for Stmt in Stmts { + let mut Detector = MoveDetector { Target, Found:false }; + + Detector.visit_stmt(Stmt); + + if Detector.Found { + return true; + } + } + + false +} + +struct MoveDetector<'a> { + Target:&'a str, + Found:bool, +} + +impl<'ast> Visit<'ast> for MoveDetector<'ast> { + // Plain identifier used as a call argument, struct field, array element, + // tuple element, return value, or let/assign RHS - all by-value moves. + fn visit_expr_path(&mut self, Node:&'ast syn::ExprPath) { + if self.Found { + return; + } + + if Node.qself.is_none() { + if let Some(I) = Node.path.get_ident() { + if I == self.Target { + self.Found = true; + } + } + } + } + + // Do not descend into reference expressions: &Target and &mut Target + // borrow rather than move, so they are safe. + fn visit_expr_reference(&mut self, _Node:&'ast syn::ExprReference) {} + + // Do not descend into method call receivers: self.foo(target_as_self) + // is a borrow in almost all cases; conservatively skip. + // We DO still visit the arguments via the default walk, so explicit + // by-value arguments to method calls are still caught. + fn visit_expr_method_call(&mut self, Node:&'ast syn::ExprMethodCall) { + if self.Found { + return; + } + + // Visit arguments only; skip the receiver (self.receiver). + for Arg in &Node.args { + self.visit_expr(Arg); + } + } +} + // --------------------------------------------------------------------------- // Node counting // --------------------------------------------------------------------------- @@ -63,8 +191,6 @@ struct UnsafeDetector { impl<'ast> Visit<'ast> for UnsafeDetector { fn visit_expr_unsafe(&mut self, _Node:&'ast syn::ExprUnsafe) { self.Found = true; - - // Do not recurse - one match is enough. } } @@ -86,63 +212,103 @@ mod Tests { fn ParseExpr(Src:&str) -> Expr { syn::parse_str(Src).expect("parse expression") } - #[test] - fn LiteralIsSafe() { - assert!(IsSafe(&ParseExpr("42"), 100)); + fn ParseStmts(Src:&str) -> Vec { + let File:syn::File = syn::parse_str(Src).expect("parse"); + + if let syn::Item::Fn(F) = &File.items[0] { + return F.block.stmts.clone(); + } + + vec![] } #[test] - fn UnsafeBlockIsNotSafe() { - assert!(!IsSafe(&ParseExpr("unsafe { *ptr }"), 100)); - } + fn LiteralIsSafe() { assert!(IsSafe(&ParseExpr("42"), 100)); } #[test] - fn QuestionMarkIsSafe() { - assert!(IsSafe(&ParseExpr("foo().map_err(|e| e)?"), 100)); - } + fn UnsafeBlockIsNotSafe() { assert!(!IsSafe(&ParseExpr("unsafe { *ptr }"), 100)); } #[test] - fn AwaitIsSafe() { - assert!(IsSafe(&ParseExpr("service.call().await"), 100)); - } + fn QuestionMarkIsSafe() { assert!(IsSafe(&ParseExpr("foo().map_err(|e| e)?"), 100)); } #[test] - fn ClosureIsSafe() { - assert!(IsSafe(&ParseExpr("|| { 1 + 1 }"), 100)); - } + fn AwaitIsSafe() { assert!(IsSafe(&ParseExpr("service.call().await"), 100)); } #[test] - fn StructLiteralIsSafe() { - assert!(IsSafe(&ParseExpr("Opts { a: 1, b: 2 }"), 100)); - } + fn ClosureIsSafe() { assert!(IsSafe(&ParseExpr("|| { 1 + 1 }"), 100)); } #[test] - fn MacroCallIsSafe() { - assert!(IsSafe(&ParseExpr(r#"json!({ "key": val })"#), 100)); - } + fn StructLiteralIsSafe() { assert!(IsSafe(&ParseExpr("Opts { a: 1, b: 2 }"), 100)); } + + #[test] + fn MacroCallIsSafe() { assert!(IsSafe(&ParseExpr(r#"json!({ "key": val })"#), 100)); } #[test] fn OversizedExprFails() { - // Build a deeply nested binary expression exceeding MaxSize=5. let Big = ParseExpr("a + b + c + d + e + f + g + h"); assert!(!IsSafe(&Big, 5)); } #[test] - fn NodeCountLiteral() { - assert_eq!(NodeCount(&ParseExpr("1")), 1); + fn NodeCountLiteral() { assert_eq!(NodeCount(&ParseExpr("1")), 1); } + + #[test] + fn NodeCountBinary() { assert_eq!(NodeCount(&ParseExpr("A + B")), 3); } + + #[test] + fn NodeCountCall() { assert_eq!(NodeCount(&ParseExpr("f(A, B)")), 4); } + + // IsFreeVarSafe tests + + #[test] + fn FreeVarSafeWhenNoStmtsBetween() { + // Init = var.clone(), no statements between decl and use: safe. + assert!(IsFreeVarSafe(&ParseExpr("var.clone()"), &[])); + } + + #[test] + fn FreeVarSafeWhenVarOnlyBorrowed() { + // &var between decl and use: borrow, not a move. + let Stmts = ParseStmts("fn f() { let _r = &var; }"); + + assert!(IsFreeVarSafe(&ParseExpr("var.clone()"), &Stmts)); + } + + #[test] + fn FreeVarUnsafeWhenMovedIntoCall() { + // fn g(x: String) consumes x by value. + let Stmts = ParseStmts("fn f() { g(var); }"); + + assert!(!IsFreeVarSafe(&ParseExpr("var.clone()"), &Stmts)); + } + + #[test] + fn FreeVarUnsafeWhenMovedIntoStructField() { + // Struct field shorthand { path } moves path. + let Stmts = ParseStmts("fn f() { let _r = Foo { path }; }"); + + assert!(!IsFreeVarSafe(&ParseExpr("path.clone()"), &Stmts)); } #[test] - fn NodeCountBinary() { - // `A + B` has one Binary node wrapping two Path nodes = 3 total. - assert_eq!(NodeCount(&ParseExpr("A + B")), 3); + fn FreeVarSafeWhenDifferentVarMoved() { + // A different variable is moved; the one we care about is untouched. + let Stmts = ParseStmts("fn f() { let _r = Foo { other }; }"); + + assert!(IsFreeVarSafe(&ParseExpr("path.clone()"), &Stmts)); } #[test] - fn NodeCountCall() { - // `f(A, B)` = Call + Path(f) + Path(A) + Path(B) = 4. - assert_eq!(NodeCount(&ParseExpr("f(A, B)")), 4); + fn FreeVarUnsafeWhenMovedIntoRpcRequest() { + // Exact pattern from AirClient::get_file_info. + // path is moved into FileInfoRequest { request_id, path }. + let Stmts = ParseStmts( + r#"fn f() { + client.get_file_info(Request::new(FileInfoRequest { request_id, path })).await; + }"#, + ); + + assert!(!IsFreeVarSafe(&ParseExpr("path.clone()"), &Stmts)); } }