diff --git a/src/formatter.rs b/src/formatter.rs index 7ec1aa4..bae3218 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -18,7 +18,12 @@ use regex::{Regex, RegexBuilder, Replacer}; use topiary_core::{Language, Operation, TopiaryQuery, formatter_tree}; use tree_sitter::{Parser, Point, Query, QueryCursor, StreamingIterator, Tree}; -use crate::FormatterConfig; +use crate::{ + FormatterConfig, + reorder::{ + GDScriptTokenKind, GDScriptTokensWithComments, MethodType, collect_top_level_tokens, + }, +}; static QUERY: &str = include_str!("../queries/gdscript.scm"); @@ -32,7 +37,12 @@ pub fn format_gdscript_with_config( ) -> Result> { let mut formatter = Formatter::new(content.to_owned(), config.clone()); - formatter.preprocess().format()?.postprocess().reorder(); + formatter + .preprocess() + .format()? + .postprocess() + .validate_formatting()? + .reorder()?; formatter.finish() } @@ -42,6 +52,7 @@ struct Formatter { parser: Parser, input_tree: GdTree, tree: Tree, + original_source: Option, } impl Formatter { @@ -53,8 +64,17 @@ impl Formatter { .unwrap(); let tree = parser.parse(&content, None).unwrap(); let input_tree = GdTree::from_ts_tree(&tree, content.as_bytes()); + let original_source = if config.safe && config.reorder_code { + // When both safe mode and reordering are enabled we keep an + // untouched copy of the original source code so we can later verify + // that the top-level declarations all survive the formatting pass. + Some(content.clone()) + } else { + None + }; Self { + original_source, content, config, tree, @@ -102,9 +122,9 @@ impl Formatter { } #[inline(always)] - fn reorder(&mut self) -> &mut Self { + fn reorder(&mut self) -> Result<&mut Self, Box> { if !self.config.reorder_code { - return self; + return Ok(self); } self.tree = self.parser.parse(&self.content, Some(&self.tree)).unwrap(); @@ -116,9 +136,15 @@ impl Formatter { eprintln!( "Warning: Code reordering failed: {e}. Returning formatted code without reordering." ); + return Ok(self); } }; - self + + if self.config.safe { + self.ensure_safe_reorder()?; + } + + Ok(self) } /// This function runs over the content before going through topiary. @@ -141,19 +167,41 @@ impl Formatter { .postprocess_tree_sitter() } - /// Finishes formatting and returns the resulting file content. #[inline(always)] - fn finish(mut self) -> Result> { - if self.config.safe { - self.input_tree.postprocess(); - self.tree = self.parser.parse(&self.content, None).unwrap(); + fn validate_formatting(&mut self) -> Result<&mut Self, Box> { + if !self.config.safe { + return Ok(self); + } - let output_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes()); - if self.input_tree != output_tree { - return Err("Code structure has changed after formatting".into()); - } + self.input_tree.postprocess(); + self.tree = self.parser.parse(&self.content, None).unwrap(); + + let formatted_tree = GdTree::from_ts_tree(&self.tree, self.content.as_bytes()); + if self.input_tree != formatted_tree { + return Err("Code structure has changed after formatting".into()); } + Ok(self) + } + + #[inline(always)] + fn ensure_safe_reorder(&mut self) -> Result<(), Box> { + let original_source = self + .original_source + .as_deref() + .ok_or_else(|| { + "Safe mode requires the original source to verify reordered code".to_string() + })?; + + self.tree = self.parser.parse(&self.content, None).unwrap(); + ensure_top_level_tokens_match(original_source, &self.tree, &self.content)?; + + Ok(()) + } + + /// Finishes formatting and returns the resulting file content. + #[inline(always)] + fn finish(self) -> Result> { Ok(self.content) } @@ -456,6 +504,167 @@ impl Formatter { } } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct TopLevelTokenSignature { + kind: String, + attached_comments: Vec, + trailing_comments: Vec, +} + +/// Ensures that the top-level tokens (child nodes of (source) in the +/// tree-sitter AST) in the original source match those in the current tree +/// after formatting and reordering. We compare their structural “signatures” +/// (kind, relevant identifiers, and attached comments). This checks that we did +/// not lose any top-level declaration. +fn ensure_top_level_tokens_match( + original_source: &str, + current_tree: &Tree, + current_source: &str, +) -> Result<(), Box> { + // Safe mode only cares that we did not lose or duplicate any top-level declaration. + // We accumulate signed counts per signature; a non-zero delta means something changed. + let mut diff = std::collections::HashMap::::new(); + + for signature in parse_top_level_token_signatures(original_source)? { + *diff.entry(signature).or_insert(0) += 1; + } + + for signature in top_level_token_signatures_from_tree(current_tree, current_source)? { + *diff.entry(signature).or_insert(0) -= 1; + } + + let mismatched: Vec<_> = diff.iter().filter(|(_, count)| **count != 0).collect(); + + if !mismatched.is_empty() { + eprintln!("Safe mode mismatch detected at top level:"); + for (signature, count) in mismatched { + eprintln!(" {:?}: delta={}", signature, count); + } + return Err("Safe mode detected mismatched top-level declarations after reordering".into()); + } + + Ok(()) +} + +fn parse_top_level_token_signatures( + source: &str, +) -> Result, Box> { + // We re-parse the original content with tree-sitter instead of reusing `input_tree` + // because the reorder module already knows how to classify the raw syntax tree into + // the token structures we want to compare. I'm not 100% sure it's needed + // but it's not very costly. + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_gdscript::LANGUAGE.into()) + .unwrap(); + let tree = parser + .parse(source, None) + .ok_or_else(|| "Failed to parse GDScript source in safe mode")?; + + top_level_token_signatures_from_tree(&tree, source) +} + +fn top_level_token_signatures_from_tree( + tree: &Tree, + content: &str, +) -> Result, Box> { + let tokens = collect_top_level_tokens(tree, content)?; + let mut signatures = Vec::with_capacity(tokens.len()); + + for token in tokens { + let GDScriptTokensWithComments { + token_kind, + attached_comments, + trailing_comments, + start_byte: _, + end_byte: _, + original_text, + } = token; + + signatures.push(TopLevelTokenSignature { + kind: token_kind_key(&token_kind), + attached_comments, + trailing_comments, + }); + + if let Some(extends_key) = inline_extends_signature(&token_kind, original_text.as_str()) { + signatures.push(TopLevelTokenSignature { + kind: extends_key, + attached_comments: Vec::new(), + trailing_comments: Vec::new(), + }); + } + } + + Ok(signatures) +} + +fn token_kind_key(kind: &GDScriptTokenKind) -> String { + match kind { + GDScriptTokenKind::ClassAnnotation(text) => format!("ClassAnnotation::{text}"), + GDScriptTokenKind::ClassName(text) => format!("ClassName::{text}"), + GDScriptTokenKind::Extends(text) => format!("Extends::{text}"), + GDScriptTokenKind::Docstring(text) => format!("Docstring::{text}"), + GDScriptTokenKind::Signal(name, is_private) => { + format!("Signal::{name}::{is_private}") + } + GDScriptTokenKind::Enum(name, is_private) => format!("Enum::{name}::{is_private}"), + GDScriptTokenKind::Constant(name, is_private) => { + format!("Constant::{name}::{is_private}") + } + GDScriptTokenKind::StaticVariable(name, is_private) => { + format!("StaticVariable::{name}::{is_private}") + } + GDScriptTokenKind::ExportVariable(name, is_private) => { + format!("ExportVariable::{name}::{is_private}") + } + GDScriptTokenKind::RegularVariable(name, is_private) => { + format!("RegularVariable::{name}::{is_private}") + } + GDScriptTokenKind::OnReadyVariable(name, is_private) => { + format!("OnReadyVariable::{name}::{is_private}") + } + GDScriptTokenKind::Method(name, method_type, is_private) => format!( + "Method::{name}::{}::{is_private}", + method_type_key(method_type) + ), + GDScriptTokenKind::InnerClass(name, is_private) => { + format!("InnerClass::{name}::{is_private}") + } + GDScriptTokenKind::Unknown(text) => format!("Unknown::{text}"), + } +} + +fn method_type_key(method_type: &MethodType) -> String { + match method_type { + MethodType::StaticInit => "StaticInit".to_string(), + MethodType::StaticFunction => "StaticFunction".to_string(), + MethodType::BuiltinVirtual(priority) => format!("BuiltinVirtual({priority})"), + MethodType::Custom => "Custom".to_string(), + } +} + +fn inline_extends_signature(token_kind: &GDScriptTokenKind, original_text: &str) -> Option { + match token_kind { + GDScriptTokenKind::ClassName(_) => { + let extends_part = extract_inline_extends(original_text)?; + Some(format!("Extends::{extends_part}")) + } + _ => None, + } +} + +fn extract_inline_extends(original_text: &str) -> Option { + let extends_index = original_text.find("extends")?; + let extends_slice = &original_text[extends_index..]; + let trimmed = extends_slice.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } +} + /// A syntax tree of the source code. struct GdTree { nodes: Vec, diff --git a/src/lib.rs b/src/lib.rs index 7945da0..a0cf24a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ pub mod formatter; -pub mod reorder; pub mod linter; +pub mod reorder; #[derive(Clone)] pub struct FormatterConfig { diff --git a/src/main.rs b/src/main.rs index f0eeeff..09fbe85 100644 --- a/src/main.rs +++ b/src/main.rs @@ -99,11 +99,10 @@ struct Args { /// /// This offers a good amount protection against the formatter failing /// on new syntax at the cost of a small little extra running time. - /// Currently incompatible with --reorder-code. /// /// WARNING: this is not a perfect solution. Some rare edge cases may still /// lead to syntax changes. - #[arg(short, long, conflicts_with = "reorder_code")] + #[arg(short, long)] safe: bool, } diff --git a/src/reorder.rs b/src/reorder.rs index e78d4f9..7ed1322 100644 --- a/src/reorder.rs +++ b/src/reorder.rs @@ -15,13 +15,21 @@ pub fn reorder_gdscript_elements( tree: &Tree, content: &str, ) -> Result> { - let tokens = extract_tokens_to_reorder(tree, content)?; + let tokens = collect_top_level_tokens(tree, content)?; let ordered_elements = sort_gdscript_tokens(tokens); let reordered_content = build_reordered_code(ordered_elements, content); Ok(reordered_content) } +/// Collects all top-level tokens (direct children of the `source` node) without reordering them. +pub fn collect_top_level_tokens( + tree: &Tree, + content: &str, +) -> Result, Box> { + extract_tokens_to_reorder(tree, content) +} + /// This struct is used to hold an element along with its associated comments /// and original text so we can precisely reconstruct it, and also when we move /// functions etc. their docstrings and comments come along. @@ -35,7 +43,7 @@ pub struct GDScriptTokensWithComments { pub end_byte: usize, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum GDScriptTokenKind { ClassAnnotation(String), // Annotations that go at the top of the file like @tool and @icon ClassName(String), // This is the class_name declaration @@ -226,7 +234,7 @@ fn extract_tokens_to_reorder( content: &str, ) -> Result, Box> { let root = tree.root_node(); - let mut elements = Vec::new(); + let mut elements: Vec = Vec::new(); // This query covers all top-level elements (direct children of source) // We need to capture everything so nothing gets lost @@ -345,7 +353,33 @@ fn extract_tokens_to_reorder( // This may look inefficient but in practice it should not have much impact if class_docstring_comments_rows.contains(&node.start_position().row) { continue; - } else { + } + + // Here we look for inline comments after declarations, and if + // so, we attach them as inline to the declaration. For + // example: + // + // var test = 1 # inline comment + // + // Without this code, the comment would wrap to the next line. + let mut handled_inline = false; + if let Some(last_element) = elements.last_mut() { + let last_end = last_element.end_byte; + let comment_start = node.start_byte(); + if last_end <= comment_start && comment_start <= content.len() { + if let Some(spacing) = content.get(last_end..comment_start) { + let has_newline = spacing.contains('\n') || spacing.contains('\r'); + if !has_newline { + last_element.original_text.push_str(spacing); + last_element.original_text.push_str(&text); + last_element.end_byte = node.end_byte(); + handled_inline = true; + } + } + } + } + + if !handled_inline { pending_comments.push(PendingAttachment { start_byte: node.start_byte(), text: text.clone(), diff --git a/src/scripts/benchmark.rs b/src/scripts/benchmark.rs index 2f11b35..22cc2d1 100644 --- a/src/scripts/benchmark.rs +++ b/src/scripts/benchmark.rs @@ -13,7 +13,7 @@ //! cargo run --bin benchmark --release >> benchmark_results.txt //! git checkout - //! ``` -use gdscript_formatter::{formatter::format_gdscript_with_config, FormatterConfig}; +use gdscript_formatter::{FormatterConfig, formatter::format_gdscript_with_config}; use std::{fs, time::Instant}; const ITERATIONS: u16 = 40; diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 18158d8..39be9b3 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -51,6 +51,7 @@ fn test_reorder_file(file_path: &Path) { file_path, &FormatterConfig { reorder_code: true, + safe: true, ..Default::default() }, true, diff --git a/tests/reorder_code/expected/reorder_inline_comment.gd b/tests/reorder_code/expected/reorder_inline_comment.gd new file mode 100644 index 0000000..3bd134c --- /dev/null +++ b/tests/reorder_code/expected/reorder_inline_comment.gd @@ -0,0 +1,7 @@ +extends Node + +var test = 1 # Inline comment + + +func _ready(): + pass diff --git a/tests/reorder_code/input/reorder_inline_comment.gd b/tests/reorder_code/input/reorder_inline_comment.gd new file mode 100644 index 0000000..a3bcafc --- /dev/null +++ b/tests/reorder_code/input/reorder_inline_comment.gd @@ -0,0 +1,7 @@ +func _ready(): + pass + +extends Node + +var test = 1 # Inline comment +