From 91364824440821a48a8c9386bab0c4d868c3a71f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 11 May 2026 09:44:49 +0200 Subject: [PATCH 1/5] Add syntax highlighting for JSON command output Adds Theme helpers for JSON strings, numbers, booleans, and null, and a JsonOutputHighlighter that renders a JsonElement as Spectre.Console markup matching the indented Utf8JsonWriter layout. PrintState now applies the highlighter when JSON output is written to the terminal (file redirection and non-JSON formats are unchanged). Closes #79. --- .../UtilTest/JsonOutputHighlighterTests.cs | 69 +++++++++ .../JsonOutputHighlighter.cs | 139 ++++++++++++++++++ .../ShellInterpreter.cs | 15 ++ .../Azure.Data.Cosmos.Shell.Core/Theme.cs | 20 +++ 4 files changed, 243 insertions(+) create mode 100644 CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs create mode 100644 CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs diff --git a/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs new file mode 100644 index 0000000..fb7dc56 --- /dev/null +++ b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs @@ -0,0 +1,69 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +using System.Text.Json; + +using Azure.Data.Cosmos.Shell.Core; + +namespace CosmosShell.Tests.UtilTest; + +public class JsonOutputHighlighterTests +{ + [Fact] + public void Primitives_AreColoredByType() + { + var element = JsonSerializer.Deserialize("{ \"name\": \"alice\", \"age\": 42, \"active\": true, \"nick\": null }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // Property name uses the JSON property color (cyan). + Assert.Contains("[cyan]\"name\"[/]", markup); + + // Each value type uses its dedicated helper from Theme. + Assert.Contains("[violet]\"alice\"[/]", markup); + Assert.Contains("[violet]42[/]", markup); + Assert.Contains("[violet]true[/]", markup); + Assert.Contains("[violet]null[/]", markup); + + // Brackets and separators use the bracket color (yellow). + Assert.Contains("[yellow]{[/]", markup); + Assert.Contains("[yellow]}[/]", markup); + Assert.Contains("[yellow]:[/]", markup); + Assert.Contains("[yellow],[/]", markup); + } + + [Fact] + public void NestedObjectsAndArrays_AreIndented() + { + var element = JsonSerializer.Deserialize("{ \"items\": [1, 2] }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // Two-space indentation matching Utf8JsonWriter(Indented=true). + Assert.Contains("\n [cyan]\"items\"[/]", markup); + Assert.Contains("\n [violet]1[/]", markup); + Assert.Contains("\n [violet]2[/]", markup); + } + + [Fact] + public void EmptyObjectAndArray_RenderInline() + { + var emptyObject = JsonSerializer.Deserialize("{}"); + var emptyArray = JsonSerializer.Deserialize("[]"); + + Assert.Equal("[yellow]{[/][yellow]}[/]", JsonOutputHighlighter.BuildMarkup(emptyObject)); + Assert.Equal("[yellow][[[/][yellow]]][/]", JsonOutputHighlighter.BuildMarkup(emptyArray)); + } + + [Fact] + public void StringValues_AreJsonAndMarkupEscaped() + { + var element = JsonSerializer.Deserialize("{ \"q\": \"a\\\"b\" }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + // The embedded quote stays JSON-escaped inside the markup token. + Assert.Contains("[violet]\"a\\u0022b\"[/]", markup); + } +} diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs new file mode 100644 index 0000000..6fb39a8 --- /dev/null +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs @@ -0,0 +1,139 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ +namespace Azure.Data.Cosmos.Shell.Core; + +using System.Text; +using System.Text.Json; +using Spectre.Console; + +/// +/// Produces a Spectre.Console markup string for a , applying the +/// JSON colors defined in . The resulting layout matches the indented +/// output produced by with Indented = true. +/// +internal static class JsonOutputHighlighter +{ + private const int IndentSize = 2; + + public static string BuildMarkup(JsonElement element) + { + var sb = new StringBuilder(); + WriteValue(sb, element, indent: 0); + return sb.ToString(); + } + + private static void WriteValue(StringBuilder sb, JsonElement element, int indent) + { + switch (element.ValueKind) + { + case JsonValueKind.Object: + WriteObject(sb, element, indent); + break; + case JsonValueKind.Array: + WriteArray(sb, element, indent); + break; + case JsonValueKind.String: + sb.Append(Theme.FormatJsonString(EncodeJsonString(element.GetString() ?? string.Empty))); + break; + case JsonValueKind.Number: + sb.Append(Theme.FormatJsonNumber(element.GetRawText())); + break; + case JsonValueKind.True: + case JsonValueKind.False: + sb.Append(Theme.FormatJsonBoolean(element.GetRawText())); + break; + case JsonValueKind.Null: + sb.Append(Theme.FormatJsonNull("null")); + break; + default: + sb.Append(Markup.Escape(element.GetRawText())); + break; + } + } + + private static void WriteObject(StringBuilder sb, JsonElement element, int indent) + { + var enumerator = element.EnumerateObject(); + if (!enumerator.MoveNext()) + { + sb.Append(Theme.FormatJsonBracket("{")); + sb.Append(Theme.FormatJsonBracket("}")); + return; + } + + sb.Append(Theme.FormatJsonBracket("{")); + sb.Append('\n'); + + var first = true; + do + { + if (!first) + { + sb.Append(Theme.FormatJsonBracket(",")); + sb.Append('\n'); + } + + first = false; + + AppendIndent(sb, indent + 1); + sb.Append(Theme.FormatJsonProperty(EncodeJsonString(enumerator.Current.Name))); + sb.Append(Theme.FormatJsonBracket(":")); + sb.Append(' '); + WriteValue(sb, enumerator.Current.Value, indent + 1); + } + while (enumerator.MoveNext()); + + sb.Append('\n'); + AppendIndent(sb, indent); + sb.Append(Theme.FormatJsonBracket("}")); + } + + private static void WriteArray(StringBuilder sb, JsonElement element, int indent) + { + var enumerator = element.EnumerateArray(); + if (!enumerator.MoveNext()) + { + sb.Append(Theme.FormatJsonBracket("[")); + sb.Append(Theme.FormatJsonBracket("]")); + return; + } + + sb.Append(Theme.FormatJsonBracket("[")); + sb.Append('\n'); + + var first = true; + do + { + if (!first) + { + sb.Append(Theme.FormatJsonBracket(",")); + sb.Append('\n'); + } + + first = false; + + AppendIndent(sb, indent + 1); + WriteValue(sb, enumerator.Current, indent + 1); + } + while (enumerator.MoveNext()); + + sb.Append('\n'); + AppendIndent(sb, indent); + sb.Append(Theme.FormatJsonBracket("]")); + } + + private static void AppendIndent(StringBuilder sb, int level) + { + sb.Append(' ', level * IndentSize); + } + + /// + /// Serializes the value as a JSON string literal (with surrounding quotes and JSON escapes) + /// so that embedded quotes, backslashes, and control characters render correctly. + /// + private static string EncodeJsonString(string value) + { + return JsonSerializer.Serialize(value); + } +} diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs index 7460313..594ecb5 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs @@ -951,6 +951,21 @@ internal CommandState PrintState(CommandState state) if (state.Result?.DataType == Parser.DataType.Json) { + // When writing JSON to the terminal (not redirected to a file), apply + // syntax highlighting using the configured Spectre.Console theme. File + // redirection still receives plain text so downstream tooling and tests + // are unaffected. + if (state.OutputFormat == OutputFormat.JSon && string.IsNullOrEmpty(this.StdOutRedirect)) + { + var element = (JsonElement?)state.Result.ConvertShellObject(Parser.DataType.Json); + if (element.HasValue) + { + AnsiConsole.MarkupLine(JsonOutputHighlighter.BuildMarkup(element.Value)); + state.Result = null; + return state; + } + } + output = state.GenerateOutputText(); } else diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs index 19ae0a2..6fe0e9a 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs @@ -72,6 +72,26 @@ public static string FormatJsonBracket(string text) return $"[yellow]{Markup.Escape(text)}[/]"; } + public static string FormatJsonString(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonNumber(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonBoolean(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + + public static string FormatJsonNull(string text) + { + return $"[violet]{Markup.Escape(text)}[/]"; + } + internal static string FormatStringLiteral(string text) { return $"[violet]{Markup.Escape(text)}[/]"; From 03624c8cb805bab13f54ad2d6c3b6a57071d4d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 11 May 2026 10:09:32 +0200 Subject: [PATCH 2/5] Add depth-cycled bracket coloring (rainbow brackets) Adds Theme.GetBracketColor(depth) and Theme.FormatBracket(text, depth) using a 3-color cycle (gold1/orchid/deepskyblue1). The JSON output highlighter and the input HighlightingVisitor now color matched braces, brackets, and parentheses by their shared nesting depth, so {[(...)]} pairs are visually distinguishable. Comma and colon continue to use the existing punctuation color. --- .../UtilTest/JsonOutputHighlighterTests.cs | 31 +++++++++++--- .../JsonOutputHighlighter.cs | 16 +++---- .../ShellInterpreter.Highlighter.cs | 42 +++++++++++++++++-- .../Azure.Data.Cosmos.Shell.Core/Theme.cs | 37 ++++++++++++++++ 4 files changed, 109 insertions(+), 17 deletions(-) diff --git a/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs index fb7dc56..d377f93 100644 --- a/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs +++ b/CosmosDBShell.Tests/UtilTest/JsonOutputHighlighterTests.cs @@ -26,9 +26,11 @@ public void Primitives_AreColoredByType() Assert.Contains("[violet]true[/]", markup); Assert.Contains("[violet]null[/]", markup); - // Brackets and separators use the bracket color (yellow). - Assert.Contains("[yellow]{[/]", markup); - Assert.Contains("[yellow]}[/]", markup); + // Outer braces use the depth-0 bracket color; comma and colon use the + // shared punctuation color. + var depth0 = Theme.GetBracketColor(0); + Assert.Contains($"[{depth0}]{{[/]", markup); + Assert.Contains($"[{depth0}]}}[/]", markup); Assert.Contains("[yellow]:[/]", markup); Assert.Contains("[yellow],[/]", markup); } @@ -52,8 +54,9 @@ public void EmptyObjectAndArray_RenderInline() var emptyObject = JsonSerializer.Deserialize("{}"); var emptyArray = JsonSerializer.Deserialize("[]"); - Assert.Equal("[yellow]{[/][yellow]}[/]", JsonOutputHighlighter.BuildMarkup(emptyObject)); - Assert.Equal("[yellow][[[/][yellow]]][/]", JsonOutputHighlighter.BuildMarkup(emptyArray)); + var depth0 = Theme.GetBracketColor(0); + Assert.Equal($"[{depth0}]{{[/][{depth0}]}}[/]", JsonOutputHighlighter.BuildMarkup(emptyObject)); + Assert.Equal($"[{depth0}][[[/][{depth0}]]][/]", JsonOutputHighlighter.BuildMarkup(emptyArray)); } [Fact] @@ -66,4 +69,22 @@ public void StringValues_AreJsonAndMarkupEscaped() // The embedded quote stays JSON-escaped inside the markup token. Assert.Contains("[violet]\"a\\u0022b\"[/]", markup); } + + [Fact] + public void NestedBrackets_CycleColorsByDepth() + { + // Depth 0 -> '{', depth 1 -> '[', depth 2 -> '{' (next nested object). + var element = JsonSerializer.Deserialize("{ \"a\": [ { \"b\": 1 } ] }"); + + var markup = JsonOutputHighlighter.BuildMarkup(element); + + Assert.Contains($"[{Theme.GetBracketColor(0)}]{{[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(1)}][[[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(2)}]{{[/]", markup); + + // Closing brackets should use the same color as their matching opener. + Assert.Contains($"[{Theme.GetBracketColor(2)}]}}[/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(1)}]]][/]", markup); + Assert.Contains($"[{Theme.GetBracketColor(0)}]}}[/]", markup); + } } diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs index 6fb39a8..5f2bf9c 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/JsonOutputHighlighter.cs @@ -57,12 +57,12 @@ private static void WriteObject(StringBuilder sb, JsonElement element, int inden var enumerator = element.EnumerateObject(); if (!enumerator.MoveNext()) { - sb.Append(Theme.FormatJsonBracket("{")); - sb.Append(Theme.FormatJsonBracket("}")); + sb.Append(Theme.FormatBracket("{", indent)); + sb.Append(Theme.FormatBracket("}", indent)); return; } - sb.Append(Theme.FormatJsonBracket("{")); + sb.Append(Theme.FormatBracket("{", indent)); sb.Append('\n'); var first = true; @@ -86,7 +86,7 @@ private static void WriteObject(StringBuilder sb, JsonElement element, int inden sb.Append('\n'); AppendIndent(sb, indent); - sb.Append(Theme.FormatJsonBracket("}")); + sb.Append(Theme.FormatBracket("}", indent)); } private static void WriteArray(StringBuilder sb, JsonElement element, int indent) @@ -94,12 +94,12 @@ private static void WriteArray(StringBuilder sb, JsonElement element, int indent var enumerator = element.EnumerateArray(); if (!enumerator.MoveNext()) { - sb.Append(Theme.FormatJsonBracket("[")); - sb.Append(Theme.FormatJsonBracket("]")); + sb.Append(Theme.FormatBracket("[", indent)); + sb.Append(Theme.FormatBracket("]", indent)); return; } - sb.Append(Theme.FormatJsonBracket("[")); + sb.Append(Theme.FormatBracket("[", indent)); sb.Append('\n'); var first = true; @@ -120,7 +120,7 @@ private static void WriteArray(StringBuilder sb, JsonElement element, int indent sb.Append('\n'); AppendIndent(sb, indent); - sb.Append(Theme.FormatJsonBracket("]")); + sb.Append(Theme.FormatBracket("]", indent)); } private static void AppendIndent(StringBuilder sb, int level) diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs index de6db4b..bc8a45a 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs @@ -104,6 +104,12 @@ internal class HighlightingVisitor : IAstVisitor private int currentPosition; private string? currentCommand; + // Tracks the current paired-bracket nesting level so that '{', '[', '(' and their + // closing counterparts can be colored using . + // The counter is shared across bracket types, mirroring the rainbow bracket + // behavior found in modern editors. + private int bracketDepth; + public HighlightingVisitor(string text, ShellInterpreter interpreter) { this.text = text; @@ -294,7 +300,17 @@ public void Visit(BinaryOperatorExpression binaryOperatorExpression) public void Visit(ParensExpression parensExpression) { + // Color the parentheses using the current bracket depth so they participate + // in the same rainbow cycle as JSON braces and brackets. + var parenDepth = this.bracketDepth; + + this.AppendToken(parensExpression.LParToken, Theme.FormatBracket(parensExpression.LParToken.Value, parenDepth)); + + this.bracketDepth = parenDepth + 1; parensExpression.InnerExpression.Accept(this); + this.bracketDepth = parenDepth; + + this.AppendToken(parensExpression.RParToken, Theme.FormatBracket(parensExpression.RParToken.Value, parenDepth)); } public void Visit(JsonExpression jsonExpression) @@ -303,15 +319,21 @@ public void Visit(JsonExpression jsonExpression) var startPos = jsonExpression.Start; this.AppendUpTo(startPos); + // Color the opening brace using the current bracket depth, then increment + // so nested braces/brackets/parens use the next color in the cycle. + var braceDepth = this.bracketDepth; + // Find and highlight the opening brace var openBracePos = this.text.IndexOf('{', this.currentPosition); if (openBracePos >= 0 && openBracePos < jsonExpression.Start + jsonExpression.Length) { this.AppendUpTo(openBracePos); - this.result.Append(Theme.FormatJsonBracket("{")); + this.result.Append(Theme.FormatBracket("{", braceDepth)); this.currentPosition = openBracePos + 1; } + this.bracketDepth = braceDepth + 1; + // Process the properties foreach (var property in jsonExpression.Properties) { @@ -389,12 +411,15 @@ public void Visit(JsonExpression jsonExpression) } } + // Restore depth so the closing brace matches its opener color. + this.bracketDepth = braceDepth; + // Find and highlight the closing brace var endBracePos = this.text.LastIndexOf('}', jsonExpression.Start + jsonExpression.Length - 1); if (endBracePos >= 0 && endBracePos >= this.currentPosition) { this.AppendUpTo(endBracePos); - this.result.Append(Theme.FormatJsonBracket("}")); + this.result.Append(Theme.FormatBracket("}", braceDepth)); this.currentPosition = endBracePos + 1; } } @@ -405,15 +430,21 @@ public void Visit(JsonArrayExpression jsonArrayExpression) var startPos = jsonArrayExpression.Start; this.AppendUpTo(startPos); + // Color the opening bracket using the current bracket depth, then increment + // so nested braces/brackets/parens use the next color in the cycle. + var bracketDepthForPair = this.bracketDepth; + // Find and highlight the opening bracket var openBracketPos = this.text.IndexOf('[', this.currentPosition); if (openBracketPos >= 0 && openBracketPos < jsonArrayExpression.Start + jsonArrayExpression.Length) { this.AppendUpTo(openBracketPos); - this.result.Append(Theme.FormatJsonBracket("[")); + this.result.Append(Theme.FormatBracket("[", bracketDepthForPair)); this.currentPosition = openBracketPos + 1; } + this.bracketDepth = bracketDepthForPair + 1; + // Process each element in the array for (int i = 0; i < jsonArrayExpression.Expressions.Count; i++) { @@ -435,13 +466,16 @@ public void Visit(JsonArrayExpression jsonArrayExpression) } } + // Restore depth so the closing bracket matches its opener color. + this.bracketDepth = bracketDepthForPair; + // Find and highlight the closing bracket var closeBracketPos = this.text.IndexOf(']', this.currentPosition); if (closeBracketPos >= 0 && closeBracketPos < jsonArrayExpression.Start + jsonArrayExpression.Length) { // AppendUpTo preserves any whitespace before the closing bracket this.AppendUpTo(closeBracketPos); - this.result.Append(Theme.FormatJsonBracket("]")); + this.result.Append(Theme.FormatBracket("]", bracketDepthForPair)); this.currentPosition = closeBracketPos + 1; } diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs index 6fe0e9a..bea2164 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/Theme.cs @@ -12,6 +12,19 @@ internal static class Theme { public const string CommandColor = "[lightyellow3]"; + /// + /// Colors used for paired brackets ({}, [], ()) cycled by nesting depth, similar to + /// the "bracket pair colorization" feature in modern editors. The cycle is shared + /// across bracket types so that a single visual depth counter spans every kind of + /// pair. + /// + private static readonly string[] BracketDepthColors = + { + "gold1", + "orchid", + "deepskyblue1", + }; + public static string FormatUnknownCommand(string command) { return $"[bold red]{Markup.Escape(command)}[/]"; @@ -72,6 +85,30 @@ public static string FormatJsonBracket(string text) return $"[yellow]{Markup.Escape(text)}[/]"; } + /// + /// Returns the Spectre.Console color name for a bracket at the given (zero-based) + /// nesting depth. Colors cycle when the depth exceeds the palette length. + /// + public static string GetBracketColor(int depth) + { + if (depth < 0) + { + depth = 0; + } + + return BracketDepthColors[depth % BracketDepthColors.Length]; + } + + /// + /// Formats a single bracket character ('{', '}', '[', ']', '(', ')') with the + /// depth-cycled color. Comma and colon should continue to use + /// instead. + /// + public static string FormatBracket(string text, int depth) + { + return $"[{GetBracketColor(depth)}]{Markup.Escape(text)}[/]"; + } + public static string FormatJsonString(string text) { return $"[violet]{Markup.Escape(text)}[/]"; From be7aa19250a3af993120dea65ab28b4efe205695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 11 May 2026 10:17:23 +0200 Subject: [PATCH 3/5] Fix duplicated text when highlighting interpolated strings Sub-expressions inside an interpolated string interpolation are produced by a separate Lexer over the interior content, so their Start/Length positions are relative to that inner buffer, not the outer line. Recursing into them from HighlightingVisitor.Visit(InterpolatedStringExpression) caused AppendUpTo and Substring to index into the wrong buffer and smear characters from the start of the line into the rendered output (the reported case typed two interpolated strings in a row and the highlighter duplicated them). Render the whole interpolated string as a single string-literal token instead, and add a regression test that asserts the highlighter round-trips the exact input text. --- CosmosDBShell.Tests/Shell/HighlighterTests.cs | 17 +++++++++++++++++ .../ShellInterpreter.Highlighter.cs | 19 +++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CosmosDBShell.Tests/Shell/HighlighterTests.cs b/CosmosDBShell.Tests/Shell/HighlighterTests.cs index 1c8b8e1..8f6da2a 100644 --- a/CosmosDBShell.Tests/Shell/HighlighterTests.cs +++ b/CosmosDBShell.Tests/Shell/HighlighterTests.cs @@ -116,6 +116,23 @@ public void TestInterpolatedStringHighlight() Assert.Equal("echo", segs[0].Text.Trim()); } + [Fact] + public void TestInterpolatedExpressionDoesNotDuplicateText() + { + // Regression: nested expressions inside $(...) carry positions from a separate + // sub-Lexer, so recursing into them while indexing this.text used to smear + // characters from the start of the line into the rendered output. The full + // visible text must round-trip exactly through the highlighter. + var input = "echo \"$(3+5)\"\"$(3+5)\""; + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + + Assert.Equal(input, rendered); + } + [Fact] public void TestExpressionHighlight() { diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs index bc8a45a..ae3912f 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs @@ -529,12 +529,19 @@ public void Visit(CommandExpression commandExpression) public void Visit(InterpolatedStringExpression interpolatedStringExpression) { - foreach (var expr in interpolatedStringExpression.Expressions) - { - expr.Accept(this); - } - - this.AppendUpTo(interpolatedStringExpression.Start + interpolatedStringExpression.Length); + // Interpolation sub-expressions inside "$(...)" are produced by a separate + // Lexer over the interior content (see ExpressionParser.ParseInterpolatedStringExpression), + // so their Start/Length positions live in that inner sub-buffer rather than + // in this.text. Recursing into them here would cause AppendUpTo/Substring to + // index into the wrong buffer and smear characters from the beginning of the + // outer line into the rendered output. Render the entire interpolated string + // as a single string literal token instead. + this.AppendUpTo(interpolatedStringExpression.Start); + var content = this.text.Substring( + interpolatedStringExpression.Start, + Math.Min(interpolatedStringExpression.Length, this.text.Length - interpolatedStringExpression.Start)); + this.result.Append(Theme.FormatStringLiteral(content)); + this.currentPosition = interpolatedStringExpression.Start + interpolatedStringExpression.Length; } // Statement visitors From 9efaffd99c141e4a00a6d546695296d17eeb933f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 11 May 2026 10:32:34 +0200 Subject: [PATCH 4/5] Lex interpolated string interiors with accurate outer-source positions Sub-expressions inside an interpolated string interpolation were previously parsed by a fresh Lexer over the cooked content with no awareness of the outer source buffer, so the resulting AST nodes carried Start/Length values relative to that sub-buffer. Downstream consumers (most visibly the syntax highlighter) either had to fall back to rendering the entire string as one literal or risk indexing into the wrong buffer. Add a position-offset constructor to Lexer that shifts every emitted token by a fixed amount, and record a per-character source-position map for every TokenType.InterpolatedString token (covering both the explicit "$"..."" form and the implicit interpolation found inside a regular double-quoted string). ExpressionParser.ParseInterpolatedStringExpression then constructs the inner sub-Lexer with the appropriate offset and uses the source map to build synthetic identifier tokens for variable references whose ranges align with the outer text. The highlighter visits these sub-expressions in place and falls back to the literal coloring for raw text chunks. Tests cover the round-trip case, expression coloring inside an interpolated expression, and a separately colored variable reference inside a quoted string. --- CosmosDBShell.Tests/Shell/HighlighterTests.cs | 40 ++++++ .../ShellInterpreter.Highlighter.cs | 62 +++++++-- .../ExpressionParser.cs | 54 +++++++- .../Azure.Data.Cosmos.Shell.Parser/Lexer.cs | 130 +++++++++++++----- 4 files changed, 235 insertions(+), 51 deletions(-) diff --git a/CosmosDBShell.Tests/Shell/HighlighterTests.cs b/CosmosDBShell.Tests/Shell/HighlighterTests.cs index 8f6da2a..c438c0a 100644 --- a/CosmosDBShell.Tests/Shell/HighlighterTests.cs +++ b/CosmosDBShell.Tests/Shell/HighlighterTests.cs @@ -133,6 +133,46 @@ public void TestInterpolatedExpressionDoesNotDuplicateText() Assert.Equal(input, rendered); } + [Fact] + public void TestInterpolatedExpressionContentsAreColoredAsExpression() + { + // The '+' inside $( ... ) should be rendered with the operator color rather + // than being merged into the surrounding string-literal coloring. Spectre + // collapses adjacent segments that share a style, so any character whose + // color does not match the surrounding literal color must end up on its own + // segment — that is exactly what we want to verify. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var res = highlighter.BuildHighlightedText("echo \"$(3+5)\"") as Markup; + Assert.NotNull(res); + var segs = res.GetSegments(AnsiConsole.Console).ToList(); + + var plusSeg = segs.FirstOrDefault(s => s.Text == "+"); + Assert.NotNull(plusSeg); + + var quotedSeg = segs.FirstOrDefault(s => s.Text.Contains("\"")); + Assert.NotNull(quotedSeg); + Assert.NotEqual(quotedSeg.Style.Foreground, plusSeg.Style.Foreground); + } + + [Fact] + public void TestInterpolatedVariableIsColoredSeparately() + { + // $name inside an interpolated string should be rendered as a variable + // reference, not lumped together with the quoted text. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var input = "echo \"Hello $name!\""; + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + Assert.Equal(input, rendered); + + var segs = res.GetSegments(AnsiConsole.Console).ToList(); + Assert.Contains(segs, s => s.Text == "$name"); + } + [Fact] public void TestExpressionHighlight() { diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs index ae3912f..35f1ec3 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.Highlighter.cs @@ -529,19 +529,57 @@ public void Visit(CommandExpression commandExpression) public void Visit(InterpolatedStringExpression interpolatedStringExpression) { - // Interpolation sub-expressions inside "$(...)" are produced by a separate - // Lexer over the interior content (see ExpressionParser.ParseInterpolatedStringExpression), - // so their Start/Length positions live in that inner sub-buffer rather than - // in this.text. Recursing into them here would cause AppendUpTo/Substring to - // index into the wrong buffer and smear characters from the beginning of the - // outer line into the rendered output. Render the entire interpolated string - // as a single string literal token instead. + // The interpolated string is rendered as a string-literal background; any + // sub-expressions that carry accurate outer-source positions (variable + // references and "$(...)" interpolations whose tokens have been produced with + // the appropriate Lexer position offset) are visited in place so they pick up + // their dedicated colors. Sub-expressions whose positions don't lie within the + // interpolated string's outer span are treated as literal text — this covers + // the ConstantExpression placeholders the parser emits for raw text chunks + // between interpolations, which still carry the surrounding string token's + // position. this.AppendUpTo(interpolatedStringExpression.Start); - var content = this.text.Substring( - interpolatedStringExpression.Start, - Math.Min(interpolatedStringExpression.Length, this.text.Length - interpolatedStringExpression.Start)); - this.result.Append(Theme.FormatStringLiteral(content)); - this.currentPosition = interpolatedStringExpression.Start + interpolatedStringExpression.Length; + + var endPos = Math.Min( + interpolatedStringExpression.Start + interpolatedStringExpression.Length, + this.text.Length); + + var interpolations = new List(); + foreach (var expr in interpolatedStringExpression.Expressions) + { + if (expr is ConstantExpression) + { + continue; + } + + if (expr.Start <= interpolatedStringExpression.Start || expr.Start >= endPos) + { + continue; + } + + interpolations.Add(expr); + } + + interpolations.Sort((a, b) => a.Start.CompareTo(b.Start)); + + foreach (var expr in interpolations) + { + if (expr.Start > this.currentPosition) + { + var chunk = this.text.Substring(this.currentPosition, expr.Start - this.currentPosition); + this.result.Append(Theme.FormatStringLiteral(chunk)); + this.currentPosition = expr.Start; + } + + expr.Accept(this); + } + + if (this.currentPosition < endPos) + { + var chunk = this.text.Substring(this.currentPosition, endPos - this.currentPosition); + this.result.Append(Theme.FormatStringLiteral(chunk)); + this.currentPosition = endPos; + } } // Statement visitors diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs index 50a8d95..4a0a7f7 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs @@ -671,6 +671,41 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var content = token.Value; // The content without the quotes var position = 0; + // Mapping from each character index in the cooked content to its absolute + // position in the original outer source buffer (accounting for escape sequences). + // Available for tokens produced by the same lexer that owns this parser; falls back + // to null for synthetic interpolated string tokens produced elsewhere. + var sourceMap = this.lexer.GetInterpolatedStringSourceMap(token); + + // Returns the absolute outer source position for the given content index. When no + // mapping is available (defensive fallback) callers degrade to the surrounding + // interpolated string token's position. + int OuterPos(int contentIndex) + { + if (sourceMap != null && contentIndex >= 0 && contentIndex < sourceMap.Count) + { + return sourceMap[contentIndex]; + } + + return token.Start; + } + + // Builds a synthetic identifier token spanning a slice of the outer source text + // for sub-expressions extracted from the interpolated string (variable references + // and the like). Using accurate positions allows the syntax highlighter and other + // tooling to operate on these nodes without consulting the cooked content. + Token MakeOuterToken(string value, int contentStart, int contentEnd) + { + if (sourceMap == null || contentEnd <= contentStart) + { + return token; + } + + var startOuter = OuterPos(contentStart); + var endOuter = OuterPos(contentEnd - 1) + 1; + return new Token(TokenType.Identifier, value, startOuter, Math.Max(0, endOuter - startOuter)); + } + while (position < content.Length) { // Find the next interpolation @@ -764,7 +799,12 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok // Parse the expression if (!string.IsNullOrWhiteSpace(exprContent)) { - var exprLexer = new Lexer(exprContent); + // Pass the absolute outer position of the first char inside the + // parentheses as the lexer's positionOffset, so tokens produced + // for the nested expression carry positions relative to the outer + // source buffer (required for syntax highlighting). + var innerOffset = OuterPos(startExprPos); + var exprLexer = new Lexer(exprContent, innerOffset); var exprParser = new ExpressionParser(exprLexer); var expr = exprParser.ParseExpression(); @@ -797,14 +837,19 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var varName = content.Substring(startVarPos, position - startVarPos); if (!string.IsNullOrEmpty(varName)) { + // Span the synthetic token from the leading '$' through the last + // character of the variable name so the AST node carries an accurate + // outer-source range. + var varToken = MakeOuterToken(varName, dollarIndex, position); + // Check if it contains property access or array access if (varName.Contains('.') || varName.Contains('[')) { - expressions.Add(new JSonPathExpression(token, varName)); + expressions.Add(new JSonPathExpression(varToken, varName)); } else { - expressions.Add(new VariableExpression(token, varName)); + expressions.Add(new VariableExpression(varToken, varName)); } } } @@ -822,7 +867,8 @@ private InterpolatedStringExpression ParseInterpolatedStringExpression(Token tok var varName = content.Substring(startVarPos, position - startVarPos); if (!string.IsNullOrEmpty(varName)) { - expressions.Add(new VariableExpression(token, varName)); + var varToken = MakeOuterToken(varName, dollarIndex, position); + expressions.Add(new VariableExpression(varToken, varName)); } } else diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs index c28b170..14ad59f 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs @@ -216,13 +216,27 @@ internal class Lexer { private readonly string input; private readonly Stack putBackTokens; + private readonly int positionOffset; + private readonly Dictionary interpolatedStringSourceMaps = new(); private int position; private Token? lastToken; public Lexer(string input) + : this(input, 0) + { + } + + /// + /// Creates a lexer that reports token positions shifted by . + /// Used when lexing a substring of a larger source buffer (for example, the contents of + /// a $(...) interpolation inside an interpolated string) so that the produced + /// tokens carry positions relative to the outer buffer. + /// + public Lexer(string input, int positionOffset) { this.input = input ?? string.Empty; this.position = 0; + this.positionOffset = positionOffset; this.putBackTokens = new Stack(); this.lastToken = null; } @@ -231,6 +245,23 @@ public Lexer(string input) public ErrorList Errors { get; } = new ErrorList(); + /// + /// Returns the per-character source-position mapping recorded for a previously produced + /// interpolated string token, or null if the token did not originate from this + /// lexer or contained no mapping. Each entry is the absolute source position of the + /// corresponding character in the cooked token value, taking + /// into account. + /// + internal IReadOnlyList? GetInterpolatedStringSourceMap(Token token) + { + return this.interpolatedStringSourceMaps.TryGetValue(token, out var map) ? map : null; + } + + private Token MakeToken(TokenType type, string value, int rawStart, int length) + { + return new Token(type, value, rawStart + this.positionOffset, length); + } + public IEnumerable Tokenize() { Token? token; @@ -316,51 +347,51 @@ private static bool IsVariableIdentifierPart(char ch) { case '|': this.Advance(); - return new Token(TokenType.Pipe, "|", startPosition, 1); + return this.MakeToken(TokenType.Pipe, "|", startPosition, 1); case '(': this.Advance(); - return new Token(TokenType.OpenParenthesis, "(", startPosition, 1); + return this.MakeToken(TokenType.OpenParenthesis, "(", startPosition, 1); case ')': this.Advance(); - return new Token(TokenType.CloseParenthesis, ")", startPosition, 1); + return this.MakeToken(TokenType.CloseParenthesis, ")", startPosition, 1); case '[': this.Advance(); - return new Token(TokenType.OpenBracket, "[", startPosition, 1); + return this.MakeToken(TokenType.OpenBracket, "[", startPosition, 1); case ']': this.Advance(); - return new Token(TokenType.CloseBracket, "]", startPosition, 1); + return this.MakeToken(TokenType.CloseBracket, "]", startPosition, 1); case '{': this.Advance(); - return new Token(TokenType.OpenBrace, "{", startPosition, 1); + return this.MakeToken(TokenType.OpenBrace, "{", startPosition, 1); case '}': this.Advance(); - return new Token(TokenType.CloseBrace, "}", startPosition, 1); + return this.MakeToken(TokenType.CloseBrace, "}", startPosition, 1); case ':': this.Advance(); - return new Token(TokenType.Colon, ":", startPosition, 1); + return this.MakeToken(TokenType.Colon, ":", startPosition, 1); case ';': this.Advance(); - return new Token(TokenType.Semicolon, ";", startPosition, 1); + return this.MakeToken(TokenType.Semicolon, ";", startPosition, 1); case ',': this.Advance(); - return new Token(TokenType.Comma, ",", startPosition, 1); + return this.MakeToken(TokenType.Comma, ",", startPosition, 1); case '+': this.Advance(); - return new Token(TokenType.Plus, "+", startPosition, 1); + return this.MakeToken(TokenType.Plus, "+", startPosition, 1); case '-': this.Advance(); - return new Token(TokenType.Minus, "-", startPosition, 1); + return this.MakeToken(TokenType.Minus, "-", startPosition, 1); case '/': // Check if this might be the start of a partition key identifier (e.g., /partitionKey) @@ -373,25 +404,25 @@ private static bool IsVariableIdentifierPart(char ch) // Otherwise treat as division operator this.Advance(); - return new Token(TokenType.Divide, "/", startPosition, 1); + return this.MakeToken(TokenType.Divide, "/", startPosition, 1); case '%': this.Advance(); - return new Token(TokenType.Mod, "%", startPosition, 1); + return this.MakeToken(TokenType.Mod, "%", startPosition, 1); case '^': this.Advance(); - return new Token(TokenType.Xor, "^", startPosition, 1); + return this.MakeToken(TokenType.Xor, "^", startPosition, 1); case '!': this.Advance(); - return new Token(TokenType.Not, "!", startPosition, 1); + return this.MakeToken(TokenType.Not, "!", startPosition, 1); case '\n': case '\r': var eolStartPos = this.position; this.SkipNewline(); - return new Token(TokenType.Eol, Environment.NewLine, startPosition, this.position - eolStartPos); + return this.MakeToken(TokenType.Eol, Environment.NewLine, startPosition, this.position - eolStartPos); case '#': return this.ReadComment(startPosition); @@ -411,7 +442,7 @@ private static bool IsVariableIdentifierPart(char ch) { // Unknown character, treat as single character identifier this.Advance(); - return new Token(TokenType.Identifier, ch.ToString(), startPosition, 1); + return this.MakeToken(TokenType.Identifier, ch.ToString(), startPosition, 1); } } } @@ -424,7 +455,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("&&")) { this.Advance(2); - token = new Token(TokenType.And, "&&", startPosition, 2); + token = this.MakeToken(TokenType.And, "&&", startPosition, 2); return true; } @@ -432,7 +463,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("||")) { this.Advance(2); - token = new Token(TokenType.Or, "||", startPosition, 2); + token = this.MakeToken(TokenType.Or, "||", startPosition, 2); return true; } @@ -440,7 +471,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("**")) { this.Advance(2); - token = new Token(TokenType.Pow, "**", startPosition, 2); + token = this.MakeToken(TokenType.Pow, "**", startPosition, 2); return true; } @@ -448,7 +479,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("==")) { this.Advance(2); - token = new Token(TokenType.Equal, "==", startPosition, 2); + token = this.MakeToken(TokenType.Equal, "==", startPosition, 2); return true; } @@ -456,7 +487,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("!=")) { this.Advance(2); - token = new Token(TokenType.NotEqual, "!=", startPosition, 2); + token = this.MakeToken(TokenType.NotEqual, "!=", startPosition, 2); return true; } @@ -464,7 +495,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead("<=")) { this.Advance(2); - token = new Token(TokenType.LessThanOrEqual, "<=", startPosition, 2); + token = this.MakeToken(TokenType.LessThanOrEqual, "<=", startPosition, 2); return true; } @@ -472,7 +503,7 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) if (this.LookAhead(">=")) { this.Advance(2); - token = new Token(TokenType.GreaterThanOrEqual, ">=", startPosition, 2); + token = this.MakeToken(TokenType.GreaterThanOrEqual, ">=", startPosition, 2); return true; } @@ -482,22 +513,22 @@ private bool TryReadMultiCharacterToken(int startPosition, out Token? token) { case '*': this.Advance(); - token = new Token(TokenType.Multiply, "*", startPosition, 1); + token = this.MakeToken(TokenType.Multiply, "*", startPosition, 1); return true; case '=': this.Advance(); - token = new Token(TokenType.Assignment, "=", startPosition, 1); + token = this.MakeToken(TokenType.Assignment, "=", startPosition, 1); return true; case '<': this.Advance(); - token = new Token(TokenType.LessThan, "<", startPosition, 1); + token = this.MakeToken(TokenType.LessThan, "<", startPosition, 1); return true; case '>': this.Advance(); - token = new Token(TokenType.GreaterThan, ">", startPosition, 1); + token = this.MakeToken(TokenType.GreaterThan, ">", startPosition, 1); return true; } @@ -519,7 +550,7 @@ private Token ReadComment(int startPosition) this.Advance(); } - var commentToken = new Token(TokenType.Comment, sb.ToString(), startPosition, this.position - startPosition); + var commentToken = this.MakeToken(TokenType.Comment, sb.ToString(), startPosition, this.position - startPosition); this.Comments.Add(commentToken); return commentToken; } @@ -562,7 +593,7 @@ private Token ReadIdentifier(int startPosition) } } - return new Token(TokenType.Identifier, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(TokenType.Identifier, sb.ToString(), startPosition, this.position - startPosition); } private Token ReadDoubleQuotedString(int startPosition) @@ -570,11 +601,17 @@ private Token ReadDoubleQuotedString(int startPosition) var sb = new StringBuilder(); bool hasInterpolation = false; + // Mirrors the source-position tracking in ReadInterpolatedString so callers can + // map cooked content indices back to absolute outer-source positions when the + // string contains "$..." interpolations. + var sourcePositions = new List(); + // Skip opening quote this.Advance(); while (this.position < this.input.Length) { + var sourcePos = this.position + this.positionOffset; var ch = this.input[this.position]; if (ch == '"') @@ -598,6 +635,7 @@ private Token ReadDoubleQuotedString(int startPosition) default: sb.Append(ch); break; } + sourcePositions.Add(sourcePos); this.Advance(); } else @@ -610,12 +648,19 @@ private Token ReadDoubleQuotedString(int startPosition) } sb.Append(ch); + sourcePositions.Add(sourcePos); this.Advance(); } } var tokenType = hasInterpolation ? TokenType.InterpolatedString : TokenType.String; - return new Token(tokenType, sb.ToString(), startPosition, this.position - startPosition); + var token = this.MakeToken(tokenType, sb.ToString(), startPosition, this.position - startPosition); + if (hasInterpolation) + { + this.interpolatedStringSourceMaps[token] = sourcePositions.ToArray(); + } + + return token; } private Token ReadSingleQuotedString(int startPosition) @@ -654,7 +699,7 @@ private Token ReadSingleQuotedString(int startPosition) } } - return new Token(TokenType.String, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(TokenType.String, sb.ToString(), startPosition, this.position - startPosition); } private Token ReadNumber(int startPosition) @@ -728,7 +773,7 @@ private Token ReadNumber(int startPosition) // Determine token type based on what we found var tokenType = (hasDecimalPoint || hasExponent) ? TokenType.Decimal : TokenType.Number; - return new Token(tokenType, sb.ToString(), startPosition, this.position - startPosition); + return this.MakeToken(tokenType, sb.ToString(), startPosition, this.position - startPosition); } private void SkipWhitespace() @@ -787,12 +832,21 @@ private Token ReadInterpolatedString(int startPosition) { var sb = new StringBuilder(); + // Records the absolute outer-source position of the source character that + // produced each cooked character appended to sb. Used by callers + // (notably ) + // to map indices in the cooked content back to positions in the original + // input, which is required for syntax highlighting of nested + // $(...) interpolations and $VAR references. + var sourcePositions = new List(); + // Skip the '$' and opening quote this.Advance(); // skip $ this.Advance(); // skip " while (this.position < this.input.Length) { + var sourcePos = this.position + this.positionOffset; var ch = this.input[this.position]; if (ch == '"') @@ -818,12 +872,14 @@ private Token ReadInterpolatedString(int startPosition) default: sb.Append(ch); break; } + sourcePositions.Add(sourcePos); this.Advance(); } else if (ch == '{' && this.position + 1 < this.input.Length && this.input[this.position + 1] == '{') { // Handle escaped opening brace {{ sb.Append('{'); + sourcePositions.Add(sourcePos); this.Advance(); // skip first { this.Advance(); // skip second { } @@ -831,6 +887,7 @@ private Token ReadInterpolatedString(int startPosition) { // Handle escaped closing brace }} sb.Append('}'); + sourcePositions.Add(sourcePos); this.Advance(); // skip first } this.Advance(); // skip second } } @@ -838,10 +895,13 @@ private Token ReadInterpolatedString(int startPosition) { // Regular character (including interpolation expressions) sb.Append(ch); + sourcePositions.Add(sourcePos); this.Advance(); } } - return new Token(TokenType.InterpolatedString, sb.ToString(), startPosition, this.position - startPosition); + var token = this.MakeToken(TokenType.InterpolatedString, sb.ToString(), startPosition, this.position - startPosition); + this.interpolatedStringSourceMaps[token] = sourcePositions.ToArray(); + return token; } } \ No newline at end of file From 872d3aa7d3837f610a0b4c0342ae122944445bf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 11 May 2026 11:24:07 +0200 Subject: [PATCH 5/5] Address Copilot PR review feedback - Use ReferenceEqualityComparer for the interpolated string source-map so equivalent (record-equal) Token instances cannot collide with the actual lexer-produced tokens in the lookup map. - Lex the contents of an interpolated string interpolation from the raw outer-source slice (via new Lexer.RawInput / Lexer.PositionOffset accessors) instead of the cooked token content. The cooked content collapses escape sequences (e.g. backslash-quote becomes quote) and would otherwise drift token positions inside interpolations that contain string literals with escapes such as $( "a\nb" ). The cooked-content path is retained as a fallback for synthetic tokens that have no source map. - Add a regression test that round-trips an interpolated string containing an escape inside an interpolation. --- CosmosDBShell.Tests/Shell/HighlighterTests.cs | 18 ++++++++ .../ExpressionParser.cs | 45 ++++++++++++++++--- .../Azure.Data.Cosmos.Shell.Parser/Lexer.cs | 21 ++++++++- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/CosmosDBShell.Tests/Shell/HighlighterTests.cs b/CosmosDBShell.Tests/Shell/HighlighterTests.cs index c438c0a..31223ec 100644 --- a/CosmosDBShell.Tests/Shell/HighlighterTests.cs +++ b/CosmosDBShell.Tests/Shell/HighlighterTests.cs @@ -173,6 +173,24 @@ public void TestInterpolatedVariableIsColoredSeparately() Assert.Contains(segs, s => s.Text == "$name"); } + [Fact] + public void TestInterpolatedExpressionWithEscapesRoundTrips() + { + // The cooked content of an interpolated string collapses escape sequences + // (e.g. \" -> "), so an inner Lexer that walks the cooked text would emit + // token positions that drift relative to the outer source. Verify that an + // interpolation containing an inner string literal with a backslash escape + // still renders the visible characters in their original positions. + var highlighter = (IHighlighter)ShellInterpreter.Instance; + + var input = "echo \"$( \\\"a\\nb\\\" )\""; + var res = highlighter.BuildHighlightedText(input) as Markup; + Assert.NotNull(res); + + var rendered = string.Concat(res.GetSegments(AnsiConsole.Console).Select(s => s.Text)); + Assert.Equal(input, rendered); + } + [Fact] public void TestExpressionHighlight() { diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs index 4a0a7f7..bc2b154 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/ExpressionParser.cs @@ -799,22 +799,53 @@ Token MakeOuterToken(string value, int contentStart, int contentEnd) // Parse the expression if (!string.IsNullOrWhiteSpace(exprContent)) { - // Pass the absolute outer position of the first char inside the - // parentheses as the lexer's positionOffset, so tokens produced - // for the nested expression carry positions relative to the outer - // source buffer (required for syntax highlighting). + // The cooked exprContent can drift from the outer source whenever + // the interpolated string contains escape sequences (for example a + // string literal inside the interpolation: $( "a\nb" )). To keep token + // positions from the inner lexer correct in those cases, lex the raw + // outer-source slice that produced this content instead of the cooked + // text. The slice runs from the absolute position of the first inner + // character to one past the absolute position of the last inner + // character (the position immediately before the closing ')'). + if (sourceMap != null && this.lexer.RawInput.Length > 0) + { + var rawStartOuter = OuterPos(startExprPos); + var rawEndOuter = OuterPos(position - 2) + 1; + var lexerOriginStart = this.lexer.PositionOffset; + var rawStart = rawStartOuter - lexerOriginStart; + var rawEnd = rawEndOuter - lexerOriginStart; + if (rawStart >= 0 && rawEnd >= rawStart && rawEnd <= this.lexer.RawInput.Length) + { + var rawSlice = this.lexer.RawInput.Substring(rawStart, rawEnd - rawStart); + var rawLexer = new Lexer(rawSlice, rawStartOuter); + var rawParser = new ExpressionParser(rawLexer); + var expr = rawParser.ParseExpression(); + + if (rawLexer.Errors.Count > 0) + { + this.lexer.Errors.AddRange(rawLexer.Errors); + } + + expressions.Add(expr); + continue; + } + } + + // Fallback path: no source map (synthetic token) or the raw slice could + // not be located. Lex the cooked content with a single fixed offset. + // Token positions may drift through escape sequences but stay correct + // for escape-free interpolations, which covers the common case. var innerOffset = OuterPos(startExprPos); var exprLexer = new Lexer(exprContent, innerOffset); var exprParser = new ExpressionParser(exprLexer); - var expr = exprParser.ParseExpression(); + var expr2 = exprParser.ParseExpression(); - // Merge nested errors if (exprLexer.Errors.Count > 0) { this.lexer.Errors.AddRange(exprLexer.Errors); } - expressions.Add(expr); + expressions.Add(expr2); } } else if (char.IsLetter(content[position]) || content[position] == '_') diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs index 14ad59f..fe10198 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Parser/Lexer.cs @@ -217,7 +217,14 @@ internal class Lexer private readonly string input; private readonly Stack putBackTokens; private readonly int positionOffset; - private readonly Dictionary interpolatedStringSourceMaps = new(); + + // Use reference equality on the Token key. Token is a record (value equality), + // so two distinct tokens with identical Type/Value/Start/Length would otherwise + // collide as map keys. Reference equality guarantees that only the exact token + // instance produced by ReadInterpolatedString / ReadDoubleQuotedString in this + // lexer can retrieve its source map. + private readonly Dictionary interpolatedStringSourceMaps = new(ReferenceEqualityComparer.Instance); + private int position; private Token? lastToken; @@ -245,6 +252,18 @@ public Lexer(string input, int positionOffset) public ErrorList Errors { get; } = new ErrorList(); + /// + /// Gets the raw input string this lexer is reading. Combined with + /// , callers can recover the original outer-source + /// substring underlying any token position produced by this lexer. + /// + internal string RawInput => this.input; + + /// + /// Gets the position offset added to every produced token's Start value. + /// + internal int PositionOffset => this.positionOffset; + /// /// Returns the per-character source-position mapping recorded for a previously produced /// interpolated string token, or null if the token did not originate from this