Skip to content

Commit

Permalink
Merge pull request #1043 from z2oh/jeremy/5.10/split-windows-command-…
Browse files Browse the repository at this point in the history
…line

[5.10] Add logic to split command line arguments on Windows
  • Loading branch information
ahoppen committed Feb 2, 2024
2 parents 78c59db + 73266b5 commit 7e2d80c
Show file tree
Hide file tree
Showing 2 changed files with 320 additions and 0 deletions.
217 changes: 217 additions & 0 deletions Sources/SKCore/CompilationDatabase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,11 @@ extension CompilationDatabase.Command: Codable {
if let arguments = try container.decodeIfPresent([String].self, forKey: .arguments) {
self.commandLine = arguments
} else if let command = try container.decodeIfPresent(String.self, forKey: .command) {
#if os(Windows)
self.commandLine = splitWindowsCommandLine(command, initialCommandName: true)
#else
self.commandLine = splitShellEscapedCommand(command)
#endif
} else {
throw CompilationDatabaseDecodingError.missingCommandOrArguments
}
Expand Down Expand Up @@ -355,3 +359,216 @@ public func splitShellEscapedCommand(_ cmd: String) -> [String] {
var parser = Parser(cmd[...])
return parser.parse()
}

// MARK: - Windows

fileprivate extension Character {
var isWhitespace: Bool {
switch self {
case " ", "\t":
return true
default:
return false
}
}

var isWhitespaceOrNull: Bool {
return self.isWhitespace || self == "\0"
}

func isWindowsSpecialChar(inCommandName: Bool) -> Bool {
if isWhitespace {
return true
}
if self == #"""# {
return true
}
if !inCommandName && self == #"\"# {
return true
}
return false
}
}

fileprivate struct WindowsCommandParser {
/// The content of the entire command that shall be parsed.
private let content: String

/// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
/// character.
private var parsingCommandName: Bool

/// An index into `content`, pointing to the character that we are currently parsing.
private var currentCharacterIndex: String.UTF8View.Index

/// The split command line arguments.
private var result: [String] = []

/// The character that is currently being parsed.
///
/// `nil` if we have reached the end of `content`.
private var currentCharacter: Character? {
guard currentCharacterIndex < content.endIndex else {
return nil
}
return self.content[currentCharacterIndex]
}

/// The character after `currentCharacter`.
///
/// `nil` if we have reached the end of `content`.
private var peek: Character? {
let nextIndex = content.index(after: currentCharacterIndex)
if nextIndex < content.endIndex {
return content[nextIndex]
} else {
return nil
}
}

init(_ string: String, initialCommandName: Bool) {
self.content = string
self.currentCharacterIndex = self.content.startIndex
self.parsingCommandName = initialCommandName
}

/// Designated entry point to split a Windows command line invocation.
mutating func parse() -> [String] {
while let currentCharacter {
if currentCharacter.isWhitespaceOrNull {
// Consume any whitespace separating arguments.
_ = consume()
} else {
result.append(parseSingleArgument())
}
}
return result
}

/// Consume the current character.
private mutating func consume() -> Character {
guard let character = currentCharacter else {
preconditionFailure("Nothing to consume")
}
currentCharacterIndex = content.index(after: currentCharacterIndex)
return character
}

/// Consume the current character, asserting that it is `expectedCharacter`
private mutating func consume(expect expectedCharacter: Character) {
assert(currentCharacter == expectedCharacter)
_ = consume()
}

/// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
/// (e.g. `\"` gets returned as `"`)
///
/// Afterwards the parser points to the character after the argument.
mutating func parseSingleArgument() -> String {
var str = ""
while let currentCharacter {
if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) {
str.append(consume())
continue
}
if currentCharacter.isWhitespaceOrNull {
parsingCommandName = false
return str
} else if currentCharacter == "\"" {
str += parseQuoted()
} else if currentCharacter == #"\"# {
assert(!parsingCommandName, "else we'd have treated it as a normal char");
str.append(parseBackslash())
} else {
preconditionFailure("unexpected special character");
}
}
return str
}

/// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
/// quotes.
mutating func parseQuoted() -> String {
// Discard the opening quote. Its not part of the unescaped text.
consume(expect: "\"")

var str = ""
while let currentCharacter {
switch currentCharacter {
case "\"":
if peek == "\"" {
// Two adjacent quotes inside a quoted string are an escaped single quote. For example
// `" a "" b "`
// represents the string
// ` a " b `
consume(expect: "\"")
consume(expect: "\"")
str += "\""
} else {
// We have found the closing quote. Discard it and return.
consume(expect: "\"")
return str
}
case "\\" where !parsingCommandName:
str.append(parseBackslash())
default:
str.append(consume())
}
}
return str
}

/// Backslashes are interpreted in a rather complicated way in the Windows-style
/// command line, because backslashes are used both to separate path and to
/// escape double quote. This method consumes runs of backslashes as well as the
/// following double quote if it's escaped.
///
/// * If an even number of backslashes is followed by a double quote, one
/// backslash is output for every pair of backslashes, and the last double
/// quote remains unconsumed. The double quote will later be interpreted as
/// the start or end of a quoted string in the main loop outside of this
/// function.
///
/// * If an odd number of backslashes is followed by a double quote, one
/// backslash is output for every pair of backslashes, and a double quote is
/// output for the last pair of backslash-double quote. The double quote is
/// consumed in this case.
///
/// * Otherwise, backslashes are interpreted literally.
mutating func parseBackslash() -> String {
var str: String = ""

let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex
let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex)

if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" {
str += String(repeating: "\\", count: numberOfBackslashes / 2)
if numberOfBackslashes.isMultiple(of: 2) {
// We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
// quote in the outer function.
currentCharacterIndex = firstNonBackslashIndex
} else {
// We have an odd number of backslashes. The last backslash escapes the quote.
str += "\""
currentCharacterIndex = content.index(after: firstNonBackslashIndex)
}
return str
}

// The sequence of backslashes is not followed by quotes. Interpret them literally.
str += String(repeating: "\\", count: numberOfBackslashes)
currentCharacterIndex = firstNonBackslashIndex
return str
}
}

// Sometimes, this function will be handling a full command line including an
// executable pathname at the start. In that situation, the initial pathname
// needs different handling from the following arguments, because when
// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
// escaping the quote character, whereas when libc scans the rest of the
// command line, it does.
public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] {
var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName)
return parser.parse()
}
103 changes: 103 additions & 0 deletions Tests/SKCoreTests/CompilationDatabaseTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,80 @@ final class CompilationDatabaseTests: XCTestCase {
check("\"a\"bcd\"ef\"\"\"\"g\"", ["abcdefg"])
check("a'\\b \"c\"'", ["a\\b \"c\""])
}

func testSplitShellEscapedCommandBasic() {
assertEscapedCommand("", [])
assertEscapedCommand(" ", [])
assertEscapedCommand("a", ["a"])
assertEscapedCommand("abc", ["abc"])
assertEscapedCommand("a😀c", ["a😀c"])
assertEscapedCommand("😀c", ["😀c"])
assertEscapedCommand("abc def", ["abc", "def"])
assertEscapedCommand("abc def", ["abc", "def"])
}

func testSplitShellEscapedCommandDoubleQuotes() {
assertEscapedCommand("\"", [""])
assertEscapedCommand(#""a"#, ["a"])
assertEscapedCommand("\"\"", [""])
assertEscapedCommand(#""a""#, ["a"])
assertEscapedCommand(#""a\"""#, [#"a""#])
assertEscapedCommand(#""a b c ""#, ["a b c "])
assertEscapedCommand(#""a " "#, ["a "])
assertEscapedCommand(#""a " b"#, ["a ", "b"])
assertEscapedCommand(#""a "b"#, ["a b"])
assertEscapedCommand(#"a"x ""b"#, ["ax b"], windows: [#"ax "b"#])

assertEscapedCommand(#""a"bcd"ef""""g""#, ["abcdefg"], windows: [#"abcdef""g"#])
}

func testSplitShellEscapedCommandSingleQuotes() {
assertEscapedCommand("'", [""], windows: ["'"])
assertEscapedCommand("'a", ["a"], windows: ["'a"])
assertEscapedCommand("''", [""], windows: ["''"])
assertEscapedCommand("'a'", ["a"], windows: ["'a'"])
assertEscapedCommand(#"'a\"'"#, [#"a\""#], windows: [#"'a"'"#])
assertEscapedCommand(#"'a b c '"#, ["a b c "], windows: ["'a", "b", "c", "'"])
assertEscapedCommand(#"'a ' "#, ["a "], windows: ["'a", "'"])
assertEscapedCommand(#"'a ' b"#, ["a ", "b"], windows: ["'a", "'", "b"])
assertEscapedCommand(#"'a 'b"#, ["a b"], windows: ["'a", "'b"])
assertEscapedCommand(#"a'x ''b"#, ["ax b"], windows: ["a'x", "''b"])
}

func testSplitShellEscapedCommandBackslash() {
assertEscapedCommand(#"a\\"#, [#"a\"#], windows: [#"a\\"#])
assertEscapedCommand(#"a'\b "c"'"#, ["a\\b \"c\""], windows: [#"a'\b"#, #"c'"#])

assertEscapedCommand(#"\""#, ["\""])
assertEscapedCommand(#"\\""#, [#"\"#])
assertEscapedCommand(#"\\\""#, [#"\""#])
assertEscapedCommand(#"\\ "#, [#"\"#], windows: [#"\\"#])
assertEscapedCommand(#"\\\ "#, [#"\ "#], windows: [#"\\\"#])
}

func testSplitShellEscapedCommandWindowsCommand() {
assertEscapedCommand(#"C:\swift.exe"#, [#"C:swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
assertEscapedCommand(
#"C:\ swift.exe"#,
[#"C: swift.exe"#],
windows: [#"C:\"#, #"swift.exe"#],
initialCommandName: true
)
assertEscapedCommand(
#"C:\ swift.exe"#,
[#"C: swift.exe"#],
windows: [#"C:\"#, #"swift.exe"#],
initialCommandName: false
)
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true)
assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:"swift.exe"#], initialCommandName: false)
}

func testSplitShellEscapedCommandWindowsTwoDoubleQuotes() {
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#])
}

func testEncodeCompDBCommand() throws {
// Requires JSONEncoder.OutputFormatting.sortedKeys
func check(_ cmd: CompilationDatabase.Command, _ expected: String, file: StaticString = #filePath, line: UInt = #line) throws {
Expand Down Expand Up @@ -332,3 +405,33 @@ private func checkCompilationDatabaseBuildSystem(_ compdb: ByteString, file: Sta
let buildSystem = CompilationDatabaseBuildSystem(projectRoot: try AbsolutePath(validating: "/a"), fileSystem: fs)
try block(buildSystem)
}

/// Assert that splitting `str` into its command line components results in `expected`.
///
/// By default assert that escaping using Unix and Windows rules results in the same split. If `windows` is specified,
/// assert that escaping with Windows rules produces `windows` and escaping using Unix rules results in `expected`.
///
/// If set `initialCommandName` gets passed to the Windows split function.
private func assertEscapedCommand(
_ str: String,
_ expected: [String],
windows: [String]? = nil,
initialCommandName: Bool = false,
file: StaticString = #filePath,
line: UInt = #line
) {
XCTAssertEqual(
splitShellEscapedCommand(str),
expected,
"Splitting Unix command line arguments",
file: file,
line: line
)
XCTAssertEqual(
splitWindowsCommandLine(str, initialCommandName: initialCommandName),
windows ?? expected,
"Splitting Windows command line arguments",
file: file,
line: line
)
}

0 comments on commit 7e2d80c

Please sign in to comment.