diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index 1dae8d75..477efb24 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -540,27 +540,84 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int { } /** - * Reads a string token from the source file. + * Reads a `.string` token from the source file. * * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*" + * + * augmented to support blockstrings """ """ and return `.blockString` token if found. */ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token { + let (token, isBlockString) = try readRawString(source: source, start: start, line: line, col: col, prev: prev) + + if isBlockString, + let rawString = token.value { + let valueString = blockStringValue(rawValue: rawString) + return Token(kind: token.kind, + start: token.start, + end: token.end, + line: token.line, + column: token.column, + value: valueString, + prev: token.prev, + next: token.next) + } + return token +} + +/** Reads a raw string token from the source. + * + * Doesn't do any clean up of leading indentations or trailing whitespace for blockstring lines; + * so if `token.kind` == `.blockString`, call `blockStringValue` with `token.value` for that. + * + * returns: tuple of Token of kind `.string and Bool of true if it was a block string or not + */ +func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> (token: Token, isBlockString: Bool) { let body = source.body var positionIndex = body.utf8.index(body.utf8.startIndex, offsetBy: start + 1) var chunkStartIndex = positionIndex var currentCode: UInt8? = 0 var value = "" - + var blockString = false + + // if we have minimum 5 more quotes worth of characters left after eating the first quote, check for block quote + // body.utf8.index(positionIndex, offsetBy: 5) < body.utf8.endIndex + if body.utf8.distance(from: positionIndex, to: body.utf8.endIndex) >= 5 { + if body.charCode(at: positionIndex) == 34, + body.charCode(at: body.utf8.index(after: positionIndex)) == 34 { + blockString = true + positionIndex = body.utf8.index(positionIndex, offsetBy: 2) + chunkStartIndex = positionIndex + } + } + while positionIndex < body.utf8.endIndex { currentCode = body.charCode(at: positionIndex) - // not LineTerminator not Quote (") - guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else { + // not in a block quote not LineTerminator not Quote (") + guard let code = currentCode, + blockString || (code != 0x000A && code != 0x000D && code != 34) else { + break + } + + // Exit if: + // - we are parsing a block quote + // - the current code is a Quote (") + // - we have at least two more characters in the input + // - and both remaining characters are Quotes (") + if blockString, + let code = currentCode, + code == 34, + body.utf8.index(positionIndex, offsetBy: 2) < body.utf8.endIndex, + let codeNext = body.charCode(at: body.utf8.index(after: positionIndex)), + codeNext == 34, + let codeNextNext = body.charCode(at: body.utf8.index(after: body.utf8.index(after: positionIndex))), + codeNextNext == 34 { + positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // position after quotes break } // SourceCharacter - if code < 0x0020 && code != 0x0009 { + if code < 0x0020 && code != 0x0009 && !(blockString && (code == 0x000A || code == 0x000D)) { throw syntaxError( source: source, position: body.offset(of: positionIndex), @@ -633,17 +690,121 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th ) } - value += String(body.utf8[chunkStartIndex.. String { + var lines = rawValue.utf8.split(omittingEmptySubsequences: false) { (code) -> Bool in + return code == 0x000A || code == 0x000D + } + + var commonIndent: Int = 0 + + for idx in lines.indices { + let line = lines[idx] + if idx == lines.startIndex { continue } + if let indentIndex = line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) { + let indent = line.distance(from: line.startIndex, to: indentIndex) + if commonIndent == 0 || indent < commonIndent { + commonIndent = indent + } + } + } + + var newLines: [String.UTF8View.SubSequence] = [] + if commonIndent != 0 { + for idx in lines.indices { + let line = lines[idx] + if idx == lines.startIndex { + newLines.append(line) + continue + } + newLines.append(line.dropFirst(commonIndent)) + } + lines = newLines + newLines.removeAll() + } + + for idx in lines.indices { + let line = lines[idx] + if newLines.count == 0, + line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) == nil { + continue + } + newLines.append(line) + } + lines = newLines + + newLines.removeAll() + for idx in lines.indices.reversed() { + let line = lines[idx] + if newLines.count == 0, + line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) == nil { + continue + } + newLines.insert(line, at: newLines.startIndex) + } + lines = newLines + + var result: Substring = Substring() + for idx in lines.indices { + if idx == lines.startIndex { + result.append(contentsOf: Substring(lines[idx])) + } else { + result.append(contentsOf: Substring("\u{000A}")) + result.append(contentsOf: Substring(lines[idx])) + } + } + + return String(result) } /** diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index d2954c54..9dac8efd 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -741,4 +741,201 @@ class LexerTests : XCTestCase { XCTAssertEqual(tokens.map({ $0.kind }), expectedKinds) } + + // + // Tests for Blockstring support + // + + // Tests for sub-routines of `readString` for Blockstring handling + + func testReadRawString() throws { + let sourceStr = #""" + """ + TopLevel { + indented + alsoIndented + } + """ + """# + + let expected = Token(kind: .string, + start: 0, + end: 66, + line: 1, + column: 1, + value: "\n TopLevel {\n indented\n alsoIndented\n }\n", + prev: nil, next: nil) + + let source = Source(body: sourceStr, name: "TestSource") + let (token, isBlockString) = try readRawString(source: source, + start: 0, + line: 1, + col: 1, + prev: Token(kind: .sof, start: 0, end: 0, line: 1, column: 1)) + XCTAssert(isBlockString) + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + + func testBlockStringIndentationAndBlankLine() throws { + let rawString = "\n\n\n TopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") + } + + func testBlockStringDoubleIndentationAndBlankLine() throws { + let rawString = "\n\n\n TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n }\n\n\n\t\t\n" + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n}") + } + + func testBlockStringIndentationAndBlankLineFirstLineNotIndentedWeird() throws { + let rawString = "\n\n\nTopLevel {\n indented\n alsoIndented\n}\n\n\n\t\t\n" + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") + } + + func testBlockStringIndentationMultilineAndBlankLineFirstLineNotIndentedWeird() throws { + let rawString = """ + + + TopLevel { + indented + alsoIndented + } + + + \t + """ + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") + } + + + // Lexer tests for multi-line string token parsing + + func testMultiLineStrings() throws { + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \nshould be Valid """ "#) + let expected = Token( + kind: .string, + start: 1, + end: 63, + line: 1, + column: 2, + value: " Multi-line string\n With Inner \"foo\" \nshould be Valid " + ) + + XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsSingleSpaceIndent() throws { + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) + let expected = Token( + kind: .string, + start: 1, + end: 64, + line: 1, + column: 2, + value: " Multi-line string\nWith Inner \"foo\" \nshould be Valid " + ) + + XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsUnescapedReturns() throws { + let token = try lexOne(#""" + """ + Multi-line string + with Inner "foo" + should be valid + """ + """#) + + let expected = Token( + kind: .string, + start: 0, + end: 59, + line: 1, + column: 1, + value: " Multi-line string\nwith Inner \"foo\"\nshould be valid" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsUnescapedReturnsIndentationTest() throws { + let token = try lexOne(#""" + """ + Multi-line string { + with Inner "foo" + should be valid indented + } + """ + """#) + + let expected = Token( + kind: .string, + start: 0, + end: 79, + line: 1, + column: 1, + value: "Multi-line string {\n with Inner \"foo\"\n should be valid indented\n}" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + func testMultilineStrings_stringIndentedInStream() throws { + let sourceStr = + #""" + """ + Multi-line string { + with Inner "foo" + should be valid indented + } + """ + """# + + let token = try lexOne(sourceStr) + + let expected = Token( + kind: .string, + start: 4, + end: 103, + line: 1, + column: 5, + value: "Multi-line string {\n with Inner \"foo\"\n should be valid indented\n}" + ) + + print(sourceStr) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + + // Test empty strings & multi-line string lexer token parsing + + func testEmptyQuote() throws { + let token = try lexOne(#" "" "#) + let expected = Token(kind: .string, start: 1, end: 3, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + + func testEmptySimpleMultilineBlockQuote() throws { + let token = try lexOne(#" """""" "#) + let expected = Token(kind: .string, start: 1, end: 7, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + + func testEmptyTrimmedCharactersMultilineBlockQuote() throws { + let token = try lexOne(#""" + """ + """ + """#) + let expected = Token(kind: .string, start: 0, end: 7, line: 1, column: 1, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } }