From 610bf0dae6045b78e1cfb0ac807defcb0443828d Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 03:15:35 -0700 Subject: [PATCH 1/6] WIP on block string support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First pass at block string support. Not sufficient This doesn’t trim leading indentation (or trailing whitespace) from the lines inside the block string, which is expected and the specific requirements are detailed in the graphql spec. Added the algorithm/notes from the spec and a blockStringValue() empty function to implement this in. Saving this old work because it works other than that. --- Sources/GraphQL/Language/Lexer.swift | 91 ++++++++++++++++++- .../LanguageTests/LexerTests.swift | 69 ++++++++++++++ 2 files changed, 155 insertions(+), 5 deletions(-) diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index 1dae8d75..491720f8 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -550,17 +550,60 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th var chunkStartIndex = positionIndex var currentCode: UInt8? = 0 var value = "" - + var blockString = false + var chunkEndTrim = 0 + + // if we have minimum 5 more quotes worth of characters left after eating the first quote, check for block quote + // body.utf8.index(positionIndex, offsetBy: 5) < body.utf8.endIndex + if body.utf8.distance(from: positionIndex, to: body.utf8.endIndex) >= 5 { + if body.charCode(at: positionIndex) == 34, + body.charCode(at: body.utf8.index(after: positionIndex)) == 34 { + blockString = true + positionIndex = body.utf8.index(positionIndex, offsetBy: 2) + + // if the first character after the """ is a newline, then it is not included in the value + if let code = body.charCode(at: positionIndex), + (code == 0x000A || code == 0x000D) { + positionIndex = body.utf8.index(after: positionIndex) + } + + chunkStartIndex = positionIndex + } + } + while positionIndex < body.utf8.endIndex { currentCode = body.charCode(at: positionIndex) - // not LineTerminator not Quote (") - guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else { + // not in a block quote not LineTerminator not Quote (") + guard let code = currentCode, + blockString || (code != 0x000A && code != 0x000D && code != 34) else { + break + } + + // Exit if: + // - we are parsing a block quote + // - the current code is a Quote (") + // - we have at least two more characters in the input + // - and both remaining characters are Quotes (") + if blockString, + let code = currentCode, + code == 34, + body.utf8.index(positionIndex, offsetBy: 2) < body.utf8.endIndex, + let codeNext = body.charCode(at: body.utf8.index(after: positionIndex)), + codeNext == 34, + let codeNextNext = body.charCode(at: body.utf8.index(after: body.utf8.index(after: positionIndex))), + codeNextNext == 34 { + // if closing """ is on a line by itself then we set chunkEndTrim to 1 to trim the last return before it + if let code = body.charCode(at: body.utf8.index(before: positionIndex)), + (code == 0x000A || code == 0x000D) { + chunkEndTrim = 1 // flag the need to trim the last return + } + positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // so we clean up on exit break } // SourceCharacter - if code < 0x0020 && code != 0x0009 { + if code < 0x0020 && code != 0x0009 && !(blockString && (code == 0x000A || code == 0x000D)) { throw syntaxError( source: source, position: body.offset(of: positionIndex), @@ -633,7 +676,8 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th ) } - value += String(body.utf8[chunkStartIndex.. String { + assert(false, "implement this!") + return "" +} + /** * Converts four hexidecimal chars to the integer that the * string represents. For example, uniCharCode('0','0','0','f') diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index d2954c54..fb0ac44a 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -208,6 +208,75 @@ class LexerTests : XCTestCase { } } + func testMultiLineStrings() throws { + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) + XCTAssertEqual(token.start, 1) + XCTAssert(token.kind == .string) + + let expected = Token( + kind: .string, + start: 1, + end: 64, + line: 1, + column: 2, + value: " Multi-line string\n With Inner \"foo\" \n should be Valid " + ) + + XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsUnescapedReturns() throws { + let token = try lexOne(#""" + """ + Multi-line string + with Inner "foo" + should be valid + """ + """#) + + let expected = Token( + kind: .string, + start: 0, + end: 59, + line: 1, + column: 1, + value: " Multi-line string\nwith Inner \"foo\"\nshould be valid" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + func fails_testMultiLineStringsUnescapedReturnsIndentationTest() throws { + let token = try lexOne(#""" + """ + Multi-line string { + with Inner "foo" + should be valid indented + } + """ + """#) + + let expected = Token( + kind: .string, + start: 0, + end: 71, + line: 1, + column: 1, + value: "Multi-line string {\nwith Inner \"foo\"\nshould be valid indented\n}" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + + func fails_testEmptyQuote() throws { + XCTFail("Implement This!") + } + + func fails_testEmptyBlockQuote() throws { + XCTFail("Implement This!") + } + func testStringErrors() throws { XCTAssertThrowsError(try lexOne("\"")) // "Syntax Error GraphQL (1:2) Unterminated string" From ac824d119ed34965ddc53f36aa0d72d47dd2f68c Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 15:44:56 -0700 Subject: [PATCH 2/6] refactor readString to prep for blockStringValue impl and additional tests. add tests for empty string and blockstring. take out trimming of string since that is moving to blockStringValue(rawString:) next. update comment for readString --- Sources/GraphQL/Language/AST.swift | 1 + Sources/GraphQL/Language/Lexer.swift | 69 +++++++++++++------ .../LanguageTests/LexerTests.swift | 33 ++++++--- 3 files changed, 70 insertions(+), 33 deletions(-) diff --git a/Sources/GraphQL/Language/AST.swift b/Sources/GraphQL/Language/AST.swift index 1c618752..50634298 100644 --- a/Sources/GraphQL/Language/AST.swift +++ b/Sources/GraphQL/Language/AST.swift @@ -55,6 +55,7 @@ final public class Token { case int = "Int" case float = "Float" case string = "String" + case blockString = "BlockString" case comment = "Comment" public var description: String { diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index 491720f8..47433a12 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -540,18 +540,44 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int { } /** - * Reads a string token from the source file. + * Reads a `.string` token from the source file. * * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*" + * + * augmented to support blockstrings """ """ and return `.blockString` token if found. */ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token { + let token = try readRawString(source: source, start: start, line: line, col: col, prev: prev) + + if token.kind == .blockString, + let rawString = token.value { + let valueString = blockStringValue(rawValue: rawString) + return Token(kind: token.kind, + start: token.start, + end: token.end, + line: token.line, + column: token.column, + value: valueString, + prev: token.prev, + next: token.next) + } + return token +} + +/** Reads a raw string token from the source. + * + * Doesn't do any clean up of leading indentations or trailing whitespace for blockstring lines; + * so if `token.kind` == `.blockString`, call `blockStringValue` with `token.value` for that. + * + * returns: Token of kind `.string` or `.blockString` + */ +func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token { let body = source.body var positionIndex = body.utf8.index(body.utf8.startIndex, offsetBy: start + 1) var chunkStartIndex = positionIndex var currentCode: UInt8? = 0 var value = "" var blockString = false - var chunkEndTrim = 0 // if we have minimum 5 more quotes worth of characters left after eating the first quote, check for block quote // body.utf8.index(positionIndex, offsetBy: 5) < body.utf8.endIndex @@ -593,12 +619,7 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th codeNext == 34, let codeNextNext = body.charCode(at: body.utf8.index(after: body.utf8.index(after: positionIndex))), codeNextNext == 34 { - // if closing """ is on a line by itself then we set chunkEndTrim to 1 to trim the last return before it - if let code = body.charCode(at: body.utf8.index(before: positionIndex)), - (code == 0x000A || code == 0x000D) { - chunkEndTrim = 1 // flag the need to trim the last return - } - positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // so we clean up on exit + positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // so we clean up quotes on exit break } @@ -676,22 +697,26 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th ) } - let valueRangeEnd = body.utf8.index(positionIndex, offsetBy: (blockString ? -2 - chunkEndTrim : 0)) - value += String(body.utf8[chunkStartIndex ..< valueRangeEnd])! + if blockString { + let valueRangeEnd = body.utf8.index(positionIndex, offsetBy: -2) + if chunkStartIndex < valueRangeEnd { // empty string? + value += String(body.utf8[chunkStartIndex ..< valueRangeEnd])! + } + } else { + value += String(body.utf8[chunkStartIndex ..< positionIndex])! + } - return Token( - kind: .string, - start: start, - end: body.offset(of: positionIndex) + 1, - line: line, - column: col, - value: value, - prev: prev - ) + return Token(kind: blockString ? .blockString : .string, + start: start, + end: body.offset(of: positionIndex) + 1, + line: line, + column: col, + value: value, + prev: prev) } /** - * BlockStringValue(rawValue: String) + * blockStringValue(rawValue: String) * * Transcription of the algorithm specified in the [spec](http://spec.graphql.org/draft/#BlockStringValue()) * @@ -723,8 +748,8 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th */ func blockStringValue(rawValue: String) -> String { - assert(false, "implement this!") - return "" + print("\n\n **** blockStringValue Not Yet Implemented **** \n\n") + return rawValue } /** diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index fb0ac44a..016f7d18 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -209,12 +209,9 @@ class LexerTests : XCTestCase { } func testMultiLineStrings() throws { - let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) - XCTAssertEqual(token.start, 1) - XCTAssert(token.kind == .string) - + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) let expected = Token( - kind: .string, + kind: .blockString, start: 1, end: 64, line: 1, @@ -235,12 +232,12 @@ class LexerTests : XCTestCase { """#) let expected = Token( - kind: .string, + kind: .blockString, start: 0, end: 59, line: 1, column: 1, - value: " Multi-line string\nwith Inner \"foo\"\nshould be valid" + value: " Multi-line string\nwith Inner \"foo\"\nshould be valid\n" ) XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") @@ -269,13 +266,27 @@ class LexerTests : XCTestCase { } - func fails_testEmptyQuote() throws { - XCTFail("Implement This!") + func testEmptyQuote() throws { + let token = try lexOne(#" "" "#) + let expected = Token(kind: .string, start: 1, end: 3, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") } - func fails_testEmptyBlockQuote() throws { - XCTFail("Implement This!") + func testEmptySimpleMultilineBlockQuote() throws { + let token = try lexOne(#" """""" "#) + let expected = Token(kind: .blockString, start: 1, end: 7, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") } + + func testEmptyTrimmedCharactersMultilineBlockQuote() throws { + let token = try lexOne(#""" + """ + """ + """#) + let expected = Token(kind: .blockString, start: 0, end: 7, line: 1, column: 1, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + func testStringErrors() throws { XCTAssertThrowsError(try lexOne("\"")) From bb00a76130dd2dc75165f3d083a8d358c0596302 Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 20:36:11 -0700 Subject: [PATCH 3/6] finish block string support implementation with many tests this commit has the debugging code still in it. next one will remove that. --- Sources/GraphQL/Language/Lexer.swift | 92 ++++++- .../LanguageTests/LexerTests.swift | 232 ++++++++++++------ 2 files changed, 242 insertions(+), 82 deletions(-) diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index 47433a12..ce9f6167 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -748,8 +748,96 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) */ func blockStringValue(rawValue: String) -> String { - print("\n\n **** blockStringValue Not Yet Implemented **** \n\n") - return rawValue + + print("inputString: \n>>>\(rawValue)<<<\n") // debug + + var commonIndent: Int = 0 + var lines = rawValue.utf8.split { (code) -> Bool in + return code == 0x000A || code == 0x000D + } + + for line in lines { print(String(line)) } // debug + + for idx in lines.indices { + let line = lines[idx] + // we already drop this before we get here.. + if idx == lines.startIndex { continue } + if let indentIndex = line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) { + let indent = line.distance(from: line.startIndex, to: indentIndex) + if commonIndent == 0 || indent < commonIndent { + commonIndent = indent + } + } + } + + print("\ncommonIndent: \(commonIndent)\n") // debug + + var newLines: [String.UTF8View.SubSequence] = [] + if commonIndent != 0 { + for idx in lines.indices { + let line = lines[idx] + // pretty sure they are dropping thinking about """\n which we already drop + if idx == lines.startIndex { + newLines.append(line) + continue + } + newLines.append(line.dropFirst(commonIndent)) + } + + for line in lines { print(String(line)) } // debug + print() + for line in newLines { print(String(line)) } // debug + + lines = newLines + newLines.removeAll() + } + + for idx in lines.indices { + let line = lines[idx] + if newLines.count == 0, + line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) == nil { + continue + } + newLines.append(line) + } + + for line in newLines { print(String(line)) } // debug + + lines = newLines + newLines.removeAll() + print() + for line in lines { print(String(line)) } // debug + + for idx in lines.indices.reversed() { + let line = lines[idx] + if newLines.count == 0, + line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) == nil { + continue + } + newLines.insert(line, at: newLines.startIndex) + } + + for line in newLines { print(String(line)) } // debug + + lines = newLines + newLines.removeAll() + print() + for line in lines { print(String(line)) } // debug + + var result: Substring = Substring() + + for idx in lines.indices { + if idx == lines.startIndex { + result.append(contentsOf: Substring(lines[idx])) + } else { + result.append(contentsOf: Substring("\u{000A}")) + result.append(contentsOf: Substring(lines[idx])) + } + } + + print( "\n>>>\(result)<<<\n" ) // debug + + return String(result) } /** diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index 016f7d18..3351d85b 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -208,86 +208,6 @@ class LexerTests : XCTestCase { } } - func testMultiLineStrings() throws { - let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) - let expected = Token( - kind: .blockString, - start: 1, - end: 64, - line: 1, - column: 2, - value: " Multi-line string\n With Inner \"foo\" \n should be Valid " - ) - - XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") - } - - func testMultiLineStringsUnescapedReturns() throws { - let token = try lexOne(#""" - """ - Multi-line string - with Inner "foo" - should be valid - """ - """#) - - let expected = Token( - kind: .blockString, - start: 0, - end: 59, - line: 1, - column: 1, - value: " Multi-line string\nwith Inner \"foo\"\nshould be valid\n" - ) - - XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") - } - - func fails_testMultiLineStringsUnescapedReturnsIndentationTest() throws { - let token = try lexOne(#""" - """ - Multi-line string { - with Inner "foo" - should be valid indented - } - """ - """#) - - let expected = Token( - kind: .string, - start: 0, - end: 71, - line: 1, - column: 1, - value: "Multi-line string {\nwith Inner \"foo\"\nshould be valid indented\n}" - ) - - XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") - } - - - func testEmptyQuote() throws { - let token = try lexOne(#" "" "#) - let expected = Token(kind: .string, start: 1, end: 3, line: 1, column: 2, value: "") - XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") - } - - func testEmptySimpleMultilineBlockQuote() throws { - let token = try lexOne(#" """""" "#) - let expected = Token(kind: .blockString, start: 1, end: 7, line: 1, column: 2, value: "") - XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") - } - - func testEmptyTrimmedCharactersMultilineBlockQuote() throws { - let token = try lexOne(#""" - """ - """ - """#) - let expected = Token(kind: .blockString, start: 0, end: 7, line: 1, column: 1, value: "") - XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") - } - - func testStringErrors() throws { XCTAssertThrowsError(try lexOne("\"")) // "Syntax Error GraphQL (1:2) Unterminated string" @@ -821,4 +741,156 @@ class LexerTests : XCTestCase { XCTAssertEqual(tokens.map({ $0.kind }), expectedKinds) } + + // + // Tests for Blockstring support + // + + // Tests for sub-routines of `readString` for Blockstring handling + + func testReadRawString() throws { + let sourceStr = #""" + """ + TopLevel { + indented + alsoIndented + } + """ + """# + + let expected = Token(kind: .blockString, + start: 0, + end: 66, + line: 1, + column: 1, + value: " TopLevel {\n indented\n alsoIndented\n }\n", + prev: nil, next: nil) + + let source = Source(body: sourceStr, name: "TestSource") + let token = try readRawString(source: source, + start: 0, + line: 1, + col: 1, + prev: Token(kind: .sof, start: 0, end: 0, line: 1, column: 1)) + + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + print(String(describing: token.value)) + } + + func testBlockStringIndentationAndBlankLine() throws { + let rawString = "\n\n\n TopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, " TopLevel {\n indented\n alsoIndented\n}") + } + + func testBlockStringDoubleIndentationAndBlankLine() throws { + let rawString = "\n\n\n TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, " TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n}") + } + + func testBlockStringIndentationAndBlankLineFirstLineNotIndentedWeird() throws { + let rawString = "\n\n\nTopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") + } + + // Lexer tests for multi-line string token parsing + + func testMultiLineStrings() throws { + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \nshould be Valid """ "#) + let expected = Token( + kind: .blockString, + start: 1, + end: 63, + line: 1, + column: 2, + value: " Multi-line string\n With Inner \"foo\" \nshould be Valid " + ) + + XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsSingleSpaceIndent() throws { + let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) + let expected = Token( + kind: .blockString, + start: 1, + end: 64, + line: 1, + column: 2, + value: " Multi-line string\nWith Inner \"foo\" \nshould be Valid " + ) + + XCTAssertEqual(token, expected, "\nexpected: \n \(dump(expected))\n\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsUnescapedReturns() throws { + let token = try lexOne(#""" + """ + Multi-line string + with Inner "foo" + should be valid + """ + """#) + + let expected = Token( + kind: .blockString, + start: 0, + end: 59, + line: 1, + column: 1, + value: " Multi-line string\nwith Inner \"foo\"\nshould be valid" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + func testMultiLineStringsUnescapedReturnsIndentationTest() throws { + let token = try lexOne(#""" + """ + Multi-line string { + with Inner "foo" + should be valid indented + } + """ + """#) + + let expected = Token( + kind: .blockString, + start: 0, + end: 79, + line: 1, + column: 1, + value: "Multi-line string {\n with Inner \"foo\"\n should be valid indented\n}" + ) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + + // Test empty strings & multi-line string lexer token parsing + + func testEmptyQuote() throws { + let token = try lexOne(#" "" "#) + let expected = Token(kind: .string, start: 1, end: 3, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + + func testEmptySimpleMultilineBlockQuote() throws { + let token = try lexOne(#" """""" "#) + let expected = Token(kind: .blockString, start: 1, end: 7, line: 1, column: 2, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } + + func testEmptyTrimmedCharactersMultilineBlockQuote() throws { + let token = try lexOne(#""" + """ + """ + """#) + let expected = Token(kind: .blockString, start: 0, end: 7, line: 1, column: 1, value: "") + XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") + } } From 6fb9d521b98b62c6d8ecab8e09fe7748cc2508d4 Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 20:38:37 -0700 Subject: [PATCH 4/6] remove debugging diagnostics and clean up a couple of old comments. --- Sources/GraphQL/Language/Lexer.swift | 30 +--------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index ce9f6167..e9ba823a 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -748,19 +748,13 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) */ func blockStringValue(rawValue: String) -> String { - - print("inputString: \n>>>\(rawValue)<<<\n") // debug - var commonIndent: Int = 0 var lines = rawValue.utf8.split { (code) -> Bool in return code == 0x000A || code == 0x000D } - for line in lines { print(String(line)) } // debug - for idx in lines.indices { let line = lines[idx] - // we already drop this before we get here.. if idx == lines.startIndex { continue } if let indentIndex = line.firstIndex(where: { $0 != 0x0009 && $0 != 0x0020 }) { let indent = line.distance(from: line.startIndex, to: indentIndex) @@ -769,25 +763,17 @@ func blockStringValue(rawValue: String) -> String { } } } - - print("\ncommonIndent: \(commonIndent)\n") // debug var newLines: [String.UTF8View.SubSequence] = [] if commonIndent != 0 { for idx in lines.indices { let line = lines[idx] - // pretty sure they are dropping thinking about """\n which we already drop if idx == lines.startIndex { newLines.append(line) continue } newLines.append(line.dropFirst(commonIndent)) } - - for line in lines { print(String(line)) } // debug - print() - for line in newLines { print(String(line)) } // debug - lines = newLines newLines.removeAll() } @@ -800,14 +786,9 @@ func blockStringValue(rawValue: String) -> String { } newLines.append(line) } - - for line in newLines { print(String(line)) } // debug - lines = newLines + newLines.removeAll() - print() - for line in lines { print(String(line)) } // debug - for idx in lines.indices.reversed() { let line = lines[idx] if newLines.count == 0, @@ -816,16 +797,9 @@ func blockStringValue(rawValue: String) -> String { } newLines.insert(line, at: newLines.startIndex) } - - for line in newLines { print(String(line)) } // debug - lines = newLines - newLines.removeAll() - print() - for line in lines { print(String(line)) } // debug var result: Substring = Substring() - for idx in lines.indices { if idx == lines.startIndex { result.append(contentsOf: Substring(lines[idx])) @@ -835,8 +809,6 @@ func blockStringValue(rawValue: String) -> String { } } - print( "\n>>>\(result)<<<\n" ) // debug - return String(result) } From 0f3614aa08f9ef44bbce3c222e2c54e4a8c6d685 Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 21:35:17 -0700 Subject: [PATCH 5/6] back out use of .blockquote as a new lexer token type. Just a .string --- Sources/GraphQL/Language/AST.swift | 1 - Sources/GraphQL/Language/Lexer.swift | 27 ++++++++++--------- .../LanguageTests/LexerTests.swift | 27 +++++++++---------- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/Sources/GraphQL/Language/AST.swift b/Sources/GraphQL/Language/AST.swift index 50634298..1c618752 100644 --- a/Sources/GraphQL/Language/AST.swift +++ b/Sources/GraphQL/Language/AST.swift @@ -55,7 +55,6 @@ final public class Token { case int = "Int" case float = "Float" case string = "String" - case blockString = "BlockString" case comment = "Comment" public var description: String { diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index e9ba823a..083c49ce 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -547,9 +547,9 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int { * augmented to support blockstrings """ """ and return `.blockString` token if found. */ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token { - let token = try readRawString(source: source, start: start, line: line, col: col, prev: prev) + let (token, isBlockString) = try readRawString(source: source, start: start, line: line, col: col, prev: prev) - if token.kind == .blockString, + if isBlockString, let rawString = token.value { let valueString = blockStringValue(rawValue: rawString) return Token(kind: token.kind, @@ -569,9 +569,9 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th * Doesn't do any clean up of leading indentations or trailing whitespace for blockstring lines; * so if `token.kind` == `.blockString`, call `blockStringValue` with `token.value` for that. * - * returns: Token of kind `.string` or `.blockString` + * returns: tuple of Token of kind `.string and Bool of true if it was a block string or not */ -func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> Token { +func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) throws -> (token: Token, isBlockString: Bool) { let body = source.body var positionIndex = body.utf8.index(body.utf8.startIndex, offsetBy: start + 1) var chunkStartIndex = positionIndex @@ -586,7 +586,7 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) body.charCode(at: body.utf8.index(after: positionIndex)) == 34 { blockString = true positionIndex = body.utf8.index(positionIndex, offsetBy: 2) - + // if the first character after the """ is a newline, then it is not included in the value if let code = body.charCode(at: positionIndex), (code == 0x000A || code == 0x000D) { @@ -705,14 +705,15 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) } else { value += String(body.utf8[chunkStartIndex ..< positionIndex])! } - - return Token(kind: blockString ? .blockString : .string, - start: start, - end: body.offset(of: positionIndex) + 1, - line: line, - column: col, - value: value, - prev: prev) + + return (token: Token(kind: .string, + start: start, + end: body.offset(of: positionIndex) + 1, + line: line, + column: col, + value: value, + prev: prev), + isBlockString: blockString) } /** diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index 3351d85b..7472c481 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -758,7 +758,7 @@ class LexerTests : XCTestCase { """ """# - let expected = Token(kind: .blockString, + let expected = Token(kind: .string, start: 0, end: 66, line: 1, @@ -767,14 +767,13 @@ class LexerTests : XCTestCase { prev: nil, next: nil) let source = Source(body: sourceStr, name: "TestSource") - let token = try readRawString(source: source, - start: 0, - line: 1, - col: 1, - prev: Token(kind: .sof, start: 0, end: 0, line: 1, column: 1)) - + let (token, isBlockString) = try readRawString(source: source, + start: 0, + line: 1, + col: 1, + prev: Token(kind: .sof, start: 0, end: 0, line: 1, column: 1)) + XCTAssert(isBlockString) XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") - print(String(describing: token.value)) } func testBlockStringIndentationAndBlankLine() throws { @@ -803,7 +802,7 @@ class LexerTests : XCTestCase { func testMultiLineStrings() throws { let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \nshould be Valid """ "#) let expected = Token( - kind: .blockString, + kind: .string, start: 1, end: 63, line: 1, @@ -817,7 +816,7 @@ class LexerTests : XCTestCase { func testMultiLineStringsSingleSpaceIndent() throws { let token = try lexOne(#" """ Multi-line string\n With Inner "foo" \n should be Valid """ "#) let expected = Token( - kind: .blockString, + kind: .string, start: 1, end: 64, line: 1, @@ -838,7 +837,7 @@ class LexerTests : XCTestCase { """#) let expected = Token( - kind: .blockString, + kind: .string, start: 0, end: 59, line: 1, @@ -860,7 +859,7 @@ class LexerTests : XCTestCase { """#) let expected = Token( - kind: .blockString, + kind: .string, start: 0, end: 79, line: 1, @@ -881,7 +880,7 @@ class LexerTests : XCTestCase { func testEmptySimpleMultilineBlockQuote() throws { let token = try lexOne(#" """""" "#) - let expected = Token(kind: .blockString, start: 1, end: 7, line: 1, column: 2, value: "") + let expected = Token(kind: .string, start: 1, end: 7, line: 1, column: 2, value: "") XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") } @@ -890,7 +889,7 @@ class LexerTests : XCTestCase { """ """ """#) - let expected = Token(kind: .blockString, start: 0, end: 7, line: 1, column: 1, value: "") + let expected = Token(kind: .string, start: 0, end: 7, line: 1, column: 1, value: "") XCTAssertEqual(token, expected, "\n\(dump(expected))\n\(dump(token))\n") } } From 5db9a9312fb27d066901808df425c197c0ade1af Mon Sep 17 00:00:00 2001 From: Tyler Morrison Date: Tue, 25 Aug 2020 23:58:13 -0700 Subject: [PATCH 6/6] =?UTF-8?q?Figure=20out=20the=20issue=20with=20indenta?= =?UTF-8?q?tion=20of=20the=20=E2=80=9Cfirst=20line=E2=80=9D=20in=20blockst?= =?UTF-8?q?rings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit subtle: swift .split omits blank lines by default. But other issues combined with that. --- Sources/GraphQL/Language/Lexer.swift | 20 +++---- .../LanguageTests/LexerTests.swift | 58 +++++++++++++++++-- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/Sources/GraphQL/Language/Lexer.swift b/Sources/GraphQL/Language/Lexer.swift index 083c49ce..477efb24 100644 --- a/Sources/GraphQL/Language/Lexer.swift +++ b/Sources/GraphQL/Language/Lexer.swift @@ -586,14 +586,7 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) body.charCode(at: body.utf8.index(after: positionIndex)) == 34 { blockString = true positionIndex = body.utf8.index(positionIndex, offsetBy: 2) - - // if the first character after the """ is a newline, then it is not included in the value - if let code = body.charCode(at: positionIndex), - (code == 0x000A || code == 0x000D) { - positionIndex = body.utf8.index(after: positionIndex) - } - - chunkStartIndex = positionIndex + chunkStartIndex = positionIndex } } @@ -619,7 +612,7 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) codeNext == 34, let codeNextNext = body.charCode(at: body.utf8.index(after: body.utf8.index(after: positionIndex))), codeNextNext == 34 { - positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // so we clean up quotes on exit + positionIndex = body.utf8.index(after: body.utf8.index(after: positionIndex)) // position after quotes break } @@ -749,11 +742,12 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token) */ func blockStringValue(rawValue: String) -> String { - var commonIndent: Int = 0 - var lines = rawValue.utf8.split { (code) -> Bool in + var lines = rawValue.utf8.split(omittingEmptySubsequences: false) { (code) -> Bool in return code == 0x000A || code == 0x000D } - + + var commonIndent: Int = 0 + for idx in lines.indices { let line = lines[idx] if idx == lines.startIndex { continue } @@ -809,7 +803,7 @@ func blockStringValue(rawValue: String) -> String { result.append(contentsOf: Substring(lines[idx])) } } - + return String(result) } diff --git a/Tests/GraphQLTests/LanguageTests/LexerTests.swift b/Tests/GraphQLTests/LanguageTests/LexerTests.swift index 7472c481..9dac8efd 100644 --- a/Tests/GraphQLTests/LanguageTests/LexerTests.swift +++ b/Tests/GraphQLTests/LanguageTests/LexerTests.swift @@ -763,7 +763,7 @@ class LexerTests : XCTestCase { end: 66, line: 1, column: 1, - value: " TopLevel {\n indented\n alsoIndented\n }\n", + value: "\n TopLevel {\n indented\n alsoIndented\n }\n", prev: nil, next: nil) let source = Source(body: sourceStr, name: "TestSource") @@ -777,25 +777,43 @@ class LexerTests : XCTestCase { } func testBlockStringIndentationAndBlankLine() throws { - let rawString = "\n\n\n TopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let rawString = "\n\n\n TopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" let cleanedString = blockStringValue(rawValue: rawString) - XCTAssertEqual(cleanedString, " TopLevel {\n indented\n alsoIndented\n}") + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") } func testBlockStringDoubleIndentationAndBlankLine() throws { - let rawString = "\n\n\n TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let rawString = "\n\n\n TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n }\n\n\n\t\t\n" let cleanedString = blockStringValue(rawValue: rawString) - XCTAssertEqual(cleanedString, " TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n}") + XCTAssertEqual(cleanedString, "TopLevel {\n indented: {\n foo: String\n }\n alsoIndented\n}") } func testBlockStringIndentationAndBlankLineFirstLineNotIndentedWeird() throws { - let rawString = "\n\n\nTopLevel {\n indented\n alsoIndented\n }\n\n\n\t\t\n" // from testReadRawString() above + let rawString = "\n\n\nTopLevel {\n indented\n alsoIndented\n}\n\n\n\t\t\n" + let cleanedString = blockStringValue(rawValue: rawString) + + XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") + } + + func testBlockStringIndentationMultilineAndBlankLineFirstLineNotIndentedWeird() throws { + let rawString = """ + + + TopLevel { + indented + alsoIndented + } + + + \t + """ let cleanedString = blockStringValue(rawValue: rawString) XCTAssertEqual(cleanedString, "TopLevel {\n indented\n alsoIndented\n}") } + // Lexer tests for multi-line string token parsing @@ -869,6 +887,34 @@ class LexerTests : XCTestCase { XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") } + + func testMultilineStrings_stringIndentedInStream() throws { + let sourceStr = + #""" + """ + Multi-line string { + with Inner "foo" + should be valid indented + } + """ + """# + + let token = try lexOne(sourceStr) + + let expected = Token( + kind: .string, + start: 4, + end: 103, + line: 1, + column: 5, + value: "Multi-line string {\n with Inner \"foo\"\n should be valid indented\n}" + ) + + print(sourceStr) + + XCTAssertEqual(token, expected, "expected: \n \(dump(expected))\ngot: \n\(dump(token))\n") + } + // Test empty strings & multi-line string lexer token parsing