diff --git a/README.md b/README.md index 7ef4dbcd..ecce2b3a 100644 --- a/README.md +++ b/README.md @@ -42,23 +42,24 @@ A talk from JuliaCon 2022 covered some aspects of this package. # Examples Here's what parsing of a small piece of code currently looks like in various -forms. We'll use the `parseall` convenience function to demonstrate, but -there's also a more flexible parsing interface with `JuliaSyntax.parse()`. +forms. We'll use the `JuliaSyntax.parse` function to demonstrate, there's also +`JuliaSyntax.parse!` offering more fine-grained control. First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means the `call` has the infix `-i` flag): ```julia -julia> parseall(SyntaxNode, "(x + y)*z", filename="foo.jl") +julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode + +julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl") line:col│ byte_range │ tree │ file_name - 1:1 │ 1:9 │[toplevel] │foo.jl - 1:1 │ 1:9 │ [call-i] - 1:2 │ 2:6 │ [call-i] - 1:2 │ 2:2 │ x - 1:4 │ 4:4 │ + - 1:6 │ 6:6 │ y - 1:8 │ 8:8 │ * - 1:9 │ 9:9 │ z + 1:1 │ 1:9 │[call-i] │foo.jl + 1:2 │ 2:6 │ [call-i] + 1:2 │ 2:2 │ x + 1:4 │ 4:4 │ + + 1:6 │ 6:6 │ y + 1:8 │ 8:8 │ * + 1:9 │ 9:9 │ z ``` Internally this has a full representation of all syntax trivia (whitespace and @@ -69,19 +70,18 @@ despite being important for parsing. ```julia julia> text = "(x + y)*z" - greentree = parseall(GreenNode, text) - 1:9 │[toplevel] - 1:9 │ [call] - 1:1 │ ( - 2:6 │ [call] - 2:2 │ Identifier ✔ - 3:3 │ Whitespace - 4:4 │ + ✔ - 5:5 │ Whitespace - 6:6 │ Identifier ✔ - 7:7 │ ) - 8:8 │ * ✔ - 9:9 │ Identifier ✔ + greentree = JuliaSyntax.parse(GreenNode, text) + 1:9 │[call] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ * ✔ + 9:9 │ Identifier ✔ ``` `GreenNode` stores only byte ranges, but the token strings can be shown by @@ -89,25 +89,24 @@ supplying the source text string: ```julia julia> show(stdout, MIME"text/plain"(), greentree, text) - 1:9 │[toplevel] - 1:9 │ [call] - 1:1 │ ( "(" - 2:6 │ [call] - 2:2 │ Identifier ✔ "x" - 3:3 │ Whitespace " " - 4:4 │ + ✔ "+" - 5:5 │ Whitespace " " - 6:6 │ Identifier ✔ "y" - 7:7 │ ) ")" - 8:8 │ * ✔ "*" - 9:9 │ Identifier ✔ "z" + 1:9 │[call] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ * ✔ "*" + 9:9 │ Identifier ✔ "z" ``` Julia `Expr` can also be produced: ```julia -julia> parseall(Expr, "(x + y)*z") -:($(Expr(:toplevel, :((x + y) * z)))) +julia> JuliaSyntax.parse(Expr, "(x + y)*z") +:((x + y) * z) ``` # Using JuliaSyntax as the default parser diff --git a/src/hooks.jl b/src/hooks.jl index 8011f602..9d65983c 100644 --- a/src/hooks.jl +++ b/src/hooks.jl @@ -157,7 +157,7 @@ function _core_parser_hook(code, filename, lineno, offset, options) return Core.svec(nothing, last_byte(stream)) end end - parse(stream; rule=rule) + parse!(stream; rule=rule) if rule === :statement bump_trivia(stream) end diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 6665c3fa..c62e64b2 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -155,10 +155,28 @@ const NO_POSITION = ParseStreamPosition(0, 0) #------------------------------------------------------------------------------- """ -ParseStream provides an IO interface for the parser. It -- Wraps the lexer with a lookahead buffer -- Removes insignificant whitespace and comment tokens, shifting them into the - output implicitly (newlines may be significant depending on `skip_newlines`) + ParseStream(text::AbstractString, index::Integer=1; version=VERSION) + ParseStream(text::IO; version=VERSION) + ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) + ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) + +Construct a `ParseStream` from input which may come in various forms: +* An string (zero copy for `String` and `SubString`) +* An `IO` object (zero copy for `IOBuffer`). The `IO` object must be seekable. +* A buffer of bytes (zero copy). The caller is responsible for preserving + buffers passed as `(ptr,len)`. + +A byte `index` may be provided as the position to start parsing. + +ParseStream provides an IO interface for the parser which provides lexing of +the source text input into tokens, manages insignificant whitespace tokens on +behalf of the parser, and stores output tokens and tree nodes in a pair of +output arrays. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. """ mutable struct ParseStream # `textbuf` is a buffer of UTF-8 encoded text of the source code. This is a diff --git a/src/parser_api.jl b/src/parser_api.jl index c05122be..9904e84a 100644 --- a/src/parser_api.jl +++ b/src/parser_api.jl @@ -3,43 +3,6 @@ # This is defined separately from parser.jl so that: # * parser.jl doesn't need to refer to any tree data structures # * It's clear which parts are the public API -# -# What should the general parsing API look like? Some points to consider: -# -# * After parsing atoms or statements or most other internal rules, it's -# usual to start in the middle of the input text and end somewhere else in -# the middle of the input text. So we should taken an index for the start of -# parsing and supply an index back to the caller after parsing. -# -# * `parseall` is a special case where we expect to consume all the input. -# Perhaps this is the API which throws an error if we don't consume it all, -# and doesn't accept an index as input? -# -# * The ParseStream is the fundamental interface which wraps the code string -# and index up together for input and contains the output events, diagnostics -# and current stream position after parsing. The user should potentially be -# able to use this directly. It does, however assume a Julia-compatible token -# stream. -# -# * It could be useful to support an IO-based interface so that users can parse -# Julia code intermixed with other DSLs. Documenter.jl and string macros come -# to mind as examples which could use this. A tricky part is deciding where -# the input ends: For string macros this is done by the parser, but for -# Documenter it's probably just done beforehand according to the Markdown -# code block rules. -# -# * The API should have an interface where a simple string is passed in. How -# does SourceFile relate to this? -# -# * It's neat for `parse` to be overloadable to produce various output data -# structures; GreenNode, SyntaxNode, Expr, (etc?) in the same way that -# Base.parse can be used for non-Julia code. (Heh... though -# `Base.parse(Expr, "...")` would also make a certain amount of sense.) -# -# * What's the no-copy API look like? A String can be put into an IOBuffer via -# unsafe_wrap(Vector{UInt8}, str) ... A SubString likewise. Also there's the -# `codeunits` function to hold a GC-safe view of string data as an array (but -# we can't use a Vector{UInt8}) struct ParseError <: Exception source::SourceFile @@ -65,39 +28,19 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt) """ - # Input and output: - stream = parse(stream::ParseStream; kws...) - (tree, diagnostics) = parse(TreeType, io::IOBuffer; kws...) - (tree, diagnostics, index) = parse(TreeType, str::AbstractString, [index::Integer]; kws...) - # Keywords - parse(...; rule=:toplevel, version=VERSION, ignore_trivia=true) - -Parse Julia source code from `input`, returning the output in a format -compatible with `input`: - -* When `input` is a `ParseStream`, the stream itself is returned and the - `ParseStream` interface can be used to process the output. -* When `input` is a seekable `IO` subtype, the output is `(tree, diagnostics)`. - The buffer `position` will be set to the next byte of input. -* When `input` is an `AbstractString, Integer`, or `Vector{UInt8}, Integer` the - output is `(tree, diagnostics, index)`, where `index` (default 1) is the next - byte of input. + parse!(stream::ParseStream; rule=:toplevel) + +Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data +structures may be extracted from `stream` with the [`build_tree`](@ref) function. `rule` may be any of -* `toplevel` (default) — parse a whole "file" of top level statements. In this +* `:toplevel` (default) — parse a whole "file" of top level statements. In this mode, the parser expects to fully consume the input. -* `statement` — parse a single statement, or statements separated by semicolons. -* `atom` — parse a single syntax "atom": a literal, identifier, or +* `:statement` — parse a single statement, or statements separated by semicolons. +* `:atom` — parse a single syntax "atom": a literal, identifier, or parenthesized expression. - -`version` (default `VERSION`) may be used to set the syntax version to -any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been -added after v"1.0", emitting an error if it's not compatible with the requested -`version`. - -See also [`parseall`](@ref) for a simpler but less powerful interface. """ -function parse(stream::ParseStream; rule::Symbol=:toplevel) +function parse!(stream::ParseStream; rule::Symbol=:toplevel) ps = ParseState(stream) if rule === :toplevel parse_toplevel(ps) @@ -111,56 +54,37 @@ function parse(stream::ParseStream; rule::Symbol=:toplevel) stream end -function parse(::Type{T}, io::IO; - rule::Symbol=:toplevel, version=VERSION, kws...) where {T} +""" + parse!(TreeType, io::IO; rule=:toplevel, version=VERSION) + +Parse Julia source code from a seekable `IO` object. The output is a tuple +`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned +directly after the last byte which was consumed during parsing. +""" +function parse!(::Type{TreeType}, io::IO; + rule::Symbol=:toplevel, version=VERSION, kws...) where {TreeType} stream = ParseStream(io; version=version) - parse(stream; rule=rule) - tree = build_tree(T, stream; kws...) + parse!(stream; rule=rule) + tree = build_tree(TreeType, stream; kws...) seek(io, last_byte(stream)) tree, stream.diagnostics end -# Generic version of parse for all other cases where an index must be passed -# back - ie strings and buffers -function parse(::Type{T}, input...; - rule::Symbol=:toplevel, version=VERSION, kws...) where {T} - stream = ParseStream(input...; version=version) - parse(stream; rule=rule) - tree = build_tree(T, stream; kws...) - tree, stream.diagnostics, last_byte(stream) + 1 -end - - -""" - parseall(TreeType, input...; - rule=:toplevel, - version=VERSION, - ignore_trivia=true) - -Experimental convenience interface to parse `input` as Julia code, emitting an -error if the entire input is not consumed. `input` can be a string or any other -valid input to the `ParseStream` constructor. By default `parseall` will ignore -whitespace and comments before and after valid code but you can turn this off -by setting `ignore_trivia=false`. - -A `ParseError` will be thrown if any errors occurred during parsing. - -See [`parse`](@ref) for a more complete and powerful interface to the parser, -as well as a description of the `version` and `rule` keywords. -""" -function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, - ignore_trivia=true, filename=nothing) where {T} - stream = ParseStream(input...; version=version) +function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, + ignore_trivia=true, filename=nothing, ignore_warnings=false) where {T} + stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :toplevel bump_trivia(stream, skip_newlines=true) empty!(stream) end - parse(stream; rule=rule) - if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || - (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) - emit_diagnostic(stream, error="unexpected text after parsing $rule") + parse!(stream; rule=rule) + if need_eof + if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || + (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) + emit_diagnostic(stream, error="unexpected text after parsing $rule") + end end - if any_error(stream.diagnostics) + if any_error(stream.diagnostics) || (!ignore_warnings && !isempty(stream.diagnostics)) throw(ParseError(stream, filename=filename)) end # TODO: Figure out a more satisfying solution to the wrap_toplevel_as_kind @@ -169,13 +93,51 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION, # not absolute positions. # * Dropping it would be ok for SyntaxNode and Expr... tree = build_tree(T, stream; wrap_toplevel_as_kind=K"toplevel", filename=filename) - if !isempty(stream.diagnostics) - # Crudely format any warnings to the current logger. - buf = IOBuffer() - show_diagnostics(IOContext(buf, stdout), stream, - SourceFile(sourcetext(stream, steal_textbuf=true), filename=filename)) - @warn Text(String(take!(buf))) - end - tree + tree, last_byte(stream) + 1 end +""" + parse(TreeType, text, [index]; + version=VERSION, + ignore_trivia=true, + filename=nothing, + ignore_warnings=false) + + # Or, with the same arguments + parseall(...) + parseatom(...) + +Parse Julia source code string `text` into a data structure of type `TreeType`. +`parse` parses a single Julia statement, `parseall` parses top level statements +at file scope and `parseatom` parses a single Julia identifier or other "syntax +atom". + +If `text` is passed without `index`, all the input text must be consumed and a +tree data structure is returned. When an integer byte `index` is passed, a +tuple `(tree, next_index)` will be returned containing the next index in `text` +to resume parsing. By default whitespace and comments before and after valid +code are ignored but you can turn this off by setting `ignore_trivia=false`. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. + +Pass `filename` to set any file name information embedded within the output +tree, if applicable. This will also annotate errors and warnings with the +source file name. + +A `ParseError` will be thrown if any errors or warnings occurred during +parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. +""" +parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] +parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1] +parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] + +@eval @doc $(@doc parse) parseall +@eval @doc $(@doc parse) parseatom + +parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) +parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...) +parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...) + diff --git a/test/expr.jl b/test/expr.jl index 35d84de5..850853f5 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -1,20 +1,16 @@ -function parse_Expr(str) - parseall(Expr, str, rule=:statement) -end - @testset "Expr conversion" begin @testset "Quote nodes" begin - @test parseall(Expr, ":(a)", rule=:atom) == QuoteNode(:a) - @test parseall(Expr, ":(:a)", rule=:atom) == Expr(:quote, QuoteNode(:a)) - @test parseall(Expr, ":(1+2)", rule=:atom) == Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseatom(Expr, ":(a)") == QuoteNode(:a) + @test parseatom(Expr, ":(:a)") == Expr(:quote, QuoteNode(:a)) + @test parseatom(Expr, ":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 - @test parseall(Expr, ":true", rule=:atom) == Expr(:quote, true) + @test parseatom(Expr, ":true") == Expr(:quote, true) end @testset "Line numbers" begin @testset "Blocks" begin - @test parse_Expr("begin a\nb\n\nc\nend") == + @test parse(Expr, "begin a\nb\n\nc\nend") == Expr(:block, LineNumberNode(1), :a, @@ -23,7 +19,7 @@ end LineNumberNode(4), :c, ) - @test parse_Expr("begin end") == + @test parse(Expr, "begin end") == Expr(:block, LineNumberNode(1) ) @@ -36,7 +32,7 @@ end :b, ) - @test parse_Expr("module A\n\nbody\nend") == + @test parse(Expr, "module A\n\nbody\nend") == Expr(:module, true, :A, @@ -49,7 +45,7 @@ end end @testset "Function definition lines" begin - @test parse_Expr("function f()\na\n\nb\nend") == + @test parse(Expr, "function f()\na\n\nb\nend") == Expr(:function, Expr(:call, :f), Expr(:block, @@ -60,7 +56,7 @@ end :b, ) ) - @test parse_Expr("f() = 1") == + @test parse(Expr, "f() = 1") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -70,14 +66,14 @@ end ) # function/macro without methods - @test parse_Expr("function f end") == + @test parse(Expr, "function f end") == Expr(:function, :f) - @test parse_Expr("macro f end") == + @test parse(Expr, "macro f end") == Expr(:macro, :f) end @testset "elseif" begin - @test parse_Expr("if a\nb\nelseif c\n d\nend") == + @test parse(Expr, "if a\nb\nelseif c\n d\nend") == Expr(:if, :a, Expr(:block, @@ -95,7 +91,7 @@ end end @testset "No line numbers in for/let bindings" begin - @test parse_Expr("for i=is, j=js\nbody\nend") == + @test parse(Expr, "for i=is, j=js\nbody\nend") == Expr(:for, Expr(:block, Expr(:(=), :i, :is), @@ -106,7 +102,7 @@ end :body ) ) - @test parse_Expr("let i=is, j=js\nbody\nend") == + @test parse(Expr, "let i=is, j=js\nbody\nend") == Expr(:let, Expr(:block, Expr(:(=), :i, :is), @@ -122,7 +118,7 @@ end @testset "Short form function line numbers" begin # A block is added to hold the line number node - @test parse_Expr("f() = xs") == + @test parse(Expr, "f() = xs") == Expr(:(=), Expr(:call, :f), Expr(:block, @@ -130,7 +126,7 @@ end :xs)) # flisp parser quirk: In a for loop the block is not added, despite # this defining a short-form function. - @test parse_Expr("for f() = xs\nend") == + @test parse(Expr, "for f() = xs\nend") == Expr(:for, Expr(:(=), Expr(:call, :f), :xs), Expr(:block, @@ -139,7 +135,7 @@ end end @testset "Long form anonymous functions" begin - @test parse_Expr("function (xs...)\nbody end") == + @test parse(Expr, "function (xs...)\nbody end") == Expr(:function, Expr(:..., :xs), Expr(:block, @@ -150,19 +146,19 @@ end @testset "String conversions" begin # String unwrapping / wrapping - @test parse_Expr("\"str\"") == "str" - @test parse_Expr("\"\$(\"str\")\"") == + @test parse(Expr, "\"str\"") == "str" + @test parse(Expr, "\"\$(\"str\")\"") == Expr(:string, Expr(:string, "str")) # Concatenation of string chunks in triple quoted cases - @test parse_Expr("```\n a\n b```") == + @test parse(Expr, "```\n a\n b```") == Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "a\nb") - @test parse_Expr("\"\"\"\n a\n \$x\n b\n c\"\"\"") == + @test parse(Expr, "\"\"\"\n a\n \$x\n b\n c\"\"\"") == Expr(:string, "a\n", :x, "\nb\nc") end @testset "do block conversion" begin - @test parse_Expr("f(x) do y\n body end") == + @test parse(Expr, "f(x) do y\n body end") == Expr(:do, Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, @@ -172,29 +168,29 @@ end @testset "= to Expr(:kw) conversion" begin # Call - @test parse_Expr("f(a=1)") == + @test parse(Expr, "f(a=1)") == Expr(:call, :f, Expr(:kw, :a, 1)) - @test parse_Expr("f(; b=2)") == + @test parse(Expr, "f(; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) - @test parse_Expr("f(a=1; b=2)") == + @test parse(Expr, "f(a=1; b=2)") == Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) # Infix call = is not :kw - @test parse_Expr("(x=1) != 2") == + @test parse(Expr, "(x=1) != 2") == Expr(:call, :!=, Expr(:(=), :x, 1), 2) # Dotcall - @test parse_Expr("f.(a=1; b=2)") == + @test parse(Expr, "f.(a=1; b=2)") == Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) # Named tuples - @test parse_Expr("(a=1,)") == + @test parse(Expr, "(a=1,)") == Expr(:tuple, Expr(:(=), :a, 1)) - @test parse_Expr("(a=1,; b=2)") == + @test parse(Expr, "(a=1,; b=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) - @test parse_Expr("(a=1,; b=2; c=3)") == + @test parse(Expr, "(a=1,; b=2; c=3)") == Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :c, 3)), @@ -202,21 +198,21 @@ end Expr(:(=), :a, 1)) # ref - @test parse_Expr("x[i=j]") == + @test parse(Expr, "x[i=j]") == Expr(:ref, :x, Expr(:kw, :i, :j)) # vect/braces - @test parse_Expr("[a=1,; b=2]") == + @test parse(Expr, "[a=1,; b=2]") == Expr(:vect, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) - @test parse_Expr("{a=1,; b=2}") == + @test parse(Expr, "{a=1,; b=2}") == Expr(:braces, Expr(:parameters, Expr(:(=), :b, 2)), Expr(:(=), :a, 1)) # dotted = is not :kw - @test parse_Expr("f(a .= 1)") == + @test parse(Expr, "f(a .= 1)") == Expr(:call, :f, Expr(:.=, :a, 1)) end end diff --git a/test/parse_stream.jl b/test/parse_stream.jl index 10c021f7..315b59c8 100644 --- a/test/parse_stream.jl +++ b/test/parse_stream.jl @@ -71,16 +71,34 @@ st = ParseStream(code) @test peek(st) == K"NewlineWs" bump(st, TRIVIA_FLAG) emit(st, p1, K"toplevel") -end - -@test JuliaSyntax.build_tree(GreenNode, st) isa JuliaSyntax.GreenNode -# ## Input code -#= -println("-----------------------") -print(code) -println() + @test build_tree(GreenNode, st) isa JuliaSyntax.GreenNode +end -# ## Output tree -show(stdout, MIME"text/plain"(), t, code, show_trivia=true) -=# +@testset "ParseStream constructors" begin + @testset "Byte buffer inputs" begin + # Vector{UInt8} + let + st = ParseStream(Vector{UInt8}("x+y")) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + let + st = ParseStream(Vector{UInt8}("x+y"), 3) + bump(st) + @test build_tree(Expr, st) == :y + @test JuliaSyntax.last_byte(st) == 3 + end + # Ptr{UInt8}, len + code = "x+y" + GC.@preserve code begin + let + st = ParseStream(pointer(code), 3) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + end + end +end diff --git a/test/parser_api.jl b/test/parser_api.jl index 0a3c49ec..1af46fba 100644 --- a/test/parser_api.jl +++ b/test/parser_api.jl @@ -1,52 +1,56 @@ @testset "parser API" begin - @testset "String and buffer input" begin - # String - let - ex,diag,pos = parse(Expr, "x+y\nz") - @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) - @test diag == [] - @test pos == 6 - end - @test parse(Expr, "x+y\nz", rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, "x+y\nz", rule=:atom) == (:x, [], 2) - @test parse(Expr, "x+y\nz", 5, rule=:atom) == (:z, [], 6) - - # Vector{UInt8} - @test parse(Expr, Vector{UInt8}("x+y"), rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, Vector{UInt8}("x+y"), 3, rule=:statement) == (:y, [], 4) - # Ptr{UInt8}, len - code = "x+y" - GC.@preserve code begin - stream = ParseStream(pointer(code), 3) - parse(stream, rule=:statement) - @test JuliaSyntax.build_tree(Expr, stream) == :(x+y) - @test JuliaSyntax.last_byte(stream) == 3 - end + @testset "parse with String input" begin + @test parse(Expr, " x ") == :x + @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) + @test parseatom(Expr, " x ") == :x + # TODO: Fix this situation with trivia here; the brackets are trivia, but + # must be parsed to discover the atom inside. But in GreenTree we only + # place trivia as siblings of the leaf node with identifier `x`, not as + # children. + @test_broken parseatom(Expr, "(x)") == :x # SubString - @test parse(Expr, SubString("x+y"), rule=:statement) == (:(x+y), [], 4) - @test parse(Expr, SubString("x+y"), 1, rule=:atom) == (:x, [], 2) - @test parse(Expr, SubString("x+y"), 3, rule=:atom) == (:y, [], 4) - @test parse(Expr, SubString("x+y",3,3), 1, rule=:atom) == (:y, [], 2) - @test parse(Expr, SubString("α+x"), rule=:statement) == (:(α+x), [], 5) + @test parse(Expr, SubString("x+y")) == :(x+y) + @test parse(Expr, SubString("α+x")) == :(α+x) + @test parseatom(Expr, SubString("x+y",3,3)) == :y + + # Exceptions due to extra trailing syntax + @test_throws JuliaSyntax.ParseError parseatom(Expr, "x+y") + @test_throws JuliaSyntax.ParseError parse(Expr, "x+y\nz") + + # ignore_warnings flag + @test_throws JuliaSyntax.ParseError parse(Expr, "import . .A") + @test parse(Expr, "import . .A", ignore_warnings=true) == :(import ..A) + + # version selection + @test_throws JuliaSyntax.ParseError parse(Expr, "[a ;; b]", version=v"1.6") + @test parse(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) + + # filename + @test JuliaSyntax.parse(Expr, "begin\na\nend", filename="foo.jl") == + Expr(:block, LineNumberNode(2, Symbol("foo.jl")), :a) + + # ignore_trivia + @test parseatom(Expr, " x ", ignore_trivia=true) == :x + @test_throws JuliaSyntax.ParseError parseatom(Expr, " x ", ignore_trivia=false) end @testset "IO input" begin # IOBuffer io = IOBuffer("x+y") - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 io = IOBuffer("x+y") seek(io, 2) - @test parse(Expr, io, rule=:atom) == (:y, []) + @test parse!(Expr, io, rule=:atom) == (:y, []) @test position(io) == 3 # A GenericIOBuffer, not actually IOBuffer io = IOBuffer(SubString("x+y")) - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 # Another type of GenericIOBuffer io = IOBuffer(codeunits("x+y")) - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 # IOStream mktemp() do path, io @@ -54,23 +58,26 @@ close(io) open(path, "r") do io - @test parse(Expr, io, rule=:statement) == (:(x+y), []) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) @test position(io) == 3 end end end - @testset "parseall" begin - @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) - @test parseall(Expr, " x ", rule=:statement) == :x - @test parseall(Expr, " x ", rule=:atom) == :x - # TODO: Fix this situation with trivia here; the brackets are trivia, but - # must be parsed to discover the atom inside. But in GreenTree we only - # place trivia as siblings of the leaf node with identifier `x`, not as - # children. - @test_broken parseall(Expr, "(x)", rule=:atom) == :x + @testset "parse with String and index input" begin + # String + let + ex,pos = parseall(Expr, "x+y\nz", 1) + @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) + @test pos == 6 + end + @test parse(Expr, "x+y\nz", 1) == (:(x+y), 4) + @test parseatom(Expr, "x+y\nz", 1) == (:x, 2) + @test parseatom(Expr, "x+y\nz", 5) == (:z, 6) - @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y", rule=:atom) - @test_throws JuliaSyntax.ParseError parseall(Expr, "x+y\nz", rule=:statement) + # SubString + @test parse(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) + @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) + @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) end end diff --git a/test/test_utils.jl b/test/test_utils.jl index ced5912c..d8b52436 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -7,8 +7,11 @@ using JuliaSyntax: # Parsing ParseStream, SourceFile, + parse!, parse, parseall, + parseatom, + build_tree, @K_str, # Nodes GreenNode, @@ -72,11 +75,13 @@ function parsers_agree_on_file(filename; show_diff=false) return true end try - ex, diagnostics, _ = parse(Expr, text, filename=filename) + stream = ParseStream(text) + parse!(stream) + ex = build_tree(Expr, stream, filename=filename) if show_diff && ex != fl_ex show_expr_text_diff(show, ex, fl_ex) end - return !JuliaSyntax.any_error(diagnostics) && + return !JuliaSyntax.any_error(stream) && JuliaSyntax.remove_linenums!(ex) == JuliaSyntax.remove_linenums!(fl_ex) catch exc @@ -111,7 +116,7 @@ function equals_flisp_parse(tree) node_text = sourcetext(tree) # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing # some context from the parent node. - ex,_,_ = parse(Expr, node_text) + ex = parseall(Expr, node_text) fl_ex = fl_parseall(node_text) if Meta.isexpr(fl_ex, :error) return true # Something went wrong in reduction; ignore these cases 😬 @@ -156,7 +161,7 @@ function reduce_test(tree::SyntaxNode) end function reduce_test(text::AbstractString) - tree, _, _ = parse(SyntaxNode, text) + tree, = parseall(SyntaxNode, text) reduce_test(tree) end