Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 27 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,16 @@ First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means
the `call` has the infix `-i` flag):

```julia
julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode
julia> using JuliaSyntax

julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl")
julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl")
line:col│ tree │ file_name
1:1 │[call-i] │foo.jl
1:2 │ [call-i]
1:2 │ x
1:4 │ +
1:6 │ y
1:1 │ [parens]
1:2 │ [call-i]
1:2 │ x
1:4 │ +
1:6 │ y
1:8 │ *
1:9 │ z
```
Expand All @@ -71,16 +72,17 @@ representation, despite being important for parsing.

```julia
julia> text = "(x + y)*z"
greentree = JuliaSyntax.parse(GreenNode, text)
greentree = parsestmt(JuliaSyntax.GreenNode, text)
1:9 │[call]
1:1 │ (
2:6 │ [call]
2:2 │ Identifier ✔
3:3 │ Whitespace
4:4 │ + ✔
5:5 │ Whitespace
6:6 │ Identifier ✔
7:7 │ )
1:7 │ [parens]
1:1 │ (
2:6 │ [call]
2:2 │ Identifier ✔
3:3 │ Whitespace
4:4 │ + ✔
5:5 │ Whitespace
6:6 │ Identifier ✔
7:7 │ )
8:8 │ * ✔
9:9 │ Identifier ✔
```
Expand All @@ -91,22 +93,23 @@ supplying the source text string:
```julia
julia> show(stdout, MIME"text/plain"(), greentree, text)
1:9 │[call]
1:1 │ ( "("
2:6 │ [call]
2:2 │ Identifier ✔ "x"
3:3 │ Whitespace " "
4:4 │ + ✔ "+"
5:5 │ Whitespace " "
6:6 │ Identifier ✔ "y"
7:7 │ ) ")"
1:7 │ [parens]
1:1 │ ( "("
2:6 │ [call]
2:2 │ Identifier ✔ "x"
3:3 │ Whitespace " "
4:4 │ + ✔ "+"
5:5 │ Whitespace " "
6:6 │ Identifier ✔ "y"
7:7 │ ) ")"
8:8 │ * ✔ "*"
9:9 │ Identifier ✔ "z"
```

Julia `Expr` can also be produced:

```julia
julia> JuliaSyntax.parse(Expr, "(x + y)*z")
julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z")
:((x + y) * z)
```

Expand Down
19 changes: 19 additions & 0 deletions src/JuliaSyntax.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
module JuliaSyntax

# Conservative list of exports - only export the most common/useful things
# here.

# Parsing. See also
# parse!(), ParseStream
export parsestmt, parseall, parseatom
# Tokenization
export tokenize, Token, untokenize
# Source file handling. See also
# highlight() sourcetext() source_line() source_location()
export SourceFile
# Expression heads/kinds. See also
# flags() and related predicates.
export @K_str, kind, head
# Syntax tree types. See also
# GreenNode
export SyntaxNode

# Helper utilities
include("utils.jl")

Expand All @@ -26,4 +44,5 @@ include("expr.jl")
# Hooks to integrate the parser with Base
include("hooks.jl")
include("precompile.jl")

end
11 changes: 5 additions & 6 deletions src/hooks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ end
# Debug log file for dumping parsed code
const _debug_log = Ref{Union{Nothing,IO}}(nothing)

function _core_parser_hook(code, filename, lineno, offset, options)
function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol)
try
# TODO: Check that we do all this input wrangling without copying the
# code buffer
Expand All @@ -144,8 +144,7 @@ function _core_parser_hook(code, filename, lineno, offset, options)
seek(io, offset)

stream = ParseStream(io)
rule = options === :all ? :toplevel : options
if rule === :statement || rule === :atom
if options === :statement || options === :atom
# To copy the flisp parser driver:
# * Parsing atoms consumes leading trivia
# * Parsing statements consumes leading+trailing trivia
Expand All @@ -157,8 +156,8 @@ function _core_parser_hook(code, filename, lineno, offset, options)
return Core.svec(nothing, last_byte(stream))
end
end
parse!(stream; rule=rule)
if rule === :statement
parse!(stream; rule=options)
if options === :statement
bump_trivia(stream)
end

Expand Down Expand Up @@ -342,7 +341,7 @@ function _fl_parse_string(text::AbstractString, filename::AbstractString,
ex, offset+1
end

# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases.
# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases.
fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...)
fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...)

60 changes: 35 additions & 25 deletions src/parser_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,25 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt)


"""
parse!(stream::ParseStream; rule=:toplevel)
parse!(stream::ParseStream; rule=:all)

Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data
structures may be extracted from `stream` with the [`build_tree`](@ref) function.

`rule` may be any of
* `:toplevel` (default) — parse a whole "file" of top level statements. In this
* `:all` (default) — parse a whole "file" of top level statements. In this
mode, the parser expects to fully consume the input.
* `:statement` — parse a single statement, or statements separated by semicolons.
* `:atom` — parse a single syntax "atom": a literal, identifier, or
parenthesized expression.
"""
function parse!(stream::ParseStream; rule::Symbol=:toplevel)
function parse!(stream::ParseStream; rule::Symbol=:all)
if rule == :toplevel
Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!)
rule = :all
end
ps = ParseState(stream)
if rule === :toplevel
if rule === :all
parse_toplevel(ps)
elseif rule === :statement
parse_stmts(ps)
Expand All @@ -56,14 +60,14 @@ function parse!(stream::ParseStream; rule::Symbol=:toplevel)
end

"""
parse!(TreeType, io::IO; rule=:toplevel, version=VERSION)
parse!(TreeType, io::IO; rule=:all, version=VERSION)

Parse Julia source code from a seekable `IO` object. The output is a tuple
`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned
directly after the last byte which was consumed during parsing.
"""
function parse!(::Type{TreeType}, io::IO;
rule::Symbol=:toplevel, version=VERSION, kws...) where {TreeType}
rule::Symbol=:all, version=VERSION, kws...) where {TreeType}
stream = ParseStream(io; version=version)
parse!(stream; rule=rule)
tree = build_tree(TreeType, stream; kws...)
Expand All @@ -75,7 +79,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=
ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false,
ignore_warnings=ignore_errors) where {T}
stream = ParseStream(text, index; version=version)
if ignore_trivia && rule != :toplevel
if ignore_trivia && rule != :all
bump_trivia(stream, skip_newlines=true)
empty!(stream)
end
Expand All @@ -100,19 +104,22 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=
end

_parse_docs = """
parse(TreeType, text, [index];
version=VERSION,
ignore_trivia=true,
filename=nothing,
ignore_errors=false,
ignore_warnings=ignore_errors)

# Or, with the same arguments
# Parse a single expression/statement
parsestmt(TreeType, text, [index];
version=VERSION,
ignore_trivia=true,
filename=nothing,
ignore_errors=false,
ignore_warnings=ignore_errors)

# Parse all statements at top level (file scope)
parseall(...)

# Parse a single syntax atom
parseatom(...)

Parse Julia source code string `text` into a data structure of type `TreeType`.
`parse` parses a single Julia statement, `parseall` parses top level statements
`parsestmt` parses a single Julia statement, `parseall` parses top level statements
at file scope and `parseatom` parses a single Julia identifier or other "syntax
atom".

Expand All @@ -136,16 +143,17 @@ parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To
also avoid exceptions due to errors, use `ignore_errors=true`.
"""

parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1]
parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1]
parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1]
"$_parse_docs"
parsestmt(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1]

@eval @doc $_parse_docs parse
@eval @doc $_parse_docs parseall
@eval @doc $_parse_docs parseatom
"$_parse_docs"
parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:all, true, T, text; kws...)[1]

parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...)
parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...)
"$_parse_docs"
parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1]

parsestmt(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...)
parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:all, false, T, text, index; kws...)
parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...)

#-------------------------------------------------------------------------------
Expand Down Expand Up @@ -178,7 +186,7 @@ This interface works on UTF-8 encoded string or buffer data only.
"""
function tokenize(text)
ps = ParseStream(text)
parse!(ps, rule=:toplevel)
parse!(ps, rule=:all)
ts = ps.tokens
output_tokens = Token[]
for i = 2:length(ts)
Expand All @@ -198,3 +206,5 @@ end
function untokenize(token::Token, text::Vector{UInt8})
text[token.range]
end

@deprecate parse parsestmt
8 changes: 4 additions & 4 deletions src/source_files.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ function SourceFile(; filename, kwargs...)
end

# Get line number of the given byte within the code
function source_line_index(source::SourceFile, byte_index)
function _source_line_index(source::SourceFile, byte_index)
lineidx = searchsortedlast(source.line_starts, byte_index)
return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1
end
_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1
source_line(source::SourceFile, byte_index) = _source_line(source, source_line_index(source, byte_index))
source_line(source::SourceFile, byte_index) = _source_line(source, _source_line_index(source, byte_index))

"""
Get line number and character within the line at the given byte index.
"""
function source_location(source::SourceFile, byte_index)
lineidx = source_line_index(source, byte_index)
lineidx = _source_line_index(source, byte_index)
i = source.line_starts[lineidx]
column = 1
while i < byte_index
Expand All @@ -63,7 +63,7 @@ Get byte range of the source line at byte_index, buffered by
"""
function source_line_range(source::SourceFile, byte_index;
context_lines_before=0, context_lines_after=0)
lineidx = source_line_index(source, byte_index)
lineidx = _source_line_index(source, byte_index)
fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1
fbyte,lbyte
Expand Down
4 changes: 2 additions & 2 deletions test/benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ end

all_base_code = concat_base()

b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:toplevel)
b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all)
b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code)
b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code)
b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code)
Expand All @@ -30,5 +30,5 @@ b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code)
# Allocs.clear()
# stream = JuliaSyntax.ParseStream(text);
# JuliaSyntax.peek(stream);
# Allocs.@profile sample_rate=1 JuliaSyntax.parse(stream)
# Allocs.@profile sample_rate=1 JuliaSyntax.parsestmt(stream)
# PProf.Allocs.pprof()
Loading