Skip to content

Commit

Permalink
Cleanup/document source code access functions + add filename() (#470)
Browse files Browse the repository at this point in the history
* Move all source code access functions which refer to source locations
  and strings into the top of source_files.jl, and add some
  documentation for these.
* Add `filename()` function to determine source file name of a syntax
  object
* Also add a minor generalization to SyntaxNode->Expr conversion code to
  make Expr conversion general enough to allow it to also be used for
  JuliaLowering.SyntaxTree. (internal/experimental interface, for now)
  • Loading branch information
c42f committed Jul 21, 2024
1 parent a41f5e1 commit 3dbcd32
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 89 deletions.
37 changes: 28 additions & 9 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,36 @@ JuliaSyntax.untokenize
JuliaSyntax.Token
```

## Source file handling
## Source code handling

This section describes the generic functions for source text, source location
computation and formatting functions.

Contiguous syntax objects like nodes in the syntax tree should implement the
following where possible:

```@docs
JuliaSyntax.SourceFile
JuliaSyntax.highlight
JuliaSyntax.sourcetext
JuliaSyntax.sourcefile
JuliaSyntax.byte_range
```

This will provide implementations of the following which include range
information, line numbers, and fancy highlighting of source ranges:

```@docs
JuliaSyntax.first_byte
JuliaSyntax.last_byte
JuliaSyntax.filename
JuliaSyntax.source_line
JuliaSyntax.source_location
JuliaSyntax.sourcetext
JuliaSyntax.highlight
```

`SourceFile`-specific functions:

```@docs
JuliaSyntax.SourceFile
JuliaSyntax.source_line_range
```

Expand All @@ -64,8 +86,5 @@ JuliaSyntax.GreenNode
```

Functions applicable to syntax trees include everything in the sections on
heads/kinds, and source file handling.

```@docs
JuliaSyntax.byte_range
```
heads/kinds as well as the accessor functions in the source code handling
section.
13 changes: 6 additions & 7 deletions src/diagnostics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing)
Diagnostic(first_byte, last_byte, level, message)
end

first_byte(d::Diagnostic) = d.first_byte
last_byte(d::Diagnostic) = d.last_byte
byte_range(d::Diagnostic) = d.first_byte:d.last_byte
is_error(d::Diagnostic) = d.level === :error

# Make relative path into a file URL
Expand Down Expand Up @@ -72,12 +71,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile)
(:normal, "Info")
line, col = source_location(source, first_byte(diagnostic))
linecol = "$line:$col"
filename = source.filename
fname = filename(source)
file_href = nothing
if !isnothing(filename)
locstr = "$filename:$linecol"
if !startswith(filename, "REPL[") && get(io, :color, false)
url = _file_url(filename)
if !isempty(fname)
locstr = "$fname:$linecol"
if !startswith(fname, "REPL[") && get(io, :color, false)
url = _file_url(fname)
if !isnothing(url)
file_href = url*"#$linecol"
end
Expand Down
24 changes: 13 additions & 11 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ function _strip_parens(ex)
end
end

# Get Julia value of leaf node as it would be represented in `Expr` form
function _expr_leaf_val(node::SyntaxNode)
node.val
end

function _leaf_to_Expr(source, txtbuf, head, srcrange, node)
k = kind(head)
if k == K"core_@cmd"
Expand All @@ -79,7 +84,7 @@ function _leaf_to_Expr(source, txtbuf, head, srcrange, node)
Expr(:error) :
Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`")
else
val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : node.val
val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : _expr_leaf_val(node)
if val isa Union{Int128,UInt128,BigInt}
# Ignore the values of large integers and convert them back to
# symbolic/textural form for compatibility with the Expr
Expand Down Expand Up @@ -519,14 +524,7 @@ function build_tree(::Type{Expr}, stream::ParseStream;
only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex]))
end

"""
Get the source file for a given syntax object
"""
function sourcefile(node::SyntaxNode)
node.source
end

function _to_expr(node::SyntaxNode)
function _to_expr(node)
file = sourcefile(node)
if !haschildren(node)
offset, txtbuf = _unsafe_wrap_substring(sourcetext(file))
Expand All @@ -537,9 +535,13 @@ function _to_expr(node::SyntaxNode)
_internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args)
end

function Base.Expr(node::SyntaxNode)
function to_expr(node)
ex = _to_expr(node)
loc = source_location(LineNumberNode, sourcefile(node), first_byte(node))
loc = source_location(LineNumberNode, node)
only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex]))
end

function Base.Expr(node::SyntaxNode)
to_expr(node)
end

3 changes: 1 addition & 2 deletions src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,7 @@ struct FullToken
end

head(t::FullToken) = t.head
first_byte(t::FullToken) = t.first_byte
last_byte(t::FullToken) = t.last_byte
byte_range(t::FullToken) = t.first_byte:t.last_byte
span(t::FullToken) = 1 + last_byte(t) - first_byte(t)

function peek_full_token(stream::ParseStream, n::Integer=1;
Expand Down
2 changes: 2 additions & 0 deletions src/parser_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ function Base.showerror(io::IO, err::ParseError)
show_diagnostics(io, err.diagnostics[1:i], err.source)
end

sourcefile(err::ParseError) = err.source

"""
parse!(stream::ParseStream; rule=:all)
Expand Down
159 changes: 124 additions & 35 deletions src/source_files.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,110 @@
#-------------------------------------------------------------------------------
# Generic functions for source text, source location computation and formatting
# functions

"""
sourcefile(x)
Get the source file object (usually `SourceFile`) for a given syntax object
`x`. The source file along with a byte range may be used to compute
`source_line()`, `source_location()`, `filename()`, etc.
"""
function sourcefile
end

"""
byte_range(x)
Return the range of bytes which `x` covers in the source text.
"""
function byte_range
end

"""
first_byte(x)
Return the first byte of `x` in the source text.
"""
first_byte(x) = first(byte_range(x))

"""
first_byte(x)
Return the last byte of `x` in the source text.
"""
last_byte(x) = last(byte_range(x))

"""
filename(x)
Get file name associated with `source`, or an empty string if one didn't exist.
For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by
default.
"""
function filename(x)
source = sourcefile(x)
isnothing(source) ? "" : filename(source)
end

"""
source_line(x)
source_line(source::SourceFile, byte_index::Integer)
Get the line number of the first line on which object `x` appears. In the
second form, get the line number at the given `byte_index` within `source`.
"""
source_line(x) = source_line(sourcefile(x), first_byte(x))

"""
souce_location(x)
souce_location(source::SourceFile, byte_index::Integer)
souce_location(LineNumberNode, x)
souce_location(LineNumberNode, source, byte_index)
Get `(line,column)` of the first byte where object `x` appears in the source.
The second form allows one to be more precise with the `byte_index`, given the
source file.
Providing `LineNumberNode` as the first agrument will return the line and file
name in a line number node object.
"""
source_location(x) = source_location(sourcefile(x), first_byte(x))

"""
sourcetext(x)
Get the full source text syntax object `x`
"""
function sourcetext(x)
view(sourcefile(x), byte_range(x))
end

"""
highlight(io, x; color, note, notecolor,
context_lines_before, context_lines_inner, context_lines_after)
highlight(io::IO, source::SourceFile, range::UnitRange; kws...)
Print the lines of source code surrounding `x` which is highlighted with
background `color` and underlined with markers in the text. A `note` in
`notecolor` may be provided as annotation. By default, `x` should be an object
with `sourcefile(x)` and `byte_range(x)` implemented.
The context arguments `context_lines_before`, etc, refer to the number of
lines of code which will be printed as context before and after, with `inner`
referring to context lines inside a multiline region.
The second form shares the keywords of the first but allows an explicit source
file and byte range to be supplied.
"""
function highlight(io::IO, x; kws...)
highlight(io, sourcefile(x), byte_range(x); kws...)
end


#-------------------------------------------------------------------------------
"""
SourceFile(code [; filename=nothing, first_line=1, first_index=1])
Expand Down Expand Up @@ -53,16 +160,19 @@ function _source_line_index(source::SourceFile, byte_index)
end
_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1

"""
Get the line number at the given byte index.
"""
source_line(source::SourceFile, byte_index) =
function source_location(::Type{LineNumberNode}, x)
source_location(LineNumberNode, sourcefile(x), first_byte(x))
end

source_line(source::SourceFile, byte_index::Integer) =
_source_line(source, _source_line_index(source, byte_index))

"""
Get line number and character within the line at the given byte index.
"""
function source_location(source::SourceFile, byte_index)
function filename(source::SourceFile)
f = source.filename
!isnothing(f) ? f : ""
end

function source_location(source::SourceFile, byte_index::Integer)
lineidx = _source_line_index(source, byte_index)
i = source.line_starts[lineidx]
column = 1
Expand All @@ -77,7 +187,7 @@ end
Get byte range of the source line at byte_index, buffered by
`context_lines_before` and `context_lines_after` before and after.
"""
function source_line_range(source::SourceFile, byte_index;
function source_line_range(source::SourceFile, byte_index::Integer;
context_lines_before=0, context_lines_after=0)
lineidx = _source_line_index(source, byte_index)
fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
Expand All @@ -86,14 +196,14 @@ function source_line_range(source::SourceFile, byte_index;
lbyte + source.byte_offset)
end

function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index)
LineNumberNode(source_line(source, byte_index),
isnothing(source.filename) ? nothing : Symbol(source.filename))
function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer)
fn = filename(source)
LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn))
end

function Base.show(io::IO, ::MIME"text/plain", source::SourceFile)
fn = isnothing(source.filename) ? "" : " $(source.filename)"
header = "## SourceFile$fn ##"
fn = filename(source)
header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##"
print(io, header, "\n")
heightlim = displaysize(io)[1] ÷ 2
if !get(io, :limit, false) || length(source.line_starts) <= heightlim
Expand Down Expand Up @@ -193,27 +303,6 @@ function _print_marker_line(io, prefix_str, str, underline, singleline, color,
end
end

function highlight(io::IO, x; kws...)
highlight(io, sourcefile(x), byte_range(x); kws...)
end

"""
highlight(io::IO, source::SourceFile, range::UnitRange;
color, note, notecolor,
context_lines_before, context_lines_inner, context_lines_after,
highlight(io, x; kws...)
Print the lines of source code `source` surrounding the given byte `range`
which is highlighted with background `color` and underlined with markers in the
text. A `note` in `notecolor` may be provided as annotation.
In the second form, `x` is an object with `sourcefile(x)` and `byte_range(x)`
implemented.
The context arguments `context_lines_before`, etc, refer to the number of
lines of code which will be printed as context before and after, with `inner`
referring to context lines inside a multiline region.
"""
function highlight(io::IO, source::SourceFile, range::UnitRange;
color=(120,70,70), context_lines_before=2,
context_lines_inner=1, context_lines_after=2,
Expand Down
26 changes: 4 additions & 22 deletions src/syntax_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,31 +121,13 @@ head(node::AbstractSyntaxNode) = head(node.raw)

span(node::AbstractSyntaxNode) = span(node.raw)

first_byte(node::AbstractSyntaxNode) = node.position
last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1
byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1)

"""
byte_range(ex)
Return the range of bytes which `ex` covers in the source text.
"""
byte_range(ex) = first_byte(ex):last_byte(ex)

"""
sourcetext(node)
Get the full source text of a node.
"""
function sourcetext(node::AbstractSyntaxNode)
view(sourcefile(node), byte_range(node))
end

source_line(node::AbstractSyntaxNode) = source_line(sourcefile(node), node.position)
source_location(node::AbstractSyntaxNode) = source_location(sourcefile(node), node.position)
sourcefile(node::AbstractSyntaxNode) = node.source

function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
indent, show_byte_offsets)
fname = sourcefile(node).filename
fname = filename(node)
line, col = source_location(node)
posstr = "$(lpad(line, 4)):$(rpad(col,3))"
if show_byte_offsets
Expand Down Expand Up @@ -192,7 +174,7 @@ end

function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false)
println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name")
_show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "", show_byte_offsets)
_show_syntax_node(io, Ref(""), node, "", show_byte_offsets)
end

function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode)
Expand Down
Loading

0 comments on commit 3dbcd32

Please sign in to comment.