Skip to content

Commit

Permalink
Add parsing options (#143)
Browse files Browse the repository at this point in the history
* Add parsing options

* more
  • Loading branch information
bicycle1885 committed Nov 9, 2023
1 parent 245242e commit cd93058
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 35 deletions.
109 changes: 75 additions & 34 deletions src/document.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,49 +73,81 @@ function prettyprint(io::IO, doc::Document)
prettyprint(io, doc.node)
end

"""
parsexml(xmlstring)
# enum xmlParserOption flags (incomplete)
const PARSE_NOERROR = Cint(32)
const PARSE_NOWARNING = Cint(64)
const PARSE_PEDANTIC = Cint(128)
const PARSE_NOBLANKS = Cint(256)
const PARSE_NODICT = Cint(4096)

function parse_options(;
noerror::Bool = false,
nowarning::Bool = false,
pedantic::Bool = false,
noblanks::Bool = false,
)
options = PARSE_NODICT # do not reuse the context dictionary
noerror && (options |= PARSE_NOERROR)
nowarning && (options |= PARSE_NOWARNING)
pedantic && (options |= PARSE_PEDANTIC)
noblanks && (options |= PARSE_NOBLANKS)
return options, (noerror = noerror, nowarning = nowarning)
end

"""
parsexml(xmlstring; options...)
Parse `xmlstring` and create an XML document.
## Parsing Options
- `noerror = false`: suppress (recoverable) error reports
- `nowarning = false`: suppress warning reports
- `pedantic = false`: pedantic error reporting
- `noblanks = false`: remove blank nodes
"""
function parsexml(xmlstring::AbstractString)
function parsexml(xmlstring::AbstractString; options...)
if isempty(xmlstring)
throw(ArgumentError("empty XML string"))
end
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:xmlParseMemory, libxml2),
(:xmlReadMemory, libxml2),
Ptr{_Node},
(Cstring, Cint),
xmlstring, sizeof(xmlstring)) != C_NULL
(Cstring, Cint, Cstring, Cstring, Cint),
xmlstring, sizeof(xmlstring), C_NULL, C_NULL, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

function parsexml(xmldata::Vector{UInt8})
return parsexml(String(xmldata))
function parsexml(xmldata::Vector{UInt8}; options...)
return parsexml(String(xmldata); options...)
end

function parsexml(xmldata::Base.CodeUnits{UInt8,String})
return parsexml(String(xmldata))
function parsexml(xmldata::Base.CodeUnits{UInt8,String}; options...)
return parsexml(String(xmldata); options...)
end

"""
parsehtml(htmlstring)
parsehtml(htmlstring; options...)
Parse `htmlstring` and create an HTML document.
See [`parsexml`](@ref) for parsing options.
"""
function parsehtml(htmlstring::AbstractString)
function parsehtml(htmlstring::AbstractString; options...)
if isempty(htmlstring)
throw(ArgumentError("empty HTML string"))
end
url = C_NULL
encoding = C_NULL
options = 1
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:htmlReadMemory, libxml2),
Ptr{_Node},
(Cstring, Cint, Cstring, Cstring, Cint),
htmlstring, sizeof(htmlstring), url, encoding, options) != C_NULL
show_warnings()
htmlstring, sizeof(htmlstring), url, encoding, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

Expand All @@ -128,77 +160,86 @@ function parsehtml(htmldata::Base.CodeUnits{UInt8,String})
end

"""
readxml(filename)
readxml(filename; options...)
Read `filename` and create an XML document.
See [`parsexml`](@ref) for parsing options.
"""
function readxml(filename::AbstractString)
function readxml(filename::AbstractString; options...)
encoding = C_NULL
# Do not reuse the context dictionary.
options = 1 << 12
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:xmlReadFile, libxml2),
Ptr{_Node},
(Cstring, Ptr{UInt8}, Cint),
filename, encoding, options) != C_NULL
filename, encoding, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

"""
readxml(input::IO)
readxml(input::IO; options...)
Read `input` and create an XML document.
See [`parsexml`](@ref) for parsing options.
"""
function readxml(input::IO)
function readxml(input::IO; options...)
readcb = make_read_callback()
closecb = C_NULL
context = pointer_from_objref(input)
uri = C_NULL
encoding = C_NULL
options = 0
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:xmlReadIO, libxml2),
Ptr{_Node},
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Cstring, Cint),
readcb, closecb, context, uri, encoding, options) != C_NULL
readcb, closecb, context, uri, encoding, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

"""
readhtml(filename)
readhtml(filename; options...)
Read `filename` and create an HTML document.
See [`parsexml`](@ref) for parsing options.
"""
function readhtml(filename::AbstractString)
function readhtml(filename::AbstractString; options...)
encoding = C_NULL
options = 0
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:htmlReadFile, libxml2),
Ptr{_Node},
(Cstring, Cstring, Cint),
filename, encoding, options) != C_NULL
show_warnings()
filename, encoding, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

"""
readhtml(input::IO)
readhtml(input::IO; options...)
Read `input` and create an HTML document.
See [`parsexml`](@ref) for parsing options.
"""
function readhtml(input::IO)
function readhtml(input::IO; options...)
readcb = make_read_callback()
closecb = C_NULL
context = pointer_from_objref(input)
uri = C_NULL
encoding = C_NULL
options = 0
opts, args = parse_options(; options...)
doc_ptr = @check ccall(
(:htmlReadIO, libxml2),
Ptr{_Node},
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Cstring, Cint),
readcb, closecb, context, uri, encoding, options) != C_NULL
show_warnings()
readcb, closecb, context, uri, encoding, opts) != C_NULL
show_warnings(; args...)
return Document(doc_ptr)
end

Expand Down
4 changes: 3 additions & 1 deletion src/error.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,10 @@ function throw_xml_error()
end

# Show warning massages if any.
function show_warnings()
function show_warnings(; noerror = false, nowarning = true)
for err in XML_GLOBAL_ERROR_STACK
noerror && err.level == XML_ERR_ERROR && continue
nowarning && err.level == XML_ERR_WARNING && continue
buf = IOBuffer()
showerror(buf, err)
@warn(String(take!(buf)))
Expand Down

0 comments on commit cd93058

Please sign in to comment.