/
iri.ex
134 lines (98 loc) · 5.54 KB
/
iri.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
defmodule Exonerate.Formats.Iri do
@moduledoc """
Module which provides a macro that generates special code for an iri
filter. This is an absolute uri with internationalization support.
If you require a relative uri, use `Exonerate.Formats.IriReference`.
the format is governed by appendix A of RFC 3986, as modified by
section 2.2 of RFC 3987:
https://www.rfc-editor.org/rfc/rfc3986.txt
https://www.rfc-editor.org/rfc/rfc3987.txt
"""
alias Exonerate.Cache
@doc """
Creates a `NimbleParsec` parser `~iri/1`.
This function returns `{:ok, ...}` if the passed string is a valid iri,
or `{:error, reason, ...}` if it is not. See `NimbleParsec` for
more information on the return tuples.
The function will only be created once per module, and it is safe to call
the macro more than once.
## Options:
- `:name` (atom): the name of the function to create. Defaults to
`:"~iri"`
"""
defmacro filter(opts \\ []) do
name = Keyword.get(opts, :name, :"~iri")
if Cache.register_context(__CALLER__.module, name) do
quote do
require Pegasus
import NimbleParsec
Pegasus.parser_from_string(~S"""
IRI <- IRI_scheme ":" IRI_ihier_part ("?" IRI_iquery)? ("#" IRI_ifragment)?
IRI_ihier_part <- "//" IRI_iauthority IRI_ipath_abempty
/ IRI_ipath_absolute
/ IRI_ipath_rootless
/ IRI_ipath_empty
IRI_scheme <- IRI_ALPHA ( IRI_ALPHA / IRI_DIGIT / "+" / "-" / "." )*
IRI_iauthority <- (IRI_iuserinfo "@")? IRI_ihost (":" IRI_port)?
IRI_iuserinfo <- ( IRI_iunreserved / IRI_pct_encoded / IRI_sub_delims / ":" )*
IRI_ihost <- IRI_IP_literal / IRI_IPv4address / IRI_ireg_name
IRI_port <- IRI_DIGIT*
IRI_IP_literal <- "[" ( IRI_IPv6address / IRI_IPvFuture ) "]"
IRI_IPvFuture <- "v" (IRI_HEXDIG)+ "." ( IRI_unreserved / IRI_sub_delims / ":" )+
IRI_DIGIT <- [0-9]
IRI_HEXDIG <- [0-9A-Fa-f]
IRI_ALPHA <- [A-Za-z]
IRI_Snum <- IRI_DIGIT IRI_DIGIT IRI_DIGIT
IRI_IPv4address <- IRI_Snum "." IRI_Snum "." IRI_Snum "." IRI_Snum
IRI_IPv6address <- IRI_IPv6_full / IRI_IPv6_comp / IRI_IPv6v4_full / IRI_IPv6v4_comp
IRI_IPv6_hex <- IRI_HEXDIG IRI_HEXDIG? IRI_HEXDIG? IRI_HEXDIG?
IRI_IPv6_full <- IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex
IRI_IPv6_comp <- (IRI_IPv6_hex (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)?)? "::"
(IRI_IPv6_hex (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)?)?
IRI_IPv6v4_full <- IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv6_hex ":" IRI_IPv4address
IRI_IPv6v4_comp <- (IRI_IPv6_hex (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)?)? "::"
(IRI_IPv6_hex (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? (":" IRI_IPv6_hex)? ":")?
IRI_IPv4address
IRI_ireg_name <- ( IRI_iunreserved / IRI_pct_encoded / IRI_sub_delims )*
IRI_ipath <- IRI_ipath_abempty # begins with "/" or is empty
/ IRI_ipath_absolute # begins with "/" but not "//"
/ IRI_ipath_no_scheme # begins with a non-colon IRI_segment
/ IRI_ipath_rootless # begins with a IRI_segment
/ IRI_ipath_empty # zero characters
IRI_ipath_abempty <- ( "/" IRI_isegment )*
IRI_ipath_absolute <- "/" ( IRI_isegment_nz ( "/" IRI_isegment )*)?
IRI_ipath_no_scheme <- IRI_isegment_nz_nc ( "/" IRI_isegment )*
IRI_ipath_rootless <- IRI_isegment_nz ( "/" IRI_isegment )*
IRI_ipath_empty <- ""
IRI_isegment <- IRI_ipchar*
IRI_isegment_nz <- IRI_ipchar+
IRI_isegment_nz_nc <- ( IRI_iunreserved / IRI_pct_encoded / IRI_sub_delims / "@" )+
# non-zero-length IRI_segment without any colon ":"
IRI_ipchar <- IRI_iunreserved / IRI_pct_encoded / IRI_sub_delims / ":" / "@"
IRI_iquery <- ( IRI_ipchar / IRI_iprivate / "/" / "?" )*
IRI_ifragment <- ( IRI_ipchar / "/" / "?" )*
IRI_pct_encoded <- "%" IRI_HEXDIG IRI_HEXDIG
IRI_unreserved <- IRI_ALPHA / IRI_DIGIT / "-" / "." / "_" / "~" # needed for ipVfuture
IRI_iunreserved <- IRI_ALPHA / IRI_DIGIT / "-" / "." / "_" / "~" / IRI_ucschar
IRI_reserved <- IRI_gen_delims / IRI_sub_delims
IRI_gen_delims <- ":" / "/" / "?" / "#" / "[" / "]" / "@"
IRI_sub_delims <- "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
""")
defcombinatorp(
:IRI_ucschar,
utf8_char(
not: 0..127,
not: 0xE000..0xF8FF,
not: 0xF0000..0xFFFFD,
not: 0x100000..0x10FFFD
)
)
defcombinatorp(
:IRI_iprivate,
utf8_char([0xE000..0xF8FF, 0xF0000..0xFFFFD, 0x100000..0x10FFFD])
)
defparsec(unquote(name), parsec(:IRI) |> eos)
end
end
end
end