-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathcheatsheet.R
67 lines (66 loc) · 5.53 KB
/
cheatsheet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# usethis::use_data(cheatsheet, overwrite = TRUE, internal = TRUE)
cheatsheet <- tibble::tribble(
~category, ~group, ~regexp, ~text,
"character classes", "regular", "<code>.</code>", "any character except newline",
"character classes", "regular", "<code>\\w</code> <code>\\d</code> <code>\\s</code>", "word, digit, whitespace",
"character classes", "regular", "<code>\\W</code> <code>\\D</code> <code>\\S</code>", "not word, not digit, not whitespace",
"character classes", "regular", "<code>\\p{property name}</code>", "matches character with unicode property, like <code>\\p{Uppercase}</code>, see <a href=\"http://www.unicode.org/reports/tr44/#Property_Index.\">unicode property list</a> (not supported by <em>regexplain</em>).",
"character classes", "regular", "<code>[abc]</code>", "any of a, b or c",
"character classes", "regular", "<code>[^abc]</code>", "not a, b, or c",
"character classes", "regular", "<code>[a-g]</code> <code>[1-3]</code>", "character between a & g or 1 & 3",
"character classes", "regular", "<code>[\\^\\-]</code> or <code>[-^]</code>", "matches <code>^</code> or <code>-</code>",
"character classes", "pre-built", "Used inside <code>[]</code>", "Example <code>[[:digit:]AX]</code> matches all digits and A and X",
"character classes", "pre-built", "<code>[:punct:]</code>", "punctuation",
"character classes", "pre-built", "<code>[:alpha:]</code>", "letters",
"character classes", "pre-built", "<code>[:lower:]</code>", "lowercase letters",
"character classes", "pre-built", "<code>[:upper:]</code>", "uppercase letters",
"character classes", "pre-built", "<code>[:digit:]</code>", "digits",
"character classes", "pre-built", "<code>[:xdigit:]</code>", "hex digits",
"character classes", "pre-built", "<code>[:alnum:]</code>", "letters and numbers",
"character classes", "pre-built", "<code>[:cntrl:]</code>", "control characters",
"character classes", "pre-built", "<code>[:graph:]</code>", "letters, numbers, and punctuation",
"character classes", "pre-built", "<code>[:print:]</code>", "letters, numbers, punctuation, and whitespace",
"character classes", "pre-built", "<code>[:space:]</code>", "space characters (basically equivalent to <code>\\s</code>)",
"character classes", "pre-built", "<code>[:blank:]</code>", "space and tab",
"anchors", NA, "<code>^</code>", "start of string",
"anchors", NA, "<code>$</code>", "end of string",
"anchors", NA, "<code>\\b</code>", "word boundary",
"anchors", NA, "<code>\\B</code>", "not-word boundary",
"anchors", NA, "<code>\\A</code>", "stringr multiline: match start of the input",
"anchors", NA, "<code>\\z</code>", "stringr multiline: match end of the input",
"anchors", NA, "<code>\\Z</code>", "stringr multiline: match end of the input, but before final line terminator (if it exists)",
"escaped characters", "general", "<code>\\.</code>", "dot",
"escaped characters", "general", "<code>\\*</code>", "asterisk",
"escaped characters", "general", "<code>\\\\</code>", "backslash",
"escaped characters", "general", "<code>\\t</code>", "tab",
"escaped characters", "general", "<code>\\n</code>", "linefeed",
"escaped characters", "general", "<code>\\r</code>", "carriage return",
"escaped characters", "hex", "<code>\\xhh</code>", "2 hex digits",
"escaped characters", "hex", "<code>\\x{hhhh}</code>", "1-6 hex digits",
"escaped characters", "hex", "<code>\\uhhhh</code>", "4 hex digitis",
"escaped characters", "hex", "<code>\\Uhhhhhhhh</code>", "8 hex digits",
"escaped characters", "hex", "<code>\\N{name}</code>", "Name of unicode character, e.g. <code>\\N{grinning face}</code> (not supported by <em>regexplain</em>)",
"escaped characters", "hex", "<code>\\0ooo</code>", "octal character where \"ooo\" is 1-3 octal digits",
"escaped characters", "control characters", "<code>\\a</code>", "bell",
"escaped characters", "control characters", "<code>\\cX</code>", "match a control-X character",
"escaped characters", "control characters", "<code>\\e</code>", "escape (<code>\\u001B</code>)",
"escaped characters", "control characters", "<code>\\f</code>", "form feed (<code>\\u000C</code>)",
"groups", NA, "<code>(abc)</code>", "capture group",
"groups", NA, "<code>\\1</code>, <code>\\2</code>, <code>\\3</code> ...", "backreference to group 1, group 2, etc.",
"groups", NA, "<code>(?:abc)</code>", "non-capturing group, e.g. <code>\"gr(?:e|a)y\")</code>",
"groups", NA, "<code>(?=abc)</code>", "postive lookahead; matches are followed by <code>abc</code> (non-capturing)",
"groups", NA, "<code>(?!abc)</code>", "negative lookahead; matches are not followed by <code>abc</code> (non-capturing)",
"groups", NA, "<code>(?<=abc)</code>", "positive lookbehind; matches are preceeded by <code>abc</code> (non-capturing)",
"groups", NA, "<code>(?<!abc)</code>", "negative lookbehind; matches are not preceeded by <code>abc</code> (non-capturing)",
"groups", NA, "<code>(?>abc)</code>", "atomic-match; no back-tracking if later matches fail",
"quantifiers", NA, "<code>a*</code>", "0 or more",
"quantifiers", NA, "<code>a+</code>", "1 or more",
"quantifiers", NA, "<code>a?</code>", "0 or 1",
"quantifiers", NA, "<code>a{n}</code>", "exactly n times",
"quantifiers", NA, "<code>a{n,}</code>", "n or more times",
"quantifiers", NA, "<code>a{n,m}</code>", "n-m times",
"quantifiers", NA, "add <code>+</code>", "makes match possessive",
"quantifiers", NA, "add <code>?</code>", "makes match non-greedy",
"quantifiers", NA, "<code>ab|cd</code>", "match ab or cd",
"quantifiers", NA, "<code>w(?:o|a)ke</code>", "use non-capturing group for precedence"
)