/
range_builder.h
163 lines (148 loc) · 3.98 KB
/
range_builder.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#ifndef IV_AERO_RANGE_BUILDER_H_
#define IV_AERO_RANGE_BUILDER_H_
#include <vector>
#include <utility>
#include <algorithm>
#include <iv/noncopyable.h>
#include <iv/character.h>
#include <iv/aero/range.h>
#include <iv/aero/escape.h>
namespace iv {
namespace aero {
class RangeBuilder : private core::Noncopyable<RangeBuilder> {
public:
explicit RangeBuilder(bool ignore_case)
: ignore_case_(ignore_case), ranges_() { }
// range value is [start, last]
typedef std::vector<Range> Ranges;
void Clear() {
ranges_.clear();
}
bool IsIgnoreCase() const { return ignore_case_; }
void AddRange(uint16_t start, uint16_t last, bool ignore_case) {
if (start == last) {
Add(start, ignore_case);
} else {
if (IsIgnoreCase() && ignore_case) {
// TODO(Constellation): create char map is more fast?
for (uint32_t ch = start; ch <= last; ++ch) {
AddCharacterIgnoreCase(ch);
}
} else {
ranges_.push_back(std::make_pair(start, last));
}
}
}
void AddOrEscaped(uint16_t escaped, uint16_t ch) {
if (escaped == 0) {
Add(ch, true);
} else {
AddEscape(escaped);
}
}
void Add(uint16_t ch, bool ignore_case) {
if (IsIgnoreCase() && ignore_case) {
AddCharacterIgnoreCase(ch);
} else {
ranges_.push_back(std::make_pair(ch, ch));
}
}
const Ranges& GetEscapedRange(uint16_t ch) {
Clear();
AddEscape(ch);
return Finish();
}
const Ranges& Finish() {
if (ranges_.empty()) {
return ranges_;
}
Ranges result;
std::sort(ranges_.begin(), ranges_.end());
Ranges::const_iterator it = ranges_.begin();
const Ranges::const_iterator last = ranges_.end();
Range current = *it;
++it;
for (; it != last; ++it) {
if ((current.second + 1) >= it->first) {
current.second = std::max(current.second, it->second);
} else {
result.push_back(current);
current = *it;
}
}
result.push_back(current);
ranges_.swap(result);
return ranges_;
}
static bool IsValidRange(uint16_t start, uint16_t last) {
return start <= last;
}
private:
void AddEscape(uint16_t escaped) {
switch (escaped) {
case 'd': {
AddRanges(kDigitRanges.begin(), kDigitRanges.end());
break;
}
case 'D': {
AddInvertedRanges(kDigitRanges.begin(), kDigitRanges.end());
break;
}
case 's': {
AddRanges(kSpaceRanges.begin(), kSpaceRanges.end());
break;
}
case 'S': {
AddInvertedRanges(kSpaceRanges.begin(), kSpaceRanges.end());
break;
}
case 'w': {
AddRanges(kWordRanges.begin(), kWordRanges.end());
break;
}
case 'W': {
AddInvertedRanges(kWordRanges.begin(), kWordRanges.end());
break;
}
case 'n': {
AddRanges(kLineTerminatorRanges.begin(), kLineTerminatorRanges.end());
break;
}
case '.': {
AddInvertedRanges(kLineTerminatorRanges.begin(),
kLineTerminatorRanges.end());
break;
}
}
}
void AddCharacterIgnoreCase(uint16_t ch) {
const uint16_t lu = core::character::ToLowerCase(ch);
const uint16_t uu = core::character::ToUpperCase(ch);
if (lu == uu && lu == ch) {
ranges_.push_back(std::make_pair(ch, ch));
} else {
ranges_.push_back(std::make_pair(lu, lu));
ranges_.push_back(std::make_pair(uu, uu));
ranges_.push_back(std::make_pair(ch, ch));
}
}
template<typename Iter>
void AddInvertedRanges(Iter it, Iter last) {
uint16_t start = 0x0000;
for (; it != last; ++it) {
AddRange(start, it->first - 1, false);
start = it->second + 1;
}
AddRange(start, 0xFFFF, false);
}
template<typename Iter>
void AddRanges(Iter it, Iter last) {
for (; it != last; ++it) {
AddRange(it->first, it->second, false);
}
}
bool ignore_case_;
Ranges ranges_;
};
} } // namespace iv::aero
#endif // IV_AERO_RANGE_BUILDER_H_