Skip to content

Commit

Permalink
Implement "match" statement in properties files as a simpler alternat…
Browse files Browse the repository at this point in the history
…ive to "if".
  • Loading branch information
nyamatongwe committed Mar 22, 2022
1 parent 43671da commit 2c9e004
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 2 deletions.
4 changes: 3 additions & 1 deletion test/README
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,11 @@ Other settings are treated as lexer or folder properties and forwarded to the le

It is often necessary to set 'fold' in SciTE.properties to cause folding.

Properties can be set for a particular file with an "if $(=" expression like so:
Properties can be set for a particular file with an "if $(=" or "match" expression like so:
if $(= $(FileNameExt);HeaderEOLFill_1.md)
lexer.markdown.header.eolfill=1
match Header*1.md
lexer.markdown.header.eolfill=1

More complex tests with additional configurations of keywords or properties can be performed
by creating another subdirectory with the different settings in a new SciTE.properties.
Expand Down
50 changes: 50 additions & 0 deletions test/TestDocument.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,56 @@ namespace {
return (ch >= 0x80) && (ch < 0xc0);
}

constexpr unsigned char TrailByteValue(unsigned char c) {
// The top 2 bits are 0b10 to indicate a trail byte.
// The lower 6 bits contain the value.
return c & 0b0011'1111;
}
}

std::u32string UTF32FromUTF8(std::string_view svu8) {
std::u32string ret;
for (size_t i = 0; i < svu8.length();) {
unsigned char ch = svu8.at(i);
const unsigned int byteCount = UTF8BytesOfLead[ch];
unsigned int value = 0;

if (i + byteCount > svu8.length()) {
// Trying to read past end
ret.push_back(ch);
break;
}

i++;
switch (byteCount) {
case 1:
value = ch;
break;
case 2:
value = (ch & 0x1F) << 6;
ch = svu8.at(i++);
value += TrailByteValue(ch);
break;
case 3:
value = (ch & 0xF) << 12;
ch = svu8.at(i++);
value += TrailByteValue(ch) << 6;
ch = svu8.at(i++);
value += TrailByteValue(ch);
break;
default:
value = (ch & 0x7) << 18;
ch = svu8.at(i++);
value += TrailByteValue(ch) << 12;
ch = svu8.at(i++);
value += TrailByteValue(ch) << 6;
ch = svu8.at(i++);
value += TrailByteValue(ch);
break;
}
ret.push_back(value);
}
return ret;
}

void TestDocument::Set(std::string_view sv) {
Expand Down
2 changes: 2 additions & 0 deletions test/TestDocument.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#ifndef TESTDOCUMENT_H
#define TESTDOCUMENT_H

std::u32string UTF32FromUTF8(std::string_view svu8);

class TestDocument : public Scintilla::IDocument {
std::string text;
std::string textStyles;
Expand Down
230 changes: 229 additions & 1 deletion test/TestLexers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,225 @@

namespace {

constexpr char MakeLowerCase(char c) noexcept {
if (c >= 'A' && c <= 'Z') {
return c - 'A' + 'a';
} else {
return c;
}
}

void LowerCaseAZ(std::string &s) {
std::transform(s.begin(), s.end(), s.begin(), MakeLowerCase);
}

int IntFromString(std::u32string_view s) noexcept {
if (s.empty()) {
return 0;
}
const bool negate = s.front() == '-';
if (negate) {
s.remove_prefix(1);
}
int value = 0;
while (!s.empty()) {
value = value * 10 + s.front() - '0';
s.remove_prefix(1);
}
return negate ? -value : value;
}

bool PatternMatch(std::u32string_view pattern, std::u32string_view text) noexcept {
if (pattern == text) {
return true;
} else if (pattern.empty()) {
return false;
} else if (pattern.front() == '\\') {
pattern.remove_prefix(1);
if (pattern.empty()) {
// Escape with nothing being escaped
return false;
}
if (text.empty()) {
return false;
}
if (pattern.front() == text.front()) {
pattern.remove_prefix(1);
text.remove_prefix(1);
return PatternMatch(pattern, text);
}
return false;
} else if (pattern.front() == '*') {
pattern.remove_prefix(1);
if (!pattern.empty() && pattern.front() == '*') {
pattern.remove_prefix(1);
// "**" matches anything including "/"
while (!text.empty()) {
if (PatternMatch(pattern, text)) {
return true;
}
text.remove_prefix(1);
}
} else {
while (!text.empty()) {
if (PatternMatch(pattern, text)) {
return true;
}
if (text.front() == '/') {
// "/" not matched by single "*"
return false;
}
text.remove_prefix(1);
}
}
assert(text.empty());
// Consumed whole text with wildcard so match if pattern consumed
return pattern.empty();
} else if (text.empty()) {
return false;
} else if (pattern.front() == '?') {
if (text.front() == '/') {
return false;
}
pattern.remove_prefix(1);
text.remove_prefix(1);
return PatternMatch(pattern, text);
} else if (pattern.front() == '[') {
pattern.remove_prefix(1);
if (pattern.empty()) {
return false;
}
const bool positive = pattern.front() != '!';
if (!positive) {
pattern.remove_prefix(1);
if (pattern.empty()) {
return false;
}
}
bool inSet = false;
if (!pattern.empty() && pattern.front() == ']') {
// First is allowed to be ']'
if (pattern.front() == text.front()) {
inSet = true;
}
pattern.remove_prefix(1);
}
char32_t start = 0;
while (!pattern.empty() && pattern.front() != ']') {
if (pattern.front() == '-') {
pattern.remove_prefix(1);
if (!pattern.empty()) {
const char32_t end = pattern.front();
if ((text.front() >= start) && (text.front() <= end)) {
inSet = true;
}
}
} else if (pattern.front() == text.front()) {
inSet = true;
}
if (!pattern.empty()) {
start = pattern.front();
pattern.remove_prefix(1);
}
}
if (!pattern.empty()) {
pattern.remove_prefix(1);
}
if (inSet != positive) {
return false;
}
text.remove_prefix(1);
return PatternMatch(pattern, text);
} else if (pattern.front() == '{') {
if (pattern.length() < 2) {
return false;
}
const size_t endParen = pattern.find('}');
if (endParen == std::u32string_view::npos) {
// Malformed {x} pattern
return false;
}
std::u32string_view parenExpression = pattern.substr(1, endParen - 1);
bool inSet = false;
const size_t dotdot = parenExpression.find(U"..");
if (dotdot != std::u32string_view::npos) {
// Numeric range: {10..20}
const std::u32string_view firstRange = parenExpression.substr(0, dotdot);
const std::u32string_view lastRange = parenExpression.substr(dotdot+2);
if (firstRange.empty() || lastRange.empty()) {
// Malformed {s..e} range pattern
return false;
}
const size_t endInteger = text.find_last_of(U"-0123456789");
if (endInteger == std::u32string_view::npos) {
// No integer in text
return false;
}
const std::u32string_view intPart = text.substr(0, endInteger+1);
const int first = IntFromString(firstRange);
const int last = IntFromString(lastRange);
const int value = IntFromString(intPart);
if ((value >= first) && (value <= last)) {
inSet = true;
text.remove_prefix(intPart.length());
}
} else {
// Alternates: {a,b,cd}
size_t comma = parenExpression.find(',');
for (;;) {
const bool finalAlt = comma == std::u32string_view::npos;
const std::u32string_view oneAlt = finalAlt ? parenExpression :
parenExpression.substr(0, comma);
if (oneAlt == text.substr(0, oneAlt.length())) {
// match
inSet = true;
text.remove_prefix(oneAlt.length());
break;
}
if (finalAlt) {
break;
}
parenExpression.remove_prefix(oneAlt.length() + 1);
comma = parenExpression.find(',');
}
}
if (!inSet) {
return false;
}
pattern.remove_prefix(endParen + 1);
return PatternMatch(pattern, text);
} else if (pattern.front() == text.front()) {
pattern.remove_prefix(1);
text.remove_prefix(1);
return PatternMatch(pattern, text);
}
return false;
}

bool PathMatch(std::string pattern, std::string relPath) {
#if defined(_WIN32)
// Convert Windows path separators to Unix
std::replace(relPath.begin(), relPath.end(), '\\', '/');
#endif
#if defined(_WIN32) || defined(__APPLE__)
// Case-insensitive, only does ASCII but fine for test example files
LowerCaseAZ(pattern);
LowerCaseAZ(relPath);
#endif
const std::u32string patternU32 = UTF32FromUTF8(pattern);
const std::u32string relPathU32 = UTF32FromUTF8(relPath);
if (PatternMatch(patternU32, relPathU32)) {
return true;
}
const size_t lastSlash = relPathU32.rfind('/');
if (lastSlash == std::string::npos) {
return false;
}
// Match against just filename
const std::u32string fileNameU32 = relPathU32.substr(lastSlash+1);
return PatternMatch(patternU32, fileNameU32);
}

constexpr std::string_view suffixStyled = ".styled";
constexpr std::string_view suffixFolded = ".folded";

Expand Down Expand Up @@ -177,7 +396,16 @@ class PropertyMap {
}
} else if (text.starts_with(prefixMatch)) {
std::optional<std::string> fileNameExt = GetProperty("FileNameExt");
ifIsTrue = fileNameExt == text.substr(prefixMatch.length());
if (fileNameExt) {
std::string pattern(text.substr(prefixMatch.length()));
// Remove trailing white space
while (!pattern.empty() && IsSpaceOrTab(pattern.back())) {
pattern.pop_back();
}
ifIsTrue = PathMatch(pattern, *fileNameExt);
} else {
ifIsTrue = false;
}
} else {
while (!text.empty() && IsSpaceOrTab(text.at(0))) {
text.remove_prefix(1);
Expand Down

0 comments on commit 2c9e004

Please sign in to comment.