diff --git a/Pluto.vcxproj b/Pluto.vcxproj
index 26a3ab54c..34f2550ac 100644
--- a/Pluto.vcxproj
+++ b/Pluto.vcxproj
@@ -662,6 +662,7 @@
+
@@ -762,6 +763,8 @@
+
+
@@ -835,6 +838,7 @@
+
@@ -920,11 +924,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Pluto.vcxproj.filters b/Pluto.vcxproj.filters
index 6467541fe..0ee1edbcf 100644
--- a/Pluto.vcxproj.filters
+++ b/Pluto.vcxproj.filters
@@ -298,6 +298,13 @@
vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
@@ -846,6 +853,99 @@
vendor\Soup\soup
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
+
+ vendor\Soup\soup
+
@@ -861,4 +961,4 @@
{f0adda43-f311-40e5-b4ec-284f248bad46}
-
\ No newline at end of file
+
diff --git a/src/Makefile b/src/Makefile
index 870a292b3..10462c07f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -40,7 +40,7 @@ PLATS= guess aix bsd freebsd generic linux linux-readline macosx posix solaris
LUA_A= libplutostatic.a
LUA_SO= libpluto.so
CORE_O= lapi.o lcode.o lctype.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o lundump.o lvm.o lzio.o
-LIB_O= lauxlib.o lbaselib.o lcorolib.o ldblib.o liolib.o lmathlib.o loadlib.o loslib.o lstrlib.o lcryptolib.o ltablib.o lutf8lib.o lassertlib.o lvector3lib.o lbase32.o lbase64.o ljson.o lurllib.o linit.o lstarlib.o lcatlib.o lhttplib.o lschedulerlib.o lsocketlib.o lbigint.o lxml.o
+LIB_O= lauxlib.o lbaselib.o lcorolib.o ldblib.o liolib.o lmathlib.o loadlib.o loslib.o lstrlib.o lcryptolib.o ltablib.o lutf8lib.o lassertlib.o lvector3lib.o lbase32.o lbase64.o ljson.o lurllib.o linit.o lstarlib.o lcatlib.o lhttplib.o lschedulerlib.o lsocketlib.o lbigint.o lxml.o lregex.o
BASE_O= $(CORE_O) $(LIB_O) $(MYOBJS)
LUA_T= pluto
diff --git a/src/lregex.cpp b/src/lregex.cpp
new file mode 100644
index 000000000..07c931565
--- /dev/null
+++ b/src/lregex.cpp
@@ -0,0 +1,56 @@
+#define LUA_LIB
+#include "lualib.h"
+
+#include "vendor/Soup/soup/Regex.hpp"
+
+static soup::Regex* checkregex (lua_State *L, int i) {
+ return (soup::Regex*)luaL_checkudata(L, i, "pluto:regex");
+}
+
+static int regex_new (lua_State *L) {
+ new (lua_newuserdata(L, sizeof(soup::Regex))) soup::Regex{ soup::Regex::fromFullString(pluto_checkstring(L, 1)) };
+ if (luaL_newmetatable(L, "pluto:regex")) {
+ lua_pushliteral(L, "__index");
+ luaL_loadbuffer(L, "return require\"pluto:regex\"", 27, 0);
+ lua_call(L, 0, 1);
+ lua_settable(L, -3);
+ lua_pushliteral(L, "__gc");
+ lua_pushcfunction(L, [](lua_State *L) {
+ std::destroy_at<>(checkregex(L, 1));
+ return 0;
+ });
+ lua_settable(L, -3);
+ }
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+static int regex_match (lua_State *L) {
+ size_t len;
+ const char *str = luaL_checklstring(L, 2, &len);
+ auto res = checkregex(L, 1)->match(str, str + len);
+ if (res.isSuccess()) {
+ lua_newtable(L);
+ for (size_t i = 0; i != res.groups.size(); ++i) {
+ if (res.groups[i].has_value()) {
+ if (res.groups[i]->name.empty())
+ lua_pushinteger(L, i);
+ else
+ pluto_pushstring(L, res.groups[i]->name);
+ lua_pushlstring(L, res.groups[i]->begin, res.groups[i]->length());
+ lua_settable(L, -3);
+ }
+ }
+ }
+ else {
+ luaL_pushfail(L);
+ }
+ return 1;
+}
+
+static const luaL_Reg funcs_regex[] = {
+ {"new", regex_new},
+ {"match", regex_match},
+ {nullptr, nullptr}
+};
+PLUTO_NEWLIB(regex);
diff --git a/src/lualib.h b/src/lualib.h
index b95c31a7b..60d28f988 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -59,6 +59,7 @@ namespace Pluto {
#endif
extern const PreloadedLibrary preloaded_bigint;
extern const PreloadedLibrary preloaded_xml;
+ extern const PreloadedLibrary preloaded_regex;
inline const PreloadedLibrary* const all_preloaded[] = {
&preloaded_crypto,
@@ -77,6 +78,7 @@ namespace Pluto {
#endif
&preloaded_bigint,
&preloaded_xml,
+ &preloaded_regex,
};
}
@@ -96,6 +98,7 @@ LUAMOD_API int (luaopen_socket) (lua_State *L);
#endif
LUAMOD_API int (luaopen_bigint) (lua_State *L);
LUAMOD_API int (luaopen_xml) (lua_State *L);
+LUAMOD_API int (luaopen_regex) (lua_State *L);
/* open all previous libraries */
LUALIB_API void (luaL_openlibs) (lua_State *L);
diff --git a/src/vendor/Soup/soup/BigBitset.hpp b/src/vendor/Soup/soup/BigBitset.hpp
new file mode 100644
index 000000000..08f48583c
--- /dev/null
+++ b/src/vendor/Soup/soup/BigBitset.hpp
@@ -0,0 +1,69 @@
+#pragma once
+
+#include
+#include // memcpy
+
+#include "base.hpp"
+
+NAMESPACE_SOUP
+{
+#pragma pack(push, 1)
+ template
+ struct BigBitset
+ {
+ uint8_t data[Bytes]{};
+
+ BigBitset() = default;
+
+ BigBitset(const BigBitset& b)
+ {
+ memcpy(data, b.data, sizeof(data));
+ }
+
+ [[nodiscard]] static BigBitset* at(void* dp) noexcept
+ {
+ return reinterpret_cast*>(dp);
+ }
+
+ [[nodiscard]] static const BigBitset* at(const void* dp) noexcept
+ {
+ return reinterpret_cast*>(dp);
+ }
+
+ [[nodiscard]] constexpr bool get(const size_t i) const noexcept
+ {
+ const auto j = (i / 8);
+ const auto k = (i % 8);
+
+ return (data[j] >> k) & 1;
+ }
+
+ constexpr void set(const size_t i, const bool v) noexcept
+ {
+ const auto j = (i / 8);
+ const auto k = (i % 8);
+
+ const uint8_t mask = (1 << k);
+
+ data[j] &= ~mask;
+ data[j] |= (mask * v);
+ }
+
+ constexpr void enable(const size_t i) noexcept
+ {
+ const auto j = (i / 8);
+ const auto k = (i % 8);
+
+ data[j] |= (1 << k);
+ }
+
+ constexpr void disable(const size_t i) noexcept
+ {
+ const auto j = (i / 8);
+ const auto k = (i % 8);
+
+ data[j] &= ~(1 << k);
+ }
+ };
+#pragma pack(pop)
+}
diff --git a/src/vendor/Soup/soup/Makefile b/src/vendor/Soup/soup/Makefile
index 8b81d7c83..c946a52a3 100644
--- a/src/vendor/Soup/soup/Makefile
+++ b/src/vendor/Soup/soup/Makefile
@@ -3,7 +3,7 @@ CFLAGS=-c -Wall -DSOUP_USE_INTRIN
LIBNAME=libsoup.a
# echo $(ls *.cpp | sed 's/.cpp/.o/g')
-OBJS=adler32.o aes.o alloc.o Asn1Identifier.o Asn1Sequence.o base32.o base64.o base.o Bigint.o Capture.o cat.o CpuInfo.o crc32.o Curve25519.o deflate.o DetachedScheduler.o dnsHttpResolver.o dnsName.o dnsRawResolver.o dnsSmartResolver.o dnsUdpResolver.o dns_records.o dnsResolver.o ecc.o filesystem.o HttpRequest.o HttpRequestTask.o IpAddr.o joaat.o JsonArray.o JsonBool.o json.o JsonFloat.o JsonInt.o JsonNode.o JsonNull.o JsonObject.o JsonString.o log.o MimeMessage.o netConfig.o netConnectTask.o netStatus.o Oid.o pem.o Promise.o rand.o rsa.o Scheduler.o SelfDeletingThread.o sha1.o sha256.o sha384.o sha512.o Socket.o SocketTlsEncrypter.o SocketTlsHandshaker.o spaceship.o string.o Task.o Thread.o time.o TrustStore.o unicode.o Uri.o urlenc.o version_compare.o Worker.o X509Certchain.o X509Certificate.o X509RelativeDistinguishedName.o xml.o Reader.o Writer.o DefaultRngInterface.o HardwareRng.o Server.o os.o
+OBJS=adler32.o aes.o alloc.o Asn1Identifier.o Asn1Sequence.o base32.o base64.o base.o Bigint.o Capture.o cat.o CpuInfo.o crc32.o Curve25519.o deflate.o DetachedScheduler.o dnsHttpResolver.o dnsName.o dnsRawResolver.o dnsSmartResolver.o dnsUdpResolver.o dns_records.o dnsResolver.o ecc.o filesystem.o HttpRequest.o HttpRequestTask.o IpAddr.o joaat.o JsonArray.o JsonBool.o json.o JsonFloat.o JsonInt.o JsonNode.o JsonNull.o JsonObject.o JsonString.o log.o MimeMessage.o netConfig.o netConnectTask.o netStatus.o Oid.o pem.o Promise.o rand.o rsa.o Scheduler.o SelfDeletingThread.o sha1.o sha256.o sha384.o sha512.o Socket.o SocketTlsEncrypter.o SocketTlsHandshaker.o spaceship.o string.o Task.o Thread.o time.o TrustStore.o unicode.o Uri.o urlenc.o version_compare.o Worker.o X509Certchain.o X509Certificate.o X509RelativeDistinguishedName.o xml.o Reader.o Writer.o DefaultRngInterface.o HardwareRng.o Server.o os.o Regex.o RegexGroup.o
all: $(LIBNAME)
diff --git a/src/vendor/Soup/soup/PointerAndBool.hpp b/src/vendor/Soup/soup/PointerAndBool.hpp
new file mode 100644
index 000000000..7a370455e
--- /dev/null
+++ b/src/vendor/Soup/soup/PointerAndBool.hpp
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "base.hpp"
+#include "type_traits.hpp"
+
+NAMESPACE_SOUP
+{
+ template )>
+ class PointerAndBool
+ {
+ private:
+ uintptr_t data;
+
+ public:
+ PointerAndBool(T ptr)
+ : data(reinterpret_cast(ptr))
+ {
+ //SOUP_ASSERT((data & 1) == 0);
+ }
+
+ PointerAndBool(T ptr, bool b)
+ : data(reinterpret_cast(ptr))
+ {
+ //SOUP_ASSERT((data & 1) == 0);
+ data |= (uintptr_t)b;
+ }
+
+ [[nodiscard]] T getPointer() const noexcept
+ {
+ return reinterpret_cast(data & ~(uintptr_t)1);
+ }
+
+ [[nodiscard]] bool getBool() const noexcept
+ {
+ return data & 1;
+ }
+
+ void setBool(bool b) noexcept
+ {
+ data &= ~static_cast(1);
+ data |= static_cast(b);
+ }
+
+ void set(T ptr, bool b)
+ {
+ data = reinterpret_cast(ptr);
+ //SOUP_ASSERT((data & 1) == 0);
+ data |= static_cast(b);
+ }
+
+ operator T() const noexcept
+ {
+ return getPointer();
+ }
+
+ [[nodiscard]] std::remove_pointer_t& operator*() const noexcept
+ {
+ return *getPointer();
+ }
+
+ [[nodiscard]] T operator->() const noexcept
+ {
+ return getPointer();
+ }
+
+ [[nodiscard]] bool operator==(T b) const noexcept
+ {
+ return getPointer() == b;
+ }
+
+ [[nodiscard]] bool operator!=(T b) const noexcept
+ {
+ return !operator==(b);
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/Regex.cpp b/src/vendor/Soup/soup/Regex.cpp
new file mode 100644
index 000000000..05dd4aadc
--- /dev/null
+++ b/src/vendor/Soup/soup/Regex.cpp
@@ -0,0 +1,387 @@
+#include "Regex.hpp"
+
+#include
+#include
+
+#include "base.hpp"
+#include "RegexConstraint.hpp"
+#include "RegexMatcher.hpp"
+#include "string.hpp"
+
+#define REGEX_DEBUG_MATCH false
+
+#if REGEX_DEBUG_MATCH
+#include
+#endif
+
+NAMESPACE_SOUP
+{
+ Regex Regex::fromFullString(const std::string& str)
+ {
+ if (str.length() >= 2)
+ {
+ const char c = str.at(0);
+ const auto i = str.find_last_of(c);
+ if (i > 0)
+ {
+ return Regex(str.c_str() + 1, str.c_str() + i, parseFlags(str.c_str() + i + 1));
+ }
+ }
+ return {};
+ }
+
+ bool Regex::matches(const std::string& str) const noexcept
+ {
+ return matches(str.data(), &str.data()[str.size()]);
+ }
+
+ bool Regex::matches(const char* it, const char* end) const noexcept
+ {
+ return match(it, end).isSuccess();
+ }
+
+ bool Regex::matchesFully(const std::string& str) const noexcept
+ {
+ return matchesFully(str.data(), &str.data()[str.size()]);
+ }
+
+ bool Regex::matchesFully(const char* it, const char* end) const noexcept
+ {
+ auto res = match(it, end);
+ if (res.isSuccess())
+ {
+ return res.groups.at(0)->end == end;
+ }
+ return false;
+ }
+
+ RegexMatchResult Regex::match(const std::string& str) const noexcept
+ {
+ return match(str.data(), &str.data()[str.size()]);
+ }
+
+ RegexMatchResult Regex::match(const char* it, const char* end) const noexcept
+ {
+ return match(it, it, end);
+ }
+
+ RegexMatchResult Regex::match(const char* it, const char* begin, const char* end) const noexcept
+ {
+ RegexMatcher m(*this, begin, end);
+ return match(m, it);
+ }
+
+ RegexMatchResult Regex::match(RegexMatcher& m, const char* it) const noexcept
+ {
+ const auto match_begin = it;
+ m.it = it;
+ SOUP_IF_UNLIKELY (m.shouldSaveCheckpoint())
+ {
+ m.saveCheckpoint();
+ }
+ SOUP_ASSERT(!m.shouldResetCapture());
+ bool reset_capture = false;
+ while (m.c != nullptr)
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << m.c->toString();
+ if (m.c->group)
+ {
+ std::cout << " (g " << m.c->group->index << ")";
+ }
+ std::cout << ": ";
+#endif
+
+ m.insertMissingCapturingGroups(m.c->group);
+
+ if (m.c->rollback_transition)
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "saved rollback; ";
+#endif
+ m.saveRollback(m.c->rollback_transition);
+ }
+
+ if (reset_capture)
+ {
+ reset_capture = false;
+#if REGEX_DEBUG_MATCH
+ std::cout << "reset capture for group " << m.c->getGroupCaturedWithin()->index << "; ";
+#endif
+ m.result.groups.at(m.c->getGroupCaturedWithin()->index)->begin = m.it;
+ }
+
+ // Matches?
+ if (m.c->matches(m))
+ {
+ // Update 'end' of applicable capturing groups
+ for (auto g = m.c->group; g; g = g->parent)
+ {
+ if (g->lookahead_or_lookbehind)
+ {
+ break;
+ }
+ if (g->isNonCapturing())
+ {
+ continue;
+ }
+ m.result.groups.at(g->index)->end = m.it;
+ }
+
+ m.c = m.c->success_transition;
+ if (m.shouldSaveCheckpoint())
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "saved checkpoint; ";
+#endif
+ m.saveCheckpoint();
+ }
+ reset_capture = m.shouldResetCapture();
+ if (m.c != RegexConstraint::SUCCESS_TO_FAIL)
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "matched\n";
+#endif
+ continue;
+ }
+#if REGEX_DEBUG_MATCH
+ std::cout << "matched into a snafu";
+#endif
+ if (!m.rollback_points.empty())
+ {
+ m.rollback_points.pop_back();
+ }
+ }
+#if REGEX_DEBUG_MATCH
+ else
+ {
+ std::cout << "did not match";
+ }
+#endif
+
+ // Rollback?
+ if (!m.rollback_points.empty())
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "; rolling back\n";
+#endif
+ m.restoreRollback();
+ SOUP_ASSERT(!m.shouldSaveCheckpoint());
+ reset_capture = m.shouldResetCapture();
+ if (m.c == RegexConstraint::ROLLBACK_TO_SUCCESS)
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "rollback says we should succeed now\n";
+#endif
+ break;
+ }
+ continue;
+ }
+
+ // Oh well
+#if REGEX_DEBUG_MATCH
+ std::cout << "\n";
+#endif
+ return {};
+ }
+
+ // Handle match of regex without capturing groups
+ SOUP_IF_UNLIKELY (!m.result.isSuccess())
+ {
+ m.result.groups.emplace_back(RegexMatchedGroup{ {}, match_begin, m.it });
+ }
+
+ SOUP_ASSERT(m.checkpoints.empty()); // if we made a checkpoint for a lookahead group, it should have been restored.
+
+ SOUP_MOVE_RETURN(m.result);
+ }
+
+ RegexMatchResult Regex::search(const std::string& str) const noexcept
+ {
+ return search(str.data(), &str.data()[str.size()]);
+ }
+
+ RegexMatchResult Regex::search(const char* it, const char* end) const noexcept
+ {
+ RegexMatcher m(*this, it, end);
+ for (; it != end; ++it)
+ {
+#if REGEX_DEBUG_MATCH
+ std::cout << "--- Attempting match with " << std::distance(m.begin, it) << " byte offset ---\r\n";
+#endif
+ auto res = match(m, it);
+ if (res.isSuccess())
+ {
+ return res;
+ }
+ m.reset(*this);
+ }
+ return {};
+ }
+
+ void Regex::replaceAll(std::string& str, const std::string& replacement) const
+ {
+ RegexMatchResult match;
+ while (match = search(str), match.isSuccess())
+ {
+ const size_t offset = (match.groups.at(0).value().begin - str.data());
+ str.erase(offset, match.length());
+ str.insert(offset, replacement);
+ }
+ }
+
+ std::string Regex::unparseFlags(uint16_t flags)
+ {
+ std::string str{};
+ if (flags & RE_MULTILINE)
+ {
+ str.push_back('m');
+ }
+ if (flags & RE_DOTALL)
+ {
+ str.push_back('s');
+ }
+ if (flags & RE_INSENSITIVE)
+ {
+ str.push_back('i');
+ }
+ if (flags & RE_EXTENDED)
+ {
+ str.push_back('x');
+ }
+ if (flags & RE_UNICODE)
+ {
+ str.push_back('u');
+ }
+ if (flags & RE_UNGREEDY)
+ {
+ str.push_back('U');
+ }
+ if (flags & RE_DOLLAR_ENDONLY)
+ {
+ str.push_back('D');
+ }
+ if (flags & RE_EXPLICIT_CAPTURE)
+ {
+ str.push_back('n');
+ }
+ return str;
+ }
+
+ [[nodiscard]] static std::string node_to_graphviz_dot_string(const RegexConstraint* node)
+ {
+ std::stringstream ss;
+ if (auto str = node->toString(); !str.empty())
+ {
+ ss << std::move(str);
+ }
+ else
+ {
+ ss << "dummy";
+ }
+ ss << " (";
+ ss << (void*)node;
+ ss << ')';
+
+ return string::escape(ss.str());
+ }
+
+ static void add_success_node(std::stringstream& ss, std::unordered_set& mapped_nodes)
+ {
+ if (mapped_nodes.count(reinterpret_cast(1)) == 0)
+ {
+ mapped_nodes.emplace(reinterpret_cast(1));
+ ss << R"("success" [shape="diamond"];)" << '\n';
+ }
+ }
+
+ static void add_fail_node(std::stringstream& ss, std::unordered_set& mapped_nodes)
+ {
+ if (mapped_nodes.count(reinterpret_cast(2)) == 0)
+ {
+ mapped_nodes.emplace(reinterpret_cast(2));
+ ss << R"("fail" [shape="diamond"];)" << '\n';
+ }
+ }
+
+ static void node_to_graphviz_dot(std::stringstream& ss, std::unordered_set& mapped_nodes, const RegexConstraint* node)
+ {
+ if (mapped_nodes.count(node) != 0)
+ {
+ return;
+ }
+ mapped_nodes.emplace(node);
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << R"( [shape="rect"];)";
+ ss << '\n';
+
+ if (node->getSuccessTransition() == nullptr)
+ {
+ add_success_node(ss, mapped_nodes);
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << " -> ";
+ ss << R"("success")";
+ ss << R"( [label="success"];)";
+ ss << '\n';
+ }
+ else if (node->getSuccessTransition() == RegexConstraint::SUCCESS_TO_FAIL)
+ {
+ add_fail_node(ss, mapped_nodes);
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << " -> ";
+ ss << R"("fail")";
+ ss << R"( [label="success"];)";
+ ss << '\n';
+ }
+ else
+ {
+ node_to_graphviz_dot(ss, mapped_nodes, node->getSuccessTransition());
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << " -> ";
+ ss << node_to_graphviz_dot_string(node->getSuccessTransition());
+ ss << R"( [label="success"];)";
+ ss << '\n';
+ }
+
+ if (node->getRollbackTransition() != nullptr)
+ {
+ if (node->getRollbackTransition() == RegexConstraint::ROLLBACK_TO_SUCCESS)
+ {
+ add_success_node(ss, mapped_nodes);
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << " -> ";
+ ss << R"("success")";
+ ss << R"( [label="rollback"];)";
+ ss << '\n';
+ }
+ else
+ {
+ node_to_graphviz_dot(ss, mapped_nodes, node->getRollbackTransition());
+
+ ss << node_to_graphviz_dot_string(node);
+ ss << " -> ";
+ ss << node_to_graphviz_dot_string(node->getRollbackTransition());
+ ss << R"( [label="rollback"];)";
+ ss << '\n';
+ }
+ }
+ }
+
+ std::string Regex::toGraphvizDot() const SOUP_EXCAL
+ {
+ std::stringstream ss;
+ std::unordered_set mapped_nodes{};
+
+ ss << "digraph {\n";
+ ss << "label=" << string::escape(toFullString()) << ";\n";
+ node_to_graphviz_dot(ss, mapped_nodes, reinterpret_cast(reinterpret_cast(group.initial) & ~1));
+ ss << '}';
+
+ return ss.str();
+ }
+}
diff --git a/src/vendor/Soup/soup/Regex.hpp b/src/vendor/Soup/soup/Regex.hpp
new file mode 100644
index 000000000..137ad373f
--- /dev/null
+++ b/src/vendor/Soup/soup/Regex.hpp
@@ -0,0 +1,132 @@
+#pragma once
+
+#include "RegexFlags.hpp"
+#include "RegexGroup.hpp"
+#include "RegexMatchResult.hpp"
+
+NAMESPACE_SOUP
+{
+ struct Regex
+ {
+ RegexGroup group;
+
+ Regex(const std::string& pattern, const char* flags)
+ : Regex(pattern.data(), &pattern.data()[pattern.size()], parseFlags(flags))
+ {
+ }
+
+ Regex(const std::string& pattern, uint16_t flags = 0)
+ : Regex(pattern.data(), &pattern.data()[pattern.size()], flags)
+ {
+ }
+
+ Regex(const char* it, const char* end, uint16_t flags)
+ : group(it, end, flags)
+ {
+ }
+
+ Regex(const Regex& b)
+ : Regex(b.toString(), b.getFlags())
+ {
+ }
+
+ Regex() = default;
+ Regex(Regex&&) = default;
+
+ [[nodiscard]] static Regex fromFullString(const std::string& str);
+
+ [[nodiscard]] bool matches(const std::string& str) const noexcept;
+ [[nodiscard]] bool matches(const char* it, const char* end) const noexcept;
+
+ [[nodiscard]] bool matchesFully(const std::string& str) const noexcept;
+ [[nodiscard]] bool matchesFully(const char* it, const char* end) const noexcept;
+
+ [[nodiscard]] RegexMatchResult match(const std::string& str) const noexcept;
+ [[nodiscard]] RegexMatchResult match(const char* it, const char* end) const noexcept;
+ [[nodiscard]] RegexMatchResult match(const char* it, const char* begin, const char* end) const noexcept;
+ [[nodiscard]] RegexMatchResult match(RegexMatcher& m, const char* it) const noexcept;
+
+ [[nodiscard]] RegexMatchResult search(const std::string& str) const noexcept;
+ [[nodiscard]] RegexMatchResult search(const char* it, const char* end) const noexcept;
+
+ void replaceAll(std::string& str, const std::string& replacement) const;
+
+ [[nodiscard]] std::string toString() const SOUP_EXCAL
+ {
+ return group.toString();
+ }
+
+ [[nodiscard]] std::string toFullString() const SOUP_EXCAL
+ {
+ std::string str(1, '/');
+ str.append(toString());
+ str.push_back('/');
+ str.append(getFlagsString());
+ return str;
+ }
+
+ [[nodiscard]] uint16_t getFlags() const noexcept
+ {
+ return group.getFlags();
+ }
+
+ [[nodiscard]] std::string getFlagsString() const noexcept
+ {
+ return unparseFlags(group.getFlags());
+ }
+
+ [[nodiscard]] static constexpr uint16_t parseFlags(const char* flags)
+ {
+ uint16_t res = 0;
+ for (; *flags != '\0'; ++flags)
+ {
+ if (*flags == 'm')
+ {
+ res |= RE_MULTILINE;
+ }
+ else if (*flags == 's')
+ {
+ res |= RE_DOTALL;
+ }
+ else if (*flags == 'i')
+ {
+ res |= RE_INSENSITIVE;
+ }
+ else if (*flags == 'x')
+ {
+ res |= RE_EXTENDED;
+ }
+ else if (*flags == 'u')
+ {
+ res |= RE_UNICODE;
+ }
+ else if (*flags == 'U')
+ {
+ res |= RE_UNGREEDY;
+ }
+ else if (*flags == 'D')
+ {
+ res |= RE_DOLLAR_ENDONLY;
+ }
+ else if (*flags == 'n')
+ {
+ res |= RE_EXPLICIT_CAPTURE;
+ }
+ }
+ return res;
+ }
+
+ [[nodiscard]] static std::string unparseFlags(uint16_t flags);
+
+ // Result can be used with 'dot' via CLI to produce an image, or an online viewer such as https://dreampuf.github.io/GraphvizOnline/
+ [[nodiscard]] std::string toGraphvizDot() const SOUP_EXCAL;
+ };
+
+ namespace literals
+ {
+ inline Regex operator ""_r(const char* str, size_t len)
+ {
+ return Regex(std::string(str, len));
+ }
+ }
+}
diff --git a/src/vendor/Soup/soup/RegexAlternative.hpp b/src/vendor/Soup/soup/RegexAlternative.hpp
new file mode 100644
index 000000000..c3d13d8a5
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexAlternative.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include
+
+#include "UniquePtr.hpp"
+#include "RegexConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexAlternative
+ {
+ std::vector> constraints{};
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexAnyCharConstraint.hpp b/src/vendor/Soup/soup/RegexAnyCharConstraint.hpp
new file mode 100644
index 000000000..67e32f79a
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexAnyCharConstraint.hpp
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexAnyCharConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.end)
+ {
+ return false;
+ }
+ if constexpr (!dotall)
+ {
+ if (*m.it == '\n')
+ {
+ return false;
+ }
+ }
+ if constexpr (unicode)
+ {
+ unicode::utf8_add(m.it, m.end);
+ }
+ else
+ {
+ ++m.it;
+ }
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return ".";
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ if constexpr (dotall)
+ {
+ set |= RE_DOTALL;
+ }
+ else
+ {
+ unset |= RE_DOTALL;
+ }
+ if constexpr (unicode)
+ {
+ set |= RE_UNICODE;
+ }
+ else
+ {
+ unset |= RE_UNICODE;
+ }
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 1;
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto cc = soup::make_unique();
+ success_transitions.setTransitionTo(cc->getEntrypoint());
+ success_transitions.emplace(&cc->success_transition);
+ return cc;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexCharConstraint.hpp b/src/vendor/Soup/soup/RegexCharConstraint.hpp
new file mode 100644
index 000000000..99ca57ec1
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexCharConstraint.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexCharConstraint : public RegexConstraint
+ {
+ char c;
+
+ RegexCharConstraint(char c)
+ : c(c)
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.end)
+ {
+ return false;
+ }
+ if (*m.it != c)
+ {
+ return false;
+ }
+ ++m.it;
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str(1, c);
+ switch (c)
+ {
+ case '\\':
+ case '|':
+ case '(':
+ case ')':
+ case '?':
+ case '+':
+ case '*':
+ case '.':
+ case '^':
+ case '$':
+ str.insert(0, 1, '\\');
+ break;
+ }
+ return str;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 1;
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto cc = soup::make_unique(c);
+ success_transitions.setTransitionTo(cc->getEntrypoint());
+ success_transitions.emplace(&cc->success_transition);
+ return cc;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexCodepointConstraint.hpp b/src/vendor/Soup/soup/RegexCodepointConstraint.hpp
new file mode 100644
index 000000000..f6fc469fd
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexCodepointConstraint.hpp
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexCodepointConstraint : public RegexConstraint
+ {
+ std::string c;
+
+ RegexCodepointConstraint(std::string c)
+ : c(std::move(c))
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (static_cast(std::distance(m.it, m.end)) < c.size())
+ {
+ return false;
+ }
+ for (size_t i = 0; i != c.size(); ++i)
+ {
+ if (m.it[i] != c[i])
+ {
+ return false;
+ }
+ }
+ m.it += c.size();
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return c;
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ set |= RE_UNICODE;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 1;
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto cc = soup::make_unique(c);
+ success_transitions.setTransitionTo(cc->getEntrypoint());
+ success_transitions.emplace(&cc->success_transition);
+ return cc;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexConstraint.hpp b/src/vendor/Soup/soup/RegexConstraint.hpp
new file mode 100644
index 000000000..06246f0fd
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexConstraint.hpp
@@ -0,0 +1,72 @@
+#pragma once
+
+#include
+
+#include "fwd.hpp"
+
+#include "Exception.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexConstraint
+ {
+ inline static RegexConstraint* SUCCESS_TO_FAIL = reinterpret_cast(0b100);
+ inline static RegexConstraint* ROLLBACK_TO_SUCCESS = reinterpret_cast(0b100);
+ inline static uintptr_t MASK = 0b11;
+
+ RegexConstraint* success_transition = nullptr;
+ RegexConstraint* rollback_transition = nullptr;
+ const RegexGroup* group = nullptr;
+
+ RegexConstraint() = default;
+
+ RegexConstraint(const RegexConstraint& b)
+ {
+ // We want the pointers to be nullptr so transitions are not copied by `clone`.
+ }
+
+ virtual ~RegexConstraint() = default;
+
+ [[nodiscard]] RegexConstraint* getSuccessTransition() const noexcept
+ {
+ return reinterpret_cast(reinterpret_cast(success_transition) & ~MASK);
+ }
+
+ [[nodiscard]] RegexConstraint* getRollbackTransition() const noexcept
+ {
+ return reinterpret_cast(reinterpret_cast(rollback_transition) & ~MASK);
+ }
+
+ [[nodiscard]] virtual bool shouldResetCapture() const noexcept
+ {
+ return false;
+ }
+
+ // May only modify `m.it` and only if the constraint matches.
+ [[nodiscard]] virtual bool matches(RegexMatcher& m) const noexcept = 0;
+
+ [[nodiscard]] virtual RegexConstraint* getEntrypoint() noexcept
+ {
+ return this;
+ }
+
+ [[nodiscard]] virtual const RegexGroup* getGroupCaturedWithin() const noexcept
+ {
+ return group;
+ }
+
+ [[nodiscard]] virtual size_t getCursorAdvancement() const
+ {
+ SOUP_THROW(Exception("Constraint is not fixed-width"));
+ }
+
+ [[nodiscard]] virtual UniquePtr clone(RegexTransitionsVector& success_transitions) const
+ {
+ SOUP_THROW(Exception("Constraint is not clonable"));
+ }
+
+ [[nodiscard]] virtual std::string toString() const noexcept = 0;
+
+ virtual void getFlags(uint16_t& set, uint16_t& unset) const noexcept {}
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexConstraintLookbehind.hpp b/src/vendor/Soup/soup/RegexConstraintLookbehind.hpp
new file mode 100644
index 000000000..88c8f49f7
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexConstraintLookbehind.hpp
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexConstraintLookbehind : public RegexConstraint
+ {
+ RegexGroup group;
+ size_t window;
+
+ RegexConstraintLookbehind(const RegexGroup::ConstructorState& s)
+ : group(s, true)
+ {
+ }
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return group.initial;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+
+ template
+ struct RegexConstraintLookbehindImpl : public RegexConstraintLookbehind
+ {
+ using RegexConstraintLookbehind::RegexConstraintLookbehind;
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if constexpr (unicode)
+ {
+ for (size_t i = 0; i != window; ++i)
+ {
+ if (m.begin == m.it)
+ {
+ return false;
+ }
+ if (UTF8_IS_CONTINUATION(*m.it))
+ {
+ return false;
+ }
+ unicode::utf8_sub(m.it, m.begin);
+ }
+ }
+ else
+ {
+ if (static_cast(std::distance(m.begin, m.it)) < window)
+ {
+ return false;
+ }
+ m.it -= window;
+ }
+ return true;
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ group.getFlags(set, unset);
+ if constexpr (unicode)
+ {
+ set |= RE_UNICODE;
+ }
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexDummyConstraint.hpp b/src/vendor/Soup/soup/RegexDummyConstraint.hpp
new file mode 100644
index 000000000..216ff3d12
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexDummyConstraint.hpp
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexDummyConstraint : public RegexConstraint
+ {
+ using RegexConstraint::RegexConstraint;
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return {};
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return true;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexEndConstraint.hpp b/src/vendor/Soup/soup/RegexEndConstraint.hpp
new file mode 100644
index 000000000..26a4eef02
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexEndConstraint.hpp
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexEndConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.end)
+ {
+ return true;
+ }
+ if constexpr (multi_line)
+ {
+ if (*m.it == '\n')
+ {
+ return true;
+ }
+ }
+ else if constexpr (!end_only)
+ {
+ if ((m.it + 1) == m.end
+ && *m.it == '\n'
+ )
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ if constexpr (escape_sequence)
+ {
+ static_assert(multi_line == false);
+ return end_only ? "\\z" : "\\Z";
+ }
+ return "$";
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ if constexpr (!escape_sequence)
+ {
+ if constexpr (multi_line)
+ {
+ set |= RE_MULTILINE;
+ }
+ else
+ {
+ unset |= RE_MULTILINE;
+ }
+ if constexpr (end_only)
+ {
+ set |= RE_DOLLAR_ENDONLY;
+ }
+ else
+ {
+ unset |= RE_DOLLAR_ENDONLY;
+ }
+ }
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexExactQuantifierConstraint.hpp b/src/vendor/Soup/soup/RegexExactQuantifierConstraint.hpp
new file mode 100644
index 000000000..7a65f30f6
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexExactQuantifierConstraint.hpp
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include
+
+#include "UniquePtr.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexExactQuantifierConstraint : public RegexConstraint
+ {
+ std::vector> constraints;
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return true;
+ }
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return constraints.at(0)->getEntrypoint();
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = constraints.at(0)->toString();
+ str.push_back('{');
+ str.append(std::to_string(constraints.size()));
+ str.push_back('}');
+ return str;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return constraints.at(0)->getCursorAdvancement() * constraints.size();
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexFlags.hpp b/src/vendor/Soup/soup/RegexFlags.hpp
new file mode 100644
index 000000000..2a2c283ea
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexFlags.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include
+
+#include "base.hpp"
+
+NAMESPACE_SOUP
+{
+ enum RegexFlags : uint16_t
+ {
+ RE_MULTILINE = (1 << 0), // 'm' - '^' and '$' also match start and end of lines, respectively
+ RE_DOTALL = (1 << 1), // 's' - '.' also matches '\n'
+ RE_INSENSITIVE = (1 << 2), // 'i' - case insensitive match
+ RE_EXTENDED = (1 << 3), // 'x' - Ignore bare space characters in pattern. '#' signifies begin of line comment.
+ RE_UNICODE = (1 << 4), // 'u' - Treat pattern and strings-to-match as UTF-8 instead of binary data
+ RE_UNGREEDY = (1 << 5), // 'U' - Quantifiers become lazy by default and are instead made greedy by a trailing '?'
+ RE_DOLLAR_ENDONLY = (1 << 6), // 'D' - '$' only matches end of pattern, not '\n' - ignored if multi_line flag is set
+ RE_EXPLICIT_CAPTURE = (1 << 7), // 'n' - only capture named groups (non-standard flag from .NET/C#)
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexGroup.cpp b/src/vendor/Soup/soup/RegexGroup.cpp
new file mode 100644
index 000000000..b77b970a1
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexGroup.cpp
@@ -0,0 +1,956 @@
+#include "RegexGroup.hpp"
+
+#include "RegexFlags.hpp"
+#include "RegexTransitionsVector.hpp"
+#include "string.hpp"
+#include "unicode.hpp"
+
+#include "RegexAnyCharConstraint.hpp"
+#include "RegexCharConstraint.hpp"
+#include "RegexCodepointConstraint.hpp"
+#include "RegexDummyConstraint.hpp"
+#include "RegexEndConstraint.hpp"
+#include "RegexExactQuantifierConstraint.hpp"
+#include "RegexGroupConstraint.hpp"
+#include "RegexNegativeLookaheadConstraint.hpp"
+#include "RegexNegativeLookbehindConstraint.hpp"
+#include "RegexOpenEndedRangeQuantifierConstraint.hpp"
+#include "RegexPositiveLookaheadConstraint.hpp"
+#include "RegexPositiveLookbehindConstraint.hpp"
+#include "RegexOptConstraint.hpp"
+#include "RegexRangeQuantifierConstraint.hpp"
+#include "RegexRangeConstraint.hpp"
+#include "RegexRecallConstraint.hpp"
+#include "RegexRepeatConstraint.hpp"
+#include "RegexStartConstraint.hpp"
+#include "RegexWordBoundaryConstraint.hpp"
+#include "RegexWordCharConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ static void discharge_alternative(RegexGroup& g, RegexTransitionsVector& success_transitions, RegexAlternative& a)
+ {
+ // Ensure all alternatives have at least one constraint so we can set up transitions
+ if (a.constraints.empty())
+ {
+ auto upC = soup::make_unique();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ }
+
+ g.alternatives.emplace_back(std::move(a));
+ a.constraints.clear();
+ }
+
+ RegexGroup::RegexGroup(const ConstructorState& s, bool non_capturing)
+ : index(non_capturing ? -1 : s.next_index++)
+ {
+ RegexTransitionsVector success_transitions;
+ success_transitions.data = { &initial };
+
+ RegexAlternative a{};
+
+ std::vector alternatives_transitions{};
+
+ bool escape = false;
+ for (; s.it != s.end; ++s.it)
+ {
+ if (escape)
+ {
+ escape = false;
+ if (*s.it == 'b')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'B')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'w')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'W')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'A')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'Z')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'z')
+ {
+ auto upC = soup::make_unique>();
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'd')
+ {
+ auto upC = soup::make_unique(RegexRangeConstraint::digits);
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 's')
+ {
+ auto upC = soup::make_unique(RegexRangeConstraint::whitespace);
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == 'k')
+ {
+ if (++s.it != s.end)
+ {
+ std::string name;
+ if (*s.it == '<')
+ {
+ while (++s.it != s.end && *s.it != '>')
+ {
+ name.push_back(*s.it);
+ }
+ }
+ else
+ {
+ while (++s.it != s.end && *s.it != '\'')
+ {
+ name.push_back(*s.it);
+ }
+ }
+
+ auto upC = soup::make_unique(std::move(name));
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ }
+ continue;
+ }
+ }
+ else
+ {
+ if (*s.it == '\\')
+ {
+ if (++s.it != s.end && string::isNumberChar(*s.it))
+ {
+ size_t i = ((*s.it) - '0');
+ while (++s.it != s.end && string::isNumberChar(*s.it))
+ {
+ i *= 10;
+ i += ((*s.it) - '0');
+ }
+
+ auto upC = soup::make_unique(i);
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ }
+ else
+ {
+ escape = true;
+ }
+ --s.it;
+ continue;
+ }
+ else if (*s.it == '|')
+ {
+ discharge_alternative(*this, success_transitions, a);
+ success_transitions.discharge(alternatives_transitions);
+ continue;
+ }
+ else if (*s.it == '(')
+ {
+ bool non_capturing = false;
+ bool positive_lookahead = false;
+ bool negative_lookahead = false;
+ bool positive_lookbehind = false;
+ bool negative_lookbehind = false;
+ std::string name{};
+ std::string inline_modifiers{};
+ if (++s.it != s.end && *s.it == '?')
+ {
+ while (++s.it != s.end && (*s.it == '-' || string::isLetter(*s.it)))
+ {
+ inline_modifiers.push_back(*s.it);
+ }
+ if (s.it != s.end)
+ {
+ if (*s.it == ':')
+ {
+ ++s.it;
+ non_capturing = true;
+ }
+ else if (*s.it == '\'')
+ {
+ while (++s.it != s.end && *s.it != '\'')
+ {
+ name.push_back(*s.it);
+ }
+ if (s.it != s.end)
+ {
+ ++s.it;
+ }
+ }
+ else if (*s.it == '=')
+ {
+ positive_lookahead = true;
+ ++s.it;
+ }
+ else if (*s.it == '!')
+ {
+ negative_lookahead = true;
+ ++s.it;
+ }
+ else if (*s.it == '<')
+ {
+ if (++s.it != s.end)
+ {
+ if (*s.it == '=')
+ {
+ positive_lookbehind = true;
+ ++s.it;
+ }
+ else if (*s.it == '!')
+ {
+ negative_lookbehind = true;
+ ++s.it;
+ }
+ else
+ {
+ do
+ {
+ name.push_back(*s.it);
+ } while (++s.it != s.end && *s.it != '>');
+ if (s.it != s.end)
+ {
+ ++s.it;
+ }
+ }
+ }
+ }
+ }
+ }
+ uint16_t restore_flags = s.flags;
+ if (!inline_modifiers.empty())
+ {
+ std::string negative_inline_modifiers{};
+ auto sep = inline_modifiers.find('-');
+ if (sep != std::string::npos)
+ {
+ negative_inline_modifiers = inline_modifiers.substr(sep + 1);
+ inline_modifiers.erase(0, sep + 1);
+ }
+ s.flags |= Regex::parseFlags(inline_modifiers.c_str());
+ s.flags &= ~Regex::parseFlags(negative_inline_modifiers.c_str());
+
+ // If non_capturing is true, these are supposed to be localised inline modifers.
+ if (!non_capturing)
+ {
+ // Otherwise, we want to keep them beyond the scope of this group.
+ restore_flags = s.flags;
+
+ // However, this kind of group should always be non-capturing.
+ non_capturing = true;
+ }
+ }
+ if (positive_lookahead)
+ {
+ auto upGC = soup::make_unique(s);
+ upGC->group.parent = this;
+ upGC->group.lookahead_or_lookbehind = true;
+
+ if (upGC->group.initial)
+ {
+ // last-constraint --[success]-> first-lookahead-constraint + save checkpoint
+ success_transitions.setTransitionTo(upGC->group.initial, true);
+ success_transitions.data = std::move(s.alternatives_transitions);
+
+ // last-lookahead-constraint --[success]-> group (to restore checkpoint)
+ success_transitions.setTransitionTo(upGC.get());
+
+ // group --> next-constraint
+ success_transitions.emplace(&upGC->success_transition);
+ }
+
+ a.constraints.emplace_back(std::move(upGC));
+ }
+ else if (negative_lookahead)
+ {
+ auto upGC = soup::make_unique(s);
+ upGC->group.parent = this;
+ upGC->group.lookahead_or_lookbehind = true;
+
+ if (upGC->group.initial)
+ {
+ // last-constraint --[success]-> first-lookahead-constraint
+ success_transitions.setTransitionTo(upGC->group.initial);
+ success_transitions.data = std::move(s.alternatives_transitions);
+ }
+
+ // last-lookahead-constraint --[success]-> fail
+ success_transitions.setTransitionTo(RegexConstraint::SUCCESS_TO_FAIL);
+
+ if (upGC->group.initial)
+ {
+ // first-lookahead-constraint --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&upGC->group.initial->rollback_transition);
+ }
+
+ a.constraints.emplace_back(std::move(upGC));
+ }
+ else if (positive_lookbehind)
+ {
+ UniquePtr upGC;
+ if (s.hasFlag(RE_UNICODE))
+ {
+ upGC = soup::make_unique>(s);
+ }
+ else
+ {
+ upGC = soup::make_unique>(s);
+ }
+ upGC->group.parent = this;
+ upGC->group.lookahead_or_lookbehind = true;
+ upGC->window = upGC->group.getCursorAdvancement();
+
+ // last-constraint --[success]-> group (to move cursor)
+ success_transitions.setTransitionTo(upGC.get());
+
+ // group --> first-lookbehind-constraint
+ success_transitions.emplace(&upGC->success_transition);
+ success_transitions.setTransitionTo(upGC->group.initial);
+
+ // last-lookbehind-constraint --[success]-> next-constraint
+ success_transitions.data = std::move(s.alternatives_transitions);
+
+ a.constraints.emplace_back(std::move(upGC));
+ }
+ else if (negative_lookbehind)
+ {
+ UniquePtr upGC;
+ if (s.hasFlag(RE_UNICODE))
+ {
+ upGC = soup::make_unique>(s);
+ }
+ else
+ {
+ upGC = soup::make_unique>(s);
+ }
+ upGC->group.parent = this;
+ upGC->group.lookahead_or_lookbehind = true;
+ upGC->window = upGC->group.getCursorAdvancement();
+
+ // last-constraint --[success]-> group (to move cursor)
+ success_transitions.setTransitionTo(upGC.get());
+
+ // group --> first-lookbehind-constraint
+ success_transitions.emplace(&upGC->success_transition);
+ success_transitions.setTransitionTo(upGC->group.initial);
+
+ // last-lookbehind-constraint --[success]-> fail
+ success_transitions.data = std::move(s.alternatives_transitions);
+ success_transitions.setTransitionTo(RegexConstraint::SUCCESS_TO_FAIL);
+
+ // group --[rollback]--> next-constraint
+ success_transitions.emplaceRollback(&upGC->rollback_transition);
+
+ a.constraints.emplace_back(std::move(upGC));
+ }
+ else
+ {
+ if (s.hasFlag(RE_EXPLICIT_CAPTURE) && name.empty())
+ {
+ non_capturing = true;
+ }
+ if (*s.it == ')' // No contents?
+ && non_capturing // Not a capturing group?
+ )
+ {
+ // Don't have to generate anything for this group.
+ }
+ else
+ {
+ auto upGC = soup::make_unique(s, non_capturing);
+ upGC->data.parent = this;
+ upGC->data.name = std::move(name);
+ success_transitions.setTransitionTo(upGC.get());
+ success_transitions.emplace(&upGC->success_transition);
+ success_transitions.setTransitionTo(upGC->data.initial);
+ success_transitions.data = std::move(s.alternatives_transitions);
+ a.constraints.emplace_back(std::move(upGC));
+ }
+ s.flags = restore_flags;
+ }
+ if (s.it == s.end)
+ {
+ break;
+ }
+ continue;
+ }
+ else if (*s.it == ')')
+ {
+ break;
+ }
+ else if (*s.it == '+')
+ {
+ bool greedy = true;
+ if (s.it + 1 != s.end
+ && *(s.it + 1) == '?'
+ )
+ {
+ greedy = false;
+ ++s.it;
+ }
+ greedy ^= s.hasFlag(RE_UNGREEDY);
+
+ SOUP_ASSERT(!a.constraints.empty(), "Invalid modifier");
+ RegexConstraint* pModifiedConstraint;
+ UniquePtr upQuantifierConstraint;
+ if (greedy)
+ {
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ pModifiedConstraint = upModifiedConstraint.get();
+ upQuantifierConstraint = soup::make_unique>(std::move(upModifiedConstraint));
+ static_cast*>(upQuantifierConstraint.get())->setupTransitionsAtLeastOne(success_transitions);
+ }
+ else
+ {
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ pModifiedConstraint = upModifiedConstraint.get();
+ upQuantifierConstraint = soup::make_unique>(std::move(upModifiedConstraint));
+ static_cast*>(upQuantifierConstraint.get())->setupTransitionsAtLeastOne(success_transitions);
+ }
+
+ pModifiedConstraint->group = this;
+
+ a.constraints.back() = std::move(upQuantifierConstraint);
+ continue;
+ }
+ else if (*s.it == '*')
+ {
+ bool greedy = true;
+ if (s.it + 1 != s.end
+ && *(s.it + 1) == '?'
+ )
+ {
+ greedy = false;
+ ++s.it;
+ }
+ greedy ^= s.hasFlag(RE_UNGREEDY);
+
+ SOUP_ASSERT(!a.constraints.empty(), "Invalid modifier");
+ RegexConstraint* pModifiedConstraint;
+ UniquePtr upQuantifierConstraint;
+ if (greedy)
+ {
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ pModifiedConstraint = upModifiedConstraint.get();
+ upQuantifierConstraint = soup::make_unique>(std::move(upModifiedConstraint));
+ }
+ else
+ {
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ pModifiedConstraint = upModifiedConstraint.get();
+ upQuantifierConstraint = soup::make_unique>(std::move(upModifiedConstraint));
+ }
+
+ pModifiedConstraint->group = this;
+
+ if (greedy)
+ {
+ // constraint --[success]-> constraint
+ success_transitions.setTransitionTo(pModifiedConstraint->getEntrypoint());
+
+ // constraint --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&pModifiedConstraint->rollback_transition);
+ }
+ else
+ {
+ // prev-constraint --[success]-> quantifier
+ success_transitions.setPreviousTransitionTo(upQuantifierConstraint.get());
+
+ // constraint --[success]-> quantifier
+ success_transitions.setTransitionTo(upQuantifierConstraint.get());
+
+ // quantifier --[success]-> next-constraint
+ success_transitions.emplace(&upQuantifierConstraint->success_transition);
+
+ // quantifier --[rollback]-> constraint
+ upQuantifierConstraint->rollback_transition = pModifiedConstraint->getEntrypoint();
+ }
+
+ a.constraints.back() = std::move(upQuantifierConstraint);
+ continue;
+ }
+ else if (*s.it == '?')
+ {
+ SOUP_ASSERT(!a.constraints.empty(), "Invalid modifier");
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ auto pModifiedConstraint = upModifiedConstraint.get();
+ auto upOptConstraint = soup::make_unique(std::move(upModifiedConstraint));
+
+ pModifiedConstraint->group = this;
+
+ // constraint --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&pModifiedConstraint->getEntrypoint()->rollback_transition);
+
+ a.constraints.back() = std::move(upOptConstraint);
+ continue;
+ }
+ else if (*s.it == '.')
+ {
+ UniquePtr upC;
+ if (s.hasFlag(RE_DOTALL))
+ {
+ if (s.hasFlag(RE_UNICODE))
+ {
+ upC = soup::make_unique>();
+ }
+ else
+ {
+ upC = soup::make_unique>();
+ }
+ }
+ else
+ {
+ if (s.hasFlag(RE_UNICODE))
+ {
+ upC = soup::make_unique>();
+ }
+ else
+ {
+ upC = soup::make_unique>();
+ }
+ }
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == '[')
+ {
+ auto upC = soup::make_unique(s.it, s.end, s.hasFlag(RE_INSENSITIVE));
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ if (s.it == s.end)
+ {
+ break;
+ }
+ continue;
+ }
+ else if (*s.it == '^')
+ {
+ UniquePtr upC;
+ if (s.flags & RE_MULTILINE)
+ {
+ upC = soup::make_unique>();
+ }
+ else
+ {
+ upC = soup::make_unique>();
+ }
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == '$')
+ {
+ UniquePtr upC;
+ if (s.flags & RE_MULTILINE)
+ {
+ upC = soup::make_unique>();
+ }
+ else if (s.flags & RE_DOLLAR_ENDONLY)
+ {
+ upC = soup::make_unique>();
+ }
+ else
+ {
+ upC = soup::make_unique>();
+ }
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ continue;
+ }
+ else if (*s.it == '{')
+ {
+ size_t min_reps = 0;
+ while (++s.it != s.end && string::isNumberChar(*s.it))
+ {
+ min_reps *= 10;
+ min_reps += ((*s.it) - '0');
+ }
+ if (s.it == s.end)
+ {
+ break;
+ }
+
+ bool exact = true;
+ size_t max_reps = 0;
+ if (*s.it == ',')
+ {
+ exact = false;
+ while (++s.it != s.end && string::isNumberChar(*s.it))
+ {
+ max_reps *= 10;
+ max_reps += ((*s.it) - '0');
+ }
+ if (s.it == s.end)
+ {
+ break;
+ }
+ }
+
+ bool greedy = true;
+ if (s.it + 1 != s.end
+ && *(s.it + 1) == '?'
+ )
+ {
+ greedy = false;
+ ++s.it;
+ }
+ greedy ^= s.hasFlag(RE_UNGREEDY);
+
+ SOUP_ASSERT(!a.constraints.empty(), "Invalid modifier");
+ UniquePtr upModifiedConstraint = std::move(a.constraints.back());
+ auto pModifiedConstraint = upModifiedConstraint.get();
+ if (min_reps == 0)
+ {
+ success_transitions.rollback();
+ a.constraints.pop_back();
+ }
+ else if (exact || min_reps == max_reps) // {X} or {X,X}
+ {
+ // greedy or not doesn't make a difference here
+
+ auto upRepConstraint = soup::make_unique();
+ upRepConstraint->constraints.emplace_back(std::move(upModifiedConstraint));
+
+ pModifiedConstraint->group = this;
+
+ while (--min_reps != 0)
+ {
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ auto upClone = pModifiedConstraint->clone(success_transitions);
+ upClone->group = this;
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ }
+ a.constraints.back() = std::move(upRepConstraint);
+ }
+ else if (max_reps == 0) // {X,}
+ {
+ UniquePtr upRepConstraint;
+ if (greedy)
+ {
+ upRepConstraint = soup::make_unique>();
+ }
+ else
+ {
+ upRepConstraint = soup::make_unique>();
+ }
+ upRepConstraint->constraints.emplace_back(std::move(upModifiedConstraint));
+
+ pModifiedConstraint->group = this;
+
+ while (--min_reps != 0)
+ {
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ auto upClone = pModifiedConstraint->clone(success_transitions);
+ upClone->group = this;
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ }
+
+ // last-clone --[success]-> quantifier
+ success_transitions.setTransitionTo(upRepConstraint.get());
+
+ if (greedy)
+ {
+ // quantifier --[success]-> last-clone
+ success_transitions.emplace(&upRepConstraint->success_transition);
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ success_transitions.setTransitionTo(upRepConstraint->constraints.back()->getEntrypoint());
+
+ // quantifier --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&upRepConstraint->rollback_transition);
+ }
+ else
+ {
+ // quantifier --[rollback]-> last-clone
+ success_transitions.emplaceRollback(&upRepConstraint->rollback_transition);
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ success_transitions.setTransitionTo(upRepConstraint->constraints.back()->getEntrypoint());
+
+ // quantifier --[success]-> next-constraint
+ success_transitions.emplace(&upRepConstraint->success_transition);
+ }
+
+ a.constraints.back() = std::move(upRepConstraint);
+ }
+ else if (min_reps < max_reps) // {X,Y}
+ {
+ if (greedy)
+ {
+ auto upRepConstraint = soup::make_unique();
+ upRepConstraint->constraints.emplace_back(std::move(upModifiedConstraint));
+ upRepConstraint->min_reps = min_reps;
+
+ pModifiedConstraint->group = this;
+
+ size_t required_reps = min_reps;
+ while (--required_reps != 0)
+ {
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ auto upClone = pModifiedConstraint->clone(success_transitions);
+ upClone->group = this;
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ }
+ RegexTransitionsVector rep_transitions;
+ success_transitions.discharge(rep_transitions.data);
+ for (size_t optional_reps = (max_reps - min_reps); optional_reps != 0; --optional_reps)
+ {
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ rep_transitions.setResetCapture();
+ }
+ auto upClone = pModifiedConstraint->clone(rep_transitions);
+ upClone->group = this;
+
+ // clone --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&upClone->getEntrypoint()->rollback_transition);
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ }
+
+ // last-clone --[success]-> next-constraint
+ rep_transitions.discharge(success_transitions.data);
+
+ a.constraints.back() = std::move(upRepConstraint);
+ }
+ else
+ {
+ auto upRepConstraint = soup::make_unique();
+ upRepConstraint->constraints.emplace_back(std::move(upModifiedConstraint));
+ upRepConstraint->min_reps = min_reps;
+
+ pModifiedConstraint->group = this;
+
+ size_t required_reps = min_reps;
+ while (--required_reps != 0)
+ {
+ if (pModifiedConstraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ auto upClone = pModifiedConstraint->clone(success_transitions);
+ upClone->group = this;
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ }
+
+ RegexTransitionsVector rep_transitions;
+ success_transitions.discharge(rep_transitions.data);
+ for (size_t optional_reps = (max_reps - min_reps); optional_reps != 0; --optional_reps)
+ {
+ auto upDummy = soup::make_unique();
+
+ // last-constraint --[success]-> dummy
+ rep_transitions.setTransitionTo(upDummy->getEntrypoint());
+
+ // dummy --[success]-> next-constraint
+ success_transitions.emplace(&upDummy->success_transition);
+
+ // clone --[success]-> next-dummy
+ auto upClone = pModifiedConstraint->clone(rep_transitions);
+ upClone->group = this;
+
+ // dummy --[rollback]-> clone
+ upDummy->rollback_transition = upClone->getEntrypoint();
+
+ upRepConstraint->constraints.emplace_back(std::move(upClone));
+ upRepConstraint->constraints.emplace_back(std::move(upDummy));
+ }
+
+ // last-clone --[success]-> next-constraint
+ rep_transitions.discharge(success_transitions.data);
+
+ a.constraints.back() = std::move(upRepConstraint);
+ }
+ }
+ else
+ {
+ // We may be here if (!exact && min_reps > max_reps)
+ // Which is invalid, so we just yeet the constraint as if {0} was written.
+ success_transitions.rollback();
+ a.constraints.pop_back();
+ }
+ continue;
+ }
+
+ if (s.hasFlag(RE_EXTENDED))
+ {
+ if (string::isSpace(*s.it))
+ {
+ continue;
+ }
+ if (*s.it == '#')
+ {
+ do
+ {
+ ++s.it;
+ } while (s.it != s.end && *s.it != '\n');
+ continue;
+ }
+ }
+ }
+
+ UniquePtr upC;
+ if (UTF8_HAS_CONTINUATION(*s.it) && s.hasFlag(RE_UNICODE))
+ {
+ std::string c;
+ do
+ {
+ c.push_back(*s.it);
+ } while (s.it + 1 != s.end && UTF8_IS_CONTINUATION(*++s.it));
+ upC = soup::make_unique(std::move(c));
+ }
+ else if (s.hasFlag(RE_INSENSITIVE) && string::lower_char(*s.it) != string::upper_char(*s.it))
+ {
+ const char arr[] = { string::lower_char(*s.it), string::upper_char(*s.it) };
+ upC = soup::make_unique(arr);
+ }
+ else
+ {
+ upC = soup::make_unique(*s.it);
+ }
+ success_transitions.setTransitionTo(upC.get());
+ success_transitions.emplace(&upC->success_transition);
+ a.constraints.emplace_back(std::move(upC));
+ }
+ discharge_alternative(*this, success_transitions, a);
+ success_transitions.discharge(alternatives_transitions);
+
+ if (alternatives.size() > 1)
+ {
+ // Set up rollback transitions for the first constraint in each alternative to jump to next alternative
+ for (size_t i = 0; i + 1 != alternatives.size(); ++i)
+ {
+ alternatives.at(i).constraints.at(0)->rollback_transition = alternatives.at(i + 1).constraints.at(0)->getEntrypoint();
+ }
+ }
+
+ // Set up group pointers
+ for (const auto& a : alternatives)
+ {
+ for (const auto& c : a.constraints)
+ {
+ c->group = this;
+ }
+ }
+
+ s.alternatives_transitions = std::move(alternatives_transitions);
+ }
+
+ std::string RegexGroup::toString() const SOUP_EXCAL
+ {
+ std::string str{};
+ for (const auto& a : alternatives)
+ {
+ for (const auto& c : a.constraints)
+ {
+ str.append(c->toString());
+ }
+ str.push_back('|');
+ }
+ if (!str.empty())
+ {
+ str.pop_back();
+ }
+ return str;
+ }
+
+ uint16_t RegexGroup::getFlags() const
+ {
+ uint16_t set = 0;
+ uint16_t unset = 0;
+ getFlags(set, unset);
+ SOUP_ASSERT((set & unset) == 0, "RegexGroup has contradicting flags");
+ return set;
+ }
+
+ void RegexGroup::getFlags(uint16_t& set, uint16_t& unset) const noexcept
+ {
+ for (const auto& a : alternatives)
+ {
+ for (const auto& c : a.constraints)
+ {
+ c->getFlags(set, unset);
+ }
+ }
+ }
+
+ size_t RegexGroup::getCursorAdvancement() const
+ {
+ size_t accum = 0;
+ for (const auto& a : alternatives)
+ {
+ for (const auto& c : a.constraints)
+ {
+ accum += c->getCursorAdvancement();
+ }
+ }
+ return accum;
+ }
+}
diff --git a/src/vendor/Soup/soup/RegexGroup.hpp b/src/vendor/Soup/soup/RegexGroup.hpp
new file mode 100644
index 000000000..473fe6053
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexGroup.hpp
@@ -0,0 +1,64 @@
+#pragma once
+
+#include
+#include
+
+#include "RegexAlternative.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexGroup
+ {
+ struct ConstructorState
+ {
+ mutable const char* it;
+ const char* end;
+ mutable uint16_t flags;
+ mutable size_t next_index = 0;
+ mutable std::vector alternatives_transitions{};
+
+ ConstructorState(const char* it, const char* end, uint16_t flags)
+ : it(it), end(end), flags(flags)
+ {
+ }
+
+ [[nodiscard]] bool hasFlag(uint16_t flag) const noexcept
+ {
+ return (flags & flag) != 0;
+ }
+ };
+
+ const size_t index = 0;
+ const RegexGroup* parent = nullptr;
+ RegexConstraint* initial = nullptr;
+ std::vector alternatives{};
+ std::string name{};
+ bool lookahead_or_lookbehind = false;
+
+ RegexGroup() = default;
+
+ RegexGroup(size_t index)
+ : index(index)
+ {
+ }
+
+ RegexGroup(const char* it, const char* end, uint16_t flags)
+ : RegexGroup(ConstructorState(it, end, flags))
+ {
+ }
+
+ RegexGroup(const ConstructorState& s, bool non_capturing = false);
+
+ [[nodiscard]] bool isNonCapturing() const noexcept
+ {
+ return index == -1;
+ }
+
+ [[nodiscard]] std::string toString() const SOUP_EXCAL;
+
+ [[nodiscard]] uint16_t getFlags() const;
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept;
+
+ [[nodiscard]] size_t getCursorAdvancement() const;
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexGroupConstraint.hpp b/src/vendor/Soup/soup/RegexGroupConstraint.hpp
new file mode 100644
index 000000000..c79d074d4
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexGroupConstraint.hpp
@@ -0,0 +1,113 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexGroup.hpp"
+
+#include
+
+NAMESPACE_SOUP
+{
+ struct RegexGroupConstraint : public RegexConstraint
+ {
+ RegexGroup data;
+
+ RegexGroupConstraint(size_t index)
+ : data(index)
+ {
+ }
+
+ RegexGroupConstraint(const RegexGroup::ConstructorState& s, bool non_capturing)
+ : data(s, non_capturing)
+ {
+ }
+
+ [[nodiscard]] bool shouldResetCapture() const noexcept final
+ {
+ return !data.isNonCapturing();
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return true;
+ }
+
+ [[nodiscard]] const RegexGroup* getGroupCaturedWithin() const noexcept final
+ {
+ return &data;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ auto str = data.toString();
+ if (data.isNonCapturing())
+ {
+ str.insert(0, "?:");
+ }
+ else if (!data.name.empty())
+ {
+ if (data.name.find('\'') != std::string::npos)
+ {
+ str.insert(0, 1, '>');
+ str.insert(0, data.name);
+ str.insert(0, 1, '<');
+ }
+ else
+ {
+ str.insert(0, 1, '\'');
+ str.insert(0, data.name);
+ str.insert(0, 1, '\'');
+ }
+ str.insert(0, 1, '?');
+ }
+ str.insert(0, 1, '(');
+ str.push_back(')');
+ return str;
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ data.getFlags(set, unset);
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return data.getCursorAdvancement();
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto upClone = soup::make_unique(data.index);
+ success_transitions.setTransitionTo(upClone.get());
+ success_transitions.emplace(&upClone->success_transition);
+ for (const auto& a : data.alternatives)
+ {
+ RegexAlternative& ac = upClone->data.alternatives.emplace_back();
+ for (const auto& c : a.constraints)
+ {
+ auto pConstraintClone = ac.constraints.emplace_back(c->clone(success_transitions)).get();
+ pConstraintClone->group = &upClone->data;
+ if (!upClone->data.initial)
+ {
+ if (data.initial == c.get())
+ {
+ upClone->data.initial = pConstraintClone;
+ }
+ else if (data.initial == c->getEntrypoint())
+ {
+ upClone->data.initial = pConstraintClone->getEntrypoint();
+ }
+ }
+ }
+ }
+
+ upClone->data.parent = data.parent;
+ upClone->data.name = data.name;
+ upClone->data.lookahead_or_lookbehind = data.lookahead_or_lookbehind;
+
+ SOUP_ASSERT(upClone->data.initial, "Failed to find initial constraint for cloned group");
+
+ return upClone;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexMatchResult.hpp b/src/vendor/Soup/soup/RegexMatchResult.hpp
new file mode 100644
index 000000000..1a2109c9a
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexMatchResult.hpp
@@ -0,0 +1,75 @@
+#pragma once
+
+#include
+#include
+
+#include "RegexMatchedGroup.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexMatchResult
+ {
+ std::vector> groups{};
+
+ [[nodiscard]] bool isSuccess() const noexcept
+ {
+ return !groups.empty();
+ }
+
+ [[nodiscard]] size_t length() const
+ {
+ return groups.at(0).value().length();
+ }
+
+ [[nodiscard]] const RegexMatchedGroup* findGroupByIndex(size_t i) const noexcept
+ {
+ if (i < groups.size()
+ && groups.at(i).has_value()
+ )
+ {
+ return &groups.at(i).value();
+ }
+ return nullptr;
+ }
+
+ [[nodiscard]] const RegexMatchedGroup* findGroupByName(const std::string& name) const noexcept
+ {
+ for (size_t i = 0; i != groups.size(); ++i)
+ {
+ if (groups.at(i).has_value()
+ && groups.at(i)->name == name
+ )
+ {
+ return &groups.at(i).value();
+ }
+ }
+ return nullptr;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept
+ {
+ std::string str{};
+ for (size_t i = 0; i != groups.size(); ++i)
+ {
+ if (groups.at(i).has_value())
+ {
+ str.append(std::to_string(i));
+ if (!groups.at(i)->name.empty())
+ {
+ str.push_back('{');
+ str.append(groups.at(i)->name);
+ str.push_back('}');
+ }
+ str.append("=\"");
+ str.append(groups.at(i)->toString());
+ str.append("\", ");
+ }
+ }
+ if (!str.empty())
+ {
+ str.erase(str.size() - 2, 2);
+ }
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexMatchedGroup.hpp b/src/vendor/Soup/soup/RegexMatchedGroup.hpp
new file mode 100644
index 000000000..e8b1c2961
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexMatchedGroup.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include
+
+NAMESPACE_SOUP
+{
+ struct RegexMatchedGroup
+ {
+ std::string name;
+ const char* begin;
+ const char* end;
+
+ [[nodiscard]] size_t length() const
+ {
+ return std::distance(begin, end);
+ }
+
+ [[nodiscard]] std::string toString() const noexcept
+ {
+ return std::string(begin, end);
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexMatcher.hpp b/src/vendor/Soup/soup/RegexMatcher.hpp
new file mode 100644
index 000000000..fc855ed3b
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexMatcher.hpp
@@ -0,0 +1,111 @@
+#pragma once
+
+#include
+#include
+#include
+
+#include "fwd.hpp"
+#include "Regex.hpp"
+#include "RegexMatchResult.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexMatcher
+ {
+ struct RollbackPoint
+ {
+ const RegexConstraint* c;
+ const char* it;
+ RegexMatchResult result{};
+ };
+
+ const RegexConstraint* c;
+ const char* it;
+ const char* const begin;
+ const char* const end;
+ std::vector rollback_points{};
+ std::vector checkpoints{};
+ RegexMatchResult result{};
+
+ RegexMatcher(const Regex& r, const char* begin, const char* end)
+ : c(r.group.initial), begin(begin), end(end)
+ {
+ }
+
+ void reset(const Regex& r) noexcept
+ {
+ c = r.group.initial;
+ rollback_points.clear();
+ checkpoints.clear();
+ result.groups.clear();
+ }
+
+ void saveRollback(const RegexConstraint* rollback_transition)
+ {
+ rollback_points.emplace_back(RollbackPoint{ rollback_transition, it, result });
+ }
+
+ void restoreRollback()
+ {
+ c = rollback_points.back().c;
+ it = rollback_points.back().it;
+ result = std::move(rollback_points.back().result);
+ rollback_points.pop_back();
+ }
+
+ bool shouldSaveCheckpoint() noexcept
+ {
+ if (reinterpret_cast(c) & 0b1)
+ {
+ c = reinterpret_cast(reinterpret_cast(c) & ~0b1);
+ SOUP_ASSERT(c != nullptr);
+ return true;
+ }
+ return false;
+ }
+
+ bool shouldResetCapture() noexcept
+ {
+ if (reinterpret_cast(c) & 0b10)
+ {
+ c = reinterpret_cast(reinterpret_cast(c) & ~0b10);
+ return true;
+ }
+ return false;
+ }
+
+ void saveCheckpoint()
+ {
+ checkpoints.emplace_back(it);
+ }
+
+ void restoreCheckpoint()
+ {
+ it = checkpoints.back();
+ checkpoints.pop_back();
+ }
+
+ void insertMissingCapturingGroups(const RegexGroup* g)
+ {
+ for (; g; g = g->parent)
+ {
+ if (g->lookahead_or_lookbehind)
+ {
+ break;
+ }
+ if (g->isNonCapturing())
+ {
+ continue;
+ }
+ while (g->index >= this->result.groups.size())
+ {
+ this->result.groups.emplace_back(std::nullopt);
+ }
+ if (!this->result.groups.at(g->index).has_value())
+ {
+ this->result.groups.at(g->index) = RegexMatchedGroup{ g->name, this->it, this->it };
+ }
+ }
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexNegativeLookaheadConstraint.hpp b/src/vendor/Soup/soup/RegexNegativeLookaheadConstraint.hpp
new file mode 100644
index 000000000..aa803eff3
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexNegativeLookaheadConstraint.hpp
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexGroup.hpp"
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexNegativeLookaheadConstraint : public RegexConstraint
+ {
+ RegexGroup group;
+
+ RegexNegativeLookaheadConstraint(const RegexGroup::ConstructorState& s)
+ : group(s, true)
+ {
+ }
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return group.initial;
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ m.restoreCheckpoint();
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ auto str = group.toString();
+ str.insert(0, "(?!");
+ str.push_back(')');
+ return str;
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ group.getFlags(set, unset);
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexNegativeLookbehindConstraint.hpp b/src/vendor/Soup/soup/RegexNegativeLookbehindConstraint.hpp
new file mode 100644
index 000000000..0d78e3e41
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexNegativeLookbehindConstraint.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "RegexConstraintLookbehind.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexNegativeLookbehindConstraint : public RegexConstraintLookbehindImpl
+ {
+ using Base = RegexConstraintLookbehindImpl;
+
+ using Base::Base;
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ auto str = Base::group.toString();
+ str.insert(0, "(?
+
+#include "UniquePtr.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexOpenEndedRangeQuantifierConstraintBase : public RegexConstraint
+ {
+ std::vector> constraints;
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return constraints.at(0)->getEntrypoint();
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ // Meta-constraint. Transitions will be set up to correctly handle matching of this.
+ return true;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return constraints.at(0)->getCursorAdvancement() * constraints.size();
+ }
+ };
+
+ template
+ struct RegexOpenEndedRangeQuantifierConstraint : public RegexOpenEndedRangeQuantifierConstraintBase
+ {
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = constraints.at(0)->toString();
+ str.push_back('{');
+ str.append(std::to_string(constraints.size()));
+ str.append(",}");
+ if (!greedy)
+ {
+ str.push_back('?');
+ }
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexOptConstraint.hpp b/src/vendor/Soup/soup/RegexOptConstraint.hpp
new file mode 100644
index 000000000..5663925a6
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexOptConstraint.hpp
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "UniquePtr.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexOptConstraint : public RegexConstraint
+ {
+ UniquePtr constraint;
+
+ RegexOptConstraint(UniquePtr&& constraint)
+ : constraint(std::move(constraint))
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ // Meta-constraint. Transitions will be set up to correctly handle matching of this.
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = constraint->toString();
+ str.push_back('?');
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexPositiveLookaheadConstraint.hpp b/src/vendor/Soup/soup/RegexPositiveLookaheadConstraint.hpp
new file mode 100644
index 000000000..efc46c43d
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexPositiveLookaheadConstraint.hpp
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexGroup.hpp"
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexPositiveLookaheadConstraint : public RegexConstraint
+ {
+ RegexGroup group;
+
+ RegexPositiveLookaheadConstraint(const RegexGroup::ConstructorState& s)
+ : group(s, true)
+ {
+ }
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return group.initial;
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ m.restoreCheckpoint();
+ return true;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ auto str = group.toString();
+ str.insert(0, "(?=");
+ str.push_back(')');
+ return str;
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ group.getFlags(set, unset);
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexPositiveLookbehindConstraint.hpp b/src/vendor/Soup/soup/RegexPositiveLookbehindConstraint.hpp
new file mode 100644
index 000000000..84b0eb5e9
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexPositiveLookbehindConstraint.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexPositiveLookbehindConstraint : public RegexConstraintLookbehindImpl
+ {
+ using Base = RegexConstraintLookbehindImpl;
+
+ using Base::Base;
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ auto str = Base::group.toString();
+ str.insert(0, "(?<=");
+ str.push_back(')');
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexRangeConstraint.hpp b/src/vendor/Soup/soup/RegexRangeConstraint.hpp
new file mode 100644
index 000000000..3ac02ca6a
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexRangeConstraint.hpp
@@ -0,0 +1,389 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "base.hpp"
+#include "BigBitset.hpp"
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexRangeConstraint : public RegexConstraint
+ {
+ BigBitset<0x100 / 8> mask{};
+ bool inverted = false;
+
+ inline static const char digits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
+ inline static const char whitespace[] = { '\r', '\n', '\t', '\f', '\v', ' ' };
+
+ RegexRangeConstraint(const char*& it, const char* end, bool insensitive)
+ {
+ if (++it == end)
+ {
+ return;
+ }
+ if (*it == '^')
+ {
+ inverted = true;
+ }
+ else
+ {
+ --it;
+ }
+ char range_begin = 0;
+ while (++it != end && *it != ']')
+ {
+ if (*it == '-')
+ {
+ SOUP_IF_UNLIKELY (++it == end)
+ {
+ break;
+ }
+ if (range_begin <= *it)
+ {
+ for (char c = range_begin; c != *it; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ }
+ else if (*it == '\\')
+ {
+ SOUP_IF_UNLIKELY (++it == end)
+ {
+ break;
+ }
+ if (*it == 'd')
+ {
+ for (auto& c : digits)
+ {
+ mask.enable(c);
+ }
+ continue;
+ }
+ if (*it == 's')
+ {
+ for (auto& c : whitespace)
+ {
+ mask.enable(c);
+ }
+ continue;
+ }
+ }
+ else if (*it == '['
+ && (it + 1) != end && *++it == ':'
+ )
+ {
+ if ((it + 1) != end && *(it + 1) == 'a'
+ && (it + 2) != end && *(it + 2) == 'l'
+ && (it + 3) != end && *(it + 3) == 'n'
+ && (it + 4) != end && *(it + 4) == 'u'
+ && (it + 5) != end && *(it + 5) == 'm'
+ )
+ {
+ it += 5;
+ for (uint8_t c = '0'; c != '9' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'A'; c != 'Z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'a'; c != 'z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'a'
+ && (it + 2) != end && *(it + 2) == 'l'
+ && (it + 3) != end && *(it + 3) == 'p'
+ && (it + 4) != end && *(it + 4) == 'h'
+ && (it + 5) != end && *(it + 5) == 'a'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 'A'; c != 'Z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'a'; c != 'z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'a'
+ && (it + 2) != end && *(it + 2) == 's'
+ && (it + 3) != end && *(it + 3) == 'c'
+ && (it + 4) != end && *(it + 4) == 'i'
+ && (it + 5) != end && *(it + 5) == 'i'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 0x00; c != 0x7F + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'b'
+ && (it + 2) != end && *(it + 2) == 'l'
+ && (it + 3) != end && *(it + 3) == 'a'
+ && (it + 4) != end && *(it + 4) == 'n'
+ && (it + 5) != end && *(it + 5) == 'k'
+ )
+ {
+ it += 5;
+ mask.enable(' ');
+ mask.enable('\t');
+ }
+ else if ((it + 1) != end && *(it + 1) == 'c'
+ && (it + 2) != end && *(it + 2) == 'n'
+ && (it + 3) != end && *(it + 3) == 't'
+ && (it + 4) != end && *(it + 4) == 'r'
+ && (it + 5) != end && *(it + 5) == 'l'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 0x00; c != 0x1F + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ mask.enable(0x7F);
+ }
+ else if ((it + 1) != end && *(it + 1) == 'd'
+ && (it + 2) != end && *(it + 2) == 'i'
+ && (it + 3) != end && *(it + 3) == 'g'
+ && (it + 4) != end && *(it + 4) == 'i'
+ && (it + 5) != end && *(it + 5) == 't'
+ )
+ {
+ it += 5;
+ for (uint8_t c = '0'; c != '9' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'g'
+ && (it + 2) != end && *(it + 2) == 'r'
+ && (it + 3) != end && *(it + 3) == 'a'
+ && (it + 4) != end && *(it + 4) == 'p'
+ && (it + 5) != end && *(it + 5) == 'h'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 0x21; c != 0x7E + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'l'
+ && (it + 2) != end && *(it + 2) == 'o'
+ && (it + 3) != end && *(it + 3) == 'w'
+ && (it + 4) != end && *(it + 4) == 'e'
+ && (it + 5) != end && *(it + 5) == 'r'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 'a'; c != 'z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'u'
+ && (it + 2) != end && *(it + 2) == 'p'
+ && (it + 3) != end && *(it + 3) == 'p'
+ && (it + 4) != end && *(it + 4) == 'e'
+ && (it + 5) != end && *(it + 5) == 'r'
+ )
+ {
+ it += 5;
+ for (uint8_t c = 'A'; c != 'Z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else if ((it + 1) != end && *(it + 1) == 'w'
+ && (it + 2) != end && *(it + 2) == 'o'
+ && (it + 3) != end && *(it + 3) == 'r'
+ && (it + 4) != end && *(it + 4) == 'd'
+ )
+ {
+ it += 4;
+ for (uint8_t c = '0'; c != '9' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'A'; c != 'Z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'a'; c != 'z' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ mask.enable('_');
+ }
+ else if ((it + 1) != end && *(it + 1) == 'x'
+ && (it + 2) != end && *(it + 2) == 'd'
+ && (it + 3) != end && *(it + 3) == 'i'
+ && (it + 4) != end && *(it + 4) == 'g'
+ && (it + 5) != end && *(it + 5) == 'i'
+ && (it + 6) != end && *(it + 6) == 't'
+ )
+ {
+ it += 6;
+ for (uint8_t c = '0'; c != '9' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'A'; c != 'F' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ for (uint8_t c = 'a'; c != 'f' + 1; ++c)
+ {
+ mask.enable(c);
+ }
+ }
+ else
+ {
+ SOUP_THROW(Exception("Unrecognised class in [[:class:]]"));
+ }
+ if ((it + 1) != end) { ++it; } // :
+ if ((it + 1) != end) { ++it; } // ]
+ continue;
+ }
+ mask.enable(*it);
+ range_begin = (*it) + 1;
+ }
+ if (insensitive)
+ {
+ for (uint8_t c = 'a'; c != 'z' + 1; ++c)
+ {
+ if (mask.get(c))
+ {
+ mask.enable(c - 'a' + 'A');
+ }
+ }
+ for (uint8_t c = 'A'; c != 'Z' + 1; ++c)
+ {
+ if (mask.get(c))
+ {
+ mask.enable(c - 'A' + 'a');
+ }
+ }
+ }
+ }
+
+ template
+ RegexRangeConstraint(const char(&arr)[S])
+ {
+ for (const auto& c : arr)
+ {
+ mask.enable(c);
+ }
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.end)
+ {
+ return false;
+ }
+ if (mask.get(static_cast(*m.it)) == inverted)
+ {
+ return false;
+ }
+ ++m.it;
+ return true;
+ }
+
+ static void appendPresentably(std::string& str, char c) noexcept
+ {
+ switch (c)
+ {
+ case '\r': str.append("\\r"); return;
+ case '\n': str.append("\\n"); return;
+ case '\t': str.append("\\t"); return;
+ case '\f': str.append("\\f"); return;
+ case '\v': str.append("\\v"); return;
+ }
+ str.push_back(c);
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str(1, '[');
+ if (inverted)
+ {
+ str.push_back('^');
+ }
+ uint16_t range_begin = 0x100;
+ for (uint16_t i = 0; i != 0x100; ++i)
+ {
+ if (mask.get(i))
+ {
+ if (range_begin == 0x100)
+ {
+ range_begin = i;
+ }
+ }
+ else
+ {
+ if (range_begin != 0x100)
+ {
+ const uint8_t range_end = static_cast(i);
+ const uint8_t range_len = (range_end - range_begin);
+ if (range_len > 3)
+ {
+ appendPresentably(str, static_cast(range_begin));
+ str.push_back('-');
+ appendPresentably(str, range_end - 1);
+ }
+ else
+ {
+ for (uint16_t j = range_begin; j != range_end; ++j)
+ {
+ appendPresentably(str, static_cast(j));
+ }
+ }
+ range_begin = 0x100;
+ }
+ }
+ }
+ if (range_begin != 0x100)
+ {
+ constexpr uint16_t range_end = 0x100;
+ const uint8_t range_len = (range_end - range_begin);
+ if (range_len > 3)
+ {
+ appendPresentably(str, static_cast(range_begin));
+ str.push_back('-');
+ appendPresentably(str, (char)(range_end - 1));
+ }
+ else
+ {
+ for (uint16_t j = range_begin; j != range_end; ++j)
+ {
+ appendPresentably(str, static_cast(j));
+ }
+ }
+ }
+ str.push_back(']');
+ return str;
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 1;
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto cc = soup::make_unique(*this);
+ success_transitions.setTransitionTo(cc->getEntrypoint());
+ success_transitions.emplace(&cc->success_transition);
+ return cc;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexRangeQuantifierConstraint.hpp b/src/vendor/Soup/soup/RegexRangeQuantifierConstraint.hpp
new file mode 100644
index 000000000..174e73314
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexRangeQuantifierConstraint.hpp
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include
+
+#include "UniquePtr.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexRangeQuantifierConstraintBase : public RegexConstraint
+ {
+ std::vector> constraints;
+ size_t min_reps;
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return true;
+ }
+
+ [[nodiscard]] RegexConstraint* getEntrypoint() noexcept final
+ {
+ return constraints.at(0)->getEntrypoint();
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return constraints.at(0)->getCursorAdvancement() * constraints.size();
+ }
+ };
+
+ struct RegexRangeQuantifierConstraintGreedy : public RegexRangeQuantifierConstraintBase
+ {
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = constraints.at(0)->toString();
+ str.push_back('{');
+ str.append(std::to_string(min_reps));
+ str.push_back(',');
+ str.append(std::to_string(constraints.size()));
+ str.push_back('}');
+ return str;
+ }
+ };
+
+ struct RegexRangeQuantifierConstraintLazy : public RegexRangeQuantifierConstraintBase
+ {
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ const size_t optional_reps = (constraints.size() - min_reps) / 2;
+
+ std::string str = constraints.at(0)->toString();
+ str.push_back('{');
+ str.append(std::to_string(min_reps));
+ str.push_back(',');
+ str.append(std::to_string(min_reps + optional_reps));
+ str.push_back('}');
+ str.push_back('?');
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexRecallConstraint.hpp b/src/vendor/Soup/soup/RegexRecallConstraint.hpp
new file mode 100644
index 000000000..8dfe124af
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexRecallConstraint.hpp
@@ -0,0 +1,85 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+NAMESPACE_SOUP
+{
+ struct RegexRecallConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matchesImpl(RegexMatcher& m, const RegexMatchedGroup* group) const noexcept
+ {
+ if (group)
+ {
+ auto it = m.it;
+ for (auto group_it = group->begin; group_it != group->end; ++group_it)
+ {
+ if (it == m.end
+ || *it != *group_it
+ )
+ {
+ return false;
+ }
+ ++it;
+ }
+ m.it = it;
+ return true;
+ }
+ return false;
+ }
+ };
+
+ struct RegexRecallIndexConstraint : public RegexRecallConstraint
+ {
+ const size_t i;
+
+ RegexRecallIndexConstraint(size_t i)
+ : i(i)
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return matchesImpl(m, m.result.findGroupByIndex(i));
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str(1, '\\');
+ str.append(std::to_string(i));
+ return str;
+ }
+ };
+
+ struct RegexRecallNameConstraint : public RegexRecallConstraint
+ {
+ const std::string name;
+
+ RegexRecallNameConstraint(std::string&& name)
+ : name(std::move(name))
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return matchesImpl(m, m.result.findGroupByName(name));
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = "\\k";
+ if (name.find('\'') != std::string::npos)
+ {
+ str.push_back('<');
+ str.append(name);
+ str.push_back('>');
+ }
+ else
+ {
+ str.push_back('\'');
+ str.append(name);
+ str.push_back('\'');
+ }
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexRepeatConstraint.hpp b/src/vendor/Soup/soup/RegexRepeatConstraint.hpp
new file mode 100644
index 000000000..7f7f9a847
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexRepeatConstraint.hpp
@@ -0,0 +1,90 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "UniquePtr.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexRepeatConstraint : public RegexConstraint
+ {
+ UniquePtr constraint;
+
+ RegexRepeatConstraint(UniquePtr&& constraint)
+ : constraint(std::move(constraint))
+ {
+ }
+
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ // Meta-constraint. Transitions will be set up to correctly handle matching of this.
+ return true;
+ }
+
+ [[nodiscard]] virtual RegexConstraint* getEntrypoint() noexcept final
+ {
+ return constraint->getEntrypoint();
+ }
+
+ void setupTransitionsAtLeastOne(RegexTransitionsVector& success_transitions)
+ {
+ success_transitions.setTransitionTo(this);
+ if (greedy)
+ {
+ // quantifier --[success]-> constraint
+ success_transitions.emplace(&success_transition);
+ if (constraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+ success_transitions.setTransitionTo(constraint->getEntrypoint());
+
+ // quantifier --[rollback]-> next-constraint
+ success_transitions.emplaceRollback(&rollback_transition);
+ }
+ else
+ {
+ // quantifier --[success]-> next-constraint
+ success_transitions.emplace(&success_transition);
+ if (constraint->shouldResetCapture())
+ {
+ success_transitions.setResetCapture();
+ }
+
+ // quantifier --[rollback]-> constraint
+ rollback_transition = constraint->getEntrypoint();
+ }
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ if (at_least_one)
+ {
+ auto cc = soup::make_unique(constraint->clone(success_transitions));
+ cc->constraint->group = constraint->group;
+ cc->setupTransitionsAtLeastOne(success_transitions);
+ return cc;
+ }
+ return RegexConstraint::clone(success_transitions);
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ std::string str = constraint->toString();
+ if (at_least_one)
+ {
+ str.push_back('+');
+ }
+ else
+ {
+ str.push_back('*');
+ }
+ if (!greedy)
+ {
+ str.push_back('?');
+ }
+ return str;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexStartConstraint.hpp b/src/vendor/Soup/soup/RegexStartConstraint.hpp
new file mode 100644
index 000000000..600a35cbd
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexStartConstraint.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexStartConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.begin)
+ {
+ return true;
+ }
+ if constexpr (multi_line)
+ {
+ if (*(m.it - 1) == '\n')
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return escape_sequence ? "\\A" : "^";
+ }
+
+ void getFlags(uint16_t& set, uint16_t& unset) const noexcept final
+ {
+ if constexpr (!escape_sequence)
+ {
+ if constexpr (multi_line)
+ {
+ set |= RE_MULTILINE;
+ }
+ else
+ {
+ unset |= RE_MULTILINE;
+ }
+ }
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexTransitionsVector.hpp b/src/vendor/Soup/soup/RegexTransitionsVector.hpp
new file mode 100644
index 000000000..d4f7e917f
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexTransitionsVector.hpp
@@ -0,0 +1,81 @@
+#pragma once
+
+#include
+
+NAMESPACE_SOUP
+{
+ struct RegexTransitionsVector
+ {
+ std::vector data;
+ std::vector prev_data;
+
+ void emplace(RegexConstraint** p)
+ {
+ data.emplace_back(p);
+ }
+
+ void emplaceRollback(RegexConstraint** p)
+ {
+ data.emplace_back(p);
+
+ // If we don't have a next constraint, rollback is match success.
+ *p = RegexConstraint::ROLLBACK_TO_SUCCESS;
+ }
+
+ void setPreviousTransitionTo(RegexConstraint* c) noexcept
+ {
+ SOUP_ASSERT((reinterpret_cast(c) & RegexConstraint::MASK) == 0);
+
+ for (const auto& p : prev_data)
+ {
+ *p = reinterpret_cast(reinterpret_cast(c) | (reinterpret_cast(*p) & 0b10));
+ }
+ }
+
+ void setResetCapture() noexcept
+ {
+ for (const auto& p : data)
+ {
+ *reinterpret_cast(p) = 0b10;
+ }
+ }
+
+ void setTransitionTo(RegexConstraint* c, bool save_checkpoint = false) noexcept
+ {
+ SOUP_ASSERT((reinterpret_cast(c) & RegexConstraint::MASK) == 0);
+
+ if (save_checkpoint)
+ {
+ reinterpret_cast(c) |= 0b1;
+ }
+
+ for (const auto& p : data)
+ {
+ *p = reinterpret_cast(reinterpret_cast(c) | (reinterpret_cast(*p) & 0b10));
+ }
+
+ prev_data = std::move(data);
+ data.clear();
+ }
+
+ void discharge(std::vector& outTransitions) noexcept
+ {
+ for (const auto& p : data)
+ {
+ outTransitions.emplace_back(p);
+ }
+ data.clear();
+ }
+
+ void rollback() noexcept
+ {
+ data = std::move(prev_data);
+ prev_data.clear();
+
+ for (const auto& p : data)
+ {
+ *reinterpret_cast(p) &= 0b10;
+ }
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexWordBoundaryConstraint.hpp b/src/vendor/Soup/soup/RegexWordBoundaryConstraint.hpp
new file mode 100644
index 000000000..d63337fe6
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexWordBoundaryConstraint.hpp
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+#include "string.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexWordBoundaryConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ if (m.it == m.begin
+ || m.it == m.end
+ )
+ {
+ return true ^ inverted;
+ }
+ if (string::isWordChar(*(m.it - 1)))
+ {
+ return !string::isWordChar(*m.it) ^ inverted;
+ }
+ else
+ {
+ return string::isWordChar(*m.it) ^ inverted;
+ }
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return inverted ? "\\B" : "\\b";
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 0;
+ }
+ };
+}
diff --git a/src/vendor/Soup/soup/RegexWordCharConstraint.hpp b/src/vendor/Soup/soup/RegexWordCharConstraint.hpp
new file mode 100644
index 000000000..9cf4b172c
--- /dev/null
+++ b/src/vendor/Soup/soup/RegexWordCharConstraint.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "RegexConstraint.hpp"
+
+#include "RegexMatcher.hpp"
+#include "string.hpp"
+
+NAMESPACE_SOUP
+{
+ template
+ struct RegexWordCharConstraint : public RegexConstraint
+ {
+ [[nodiscard]] bool matches(RegexMatcher& m) const noexcept final
+ {
+ return string::isWordChar(*m.it++) ^ inverted;
+ }
+
+ [[nodiscard]] std::string toString() const noexcept final
+ {
+ return inverted ? "\\W" : "\\w";
+ }
+
+ [[nodiscard]] size_t getCursorAdvancement() const final
+ {
+ return 1;
+ }
+
+ [[nodiscard]] UniquePtr clone(RegexTransitionsVector& success_transitions) const final
+ {
+ auto cc = soup::make_unique();
+ success_transitions.setTransitionTo(cc->getEntrypoint());
+ success_transitions.emplace(&cc->success_transition);
+ return cc;
+ }
+ };
+}
diff --git a/testes/pluto/basic.pluto b/testes/pluto/basic.pluto
index 55633e1d8..9256cb732 100644
--- a/testes/pluto/basic.pluto
+++ b/testes/pluto/basic.pluto
@@ -2076,6 +2076,20 @@ do
end)
sched:run()
end
+do
+ local regex = require "pluto:regex"
+
+ local pattern = new regex [[/^the (only )?one$/i]]
+ assert(pattern:match("THE ONE"))
+ assert(not pattern:match("NOT THE ONE"))
+
+ pattern = new regex [[/anywhere from (\d+) to (\d+)/]]
+ local match = pattern:match("anywhere from 3 to 5")
+ assert(match)
+ assert(match[0] == "anywhere from 3 to 5")
+ assert(match[1] == "3")
+ assert(match[2] == "5")
+end
print "Testing default table metatable."
do