Skip to content

Commit

Permalink
Implement split by regex
Browse files Browse the repository at this point in the history
  • Loading branch information
BrainStone committed Oct 4, 2023
1 parent 43ce4fa commit 4875877
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 7 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON" FORCE)

add_library(stomfoolery INTERFACE)
target_include_directories(stomfoolery INTERFACE ./src)
target_include_directories(stomfoolery INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/src")

###############################################################################
## testing ####################################################################
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ using namespace std::string_literals;

std::vector<std::string>{"Hello World!"s, "Hello Bjarne!"s} * " "s; // -> "Hello World! Hello Bjarne!"s
"Hello World! Hello Bjarne!"s / " "s; // -> std::vector<std::string>{"Hello World!"s, "Hello Bjarne!"s}
"Hello World! \tHello Bjarne!"s / std::regex{R"(\s+)"}; // -> std::vector<std::string>{"Hello World!"s, "Hello Bjarne!"s}
```
This is really just playing around, and honestly you shouldn't be using this in production code. The idea behind is more
Expand All @@ -29,8 +30,8 @@ set the preprocessor flag `STOMFOOLERY_DISABLE_OPERATORS`, which does in fact di
## Using this lib
This is a CMake library and you can use it as you would any other CMake library. See Instructions further down on how to
use it without CMake too.
This is a CMake library, and you can use it as you would any other CMake library. See Instructions further down on how
to use it without CMake too.
### CMake
Expand Down
49 changes: 47 additions & 2 deletions src/stomfoolery.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <iterator>
#include <ranges>
#include <regex>
#include <string>
#include <string_view>
#include <vector>
Expand Down Expand Up @@ -247,6 +248,37 @@ inline C split(const S1& str, const S2& separator) {
std::ranges::begin(separator), std::ranges::end(separator));
}

// #### split by regex ####
// Actual function

/**
* @brief Splits an iterator based string into multiple substrings based on a specified regex separator.
* @tparam T Char type of the string
* @tparam C Container type to store the resulting substrings
* @tparam I Iterator type representing the string
* @param begin Iterator pointing to the start of the string
* @param end Iterator pointing to the end of the string
* @param separator Regex based seperator
* @return A container of substrings
*/
template <typename T, typename C = std::vector<std::basic_string<T>>, return_type_iterator<T> I>
C split(I begin, I end, const std::basic_regex<T>& separator);

// Helpers

/**
* @brief Splits a string into multiple substrings based on a specified regex separator.
* @tparam S String like type
* @tparam C Container type to store the resulting substrings
* @param str The string to split
* @param separator The separator to split the string by
* @return A container of substrings
*/
template <string_like S, typename C = std::vector<std::basic_string<string_like_char_t<S>>>>
inline C split(const S& str, const std::basic_regex<string_like_char_t<S>>& separator) {
return split<string_like_char_t<S>, C>(std::ranges::begin(str), std::ranges::end(str), separator);
}

} // namespace stomfoolery

// You can disable the operators if you really want to!
Expand Down Expand Up @@ -308,8 +340,21 @@ template <stomfoolery::string_like S1, stomfoolery::string_like S2,
typename C = std::vector<std::basic_string<stomfoolery::string_like_char_t<S1>>>>
requires stomfoolery::same_char_type<S1, S2>
inline C operator/(const S1& str, const S2& separator) {
return stomfoolery::split<stomfoolery::string_like_char_t<S1>, C>(
std::ranges::begin(str), std::ranges::end(str), std::ranges::begin(separator), std::ranges::end(separator));
return stomfoolery::split<S1, S2, C>(str, separator);
}

// #### split by regex ####
/**
* @brief Splits a string into multiple substrings based on a specified regex separator using the `/` operator.
* @tparam S String like type
* @tparam C Container type to store the resulting substrings
* @param str The string to split
* @param separator The separator to split the string by
* @return A container of substrings
*/
template <stomfoolery::string_like S, typename C = std::vector<std::basic_string<stomfoolery::string_like_char_t<S>>>>
inline C operator/(const S& str, const std::basic_regex<stomfoolery::string_like_char_t<S>>& separator) {
return stomfoolery::split<S, C>(str, separator);
}

#endif // STOMFOOLERY_DISABLE_OPERATORS
Expand Down
20 changes: 18 additions & 2 deletions src/stomfoolery.inc
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ C split(I1 str_begin, I1 str_end, I2 separator_begin, I2 separator_end) {

return result;
}
// No special handling for an empty separator, the following code handles that just fine and the performance to be
// gained from it is minimal

I1 temp_str_begin = str_begin, temp_str_end;

Expand All @@ -128,6 +126,24 @@ C split(I1 str_begin, I1 str_end, I2 separator_begin, I2 separator_end) {
return result;
}

template <typename T, typename C, return_type_iterator<T> I>
C split(I begin, I end, const std::basic_regex<T>& separator) {
const std::size_t size = end - begin;
C result;

if (size == 0) {
return result;
}

// submatch = -1 means all unmatched parts
for (std::regex_token_iterator<I> it{begin, end, separator, -1}, regex_end{}; it != regex_end; ++it) {
result.emplace_back(it->first, it->second);
}

result.shrink_to_fit();
return result;
}

} // namespace stomfoolery

#endif // STOMFOOLERY_STOMFOOLERY_INC
34 changes: 34 additions & 0 deletions test/test_split_by_regex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <string>
#include <string_view>

#include "gtest/gtest.h"
#include "stomfoolery.hpp"

using namespace std::string_literals;

const std::vector<std::string> simple_strs_empty_start{{""s}, {"a"s}, {"b"s}, {"c"s}, {"d"s}, {"e"s}};
const std::vector<std::string> simple_strs{{"a"s}, {"b"s}, {"c"s}, {"d"s}, {"e"s}};
const std::string simple_str_empty_separator{"abcde"s};
const std::string simple_str_underscore{"a b c\td\n \t\n\re"s};

const std::regex empty_regex{""};
const std::regex whitespaces{R"(\s+)"};

// Normal cases
TEST(TestSplitByRegex, SimpleStringsEmptySeparator) {
EXPECT_EQ(stomfoolery::split(simple_str_empty_separator, empty_regex), simple_strs_empty_start);
}

TEST(TestSplitByRegex, SimpleStringsEmptySeparatorOperator) {
EXPECT_EQ(simple_str_empty_separator / empty_regex, simple_strs_empty_start);
}

TEST(TestSplitByRegex, SimpleStringsWhitespaces) {
EXPECT_EQ(stomfoolery::split(simple_str_underscore, whitespaces), simple_strs);
}

TEST(TestSplitByRegex, SimpleStringsWhitespacesOperator) {
EXPECT_EQ(simple_str_underscore / whitespaces, simple_strs);
}

// Edge cases

0 comments on commit 4875877

Please sign in to comment.