diff --git a/.gitignore b/.gitignore index cdcb6f6..4feb172 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ bld/ [Bb]in/ [Oo]bj/ [Ll]og/ +cmake-build-debug/ # Visual Studio 2015 cache/options directory .vs/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6397b..9caecce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,25 +1,25 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) # ---- Project ---- - # Note: update this to your new project's name and version project( - Glob - VERSION 1.0 - LANGUAGES CXX + Glob + VERSION 1.0 + LANGUAGES CXX ) # ---- Options ---- option(GLOB_USE_GHC_FILESYSTEM "Use ghc::filesystem instead of std::filesystem" OFF) +option(GLOB_TESTS "Run glob gtests" ON) # ---- Include guards ---- -if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) - message( - FATAL_ERROR - "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." - ) -endif() +if (PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) + message( + FATAL_ERROR + "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." + ) +endif () # ---- Add dependencies via CPM ---- # see https://github.com/TheLartians/CPM.cmake for more info @@ -28,11 +28,21 @@ include(cmake/CPM.cmake) # PackageProject.cmake will be used to make our target installable CPMAddPackage( - NAME PackageProject.cmake - GITHUB_REPOSITORY TheLartians/PackageProject.cmake - VERSION 1.3 + NAME PackageProject.cmake + GITHUB_REPOSITORY TheLartians/PackageProject.cmake + VERSION 1.3 ) +CPMAddPackage( + NAME googletest + GITHUB_REPOSITORY google/googletest + GIT_TAG v1.16.0 + VERSION 1.16.0 + OPTIONS "INSTALL_GTEST OFF" "gtest_force_shared_crt" +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + # ---- Add source files ---- # Note: globbing sources is considered bad practice as CMake's generators may not detect new files @@ -47,14 +57,14 @@ file(GLOB_RECURSE sources CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/source/ # INTERFACE_COMPILE_FEATURES cxx_std_17) add_library(Glob ${headers} ${sources}) -SET_TARGET_PROPERTIES(Glob PROPERTIES OUTPUT_NAME glob) +set_target_properties(Glob PROPERTIES OUTPUT_NAME glob) set_target_properties(Glob PROPERTIES CXX_STANDARD 17) -if ( GLOB_USE_GHC_FILESYSTEM ) - # Switch to ghc::filesystem. - target_link_libraries(Glob PRIVATE ghcFilesystem::ghc_filesystem) - target_compile_definitions(Glob PUBLIC GLOB_USE_GHC_FILESYSTEM) -endif() +if (GLOB_USE_GHC_FILESYSTEM) + # Switch to ghc::filesystem. + target_link_libraries(Glob PRIVATE ghcFilesystem::ghc_filesystem) + target_compile_definitions(Glob PUBLIC GLOB_USE_GHC_FILESYSTEM) +endif () # being a cross-platform target, we enforce standards conformance on MSVC target_compile_options(Glob PUBLIC "$<$:/permissive->") @@ -62,8 +72,8 @@ target_compile_options(Glob PUBLIC "$<$:/permissive->") # Link dependencies (if required) target_link_libraries(Glob PUBLIC cxxopts) target_include_directories( - Glob PUBLIC $ - $ + Glob PUBLIC $ + $ ) # ---- Create an installable target ---- @@ -74,11 +84,28 @@ target_include_directories( string(TOLOWER ${PROJECT_NAME}/version.h VERSION_HEADER_LOCATION) packageProject( - NAME ${PROJECT_NAME} - VERSION ${PROJECT_VERSION} - BINARY_DIR ${PROJECT_BINARY_DIR} - INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include - INCLUDE_DESTINATION include/${PROJECT_NAME}-${PROJECT_VERSION} - VERSION_HEADER "${VERSION_HEADER_LOCATION}" - DEPENDENCIES "" + NAME ${PROJECT_NAME} + VERSION ${PROJECT_VERSION} + BINARY_DIR ${PROJECT_BINARY_DIR} + INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include + INCLUDE_DESTINATION include/${PROJECT_NAME}-${PROJECT_VERSION} + VERSION_HEADER "${VERSION_HEADER_LOCATION}" + DEPENDENCIES "" ) + +# --- setup tests --- +if (GLOB_TESTS) + enable_testing() + + add_executable(glob_tests test/rglob_test.cpp test/compile_pattern_test.cpp) + set_property(TARGET glob_tests PROPERTY CXX_STANDARD 17) + target_link_libraries(glob_tests PRIVATE gtest_main ${PROJECT_NAME}) + add_test(NAME glob_tests COMMAND glob_tests) + + add_executable(glob_tests_single test/rglob_test.cpp test/compile_pattern_test.cpp) + set_property(TARGET glob_tests_single PROPERTY CXX_STANDARD 17) + target_compile_definitions(glob_tests_single PRIVATE USE_SINGLE_HEADER=1) + target_link_libraries(glob_tests_single PRIVATE gtest_main) + target_include_directories(glob_tests_single PRIVATE single_include) + add_test(NAME glob_tests_single COMMAND glob_tests_single) +endif () diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index ed8c0bc..9ede4a5 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -1,4 +1,4 @@ -set(CPM_DOWNLOAD_VERSION 0.35.1) +set(CPM_DOWNLOAD_VERSION 0.40.8) if(CPM_SOURCE_CACHE) set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") diff --git a/include/glob/glob.h b/include/glob/glob.h index f2b36ed..efc05a9 100644 --- a/include/glob/glob.h +++ b/include/glob/glob.h @@ -2,6 +2,7 @@ #pragma once #include #include +#include #ifdef GLOB_USE_GHC_FILESYSTEM #include @@ -45,4 +46,5 @@ std::vector glob(const std::initializer_list &pathnames); /// Initializer list overload for convenience std::vector rglob(const std::initializer_list &pathnames); +std::regex compile_pattern_to_regex(std::string_view pattern); } // namespace glob diff --git a/single_include/glob/glob.hpp b/single_include/glob/glob.hpp index d5da089..5ffc25b 100644 --- a/single_include/glob/glob.hpp +++ b/single_include/glob/glob.hpp @@ -138,13 +138,13 @@ std::string translate(const std::string &pattern) { } static inline -std::regex compile_pattern(const std::string &pattern) { +std::regex compile_pattern_to_regex(const std::string &pattern) { return std::regex(translate(pattern), std::regex::ECMAScript); } static inline bool fnmatch(const fs::path &name, const std::string &pattern) { - return std::regex_match(name.string(), compile_pattern(pattern)); + return std::regex_match(name.string(), compile_pattern_to_regex(pattern)); } static inline @@ -252,6 +252,10 @@ std::vector glob2(const fs::path &dirname, [[maybe_unused]] const std: bool dironly) { // std::cout << "In glob2\n"; std::vector result; + // look into the base directory as well, but only if it exists + if (fs::exists(dirname)) { + result.push_back("."); + } assert(is_recursive(pattern)); for (auto &dir : rlistdir(dirname, dironly)) { result.push_back(dir); @@ -364,7 +368,7 @@ std::vector glob(const std::string &pathname, bool recursive = false, if (name.parent_path().empty()) { subresult = d / name; } - result.push_back(subresult); + result.push_back(subresult.lexically_normal()); } } diff --git a/source/glob.cpp b/source/glob.cpp index b74547a..c7ac2cc 100644 --- a/source/glob.cpp +++ b/source/glob.cpp @@ -23,109 +23,140 @@ bool string_replace(std::string &str, std::string_view from, std::string_view to return true; } +inline void handle_previous_stars(std::string &result_string, const int &n_stars) { + if (n_stars == 1) { + // single star cannot escape "/" + result_string += "[^/]*"; // .* without / + } else if (n_stars == 2) { + // double star matches anything + result_string += ".*"; + } +} + std::string translate(std::string_view pattern) { std::size_t i = 0, n = pattern.size(); std::string result_string; + int n_stars = 0; + while (i < n) { auto c = pattern[i]; i += 1; + if (c == '*') { - result_string += ".*"; - } else if (c == '?') { - result_string += "."; - } else if (c == '[') { - auto j = i; - if (j < n && pattern[j] == '!') { - j += 1; + n_stars++; + // last character of the pattern is a star + if (i == n) { + handle_previous_stars(result_string, n_stars); } - if (j < n && pattern[j] == ']') { - j += 1; - } - while (j < n && pattern[j] != ']') { - j += 1; + } else { + if (n_stars == 1) { + // single star cannot escape "/" + result_string += "[^/]*"; // .* without / } - if (j >= n) { - result_string += "\\["; - } else { - auto stuff = std::string(pattern.begin() + i, pattern.begin() + j); - if (stuff.find("--") == std::string::npos) { - string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"}); + if (c == '/') { + if (n_stars == 2) { + // handle **/ + result_string += "(.*?/)?"; } else { - std::vector chunks; - std::size_t k = 0; - if (pattern[i] == '!') { - k = i + 2; - } else { - k = i + 1; + result_string += "/"; + } + } else { // not / + handle_previous_stars(result_string, n_stars); + if (c == '?') { + result_string += "."; + } else if (c == '[') { + auto j = i; + if (j < n && pattern[j] == '!') { + j += 1; } + if (j < n && pattern[j] == ']') { + j += 1; + } + while (j < n && pattern[j] != ']') { + j += 1; + } + if (j >= n) { + result_string += "\\["; + } else { + auto stuff = std::string(pattern.begin() + i, pattern.begin() + j); + if (stuff.find("--") == std::string::npos) { + string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"}); + } else { + std::vector chunks; + std::size_t k = 0; + if (pattern[i] == '!') { + k = i + 2; + } else { + k = i + 1; + } + + while (true) { + k = pattern.find("-", k, j); + if (k == std::string_view::npos) { + break; + } + chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k)); + i = k + 1; + k = k + 3; + } + + chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + j)); + // Escape backslashes and hyphens for set difference (--). + // Hyphens that create ranges shouldn't be escaped. + bool first = true; + for (auto &chunk : chunks) { + string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"}); + string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"}); + if (first) { + stuff += chunk; + first = false; + } else { + stuff += "-" + chunk; + } + } + } - while (true) { - k = pattern.find("-", k, j); - if (k == std::string_view::npos) { - break; + // Escape set operations (&&, ~~ and ||). + std::string result{}; + std::regex_replace(std::back_inserter(result), // result + stuff.begin(), stuff.end(), // string + ESCAPE_SET_OPER, // pattern + ESCAPE_REPL_STR); // repl + stuff = result; + i = j + 1; + if (stuff[0] == '!') { + stuff = "^" + std::string(stuff.begin() + 1, stuff.end()); + } else if (stuff[0] == '^' || stuff[0] == '[') { + stuff = "\\\\" + stuff; } - chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k)); - i = k + 1; - k = k + 3; + result_string = result_string + "[" + stuff + "]"; } - - chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + j)); - // Escape backslashes and hyphens for set difference (--). - // Hyphens that create ranges shouldn't be escaped. - bool first = true; - for (auto &chunk : chunks) { - string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"}); - string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"}); - if (first) { - stuff += chunk; - first = false; - } else { - stuff += "-" + chunk; + } else { + // SPECIAL_CHARS + // closing ')', '}' and ']' + // '-' (a range in character set) + // '&', '~', (extended character set operations) + // '#' (comment) and WHITESPACE (ignored) in verbose mode + static std::map special_characters_map; + if (special_characters_map.empty()) { + for (auto &&sc : SPECIAL_CHARACTERS) { + special_characters_map.emplace(static_cast(sc), std::string{"\\"} + std::string(1, sc)); } } - } - // Escape set operations (&&, ~~ and ||). - std::string result{}; - std::regex_replace(std::back_inserter(result), // result - stuff.begin(), stuff.end(), // string - ESCAPE_SET_OPER, // pattern - ESCAPE_REPL_STR); // repl - stuff = result; - i = j + 1; - if (stuff[0] == '!') { - stuff = "^" + std::string(stuff.begin() + 1, stuff.end()); - } else if (stuff[0] == '^' || stuff[0] == '[') { - stuff = "\\\\" + stuff; - } - result_string = result_string + "[" + stuff + "]"; - } - } else { - // SPECIAL_CHARS - // closing ')', '}' and ']' - // '-' (a range in character set) - // '&', '~', (extended character set operations) - // '#' (comment) and WHITESPACE (ignored) in verbose mode - static std::map special_characters_map; - if (special_characters_map.empty()) { - for (auto &&sc : SPECIAL_CHARACTERS) { - special_characters_map.emplace(static_cast(sc), std::string{"\\"} + std::string(1, sc)); + if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) { + result_string += special_characters_map[static_cast(c)]; + } else { + result_string += c; + } } - } - - if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) { - result_string += special_characters_map[static_cast(c)]; - } else { - result_string += c; - } - } + } // not / + n_stars = 0; + } // not * } - return std::string{"(("} + result_string + std::string{R"()|[\r\n])$)"}; -} -std::regex compile_pattern(std::string_view pattern) { - return std::regex(translate(pattern), std::regex::ECMAScript); + return std::string{"(("} + result_string + std::string{R"()|[\r\n])$)"}; } bool fnmatch(std::string&& name, const std::regex& pattern) { @@ -135,7 +166,7 @@ bool fnmatch(std::string&& name, const std::regex& pattern) { std::vector filter(const std::vector &names, std::string_view pattern) { // std::cout << "Pattern: " << pattern << "\n"; - const auto pattern_re = compile_pattern(pattern); + const auto pattern_re = compile_pattern_to_regex(pattern); std::vector result; std::copy_if(std::make_move_iterator(names.begin()), std::make_move_iterator(names.end()), std::back_inserter(result), @@ -223,7 +254,11 @@ std::vector rlistdir(const fs::path &dirname, bool dironly) { std::vector glob2(const fs::path &dirname, [[maybe_unused]] const fs::path &pattern, bool dironly) { // std::cout << "In glob2\n"; - std::vector result{"."}; + std::vector result; + // look into the base directory as well, but only if it exists + if (fs::exists(dirname)) { + result.push_back("."); + } assert(is_recursive(pattern.string())); auto matched_dirs = rlistdir(dirname, dironly); std::copy(std::make_move_iterator(matched_dirs.begin()), std::make_move_iterator(matched_dirs.end()), std::back_inserter(result)); @@ -362,4 +397,7 @@ rglob(const std::initializer_list &pathnames) { return rglob(std::vector(pathnames)); } +std::regex compile_pattern_to_regex(std::string_view pattern) { + return std::regex(translate(pattern), std::regex::ECMAScript); +} } // namespace glob diff --git a/standalone/CMakeLists.txt b/standalone/CMakeLists.txt index 4c55bef..fc3dd64 100644 --- a/standalone/CMakeLists.txt +++ b/standalone/CMakeLists.txt @@ -13,7 +13,7 @@ include(../cmake/CPM.cmake) CPMAddPackage( NAME cxxopts GITHUB_REPOSITORY jarro2783/cxxopts - VERSION 2.2.0 + VERSION 3.2.0 OPTIONS "CXXOPTS_BUILD_EXAMPLES Off" "CXXOPTS_BUILD_TESTS Off" ) diff --git a/test/compile_pattern_test.cpp b/test/compile_pattern_test.cpp new file mode 100644 index 0000000..a1a1bf6 --- /dev/null +++ b/test/compile_pattern_test.cpp @@ -0,0 +1,52 @@ +#include +#include + +#ifdef USE_SINGLE_HEADER +#include "glob/glob.hpp" +#else +#include "glob/glob.h" +#endif + +TEST(globTest, simpleFileGlob) { + auto pattern = glob::compile_pattern_to_regex("foo/*.txt"); + ASSERT_TRUE(std::regex_match("foo/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/blub.bar.txt", pattern)); + ASSERT_FALSE(std::regex_match("blub.txt", pattern)); + ASSERT_FALSE(std::regex_match("foo/blub.json", pattern)); +} + +TEST(globTest, doubleStarGlob) { + auto pattern = glob::compile_pattern_to_regex("foo/**/*.txt"); + ASSERT_TRUE(std::regex_match("foo/baz/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/baz/blub/bar.txt", pattern)); + ASSERT_FALSE(std::regex_match("blub.txt", pattern)); + ASSERT_FALSE(std::regex_match("foo/blub.json", pattern)); + ASSERT_FALSE(std::regex_match("/home/user/foo/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/bar.txt", pattern)); +} + +TEST(globTest, doubleStar_Star_FixedEnd) { + auto pattern = glob::compile_pattern_to_regex("/home/*/**/test"); + ASSERT_TRUE(std::regex_match("/home/user/test", pattern)); + ASSERT_TRUE(std::regex_match("/home/user2/test", pattern)); + ASSERT_TRUE(std::regex_match("/home/test/a/b/c/test", pattern)); + ASSERT_FALSE(std::regex_match("/home/test/a/b/c/mytest", pattern)); +} + +TEST(globTest, doubleStar_Star_StarFixedEnd) { + auto pattern = glob::compile_pattern_to_regex("/home/*/**/*stream"); + ASSERT_TRUE(std::regex_match("/home/user/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user2/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/istream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/my-stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/youdontstream", pattern)); + ASSERT_FALSE(std::regex_match("/home/user/a/b/c/youdontstreamc", pattern)); +} + +TEST(globTest, exclude_directories_but_not_files) { + auto pattern = glob::compile_pattern_to_regex("test*/**"); + ASSERT_TRUE(std::regex_match("test/whatever.c", pattern)); + ASSERT_FALSE(std::regex_match("test.c", pattern)); +} diff --git a/test/rglob_test.cpp b/test/rglob_test.cpp new file mode 100644 index 0000000..55640d4 --- /dev/null +++ b/test/rglob_test.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +#ifdef USE_SINGLE_HEADER +#include "glob/glob.hpp" +#else +#include "glob/glob.h" +#endif + +namespace fs = std::filesystem; + +fs::path mkdir_temp() { + std::srand(std::time(nullptr)); + fs::path temp_dir = fs::temp_directory_path() / ("rglob_test_" + std::to_string(std::rand())); + + fs::create_directories(temp_dir); + return temp_dir; +} + +// regression test to avoid matching an non existing file +TEST(rglobTest, MatchNonExistent) { + auto matches = glob::rglob("non-existent/**"); + EXPECT_EQ(matches.size(), 0); +} + +// see https://github.com/p-ranav/glob/issues/3 +TEST(rglobTest, Issue3) { + auto temp_dir = mkdir_temp(); + std::cout << "Temporary directory: " << temp_dir << std::endl; + + fs::path sub1 = temp_dir / "sub"; + fs::path sub2 = sub1 / "sub"; + EXPECT_TRUE(fs::create_directory(sub1)); + EXPECT_TRUE(fs::create_directory(sub2)); + + std::ofstream(temp_dir / "file.txt").close(); + std::ofstream(sub1 / "file.txt").close(); + std::ofstream(sub2 / "file.txt").close(); + + auto pattern = temp_dir.string() + "/**/*.txt"; + std::cout << "Pattern: " << pattern << std::endl; + + auto matches = glob::rglob(pattern); + EXPECT_EQ(matches.size(), 3); + EXPECT_EQ(matches[0].string(), (temp_dir / "file.txt").string()); + EXPECT_EQ(matches[1].string(), (sub1 / "file.txt").string()); + EXPECT_EQ(matches[2].string(), (sub2 / "file.txt").string()); +}