PointCloudLibrary · mvieth · Jul 4, 2022 · Jun 4, 2022 · Jun 21, 2022
diff --git a/io/include/pcl/io/split.h b/io/include/pcl/io/split.h
@@ -0,0 +1,56 @@
+/*
+* SPDX-License-Identifier: BSD-3-Clause
+*
+*  Point Cloud Library (PCL) - www.pointclouds.org
+*  Copyright (c) 2014-, Open Perception Inc.
+*
+*  All rights reserved
+*/
+
+#pragma once
+#include <string>
+
+namespace pcl {
+
+/** \brief Lightweight tokenization function
+ * This function can be used as a boost::split substitute. When benchmarked against
+ * boost, this function will create much less alocations and hence it is much better
+ * suited for quick line tokenization.
+ *
+ * Cool thing is this function will work with SequenceSequenceT =
+ * std::vector<std::string> and std::vector<std::string_view>
+ */
+template <typename SequenceSequenceT>
+void
+split(SequenceSequenceT& result, std::string const& in, const char* const delimiters)
+{
+  using StringSizeT = std::string::size_type;
+
+  const auto len = in.length();
+  StringSizeT token_start = 0;
+
+  result.clear();
+  while (token_start < len) {
+    // eat leading whitespace
+    token_start = in.find_first_not_of(delimiters, token_start);
+    if (token_start == std::string::npos) {
+      return; // nothing left but white space
+    }
+
+    // find the end of the token
+    const auto token_end = in.find_first_of(delimiters, token_start);
+
+    // push token
+    if (token_end == std::string::npos) {
+      result.emplace_back(in.data() + token_start, len - token_start);
+      return;
+    }
+    else {
+      result.emplace_back(in.data() + token_start, token_end - token_start);
+    }
+
+    // set up for next loop
+    token_start = token_end + 1;
+  }
+}
+} // namespace pcl
diff --git a/io/src/obj_io.cpp b/io/src/obj_io.cpp
@@ -39,9 +39,11 @@
 #include <fstream>
 #include <pcl/common/io.h>
 #include <pcl/console/time.h>
+#include <pcl/io/split.h>
+
 #include <boost/lexical_cast.hpp> // for lexical_cast
 #include <boost/filesystem.hpp> // for exists
-#include <boost/algorithm/string.hpp> // for split
+#include <boost/algorithm/string.hpp> // for trim
 
 pcl::MTLReader::MTLReader ()
 {
@@ -197,11 +199,7 @@ pcl::MTLReader::read (const std::string& mtl_file_path)
         continue;
 
       // Tokenize the line
-      std::stringstream sstream (line);
-      sstream.imbue (std::locale::classic ());
-      line = sstream.str ();
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split (st, line, "\t\r ");
       // Ignore comments
       if (st[0] == "#")
         continue;
@@ -382,6 +380,7 @@ pcl::OBJReader::readHeader (const std::string &file_name, pcl::PCLPointCloud2 &c
         continue;
 
       // Trim the line
+      //TOOD: we can easily do this without boost
       boost::trim (line);
 
       // Ignore comments
@@ -416,7 +415,7 @@ pcl::OBJReader::readHeader (const std::string &file_name, pcl::PCLPointCloud2 &c
       if (line.substr (0, 6) == "mtllib")
       {
         std::vector<std::string> st;
-        boost::split(st, line, boost::is_any_of("\t\r "), boost::token_compress_on);
+        pcl::split(st, line, "\t\r ");
         material_files.push_back (st.at (1));
         continue;
       }
@@ -536,25 +535,21 @@ pcl::OBJReader::read (const std::string &file_name, pcl::PCLPointCloud2 &cloud,
   //   rgba_field = i;
 
   std::vector<std::string> st;
+  std::string line;
   try
   {
     uindex_t point_idx = 0;
     uindex_t normal_idx = 0;
 
     while (!fs.eof ())
     {
-      std::string line;
       getline (fs, line);
       // Ignore empty lines
       if (line.empty())
         continue;
 
       // Tokenize the line
-      std::stringstream sstream (line);
-      sstream.imbue (std::locale::classic ());
-      line = sstream.str ();
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split (st, line, "\t\r ");
 
       // Ignore comments
       if (st[0] == "#")
@@ -693,11 +688,7 @@ pcl::OBJReader::read (const std::string &file_name, pcl::TextureMesh &mesh,
         continue;
 
       // Tokenize the line
-      std::stringstream sstream (line);
-      sstream.imbue (std::locale::classic ());
-      line = sstream.str ();
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split (st, line, "\t\r ");
 
       // Ignore comments
       if (st[0] == "#")
@@ -891,11 +882,7 @@ pcl::OBJReader::read (const std::string &file_name, pcl::PolygonMesh &mesh,
         continue;
 
       // Tokenize the line
-      std::stringstream sstream (line);
-      sstream.imbue (std::locale::classic ());
-      line = sstream.str ();
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split (st, line, "\t\r ");
 
       // Ignore comments
       if (st[0] == "#")

diff --git a/io/src/pcd_io.cpp b/io/src/pcd_io.cpp
@@ -46,12 +46,12 @@
 #include <pcl/io/low_level_io.h>
 #include <pcl/io/lzf.h>
 #include <pcl/io/pcd_io.h>
+#include <pcl/io/split.h>
 #include <pcl/console/time.h>
 
 #include <cstring>
 #include <cerrno>
 #include <boost/filesystem.hpp> // for permissions
-#include <boost/algorithm/string.hpp> // for split
 
 ///////////////////////////////////////////////////////////////////////////////////////////
 void
@@ -143,8 +143,7 @@ pcl::PCDReader::readHeader (std::istream &fs, pcl::PCLPointCloud2 &cloud,
         continue;
 
       // Tokenize the line
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split (st, line, "\t\r ");
 
       std::stringstream sstream (line);
       sstream.imbue (std::locale::classic ());
@@ -439,6 +438,8 @@ pcl::PCDReader::readBodyASCII (std::istream &fs, pcl::PCLPointCloud2 &cloud, int
   std::istringstream is;
   is.imbue (std::locale::classic ());
 
+  st.reserve(elems_per_line);
+
   try
   {
     while (idx < nr_points && !fs.eof ())
@@ -449,8 +450,7 @@ pcl::PCDReader::readBodyASCII (std::istream &fs, pcl::PCLPointCloud2 &cloud, int
         continue;
 
       // Tokenize the line
-      boost::trim (line);
-      boost::split (st, line, boost::is_any_of ("\t\r "), boost::token_compress_on);
+      pcl::split(st, line, "\r\t ");
 
       if (st.size () != elems_per_line) // If this is not checked, an exception might occur while accessing st
       {

diff --git a/test/io/CMakeLists.txt b/test/io/CMakeLists.txt
@@ -16,6 +16,10 @@ PCL_ADD_TEST(io_io test_io
               FILES test_io.cpp
               LINK_WITH pcl_gtest pcl_io)
 
+PCL_ADD_TEST(io_split test_split
+              FILES test_split.cpp
+              LINK_WITH pcl_gtest pcl_io)
+
 PCL_ADD_TEST(io_iterators test_iterators
               FILES test_iterators.cpp
               LINK_WITH pcl_gtest pcl_io)

diff --git a/test/io/test_split.cpp b/test/io/test_split.cpp
@@ -0,0 +1,87 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2014-, Open Perception Inc.
+ *
+ *  All rights reserved
+ */
+
+#include <pcl/io/split.h>
+#include <pcl/test/gtest.h>
+
+#include <vector>
+
+TEST(PCL, TestIdentitySplit)
+{
+  std::vector<std::string> tokens;
+
+  // identity test, empty string as input
+  pcl::split(tokens, "", " \r\t");
+  EXPECT_EQ(tokens, std::vector<std::string>());
+}
+
+TEST(PCL, TestNonEmptyDelimitersSplit)
+{
+  std::vector<std::string> tokens;
+
+  // test non-empty string with just the delimiters
+  pcl::split(tokens, "\r\t ", " \r\t");
+  EXPECT_EQ(tokens, std::vector<std::string>());
+}
+
+TEST(PCL, TestTokenWithoutDelimitersSplit)
+{
+  std::vector<std::string> tokens;
+
+  // test a string without delimiters
+  pcl::split(tokens, "abcd", " \r\t");
+  EXPECT_EQ(tokens, std::vector<std::string>{"abcd"});
+}
+
+TEST(PCL, TestSimpleSplit1)
+{
+  std::vector<std::string> tokens;
+  const auto output = std::vector<std::string>{
+      "aabb", "ccdd", "eeff", "gghh", "iijj", "kkll", "mmnn", "oopp"};
+
+  // test simple combination of all the delimiters
+  const std::string input_1 = "aabb ccdd\reeff\tgghh \riijj \tkkll\r\tmmnn \r\toopp";
+  pcl::split(tokens, input_1, " \r\t");
+  EXPECT_EQ(tokens, output);
+}
+
+TEST(PCL, TestSimpleSplit2)
+{
+  std::vector<std::string> tokens;
+  const auto output = std::vector<std::string>{
+      "aabb", "ccdd", "eeff", "gghh", "iijj", "kkll", "mmnn", "oopp"};
+
+  // same as input_1 but we have whitespaces in the front and in the back
+  const std::string input_2 =
+      "   aabb ccdd\reeff\tgghh \riijj \tkkll\r\tmmnn \r\toopp   ";
+  pcl::split(tokens, input_2, " \r\t");
+  EXPECT_EQ(tokens, output);
+}
+
+TEST(PCL, TestSimpleSplit3)
+{
+  std::vector<std::string> tokens;
+  const auto output = std::vector<std::string>{
+      "aabb", "ccdd", "eeff", "gghh", "iijj", "kkll", "mmnn", "oopp"};
+
+  // same as input_2 but we have some double delimiters
+  const std::string input_3 =
+      "   aabb  ccdd\r\reeff\t\tgghh \r\r\riijj \t\tkkll\r\t\tmmnn \r\r\toopp   ";
+  pcl::split(tokens, input_3, " \r\t");
+  EXPECT_EQ(tokens, output);
+}
+
+/* ---[ */
+int
+main(int argc, char** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return (RUN_ALL_TESTS());
+}
+/* ]--- */