Merge pull request dealii#13745 from drwells/muparser-tls

Avoid doing extra TLS lookups in FunctionParser.
OpenFCST · May 19, 2022 · 5ab60ee · 5ab60ee
2 parents 33c6797 + fc885a1
commit 5ab60ee
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 63 deletions.
diff --git a/doc/news/changes/minor/20220516DavidWells b/doc/news/changes/minor/20220516DavidWells
@@ -0,0 +1,5 @@
+Improved: The thread-local storage in FunctionParser has been reworked to avoid
+extra locking and unlocking calls. This makes evaluations of simple expressions
+about three times faster.
+<br>
+(David Wells, 2022/05/16)
diff --git a/include/deal.II/base/function_parser.h b/include/deal.II/base/function_parser.h
@@ -396,19 +396,48 @@ class FunctionParser : public AutoDerivativeFunction<dim>
   //@}
 
 private:
-#ifdef DEAL_II_WITH_MUPARSER
   /**
-   * Place for the variables for each thread
+   * Class containing the mutable state required by muParser.
+   *
+   * @note For performance reasons it is best to put all mutable state in a
+   * single object so that, for each function call, we only need to get
+   * thread-local data exactly once.
    */
-  mutable Threads::ThreadLocalStorage<std::vector<double>> vars;
+  struct ParserData
+  {
+    /**
+     * Default constructor. Threads::ThreadLocalStorage requires that objects be
+     * either default- or copy-constructible: make sure we satisfy the first
+     * case by declaring it here.
+     */
+    ParserData() = default;
+
+    /**
+     * std::is_copy_constructible gives the wrong answer for containers with
+     * non-copy constructible types (e.g., std::vector<std::unique_ptr<int>>) -
+     * for more information, see the documentation of
+     * Threads::ThreadLocalStorage. Hence, to avoid compilation failures, just
+     * delete the copy constructor completely.
+     */
+    ParserData(const ParserData &) = delete;
+
+    /**
+     * Scratch array used to set independent variables (i.e., x, y, and t)
+     * before each muParser call.
+     */
+    std::vector<double> vars;
+
+    /**
+     * The actual muParser parser objects (hidden with PIMPL).
+     */
+    std::vector<std::unique_ptr<internal::muParserBase>> parsers;
+  };
 
   /**
    * The muParser objects (hidden with the PIMPL idiom) for each thread (and one
    * for each component).
    */
-  mutable Threads::ThreadLocalStorage<
-    std::vector<std::unique_ptr<internal::muParserBase>>>
-    fp;
+  mutable Threads::ThreadLocalStorage<ParserData> parser_data;
 
   /**
    * An array to keep track of all the constants, required to initialize fp in
@@ -430,7 +459,6 @@ class FunctionParser : public AutoDerivativeFunction<dim>
    */
   void
   init_muparser() const;
-#endif
 
   /**
    * An array of function expressions (one per component), required to

diff --git a/include/deal.II/base/tensor_function_parser.h b/include/deal.II/base/tensor_function_parser.h
@@ -262,19 +262,48 @@ class TensorFunctionParser : public TensorFunction<rank, dim, Number>
   //@}
 
 private:
-#ifdef DEAL_II_WITH_MUPARSER
   /**
-   * Place for the variables for each thread
+   * Class containing the mutable state required by muParser.
+   *
+   * @note For performance reasons it is best to put all mutable state in a
+   * single object so that, for each function call, we only need to get
+   * thread-local data exactly once.
    */
-  mutable Threads::ThreadLocalStorage<std::vector<double>> vars;
+  struct ParserData
+  {
+    /**
+     * Default constructor. Threads::ThreadLocalStorage requires that objects be
+     * either default- or copy-constructible: make sure we satisfy the first
+     * case by declaring it here.
+     */
+    ParserData() = default;
+
+    /**
+     * std::is_copy_constructible gives the wrong answer for containers with
+     * non-copy constructible types (e.g., std::vector<std::unique_ptr<int>>) -
+     * for more information, see the documentation of
+     * Threads::ThreadLocalStorage. Hence, to avoid compilation failures, just
+     * delete the copy constructor completely.
+     */
+    ParserData(const ParserData &) = delete;
+
+    /**
+     * Scratch array used to set independent variables (i.e., x, y, and t)
+     * before each muParser call.
+     */
+    std::vector<double> vars;
+
+    /**
+     * The actual muParser parser objects (hidden with PIMPL).
+     */
+    std::vector<std::unique_ptr<internal::muParserBase>> parsers;
+  };
 
   /**
    * The muParser objects (hidden with the PIMPL idiom) for each thread (and one
    * for each component).
    */
-  mutable Threads::ThreadLocalStorage<
-    std::vector<std::unique_ptr<internal::muParserBase>>>
-    tfp;
+  mutable Threads::ThreadLocalStorage<ParserData> parser_data;
 
   /**
    * An array to keep track of all the constants, required to initialize tfp in
@@ -296,7 +325,6 @@ class TensorFunctionParser : public TensorFunction<rank, dim, Number>
    */
   void
   init_muparser() const;
-#endif
 
   /**
    * An array of function expressions (one per component), required to

diff --git a/source/base/function_parser.cc b/source/base/function_parser.cc
@@ -87,7 +87,7 @@ FunctionParser<dim>::initialize(const std::string &             variables,
                                 const std::map<std::string, double> &constants,
                                 const bool time_dependent)
 {
-  this->fp.clear(); // this will reset all thread-local objects
+  this->parser_data.clear(); // this will reset all thread-local objects
 
   this->constants   = constants;
   this->var_names   = Utilities::split_string_list(variables, ',');
@@ -158,27 +158,25 @@ FunctionParser<dim>::init_muparser() const
   // check that we have not already initialized the parser on the
   // current thread, i.e., that the current function is only called
   // once per thread
-  Assert(fp.get().size() == 0, ExcInternalError());
+  ParserData &data = parser_data.get();
+  Assert(data.parsers.size() == 0 && data.vars.size() == 0, ExcInternalError());
 
-  // initialize the objects for the current thread (fp.get() and
-  // vars.get())
-  fp.get().reserve(this->n_components);
-  vars.get().resize(var_names.size());
+  // initialize the objects for the current thread
+  data.parsers.reserve(this->n_components);
+  data.vars.resize(var_names.size());
   for (unsigned int component = 0; component < this->n_components; ++component)
     {
-      fp.get().emplace_back(
-        std::make_unique<internal::FunctionParserImplementation::Parser>());
+      data.parsers.emplace_back(
+        new internal::FunctionParserImplementation::Parser());
       mu::Parser &parser =
         dynamic_cast<internal::FunctionParserImplementation::Parser &>(
-          *fp.get().back());
+          *data.parsers.back());
 
       for (const auto &constant : constants)
-        {
-          parser.DefineConst(constant.first, constant.second);
-        }
+        parser.DefineConst(constant.first, constant.second);
 
       for (unsigned int iv = 0; iv < var_names.size(); ++iv)
-        parser.DefineVar(var_names[iv], &vars.get()[iv]);
+        parser.DefineVar(var_names[iv], &data.vars[iv]);
 
       // define some compatibility functions:
       parser.DefineFun("if", internal::FunctionParser::mu_if, true);
@@ -282,25 +280,26 @@ FunctionParser<dim>::value(const Point<dim> & p,
   AssertIndexRange(component, this->n_components);
 
   // initialize the parser if that hasn't happened yet on the current thread
-  if (fp.get().size() == 0)
+  ParserData &data = parser_data.get();
+  if (data.vars.size() == 0)
     init_muparser();
 
   for (unsigned int i = 0; i < dim; ++i)
-    vars.get()[i] = p(i);
+    data.vars[i] = p(i);
   if (dim != n_vars)
-    vars.get()[dim] = this->get_time();
+    data.vars[dim] = this->get_time();
 
   try
     {
       Assert(dynamic_cast<internal::FunctionParserImplementation::Parser *>(
-               fp.get()[component].get()),
+               data.parsers[component].get()),
              ExcInternalError());
       // using dynamic_cast in the next line is about 6% slower than
       // static_cast, so use the assertion above for debugging and disable
       // clang-tidy:
       mu::Parser &parser =
         static_cast<internal::FunctionParserImplementation::Parser &>( // NOLINT
-          *fp.get()[component]);
+          *data.parsers[component]);
       return parser.Eval();
     }
   catch (mu::ParserError &e)
@@ -328,23 +327,24 @@ FunctionParser<dim>::vector_value(const Point<dim> &p,
 
 
   // initialize the parser if that hasn't happened yet on the current thread
-  if (fp.get().size() == 0)
+  ParserData &data = parser_data.get();
+  if (data.vars.size() == 0)
     init_muparser();
 
   for (unsigned int i = 0; i < dim; ++i)
-    vars.get()[i] = p(i);
+    data.vars[i] = p(i);
   if (dim != n_vars)
-    vars.get()[dim] = this->get_time();
+    data.vars[dim] = this->get_time();
 
   for (unsigned int component = 0; component < this->n_components; ++component)
     {
       // Same comment in value() applies here too:
       Assert(dynamic_cast<internal::FunctionParserImplementation::Parser *>(
-               fp.get()[component].get()),
+               data.parsers[component].get()),
              ExcInternalError());
       mu::Parser &parser =
         static_cast<internal::FunctionParserImplementation::Parser &>( // NOLINT
-          *fp.get()[component]);
+          *data.parsers[component]);
 
       values(component) = parser.Eval();
     }

diff --git a/source/base/tensor_function_parser.cc b/source/base/tensor_function_parser.cc
@@ -14,6 +14,7 @@
 // ---------------------------------------------------------------------
 
 
+#include <deal.II/base/array_view.h>
 #include <deal.II/base/mu_parser_internal.h>
 #include <deal.II/base/patterns.h>
 #include <deal.II/base/tensor.h>
@@ -87,7 +88,7 @@ TensorFunctionParser<rank, dim, Number>::initialize(
   const std::map<std::string, double> &constants,
   const bool                           time_dependent)
 {
-  this->tfp.clear(); // this will reset all thread-local objects
+  this->parser_data.clear(); // this will reset all thread-local objects
 
   this->constants   = constants;
   this->var_names   = Utilities::split_string_list(variables, ',');
@@ -172,28 +173,26 @@ TensorFunctionParser<rank, dim, Number>::init_muparser() const
   // check that we have not already initialized the parser on the
   // current thread, i.e., that the current function is only called
   // once per thread
-  Assert(tfp.get().size() == 0, ExcInternalError());
+  ParserData &data = parser_data.get();
+  Assert(data.parsers.size() == 0 && data.vars.size() == 0, ExcInternalError());
 
-  // initialize the objects for the current thread (tfp.get() and
-  // vars.get())
-  tfp.get().reserve(this->n_components);
-  vars.get().resize(var_names.size());
+  // initialize the objects for the current thread
+  data.parsers.reserve(this->n_components);
+  data.vars.resize(var_names.size());
   for (unsigned int component = 0; component < this->n_components; ++component)
     {
-      tfp.get().emplace_back(
+      data.parsers.emplace_back(
         std::make_unique<
           internal::TensorFunctionParserImplementation::Parser>());
       mu::Parser &parser =
         dynamic_cast<internal::TensorFunctionParserImplementation::Parser &>(
-          *tfp.get().back());
+          *data.parsers.back());
 
       for (const auto &constant : constants)
-        {
-          parser.DefineConst(constant.first, constant.second);
-        }
+        parser.DefineConst(constant.first, constant.second);
 
       for (unsigned int iv = 0; iv < var_names.size(); ++iv)
-        parser.DefineVar(var_names[iv], &vars.get()[iv]);
+        parser.DefineVar(var_names[iv], &data.vars[iv]);
 
       // define some compatibility functions:
       parser.DefineFun("if", internal::FunctionParser::mu_if, true);
@@ -219,7 +218,7 @@ TensorFunctionParser<rank, dim, Number>::init_muparser() const
           // space between the name of the function and the opening
           // parenthesis. this is awkward because it is not backward
           // compatible to the library we used to use before muparser
-          // (the tfparser library) but also makes no real sense.
+          // (the fparser library) but also makes no real sense.
           // consequently, in the expressions we set, remove any space
           // we may find after function names
           std::string transformed_expression = expressions[component];
@@ -295,32 +294,30 @@ TensorFunctionParser<rank, dim, Number>::value(const Point<dim> &p) const
   Assert(initialized == true, ExcNotInitialized());
 
   // initialize the parser if that hasn't happened yet on the current thread
-  if (tfp.get().size() == 0)
+  ParserData &data = parser_data.get();
+  if (data.vars.size() == 0)
     init_muparser();
 
   for (unsigned int i = 0; i < dim; ++i)
-    vars.get()[i] = p(i);
+    data.vars[i] = p(i);
   if (dim != n_vars)
-    vars.get()[dim] = this->get_time();
+    data.vars[dim] = this->get_time();
 
-  // initialize tensor with zeros
-  Tensor<rank, dim, Number> value;
+  std::array<Number, Tensor<rank, dim, Number>::n_independent_components>
+    values;
 
   try
     {
-      unsigned int component = 0;
-      for (Number *value_ptr = value.begin_raw(); value_ptr != value.end_raw();
-           ++value_ptr)
+      for (unsigned int component = 0; component < values.size(); ++component)
         {
           Assert(dynamic_cast<
                    internal::TensorFunctionParserImplementation::Parser *>(
-                   tfp.get()[component].get()),
+                   data.parsers[component].get()),
                  ExcInternalError());
           mu::Parser &parser = static_cast< // NOLINT
             internal::TensorFunctionParserImplementation::Parser &>(
-            *tfp.get()[component]);
-          *value_ptr = parser.Eval();
-          ++component;
+            *data.parsers[component]);
+          values[component] = parser.Eval();
         } // for
     }     // try
   catch (mu::ParserError &e)
@@ -335,7 +332,8 @@ TensorFunctionParser<rank, dim, Number>::value(const Point<dim> &p) const
       return Tensor<rank, dim, Number>();
     } // catch
 
-  return value;
+  return Tensor<rank, dim, Number>(
+    make_array_view(values.begin(), values.end()));
 }