From 20b24c46394262aff290b610afd6157c90f406a0 Mon Sep 17 00:00:00 2001 From: Mahder Gebremedhin Date: Thu, 29 Oct 2020 12:49:09 +0100 Subject: [PATCH] Specify number of threads during pm model creation - This used to be set during compilation. That was find for testing but for general usage it should be settable during simulation time. - It is still recommended to just let the system pick the number of threads unless you have specific reasons to do otherwise, e.g. comparing performance for research reasons. --- .../ParModelica/auto/Makefile.common | 7 --- .../ParModelica/auto/om_pm_interface.cpp | 55 ++----------------- .../ParModelica/auto/om_pm_model.cpp | 2 +- .../ParModelica/auto/om_pm_model.hpp | 1 + .../auto/pm_cluster_dynamic_scheduler.hpp | 8 +-- .../auto/pm_cluster_level_scheduler.hpp | 30 +++++----- .../ParModelica/auto/pm_clustering.hpp | 14 ++--- .../ParModelica/auto/pm_utility.hpp | 2 - 8 files changed, 29 insertions(+), 90 deletions(-) diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/Makefile.common b/OMCompiler/SimulationRuntime/ParModelica/auto/Makefile.common index e6ed83f4872..725625645f9 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/Makefile.common +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/Makefile.common @@ -28,13 +28,6 @@ else CPPFLAGS += -DUSE_FLOW_SCHEDULER endif -ifeq ($(NUM_THREADS), ) -# $(info ************ NUM THREADS NOT SET. SETTIN TO $(DEFAULT_NUM_THREADS) ************) -CPPFLAGS += -DNUM_THREADS=$(DEFAULT_NUM_THREADS) -else -CPPFLAGS += -DNUM_THREADS=$(NUM_THREADS) -endif - libParModelicaAuto.a: $(OBJS) @rm -f $@ diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_interface.cpp b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_interface.cpp index 23d906cbc5c..e2044d9ae92 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_interface.cpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_interface.cpp @@ -49,6 +49,9 @@ void* PM_Model_create(const char* model_name, DATA* data, threadData_t* threadDa OMModel* pm_om_model = new OMModel(model_name); pm_om_model->data = data; pm_om_model->threadData = threadData; + + pm_om_model->max_num_threads = tbb::this_task_arena::max_concurrency(); + return pm_om_model; } @@ -59,29 +62,6 @@ void PM_Model_load_ODE_system(void* v_model, FunctionType* ode_system_funcs) { model.load_ODE_system(); } -// void PM_functionInitialEquations(int size, FunctionType* functionInitialEquations_systems) { - -// // pm_om_model.ini_system_funcs = functionInitialEquations_systems; -// // pm_om_model.INI_scheduler.execute(); -// pm_om_model.INI_scheduler.execution_timer.start_timer(); -// for(int i = 0; i < size; ++i) -// functionInitialEquations_systems[i](data, threadData); -// pm_om_model.INI_scheduler.execution_timer.stop_timer(); - -// } - -// void PM_functionDAE(int size, FunctionType* functionDAE_systems) { - -// // pm_om_model.dae_system_funcs = functionDAE_systems; -// // pm_om_model.DAE_scheduler.execute(); - -// pm_om_model.DAE_scheduler.execution_timer.start_timer(); -// for(int i = 0; i < size; ++i) -// functionDAE_systems[i](data, threadData); -// pm_om_model.DAE_scheduler.execution_timer.stop_timer(); - -// } - void PM_evaluate_ODE_system(void* v_model) { OMModel& model = *(static_cast(v_model)); @@ -93,17 +73,6 @@ void PM_evaluate_ODE_system(void* v_model) { // pm_om_model.ODE_scheduler.execution_timer.stop_timer(); } -// void PM_functionAlg(int size, DATA* data, threadData_t* threadData, FunctionType* functionAlg_systems) { - -// pm_om_model.ALG_scheduler.execution_timer.start_timer(); - -// for(int i = 0; i < size; ++i) -// functionAlg_systems[i](data, threadData); - -// pm_om_model.ALG_scheduler.execution_timer.start_timer(); - -// } - void seq_ode_timer_start() { seq_ode_timer.start_timer(); } @@ -117,7 +86,6 @@ void seq_ode_timer_reset() { } void seq_ode_timer_get_elapsed_time2() { - // return seq_ode_timer.get_elapsed_time(); std::cerr << seq_ode_timer.get_elapsed_time(); } @@ -128,21 +96,6 @@ double seq_ode_timer_get_elapsed_time() { void dump_times(void* v_model) { OMModel& model = *(static_cast(v_model)); - // utility::log("") << "Total INI: " << model.INI_scheduler.execution_timer.get_elapsed_time() << std::endl; - // utility::log("") << "Total DAE: " << model.DAE_scheduler.execution_timer.get_elapsed_time() << std::endl; - // utility::log("") << "Total ALG: " << model.ALG_scheduler.execution_timer.get_elapsed_time() << std::endl; - - // double total = 0; - // for(unsigned int i = 0; i < model.ODE_scheduler.parallel_eval_costs.size(); ++i) { - // double c = model.ODE_scheduler.parallel_eval_costs[i]; - // total += c; - // utility::log("") << i+1 << ": " << c << " : " << total/(i+1) << std::endl; - // } - - // utility::log("") << "Total ODE added: " << total << std::endl; - // std::cout << model.ODE_scheduler.execution_timer.get_elapsed_time() << std::endl; - // std::cout << model.ODE_scheduler.total_evaluations << " : " << model.ODE_scheduler.total_parallel_cost << - // std::endl; #ifdef USE_LEVEL_SCHEDULER utility::log("") << "Using level scheduler" << std::endl; #else @@ -152,7 +105,7 @@ void dump_times(void* v_model) { #error "please specify scheduler. See makefile" #endif #endif - utility::log("") << "Nr.of threads " << NUM_THREADS << std::endl; + utility::log("") << "Nr.of threads " << model.max_num_threads << std::endl; utility::log("") << "Nr.of ODE evaluations: " << model.ODE_scheduler.total_evaluations << std::endl; utility::log("") << "Nr.of profiling ODE Evaluations: " << model.ODE_scheduler.sequential_evaluations << std::endl; // utility::log("") << "Total ODE evaluation time : " << model.ODE_scheduler.total_parallel_cost << std::endl; diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.cpp b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.cpp index 379a6964a1f..3dc51591a7f 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.cpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.cpp @@ -297,7 +297,7 @@ void OMModel::load_from_json(TaskSystemT& task_system, const std::string& eq_to_ task_system.add_node(current_node); } - std::cout << "Number of tasks = " << node_count << newl; + std::cout << "Number of tasks = " << node_count << std::endl; } }} // namespace openmodelica::parmodelica diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.hpp b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.hpp index e2dc5d875f7..9ffb71128b6 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.hpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_model.hpp @@ -110,6 +110,7 @@ class OMModel bool intialized; DATA* data; threadData_t* threadData; + size_t max_num_threads; public: OMModel(const std::string&); diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_dynamic_scheduler.hpp b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_dynamic_scheduler.hpp index 0655b4840f5..4865e1da150 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_dynamic_scheduler.hpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_dynamic_scheduler.hpp @@ -38,8 +38,6 @@ */ #include -#include - #include "pm_clustering.hpp" namespace openmodelica { namespace parmodelica { @@ -70,7 +68,6 @@ class ClusterDynamicScheduler { typedef typename TaskType::FunctionType FunctionType; private: - tbb::task_scheduler_init tbb_system; tbb::flow::graph dynamic_graph; tbb::flow::broadcast_node flow_root; @@ -84,13 +81,14 @@ class ClusterDynamicScheduler { PMTimer clustering_timer; TaskSystemType& task_system; + size_t max_num_threads; + int sequential_evaluations; int total_evaluations; int parallel_evaluations; ClusterDynamicScheduler(TaskSystemType& task_system) - : tbb_system(NUM_THREADS) - , flow_root(dynamic_graph) + : flow_root(dynamic_graph) , flow_graph_created(false) , task_system(task_system) { sequential_evaluations = 0; diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_level_scheduler.hpp b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_level_scheduler.hpp index 7921f3121ae..421cec52d33 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_level_scheduler.hpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_cluster_level_scheduler.hpp @@ -40,7 +40,6 @@ #include "gc.h" #include -#include #include // #include @@ -65,10 +64,9 @@ struct TBBConcurrentStepExecutor { private: GraphType& sys_graph; - std::set& knownthreads; public: - TBBConcurrentStepExecutor(GraphType& g, std::set& k) : sys_graph(g), knownthreads(k) {} + TBBConcurrentStepExecutor(GraphType& g, std::set& k) : sys_graph(g) {} void operator()(tbb::blocked_range& range) const { @@ -101,9 +99,11 @@ struct TBBConcurrentStepExecutor { } }; -template class StepLevels : boost::noncopyable { public: @@ -118,15 +118,13 @@ class StepLevels : boost::noncopyable { public: const TaskSystemType& task_system_org; TaskSystemType task_system; + TBBConcurrentStepExecutor step_executor; + + size_t max_num_threads; bool profiled; bool schedule_available; - tbb::task_scheduler_init tbb_system; - TBBConcurrentStepExecutor step_executor; - - std::set knownthreads; - int total_evaluations; int parallel_evaluations; int sequential_evaluations; @@ -148,9 +146,8 @@ class StepLevels : boost::noncopyable { StepLevels(TaskSystemType& ts) : task_system_org(ts) , task_system("invalid") // implement a constrctor with no parameters and remove this - , tbb_system(NUM_THREADS) - , step_executor(task_system.sys_graph, knownthreads) { - GC_allow_register_threads(); + , step_executor(task_system.sys_graph) { + // GC_allow_register_threads(); // GC_use_threads_discovery(); profiled = false; @@ -209,11 +206,11 @@ class StepLevels : boost::noncopyable { if (task_system.levels_valid == false) task_system.update_node_levels(); - // clustetring1::apply(task_system); - // clustetring1::dump_graph(task_system, std::to_string(this->total_evaluations)); + clustetring1::apply(task_system); + clustetring1::dump_graph(task_system); clustetring2::apply(task_system); - // clustetring2::dump_graph(task_system, std::to_string(this->total_evaluations)); + clustetring2::dump_graph(task_system); clustetring3::apply(task_system); clustetring3::dump_graph(task_system); @@ -231,7 +228,6 @@ class StepLevels : boost::noncopyable { estimate_speedup(); clustering_timer.stop_timer(); - // task_system_org.dump_graphml("original"); } void execute() { diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_clustering.hpp b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_clustering.hpp index 09eb00fe746..307032ec8ba 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_clustering.hpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_clustering.hpp @@ -254,7 +254,7 @@ struct cluster_merge_level_for_cost { static std::string name() { return "cluster_merge_level_for_cost"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_for_cost") { task_system.dump_graphml(cluster_merge_level_for_cost::name() + "_" + suffix); } @@ -348,7 +348,7 @@ struct cluster_merge_level_for_bins { static std::string name() { return "cluster_merge_level_for_bins"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_for_bins") { task_system.dump_graphml(cluster_merge_level_for_bins::name() + "_" + suffix); } @@ -365,7 +365,7 @@ struct cluster_merge_level_for_bins { ClusterLevels& clusters_by_level = task_system.clusters_by_level; GraphType& sys_graph = task_system.sys_graph; - unsigned nr_of_clusters = NUM_THREADS * 2; + unsigned nr_of_clusters = task_system.max_num_threads * 2; if (task_system.levels_valid == false) task_system.update_node_levels(); @@ -427,7 +427,7 @@ struct cluster_merge_common { static std::string name() { return "cluster_merge_common"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_common") { task_system.dump_graphml(cluster_merge_common::name() + "_" + suffix); } @@ -543,7 +543,7 @@ struct cluster_merge_single_parent { static std::string name() { return "cluster_merge_single_parent"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_single_parent") { task_system.dump_graphml(cluster_merge_single_parent::name() + "_" + suffix); } @@ -598,7 +598,7 @@ struct cluster_merge_level_parents { static std::string name() { return "cluster_merge_level_parents"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_parents") { task_system.dump_graphml(cluster_merge_level_parents::name() + "_" + suffix); } @@ -672,7 +672,7 @@ struct cluster_merge_connected_for_cost { static std::string name() { return "cluster_merge_connected_for_cost"; } template - static void dump_graph(TaskSystemType& task_system, std::string suffix = "") { + static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_connected_for_cost") { task_system.dump_graphml(cluster_merge_connected_for_cost::name() + "_" + suffix); } diff --git a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_utility.hpp b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_utility.hpp index 2d05378499a..0bae71da23d 100644 --- a/OMCompiler/SimulationRuntime/ParModelica/auto/pm_utility.hpp +++ b/OMCompiler/SimulationRuntime/ParModelica/auto/pm_utility.hpp @@ -47,8 +47,6 @@ #define NOMINMAX #endif -#define newl "\n" - namespace openmodelica { namespace parmodelica { namespace utility { extern std::ostringstream log_stream;