Skip to content

Commit

Permalink
Specify number of threads during pm model creation
Browse files Browse the repository at this point in the history
  - This used to be set during compilation. That was find for testing but
    for general usage it should be settable during simulation time.

  - It is still recommended to just let the system pick the number of
    threads unless you have specific reasons to do otherwise, e.g. comparing
    performance for research reasons.
  • Loading branch information
mahge committed Oct 30, 2020
1 parent d9b9a66 commit 20b24c4
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 90 deletions.
7 changes: 0 additions & 7 deletions OMCompiler/SimulationRuntime/ParModelica/auto/Makefile.common
Expand Up @@ -28,13 +28,6 @@ else
CPPFLAGS += -DUSE_FLOW_SCHEDULER
endif

ifeq ($(NUM_THREADS), )
# $(info ************ NUM THREADS NOT SET. SETTIN TO $(DEFAULT_NUM_THREADS) ************)
CPPFLAGS += -DNUM_THREADS=$(DEFAULT_NUM_THREADS)
else
CPPFLAGS += -DNUM_THREADS=$(NUM_THREADS)
endif


libParModelicaAuto.a: $(OBJS)
@rm -f $@
Expand Down
55 changes: 4 additions & 51 deletions OMCompiler/SimulationRuntime/ParModelica/auto/om_pm_interface.cpp
Expand Up @@ -49,6 +49,9 @@ void* PM_Model_create(const char* model_name, DATA* data, threadData_t* threadDa
OMModel* pm_om_model = new OMModel(model_name);
pm_om_model->data = data;
pm_om_model->threadData = threadData;

pm_om_model->max_num_threads = tbb::this_task_arena::max_concurrency();

return pm_om_model;
}

Expand All @@ -59,29 +62,6 @@ void PM_Model_load_ODE_system(void* v_model, FunctionType* ode_system_funcs) {
model.load_ODE_system();
}

// void PM_functionInitialEquations(int size, FunctionType* functionInitialEquations_systems) {

// // pm_om_model.ini_system_funcs = functionInitialEquations_systems;
// // pm_om_model.INI_scheduler.execute();
// pm_om_model.INI_scheduler.execution_timer.start_timer();
// for(int i = 0; i < size; ++i)
// functionInitialEquations_systems[i](data, threadData);
// pm_om_model.INI_scheduler.execution_timer.stop_timer();

// }

// void PM_functionDAE(int size, FunctionType* functionDAE_systems) {

// // pm_om_model.dae_system_funcs = functionDAE_systems;
// // pm_om_model.DAE_scheduler.execute();

// pm_om_model.DAE_scheduler.execution_timer.start_timer();
// for(int i = 0; i < size; ++i)
// functionDAE_systems[i](data, threadData);
// pm_om_model.DAE_scheduler.execution_timer.stop_timer();

// }

void PM_evaluate_ODE_system(void* v_model) {

OMModel& model = *(static_cast<OMModel*>(v_model));
Expand All @@ -93,17 +73,6 @@ void PM_evaluate_ODE_system(void* v_model) {
// pm_om_model.ODE_scheduler.execution_timer.stop_timer();
}

// void PM_functionAlg(int size, DATA* data, threadData_t* threadData, FunctionType* functionAlg_systems) {

// pm_om_model.ALG_scheduler.execution_timer.start_timer();

// for(int i = 0; i < size; ++i)
// functionAlg_systems[i](data, threadData);

// pm_om_model.ALG_scheduler.execution_timer.start_timer();

// }

void seq_ode_timer_start() {
seq_ode_timer.start_timer();
}
Expand All @@ -117,7 +86,6 @@ void seq_ode_timer_reset() {
}

void seq_ode_timer_get_elapsed_time2() {
// return seq_ode_timer.get_elapsed_time();
std::cerr << seq_ode_timer.get_elapsed_time();
}

Expand All @@ -128,21 +96,6 @@ double seq_ode_timer_get_elapsed_time() {
void dump_times(void* v_model) {
OMModel& model = *(static_cast<OMModel*>(v_model));

// utility::log("") << "Total INI: " << model.INI_scheduler.execution_timer.get_elapsed_time() << std::endl;
// utility::log("") << "Total DAE: " << model.DAE_scheduler.execution_timer.get_elapsed_time() << std::endl;
// utility::log("") << "Total ALG: " << model.ALG_scheduler.execution_timer.get_elapsed_time() << std::endl;

// double total = 0;
// for(unsigned int i = 0; i < model.ODE_scheduler.parallel_eval_costs.size(); ++i) {
// double c = model.ODE_scheduler.parallel_eval_costs[i];
// total += c;
// utility::log("") << i+1 << ": " << c << " : " << total/(i+1) << std::endl;
// }

// utility::log("") << "Total ODE added: " << total << std::endl;
// std::cout << model.ODE_scheduler.execution_timer.get_elapsed_time() << std::endl;
// std::cout << model.ODE_scheduler.total_evaluations << " : " << model.ODE_scheduler.total_parallel_cost <<
// std::endl;
#ifdef USE_LEVEL_SCHEDULER
utility::log("") << "Using level scheduler" << std::endl;
#else
Expand All @@ -152,7 +105,7 @@ void dump_times(void* v_model) {
#error "please specify scheduler. See makefile"
#endif
#endif
utility::log("") << "Nr.of threads " << NUM_THREADS << std::endl;
utility::log("") << "Nr.of threads " << model.max_num_threads << std::endl;
utility::log("") << "Nr.of ODE evaluations: " << model.ODE_scheduler.total_evaluations << std::endl;
utility::log("") << "Nr.of profiling ODE Evaluations: " << model.ODE_scheduler.sequential_evaluations << std::endl;
// utility::log("") << "Total ODE evaluation time : " << model.ODE_scheduler.total_parallel_cost << std::endl;
Expand Down
Expand Up @@ -297,7 +297,7 @@ void OMModel::load_from_json(TaskSystemT& task_system, const std::string& eq_to_
task_system.add_node(current_node);
}

std::cout << "Number of tasks = " << node_count << newl;
std::cout << "Number of tasks = " << node_count << std::endl;
}

}} // namespace openmodelica::parmodelica
Expand Up @@ -110,6 +110,7 @@ class OMModel
bool intialized;
DATA* data;
threadData_t* threadData;
size_t max_num_threads;

public:
OMModel(const std::string&);
Expand Down
Expand Up @@ -38,8 +38,6 @@
*/

#include <tbb/flow_graph.h>
#include <tbb/task_scheduler_init.h>

#include "pm_clustering.hpp"

namespace openmodelica { namespace parmodelica {
Expand Down Expand Up @@ -70,7 +68,6 @@ class ClusterDynamicScheduler {
typedef typename TaskType::FunctionType FunctionType;

private:
tbb::task_scheduler_init tbb_system;

tbb::flow::graph dynamic_graph;
tbb::flow::broadcast_node<tbb::flow::continue_msg> flow_root;
Expand All @@ -84,13 +81,14 @@ class ClusterDynamicScheduler {
PMTimer clustering_timer;
TaskSystemType& task_system;

size_t max_num_threads;

int sequential_evaluations;
int total_evaluations;
int parallel_evaluations;

ClusterDynamicScheduler(TaskSystemType& task_system)
: tbb_system(NUM_THREADS)
, flow_root(dynamic_graph)
: flow_root(dynamic_graph)
, flow_graph_created(false)
, task_system(task_system) {
sequential_evaluations = 0;
Expand Down
Expand Up @@ -40,7 +40,6 @@
#include "gc.h"

#include <tbb/parallel_for.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/tick_count.h>

// #include <sys/types.h>
Expand All @@ -65,10 +64,9 @@ struct TBBConcurrentStepExecutor {

private:
GraphType& sys_graph;
std::set<pid_t>& knownthreads;

public:
TBBConcurrentStepExecutor(GraphType& g, std::set<pid_t>& k) : sys_graph(g), knownthreads(k) {}
TBBConcurrentStepExecutor(GraphType& g, std::set<pid_t>& k) : sys_graph(g) {}

void operator()(tbb::blocked_range<ClusteIdIter>& range) const {

Expand Down Expand Up @@ -101,9 +99,11 @@ struct TBBConcurrentStepExecutor {
}
};

template <typename TaskType, typename clustetring1 = cluster_merge_common, /* for now default here*/
template <typename TaskType,
typename clustetring1 = cluster_merge_common, /* for now default here*/
typename clustetring2 = cluster_merge_level_for_bins, /* for now default here*/
typename clustetring3 = cluster_none, typename clustetring4 = cluster_none,
typename clustetring3 = cluster_none,
typename clustetring4 = cluster_none,
typename clustetring5 = cluster_none>
class StepLevels : boost::noncopyable {
public:
Expand All @@ -118,15 +118,13 @@ class StepLevels : boost::noncopyable {
public:
const TaskSystemType& task_system_org;
TaskSystemType task_system;
TBBConcurrentStepExecutor<TaskType> step_executor;

size_t max_num_threads;

bool profiled;
bool schedule_available;

tbb::task_scheduler_init tbb_system;
TBBConcurrentStepExecutor<TaskType> step_executor;

std::set<pid_t> knownthreads;

int total_evaluations;
int parallel_evaluations;
int sequential_evaluations;
Expand All @@ -148,9 +146,8 @@ class StepLevels : boost::noncopyable {
StepLevels(TaskSystemType& ts)
: task_system_org(ts)
, task_system("invalid") // implement a constrctor with no parameters and remove this
, tbb_system(NUM_THREADS)
, step_executor(task_system.sys_graph, knownthreads) {
GC_allow_register_threads();
, step_executor(task_system.sys_graph) {
// GC_allow_register_threads();
// GC_use_threads_discovery();

profiled = false;
Expand Down Expand Up @@ -209,11 +206,11 @@ class StepLevels : boost::noncopyable {
if (task_system.levels_valid == false)
task_system.update_node_levels();

// clustetring1::apply(task_system);
// clustetring1::dump_graph(task_system, std::to_string(this->total_evaluations));
clustetring1::apply(task_system);
clustetring1::dump_graph(task_system);

clustetring2::apply(task_system);
// clustetring2::dump_graph(task_system, std::to_string(this->total_evaluations));
clustetring2::dump_graph(task_system);

clustetring3::apply(task_system);
clustetring3::dump_graph(task_system);
Expand All @@ -231,7 +228,6 @@ class StepLevels : boost::noncopyable {
estimate_speedup();
clustering_timer.stop_timer();

// task_system_org.dump_graphml("original");
}

void execute() {
Expand Down
14 changes: 7 additions & 7 deletions OMCompiler/SimulationRuntime/ParModelica/auto/pm_clustering.hpp
Expand Up @@ -254,7 +254,7 @@ struct cluster_merge_level_for_cost {
static std::string name() { return "cluster_merge_level_for_cost"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_for_cost") {
task_system.dump_graphml(cluster_merge_level_for_cost::name() + "_" + suffix);
}

Expand Down Expand Up @@ -348,7 +348,7 @@ struct cluster_merge_level_for_bins {
static std::string name() { return "cluster_merge_level_for_bins"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_for_bins") {
task_system.dump_graphml(cluster_merge_level_for_bins::name() + "_" + suffix);
}

Expand All @@ -365,7 +365,7 @@ struct cluster_merge_level_for_bins {
ClusterLevels& clusters_by_level = task_system.clusters_by_level;
GraphType& sys_graph = task_system.sys_graph;

unsigned nr_of_clusters = NUM_THREADS * 2;
unsigned nr_of_clusters = task_system.max_num_threads * 2;

if (task_system.levels_valid == false)
task_system.update_node_levels();
Expand Down Expand Up @@ -427,7 +427,7 @@ struct cluster_merge_common {
static std::string name() { return "cluster_merge_common"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_common") {
task_system.dump_graphml(cluster_merge_common::name() + "_" + suffix);
}

Expand Down Expand Up @@ -543,7 +543,7 @@ struct cluster_merge_single_parent {
static std::string name() { return "cluster_merge_single_parent"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_single_parent") {
task_system.dump_graphml(cluster_merge_single_parent::name() + "_" + suffix);
}

Expand Down Expand Up @@ -598,7 +598,7 @@ struct cluster_merge_level_parents {
static std::string name() { return "cluster_merge_level_parents"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_level_parents") {
task_system.dump_graphml(cluster_merge_level_parents::name() + "_" + suffix);
}

Expand Down Expand Up @@ -672,7 +672,7 @@ struct cluster_merge_connected_for_cost {
static std::string name() { return "cluster_merge_connected_for_cost"; }

template <typename TaskSystemType>
static void dump_graph(TaskSystemType& task_system, std::string suffix = "") {
static void dump_graph(TaskSystemType& task_system, std::string suffix = "cluster_merge_connected_for_cost") {
task_system.dump_graphml(cluster_merge_connected_for_cost::name() + "_" + suffix);
}

Expand Down
2 changes: 0 additions & 2 deletions OMCompiler/SimulationRuntime/ParModelica/auto/pm_utility.hpp
Expand Up @@ -47,8 +47,6 @@
#define NOMINMAX
#endif

#define newl "\n"

namespace openmodelica { namespace parmodelica { namespace utility {

extern std::ostringstream log_stream;
Expand Down

0 comments on commit 20b24c4

Please sign in to comment.