diff --git a/dali/common.h b/dali/common.h index ada763cb6a..479d0f3764 100644 --- a/dali/common.h +++ b/dali/common.h @@ -55,6 +55,29 @@ using uint32 = uint32_t; // Basic data type for our indices and dimension sizes typedef int64_t Index; +enum class OpType { + GPU = 0, + CPU = 1, + MIXED = 2, + SUPPORT = 3, + COUNT = 4 +}; + +static std::string to_string(OpType op_type) { + switch (op_type) { + case OpType::CPU: + return "cpu"; + case OpType::GPU: + return "gpu"; + case OpType::MIXED: + return "mixed"; + case OpType::SUPPORT: + return "support"; + default: + return ""; + } +} + struct DALISize { int width; int height; diff --git a/dali/pipeline/executor/executor.cc b/dali/pipeline/executor/executor.cc index 66cf326e5d..f40d82e2c6 100644 --- a/dali/pipeline/executor/executor.cc +++ b/dali/pipeline/executor/executor.cc @@ -218,10 +218,10 @@ void Executor::RunGPU() { // Record events for each output requested by the user cudaEvent_t event = gpu_output_events_[i].GetEvent(queue_idx); - if (graph_->NodeType(src_id) == DALI_MIXED) { + if (graph_->NodeType(src_id) == OpType::MIXED) { auto &ws = wsb.mixed_op_data[src_idx]; CUDA_CALL(cudaEventRecord(event, ws.stream())); - } else if (graph_->NodeType(src_id) == DALI_GPU) { + } else if (graph_->NodeType(src_id) == OpType::GPU) { auto &ws = wsb.gpu_op_data[src_idx]; CUDA_CALL(cudaEventRecord(event, ws.stream())); } else { @@ -401,8 +401,8 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { // Get each regular input and add them to this op's workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(j)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); - DALI_ENFORCE(parent_op_type == DALI_SUPPORT, + OpType parent_op_type = graph_->NodeType(parent_node_id); + DALI_ENFORCE(parent_op_type == OpType::SUPPORT, "Executor encountered support op with non-support input."); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(j)); @@ -427,8 +427,8 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { for (int j = 0; j < node.spec.NumRegularInput(); ++j) { // Get each regular input and add them to this op's workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(j)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); - DALI_ENFORCE(parent_op_type == DALI_CPU, + OpType parent_op_type = graph_->NodeType(parent_node_id); + DALI_ENFORCE(parent_op_type == OpType::CPU, "Executor encountered cpu op with non-cpu input."); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(j)); @@ -442,8 +442,8 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { for (const auto &arg_pair : node.spec.ArgumentInputs()) { // Get each argument input and add them to this op's workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(arg_pair.second)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); - DALI_ENFORCE(parent_op_type == DALI_SUPPORT, + OpType parent_op_type = graph_->NodeType(parent_node_id); + DALI_ENFORCE(parent_op_type == OpType::SUPPORT, "Executor encountered argument input produced by non-cpu op."); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(arg_pair.second)); @@ -478,8 +478,8 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { // Go get each set of input Tensors and add // them to this mixed ops workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(j)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); - DALI_ENFORCE(parent_op_type == DALI_CPU, + OpType parent_op_type = graph_->NodeType(parent_node_id); + DALI_ENFORCE(parent_op_type == OpType::CPU, "Executor encountered mixed op with non-cpu input."); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(j)); @@ -516,11 +516,11 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { for (int j = 0; j < node.spec.NumRegularInput(); ++j) { // Get each input and add them to this GPU op's workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(j)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); + OpType parent_op_type = graph_->NodeType(parent_node_id); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(j)); - if (parent_op_type == DALI_MIXED) { + if (parent_op_type == OpType::MIXED) { MixedWorkspace &src_ws = wsb->mixed_op_data[parent_idx]; if (node.spec.InputDevice(j) == "cpu") { const auto input = src_ws.SharedCPUOutput(input_src_idx); @@ -531,7 +531,7 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { } else { DALI_FAIL("Executor encountered gpu op with non-cpu/gpu input."); } - } else if (parent_op_type == DALI_GPU) { + } else if (parent_op_type == OpType::GPU) { DeviceWorkspace &src_ws = wsb->gpu_op_data[parent_idx]; if (node.spec.InputDevice(j) == "cpu") { // Note: This path should currently never occur, as we @@ -553,8 +553,8 @@ void Executor::SetupDataForGraph(WorkspaceBlob *wsb) { for (const auto &arg_pair : node.spec.ArgumentInputs()) { // Get each argument input and add them to this op's workspace. NodeID parent_node_id = graph_->TensorSourceID(node.spec.Input(arg_pair.second)); - DALIOpType parent_op_type = graph_->NodeType(parent_node_id); - DALI_ENFORCE(parent_op_type == DALI_SUPPORT, + OpType parent_op_type = graph_->NodeType(parent_node_id); + DALI_ENFORCE(parent_op_type == OpType::SUPPORT, "Executor encountered argument input produced by non-cpu op."); int parent_idx = graph_->NodeIdx(parent_node_id); int input_src_idx = graph_->TensorIdxInSource(node.spec.Input(arg_pair.second)); @@ -679,7 +679,7 @@ void Executor::SetupStreamsForGraph(WorkspaceBlob *wsb) { ws.set_stream(gpu_op_stream); const OpNode& node = graph_->gpu_node(i); for (const auto& p : node.parents) { - if (graph_->NodeType(p) == DALI_MIXED) { + if (graph_->NodeType(p) == OpType::MIXED) { // We need to block on this op's event to // make sure that we respect the dependency int parent_op_idx = graph_->NodeIdx(p); @@ -745,14 +745,14 @@ void Executor::SetOutputBuffersForIter(int queue_idx, WorkspaceBlob *wsb) { NodeID node_id = info.prod_and_idx.first; int output_idx = info.prod_and_idx.second; // Contiguous CPU outputs come from mixed or GPU ops - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_MIXED || - graph_->NodeType(node_id) == DALI_GPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::MIXED || + graph_->NodeType(node_id) == OpType::GPU); - if (graph_->NodeType(node_id) == DALI_MIXED) { + if (graph_->NodeType(node_id) == OpType::MIXED) { int mixed_op_id = graph_->NodeIdx(node_id); wsb->mixed_op_data[mixed_op_id].SetOutput( output_idx, cpu_outputs_[i].Get(queue_idx)); - } else { // DALI_GPU + } else { // OpType::GPU int gpu_op_id = graph_->NodeIdx(node_id); wsb->gpu_op_data[gpu_op_id].SetOutput(output_idx, cpu_outputs_[i].Get(queue_idx)); @@ -761,7 +761,7 @@ void Executor::SetOutputBuffersForIter(int queue_idx, WorkspaceBlob *wsb) { for (size_t j = 0; j < info.con_and_idx.size(); ++j) { node_id = info.con_and_idx[j].first; int input_idx = info.con_and_idx[j].second; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_GPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::GPU); int gpu_op_id = graph_->NodeIdx(node_id); wsb->gpu_op_data[gpu_op_id].SetInput( @@ -774,11 +774,11 @@ void Executor::SetOutputBuffersForIter(int queue_idx, WorkspaceBlob *wsb) { NodeID node_id = info.prod_and_idx.first; int output_idx = info.prod_and_idx.second; - if (graph_->NodeType(node_id) == DALI_MIXED) { + if (graph_->NodeType(node_id) == OpType::MIXED) { int mixed_op_id = graph_->NodeIdx(node_id); wsb->mixed_op_data[mixed_op_id].SetOutput(output_idx, gpu_outputs_[i].Get(queue_idx)); - } else if (graph_->NodeType(node_id) == DALI_GPU) { + } else if (graph_->NodeType(node_id) == OpType::GPU) { int gpu_op_id = graph_->NodeIdx(node_id); wsb->gpu_op_data[gpu_op_id].SetOutput(output_idx, gpu_outputs_[i].Get(queue_idx)); @@ -790,7 +790,7 @@ void Executor::SetOutputBuffersForIter(int queue_idx, WorkspaceBlob *wsb) { for (size_t j = 0; j < info.con_and_idx.size(); ++j) { node_id = info.con_and_idx[j].first; int input_idx = info.con_and_idx[j].second; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_GPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::GPU); int gpu_op_id = graph_->NodeIdx(node_id); wsb->gpu_op_data[gpu_op_id].SetInput(input_idx, diff --git a/dali/pipeline/executor/executor_test.cc b/dali/pipeline/executor/executor_test.cc index 184216690b..f8da556213 100644 --- a/dali/pipeline/executor/executor_test.cc +++ b/dali/pipeline/executor/executor_test.cc @@ -374,7 +374,7 @@ TEST_F(ExecutorTest, TestRunBasicGraph) { exe.Build(&graph, outputs); // Set the data for the external source - auto *src_op = dynamic_cast*>(&graph.cpu_op(0)); + auto *src_op = dynamic_cast*>(graph.cpu_node(0).op.get()); ASSERT_NE(src_op, nullptr); TensorList tl; this->MakeJPEGBatch(&tl, this->batch_size_); @@ -421,7 +421,7 @@ TEST_F(ExecutorTest, TestRunBasicGraphWithCB) { exe.Build(&graph, outputs); // Set the data for the external source - auto *src_op = dynamic_cast*>(&graph.cpu_op(0)); + auto *src_op = dynamic_cast*>(graph.cpu_node(0).op.get()); ASSERT_NE(src_op, nullptr); TensorList tl; this->MakeJPEGBatch(&tl, this->batch_size_); @@ -479,7 +479,7 @@ TEST_F(ExecutorTest, TestPrefetchedExecution) { exe.Build(&graph, outputs); // Set the data for the external source - auto *src_op = dynamic_cast*>(&graph.cpu_op(0)); + auto *src_op = dynamic_cast*>(graph.cpu_node(0).op.get()); ASSERT_NE(src_op, nullptr); TensorList tl; this->MakeJPEGBatch(&tl, this->batch_size_*2); diff --git a/dali/pipeline/executor/pipelined_executor.cc b/dali/pipeline/executor/pipelined_executor.cc index 805daa215a..9b7b5d1d6f 100644 --- a/dali/pipeline/executor/pipelined_executor.cc +++ b/dali/pipeline/executor/pipelined_executor.cc @@ -67,8 +67,8 @@ void PipelinedExecutor::SetupStageOutputsForGraph() { bool found_stage_boundary = false; for (auto &meta : consumer_meta) { const auto& node_type = graph_->NodeType(meta.node); - if (node_type != DALI_SUPPORT && - node_type != DALI_CPU) { + if (node_type != OpType::SUPPORT && + node_type != OpType::CPU) { // We've located a tensor that is an output of // the stage. found_stage_boundary = true; @@ -109,13 +109,13 @@ void PipelinedExecutor::SetupStageOutputsForGraph() { bool has_gpu_consumer = false; for (auto &meta : consumer_meta) { auto type = graph_->NodeType(meta.node); - if (type != DALI_CPU) { + if (type != OpType::CPU) { // We've located a tensor that is an output of // the stage. auto &consumer = graph_->node(meta.node); has_gpu_consumer = has_gpu_consumer || - type == DALI_GPU || + type == OpType::GPU || (consumer.spec.name() == "MakeContiguous" && consumer.spec.OutputDevice(0) == "gpu"); found_stage_boundary = true; @@ -155,12 +155,12 @@ void PipelinedExecutor::SetupStageOutputsForGraph() { if (graph_->TensorIsType(tensor_name)) { for (auto &meta : consumer_meta) { - if (graph_->NodeType(meta.node) != DALI_MIXED) { + if (graph_->NodeType(meta.node) != OpType::MIXED) { if (!has_info_object) { OutputInfo info; info.prod_and_idx = std::make_pair(node.id, j); - bool pinned = graph_->NodeType(meta.node) == DALI_GPU; + bool pinned = graph_->NodeType(meta.node) == OpType::GPU; mixed_stage_cpu_output_info_.push_back(info); mixed_stage_cpu_outputs_.push_back( @@ -176,7 +176,7 @@ void PipelinedExecutor::SetupStageOutputsForGraph() { } } else { for (auto &meta : consumer_meta) { - if (graph_->NodeType(meta.node) != DALI_MIXED) { + if (graph_->NodeType(meta.node) != OpType::MIXED) { if (!has_info_object) { OutputInfo info; info.prod_and_idx = std::make_pair(node.id, j); @@ -204,7 +204,7 @@ void PipelinedExecutor::SetStageOutputsForIter( auto &tvp = support_stage_outputs_[i]; auto &info = support_stage_output_info_[i]; NodeID node_id = info.prod_and_idx.first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_SUPPORT); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::SUPPORT); int support_op_id = graph_->NodeIdx(node_id); int output_idx = info.prod_and_idx.second; @@ -218,10 +218,10 @@ void PipelinedExecutor::SetStageOutputsForIter( int input_idx = info.con_and_idx[j].second; const OpSpec& spec = op_node.spec; std::string arg_name = spec.ArgumentInputName(input_idx); - if (graph_->NodeType(node_id) == DALI_MIXED) { + if (graph_->NodeType(node_id) == OpType::MIXED) { wsb->mixed_op_data[child_op_id].SetArgumentInput( tvp.Get(queue_idx), arg_name); - } else if (graph_->NodeType(node_id) == DALI_GPU) { + } else if (graph_->NodeType(node_id) == OpType::GPU) { wsb->gpu_op_data[child_op_id].SetArgumentInput( tvp.Get(queue_idx), arg_name); } else { @@ -235,7 +235,7 @@ void PipelinedExecutor::SetStageOutputsForIter( auto &tvp = cpu_stage_outputs_[i]; auto &info = cpu_stage_output_info_[i]; NodeID node_id = info.prod_and_idx.first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_CPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::CPU); int cpu_op_id = graph_->NodeIdx(node_id); int output_idx = info.prod_and_idx.second; @@ -244,7 +244,7 @@ void PipelinedExecutor::SetStageOutputsForIter( for (size_t j = 0; j < info.con_and_idx.size(); ++j) { node_id = info.con_and_idx[j].first; - if (graph_->NodeType(node_id) == DALI_MIXED) { + if (graph_->NodeType(node_id) == OpType::MIXED) { int mixed_op_id = graph_->NodeIdx(node_id); int input_idx = info.con_and_idx[j].second; wsb->mixed_op_data[mixed_op_id].SetInput( @@ -264,7 +264,7 @@ void PipelinedExecutor::SetStageOutputsForIter( } } } - } else if (graph_->NodeType(node_id) == DALI_CPU) { + } else if (graph_->NodeType(node_id) == OpType::CPU) { int cpu_op_id = graph_->NodeIdx(node_id); int input_idx = info.con_and_idx[j].second; wsb->cpu_op_data[cpu_op_id].SetInput( @@ -279,7 +279,7 @@ void PipelinedExecutor::SetStageOutputsForIter( auto &tlp = mixed_stage_cpu_outputs_[i]; auto &info = mixed_stage_cpu_output_info_[i]; NodeID node_id = info.prod_and_idx.first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_MIXED); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::MIXED); int mixed_op_id = graph_->NodeIdx(node_id); int output_idx = info.prod_and_idx.second; @@ -288,7 +288,7 @@ void PipelinedExecutor::SetStageOutputsForIter( for (size_t j = 0; j < info.con_and_idx.size(); ++j) { node_id = info.con_and_idx[j].first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_GPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::GPU); int gpu_op_id = graph_->NodeIdx(node_id); int input_idx = info.con_and_idx[j].second; @@ -302,7 +302,7 @@ void PipelinedExecutor::SetStageOutputsForIter( auto &tlp = mixed_stage_gpu_outputs_[i]; auto &info = mixed_stage_gpu_output_info_[i]; NodeID node_id = info.prod_and_idx.first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_MIXED); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::MIXED); int mixed_op_id = graph_->NodeIdx(node_id); int output_idx = info.prod_and_idx.second; @@ -311,7 +311,7 @@ void PipelinedExecutor::SetStageOutputsForIter( for (size_t j = 0; j < info.con_and_idx.size(); ++j) { node_id = info.con_and_idx[j].first; - DALI_ENFORCE(graph_->NodeType(node_id) == DALI_GPU); + DALI_ENFORCE(graph_->NodeType(node_id) == OpType::GPU); int gpu_op_id = graph_->NodeIdx(node_id); int input_idx = info.con_and_idx[j].second; diff --git a/dali/pipeline/graph_descr.cc b/dali/pipeline/graph_descr.cc index b4316377b2..3008b82965 100644 --- a/dali/pipeline/graph_descr.cc +++ b/dali/pipeline/graph_descr.cc @@ -71,6 +71,19 @@ void CheckOpConstraints(const OpSpec &spec) { + " outputs, but was passed " + std::to_string(spec.NumOutput()/num_input_sets) + "."); } +OpType ParseOpType(const std::string &device) { + if (device == "gpu") { + return OpType::GPU; + } else if (device == "cpu") { + return OpType::CPU; + } else if (device == "mixed") { + return OpType::MIXED; + } else if (device == "support") { + return OpType::SUPPORT; + } + DALI_FAIL("Unsupported device type: " + device + "."); +} + } // namespace void OpGraph::AddOp(const OpSpec &spec, const std::string& name) { @@ -78,44 +91,55 @@ void OpGraph::AddOp(const OpSpec &spec, const std::string& name) { CheckOpConstraints(spec); string device = spec.GetArgument("device"); + auto op_type = ParseOpType(device); OpNode *new_node; - if (device == "cpu") { - // Enforce graph constraints - DALI_ENFORCE(AllInputsCPU(spec), "CPU ops cannot receive GPU input data."); - DALI_ENFORCE(AllOutputsCPU(spec), "CPU ops can only produce CPU output data."); - - cpu_nodes_.resize(cpu_nodes_.size()+1); - OpNode &cpu_node = cpu_nodes_.back(); - id_to_node_map_.push_back({DALI_CPU, cpu_nodes_.size()-1}); - - new_node = &cpu_node; - } else if (device == "gpu") { - gpu_nodes_.resize(gpu_nodes_.size()+1); - OpNode &gpu_node = gpu_nodes_.back(); - id_to_node_map_.push_back({DALI_GPU, gpu_nodes_.size()-1}); - - new_node = &gpu_node; - } else if (device == "mixed") { - // Enforce graph constraints - DALI_ENFORCE(AllInputsCPU(spec), "Mixed ops cannot receive GPU input data."); + switch (op_type) { + case OpType::CPU: { + // Enforce graph constraints + DALI_ENFORCE(AllInputsCPU(spec), "CPU ops cannot receive GPU input data."); + DALI_ENFORCE(AllOutputsCPU(spec), "CPU ops can only produce CPU output data."); + + cpu_nodes_.resize(cpu_nodes_.size()+1); + OpNode &cpu_node = cpu_nodes_.back(); + id_to_node_map_.push_back({OpType::CPU, cpu_nodes_.size()-1}); + + new_node = &cpu_node; + break; + } + case OpType::GPU: { + gpu_nodes_.resize(gpu_nodes_.size()+1); + OpNode &gpu_node = gpu_nodes_.back(); + id_to_node_map_.push_back({OpType::GPU, gpu_nodes_.size()-1}); - mixed_nodes_.resize(mixed_nodes_.size()+1); - OpNode &mixed_node = mixed_nodes_.back(); - id_to_node_map_.push_back({DALI_MIXED, mixed_nodes_.size()-1}); + new_node = &gpu_node; + break; + } + case OpType::MIXED: { + // Enforce graph constraints + DALI_ENFORCE(AllInputsCPU(spec), "Mixed ops cannot receive GPU input data."); - new_node = &mixed_node; - } else if (device == "support") { - // Enforce graph constraints - DALI_ENFORCE(AllInputsCPU(spec), "Support ops cannot receive GPU input data."); + mixed_nodes_.resize(mixed_nodes_.size()+1); + OpNode &mixed_node = mixed_nodes_.back(); + id_to_node_map_.push_back({OpType::MIXED, mixed_nodes_.size()-1}); + + new_node = &mixed_node; + break; + } + case OpType::SUPPORT: { + // Enforce graph constraints + DALI_ENFORCE(AllInputsCPU(spec), "Support ops cannot receive GPU input data."); - support_nodes_.resize(support_nodes_.size()+1); - OpNode &support_node = support_nodes_.back(); - id_to_node_map_.push_back({DALI_SUPPORT, support_nodes_.size() - 1}); + support_nodes_.resize(support_nodes_.size()+1); + OpNode &support_node = support_nodes_.back(); + id_to_node_map_.push_back({OpType::SUPPORT, support_nodes_.size() - 1}); - new_node = &support_node; - } else { - DALI_FAIL("Invalid device argument \"" + device + - "\". Valid options are \"cpu\", \"gpu\" or \"mixed\""); + new_node = &support_node; + break; + } + default: + DALI_FAIL("Invalid device argument \"" + device + + "\". Valid options are \"cpu\", \"gpu\" or \"mixed\""); + break; } // Add node meta-data and add to the list of nodes @@ -323,7 +347,7 @@ void OpGraph::RemoveOp(NodeID id) { // to fill the gap. // auto type_and_idx = id_to_node_map_[id]; - DALIOpType type = type_and_idx.first; + OpType type = type_and_idx.first; int idx = type_and_idx.second; id_to_node_map_.erase(id_to_node_map_.begin() + id); @@ -331,7 +355,7 @@ void OpGraph::RemoveOp(NodeID id) { // We will then need to update the id map entry for // all nodes of this type that follow the deleted node switch (type) { - case DALI_CPU: + case OpType::CPU: cpu_nodes_.erase(cpu_nodes_.begin() + idx); for (size_t i = idx; i < cpu_nodes_.size(); ++i) { @@ -339,7 +363,7 @@ void OpGraph::RemoveOp(NodeID id) { id_to_node_map_[cpu_node.id].second = i; } break; - case DALI_GPU: + case OpType::GPU: gpu_nodes_.erase(gpu_nodes_.begin() + idx); for (size_t i = idx; i < gpu_nodes_.size(); ++i) { @@ -347,7 +371,7 @@ void OpGraph::RemoveOp(NodeID id) { id_to_node_map_[gpu_node.id].second = i; } break; - case DALI_MIXED: + case OpType::MIXED: mixed_nodes_.erase(mixed_nodes_.begin() + idx); for (size_t i = idx; i < mixed_nodes_.size(); ++i) { @@ -355,13 +379,16 @@ void OpGraph::RemoveOp(NodeID id) { id_to_node_map_[mixed_node.id].second = i; } break; - case DALI_SUPPORT: + case OpType::SUPPORT: support_nodes_.erase(support_nodes_.begin() + idx); for (size_t i = idx; i < support_nodes_.size(); ++i) { OpNode &support_node = this->support_node(i); id_to_node_map_[support_node.id].second = i; } + break; + default: + DALI_FAIL("Invalid OpType"); } } @@ -370,16 +397,16 @@ OpNode& OpGraph::node(NodeID id) { auto idx_pair = id_to_node_map_[id]; switch (idx_pair.first) { - case DALI_CPU: + case OpType::CPU: return cpu_nodes_[idx_pair.second]; break; - case DALI_GPU: + case OpType::GPU: return gpu_nodes_[idx_pair.second]; break; - case DALI_MIXED: + case OpType::MIXED: return mixed_nodes_[idx_pair.second]; break; - case DALI_SUPPORT: + case OpType::SUPPORT: return support_nodes_[idx_pair.second]; break; default: diff --git a/dali/pipeline/op_graph.h b/dali/pipeline/op_graph.h index 170bd1c6e7..f360d25088 100644 --- a/dali/pipeline/op_graph.h +++ b/dali/pipeline/op_graph.h @@ -30,7 +30,7 @@ namespace dali { -typedef int64 NodeID; +using NodeID = int64_t; struct OpNode { inline OpNode() {} @@ -116,16 +116,6 @@ class DLL_PUBLIC OpGraph { */ DLL_PUBLIC inline Index NumSupportOp() const { return support_nodes_.size(); } - /** - * @brief Returns a reference to the `idx`-th cpu op that was - * added to the graph. - */ - DLL_PUBLIC inline OperatorBase& cpu_op(Index idx) { - DALI_ENFORCE_VALID_INDEX(idx, (Index)cpu_nodes_.size()); - DALI_ENFORCE(cpu_nodes_[idx].op != nullptr, "Operator instance is empty"); - return *cpu_nodes_[idx].op; - } - /** * @brief Returns the node object for the `idx`-th cpu op that * was added to the graph. @@ -135,16 +125,6 @@ class DLL_PUBLIC OpGraph { return cpu_nodes_[idx]; } - /** - * @brief Returns a reference to the `idx`-th gpu op that - * was added to the graph. - */ - DLL_PUBLIC inline OperatorBase& gpu_op(Index idx) { - DALI_ENFORCE_VALID_INDEX(idx, (Index)gpu_nodes_.size()); - DALI_ENFORCE(gpu_nodes_[idx].op != nullptr, "Operator instance is empty"); - return *gpu_nodes_[idx].op; - } - /** * @brief Returns the node object for the `idx`-th gpu op that * was added to the graph. @@ -154,16 +134,6 @@ class DLL_PUBLIC OpGraph { return gpu_nodes_[idx]; } - /** - * @brief Returns a reference to the `idx`-th mixed op - * that was added to the graph. - */ - DLL_PUBLIC inline OperatorBase& mixed_op(Index idx) { - DALI_ENFORCE_VALID_INDEX(idx, (Index)mixed_nodes_.size()); - DALI_ENFORCE(mixed_nodes_[idx].op != nullptr, "Operator instance is empty"); - return *mixed_nodes_[idx].op; - } - /** * @brief Returns the node object for the `idx`-th mixed op that * was added to the graph. @@ -173,16 +143,6 @@ class DLL_PUBLIC OpGraph { return mixed_nodes_[idx]; } - /** - * @brief Returns a reference to the `idx`-th support op - * that was added to the graph. - */ - DLL_PUBLIC inline OperatorBase& support_op(Index idx) { - DALI_ENFORCE_VALID_INDEX(idx, (Index)support_nodes_.size()); - DALI_ENFORCE(support_nodes_[idx].op != nullptr, "Operator instance is empty"); - return *support_nodes_[idx].op; - } - /** * @brief Returns the node object for the `idx`-th support op that * was added to the graph. @@ -209,7 +169,7 @@ class DLL_PUBLIC OpGraph { * @brief Returns the type (cpu, gpu, mixed) of the node * at the given index. */ - DLL_PUBLIC inline DALIOpType NodeType(NodeID id) const { + DLL_PUBLIC inline OpType NodeType(NodeID id) const { DALI_ENFORCE_VALID_INDEX(id, (Index)id_to_node_map_.size()); return id_to_node_map_[id].first; } @@ -285,20 +245,21 @@ class DLL_PUBLIC OpGraph { * map. */ DLL_PUBLIC const OpNode& GetNodeForIdx(int idx) const { - DALIOpType type = id_to_node_map_[idx].first; + OpType type = id_to_node_map_[idx].first; Index index = id_to_node_map_[idx].second; switch (type) { - case DALI_CPU: + case OpType::CPU: return cpu_nodes_[index]; - case DALI_GPU: + case OpType::GPU: return gpu_nodes_[index]; - case DALI_MIXED: + case OpType::MIXED: return mixed_nodes_[index]; - case DALI_SUPPORT: + case OpType::SUPPORT: return support_nodes_[index]; + default: + string str_error = "No Node for index " + to_string(idx); + DALI_FAIL(str_error); } - string str_error = "No Node for index " + to_string(idx); - DALI_FAIL(str_error); } @@ -350,7 +311,7 @@ class DLL_PUBLIC OpGraph { // Stores a mapping from NodeIDs to a pair where the first // element indicates what type of node it is, and the second // is the index of the op within the specified vector. - vector> id_to_node_map_; + vector> id_to_node_map_; std::map tensor_producers_; std::map> tensor_consumers_; diff --git a/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc b/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc index 61bf07ad56..0ce47ea1b3 100644 --- a/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc +++ b/dali/pipeline/operators/fused/crop_mirror_normalize_test.cc @@ -33,7 +33,7 @@ class CropMirrorNormalizePermuteTest : public GenericMatchingTest { this->SetExternalInputs({{"jpegs", &data}}); string device(deviceName); - this->setOpType(device == "gpu" ? DALI_GPU : DALI_CPU); + this->SetOpType(device == "gpu" ? OpType::GPU : OpType::CPU); OpSpec spec = OpSpec(opName) .AddArg("device", device) .AddInput("images", device) diff --git a/dali/pipeline/operators/operator.h b/dali/pipeline/operators/operator.h index 473fddbdd2..73f040cac3 100644 --- a/dali/pipeline/operators/operator.h +++ b/dali/pipeline/operators/operator.h @@ -33,13 +33,6 @@ namespace dali { -enum DALIOpType { - DALI_GPU = 0, - DALI_CPU = 1, - DALI_MIXED = 2, - DALI_SUPPORT = 3 -}; - template inline void CheckInputLayout(const InputType& input, const OpSpec& spec) { auto &schema = SchemaRegistry::GetSchema(spec.name()); diff --git a/dali/pipeline/pipeline.h b/dali/pipeline/pipeline.h index cc10ea86cf..ef199464b6 100644 --- a/dali/pipeline/pipeline.h +++ b/dali/pipeline/pipeline.h @@ -137,7 +137,7 @@ class DLL_PUBLIC Pipeline { return; } NodeID node_id = graph_.TensorSourceID(name + "_cpu"); - DALI_ENFORCE(graph_.NodeType(node_id) == DALI_CPU, + DALI_ENFORCE(graph_.NodeType(node_id) == OpType::CPU, "Internal error setting external input data."); auto &node = graph_.node(node_id); diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc index f3deb52e61..85fae8abd5 100644 --- a/dali/pipeline/pipeline_test.cc +++ b/dali/pipeline/pipeline_test.cc @@ -169,7 +169,7 @@ class PipelineTest : public DALITest { ASSERT_EQ(graph.NumMixedOp(), 1); ASSERT_EQ(graph.NumGPUOp(), 1); - ASSERT_EQ(graph.mixed_op(0).name(), "MakeContiguous"); + ASSERT_EQ(graph.mixed_node(0).op->name(), "MakeContiguous"); // Validate the source op auto &node = graph.node(0); diff --git a/dali/test/dali_test_matching.h b/dali/test/dali_test_matching.h index b373998684..5d764049f0 100644 --- a/dali/test/dali_test_matching.h +++ b/dali/test/dali_test_matching.h @@ -2,11 +2,13 @@ #ifndef DALI_TEST_DALI_TEST_MATCHING_H_ #define DALI_TEST_DALI_TEST_MATCHING_H_ -#include "dali/test/dali_test_single_op.h" +#include +#include #include #include -#include -#include + +#include "dali/common.h" +#include "dali/test/dali_test_single_op.h" namespace dali { @@ -43,7 +45,7 @@ class GenericMatchingTest : public DALISingleOpTest { vector*> Reference(const vector*> &inputs, DeviceWorkspace *ws) override { - if (OpType() == DALI_GPU) + if (GetOpType() == OpType::GPU) return this->CopyToHost(ws->Output(1)); else return this->CopyToHost(ws->Output(1)); @@ -70,11 +72,11 @@ class GenericMatchingTest : public DALISingleOpTest { } } - inline DALIOpType OpType() const { return m_nOpType; } - inline void setOpType(DALIOpType opType) { m_nOpType = opType; } + inline OpType GetOpType() const { return op_type_; } + inline void SetOpType(OpType opType) { op_type_ = opType; } - DALIOpType m_nOpType = DALI_GPU; + OpType op_type_ = OpType::GPU; }; } // namespace dali