diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 53a86623087..fd62edd4acb 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -5162,6 +5162,45 @@ tf_cuda_cc_test( ] + if_cuda([":cuda"]), ) +tf_cuda_cc_test( + name = "common_runtime_direct_session_group_test", + size = "small", + srcs = ["common_runtime/direct_session_group_test.cc"], + args = [] + if_cuda(["--heap_check=local"]), # The GPU tracer leaks memory + linkstatic = tf_kernel_tests_linkstatic(), + deps = [ + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":framework", + ":framework_internal", + ":lib", + ":lib_internal", + ":ops", + ":protos_all_cc", + ":test", + ":test_main", + ":testlib", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "//third_party/eigen3", + "//tensorflow/cc:cc_ops", + "//tensorflow/core/kernels:collective_ops", + "//tensorflow/core/kernels:control_flow_ops", + "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:dense_update_ops", + "//tensorflow/core/kernels:fifo_queue_op", + "//tensorflow/core/kernels:function_ops", + "//tensorflow/core/kernels:identity_n_op", + "//tensorflow/core/kernels:identity_op", + "//tensorflow/core/kernels:matmul_op", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:queue_ops", + "//tensorflow/core/kernels:session_ops", + "//tensorflow/core/kernels:variable_ops", + ] + if_cuda([":cuda"]), +) + tf_cuda_cc_test( name = "common_runtime_gpu_cuda_graph_mode_session_test", size = "small", diff --git a/tensorflow/core/common_runtime/direct_session_group_test.cc b/tensorflow/core/common_runtime/direct_session_group_test.cc new file mode 100644 index 00000000000..69bccbd2bc5 --- /dev/null +++ b/tensorflow/core/common_runtime/direct_session_group_test.cc @@ -0,0 +1,192 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/direct_session.h" + +#include +#include +#include +#include // NOLINT +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/function_testlib.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/costmodel.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/testlib.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/util/device_name_utils.h" + +#if GOOGLE_CUDA +#include "third_party/gpus/cuda/include/cuda.h" +#include "third_party/gpus/cuda/include/cuda_runtime_api.h" +#elif TENSORFLOW_USE_ROCM +#include "rocm/include/hip/hip_runtime.h" +#endif // GOOGLE_CUDA + +namespace tensorflow { +namespace { + +CallableOptions MakeCallableOptions(gtl::ArraySlice feeds, + gtl::ArraySlice fetches, + gtl::ArraySlice targets) { + CallableOptions ret; + for (const string& feed : feeds) { + ret.add_feed(feed); + } + for (const string& fetch : fetches) { + ret.add_fetch(fetch); + } + for (const string& target : targets) { + ret.add_target(target); + } + return ret; +} + +SessionOptions DefaultSessionOptions() { + SessionOptions options; + (*options.config.mutable_device_count())["CPU"] = 2; + return options; +} + +std::unique_ptr CreateSessionGroup() { + SessionGroup* sg = nullptr; + SessionGroupMetadata metadata; + NewSessionGroup(DefaultSessionOptions(), &sg, metadata); + return std::unique_ptr(sg); +} + +std::unique_ptr CreateSession() { + return std::unique_ptr(NewSession(DefaultSessionOptions())); +} + +class DirectSessionMinusAXTest : public ::testing::Test { + public: + void Initialize(std::initializer_list a_values) { + Graph graph(OpRegistry::Global()); + + Tensor a_tensor(DT_FLOAT, TensorShape({2, 2})); + test::FillValues(&a_tensor, a_values); + Node* a = test::graph::Constant(&graph, a_tensor); + a->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0"); + a_ = a->name(); + + Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); + test::FillValues(&x_tensor, {1, 1}); + Node* x = test::graph::Constant(&graph, x_tensor); + x->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:1"); + x_ = x->name(); + + // y = A * x + Node* y = test::graph::Matmul(&graph, a, x, false, false); + y->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0"); + y_ = y->name(); + + Node* y_neg = test::graph::Unary(&graph, "Neg", y); + y_neg_ = y_neg->name(); + y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:1"); + + Node* z = test::graph::Unary(&graph, "Identity", y_neg); + z_ = z->name(); + z->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:1"); + + graph.ToGraphDef(&def_); + } + + string a_; + string x_; + string y_; + string y_neg_; + string z_; + GraphDef def_; +}; + +TEST_F(DirectSessionMinusAXTest, TestDirectSessionGroup) { + for (int i = 0; i < 1000; ++i) { + Initialize({3, 2, -1, 0}); + auto sg = CreateSessionGroup(); + ASSERT_TRUE(sg != nullptr); + TF_ASSERT_OK(sg->Create(def_)); + std::vector> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector output_names = {y_ + ":0"}; + std::vector target_nodes = {y_neg_}; + std::vector outputs; + Status s = sg->Run(inputs, output_names, target_nodes, &outputs); + TF_ASSERT_OK(s); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); + + //usleep(10000); + } +} + +TEST_F(DirectSessionMinusAXTest, TestDirectSession) { + for (int i = 0; i < 1000; ++i) { + Initialize({3, 2, -1, 0}); + auto sg = CreateSession(); + ASSERT_TRUE(sg != nullptr); + TF_ASSERT_OK(sg->Create(def_)); + std::vector> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector output_names = {y_ + ":0"}; + std::vector target_nodes = {y_neg_}; + std::vector outputs; + Status s = sg->Run(inputs, output_names, target_nodes, &outputs); + TF_ASSERT_OK(s); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); + + //usleep(10000); + } +} + +} +} // namespace tensorflow