Oneflow-Inc · oneflow-ci-bot · Jun 22, 2021 · Jun 22, 2021 · Jun 22, 2021 · Jun 22, 2021
@@ -123,7 +123,7 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        test_suite: ["cuda", "cpu", "xla"]
+        test_suite: ["cuda", "cpu", "xla", "xla_cpu"]
         include:
           - test_suite: cuda
             cuda_version: 10.2
@@ -140,6 +140,11 @@ jobs:
             extra_flags: --extra_oneflow_cmake_args=-DCUDA_NVCC_GENCODES=arch=compute_61,code=sm_61 --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple
             os: [self-hosted, linux, build]
             allow_fail: true
+          - test_suite: xla_cpu
+            cuda_version: 10.1
+            extra_flags: --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --cpu --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple
+            os: [self-hosted, linux, build]
+            allow_fail: true
     steps:
       - name: Set environments from secrets
         if: contains(${{ github.repository }}, "Oneflow-Inc")
@@ -278,6 +283,7 @@ jobs:
             "xla",
             "cuda_op_eager",
             "cpu_op_eager",
+            "xla_cpu"
           ]
         include:
           - test_suite: "cuda"
@@ -308,6 +314,10 @@ jobs:
             os: [self-hosted, linux, gpu]
             allow_fail: true
             build_env: build.xla.env
+          - test_suite: "xla_cpu"
+            os: [self-hosted, linux, cpu]
+            allow_fail: true
+            build_env: build.xla_cpu.env
     steps:
       - name: Fix permissions
         run: |
@@ -350,9 +360,9 @@ jobs:
           if [ "$test_suite" == "cuda" ] || [  "$test_suite" == "cpu" ]; then
             echo "bin_dir=${PWD}/bin_tmp" >> $GITHUB_ENV
           fi
-          if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ]; then
+          if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "xla_cpu" ]; then
             extra_docker_args+=" --env ONEFLOW_TEST_CPU_ONLY=1"
-            extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=''"
+            extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=-1"
           fi
           if [ "$test_suite" == "cuda_op_eager" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "cuda_new_interface" ]; then
             extra_docker_args+=" --env ONEFLOW_TEST_ENABLE_EAGER=1"
@@ -487,7 +497,7 @@ jobs:
             ${image_name} \
             bash -c "bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test_fp16.sh"
       - name: XLA Test
-        if: matrix.test_suite == 'xla'
+        if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && env.is_built != '1'
         run: |
           set -x
           docker run $extra_docker_args \

@@ -7,13 +7,16 @@ if(WITH_XLA)
   #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
   set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
   set(EIGEN_MD5 67b12e85555e0ac97b4cf8bae7fd65ad)
-  else()
+else()
   set(EIGEN_URL https://github.com/Oneflow-Inc/eigen-git-mirror/archive/e9e95489a.tar.gz)
   set(EIGEN_MD5 a23cb70e12d1bf9b09cb28af51bc26ae)
 endif()
 use_mirror(VARIABLE EIGEN_URL URL ${EIGEN_URL})
 
-add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING -DEIGEN_USE_GPU)
+add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING)
+if(BUILD_CUDA)
+  add_definitions(-DEIGEN_USE_GPU)
+endif()
 if (NOT WITH_XLA)
 add_definitions(-DEIGEN_NO_MALLOC)
 endif()

@@ -1,3 +1,4 @@
+cmake_minimum_required(VERSION 3.17.0)
 include (ExternalProject)
 
 if (WITH_XLA)
@@ -11,7 +12,12 @@ else()
   set(TENSORFLOW_GENFILE_DIR k8-opt)
 endif()
 
-set(TF_WITH_CUDA ON)
+list(APPEND TENSORFLOW_BUILD_CMD --config=noaws)
+list(APPEND TENSORFLOW_BUILD_CMD --config=nogcp)
+list(APPEND TENSORFLOW_BUILD_CMD --config=nohdfs)
+list(APPEND TENSORFLOW_BUILD_CMD --config=nonccl)
+
+set(TF_WITH_CUDA ${BUILD_CUDA})
 if (TF_WITH_CUDA)
   set(CUDA_COMPUTE_CAPABILITIES "6.0,6.1")
   if (NOT CUDA_VERSION VERSION_LESS "10.0")
@@ -22,7 +28,7 @@ if (TF_WITH_CUDA)
   list(APPEND TENSORFLOW_BUILD_CMD --action_env TF_CUDA_COMPUTE_CAPABILITIES=${CUDA_COMPUTE_CAPABILITIES})
 endif()
 
-message(STATUS ${TENSORFLOW_BUILD_CMD})
+message(STATUS "TENSORFLOW_BUILD_CMD: ${TENSORFLOW_BUILD_CMD}")
 
 set(TENSORFLOW_PROJECT  tensorflow)
 set(TENSORFLOW_SOURCES_DIR ${CMAKE_CURRENT_BINARY_DIR}/tensorflow)
@@ -65,17 +71,24 @@ list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1)
 list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so)
 link_directories(${TENSORFLOW_INSTALL_DIR}/lib)
 
-if(NOT XRT_TF_URL)
-  set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/fc42cf2a17e4af9f494278ddee66b6d17e1e9eaf.zip)
+set(XRT_TF_DOWNLOAD_NO_EXTRACT OFF)
+set(XRT_TF_URL "https://github.com/Oneflow-Inc/tensorflow/archive/7016a22292a607edc4175d07dae263faad31cd04.zip" CACHE STRING "")
+message(STATUS "XRT_TF_URL: ${XRT_TF_URL}")
+
+if(IS_DIRECTORY ${XRT_TF_URL})
+  set(XRT_TF_DOWNLOAD_NO_EXTRACT ON)
+else()
   use_mirror(VARIABLE XRT_TF_URL URL ${XRT_TF_URL})
 endif()
+
 if (THIRD_PARTY)
   ExternalProject_Add(${TENSORFLOW_PROJECT}
     PREFIX ${TENSORFLOW_SOURCES_DIR}
     URL ${XRT_TF_URL}
+    DOWNLOAD_NO_EXTRACT ${XRT_TF_DOWNLOAD_NO_EXTRACT}
     CONFIGURE_COMMAND ""
     BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} &&
-                  bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
+                  ${BAZEL_ENV_ARGS} bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
     INSTALL_COMMAND ""
   )
 

diff --git a/docker/package/manylinux/build_wheel.py b/docker/package/manylinux/build_wheel.py
@@ -366,8 +366,6 @@ def is_img_existing(tag):
         extra_oneflow_cmake_args += " -DWITH_XLA=ON"
     else:
         extra_oneflow_cmake_args += " -DWITH_XLA=Off"
-    if args.xla == True and args.cpu == True:
-        raise ValueError("flag xla can't coexist with flag cpu")
     for cuda_version in cuda_versions:
 
         cache_dir = None
@@ -386,6 +384,11 @@ def build():
                     "CUDNN_STATIC" not in extra_oneflow_cmake_args
                 ), "CUDNN_STATIC will be set to OFF if cuda_version > 11"
                 enforced_oneflow_cmake_args += " -DCUDNN_STATIC=OFF"
+            if args.xla and args.cpu:
+                # https://github.com/tensorflow/tensorflow/issues/35867#issuecomment-578998683
+                enforced_oneflow_cmake_args += (
+                    ' -DBAZEL_ENV_ARGS="BAZEL_LINKLIBS=-l%:libstdc++.a"'
+                )
             user_img_tag = f"{img_prefix}:{user}"
             extra_docker_args = args.extra_docker_args
             if "--name" not in extra_docker_args:

@@ -178,9 +178,13 @@ void XrtLaunchKernel<device_type>::ForwardDataContent(
   run_options.return_params = return_params;
   bool block_until_done = true;
   if (device_type == DeviceType::kGPU) {
+#ifdef WITH_CUDA
     run_options.stream = ctx.device_ctx->cuda_stream();
     run_options.device_memory_limit = FLAGS_max_workspace_bytes;
     block_until_done = false;
+#else
+    UNIMPLEMENTED() << "wasn't compile with CUDA";
+#endif  // WITH_CUDA
   }
   if (executable->engine() == xrt::XrtEngine::TENSORRT) {
     CHECK_EQ(device_type, DeviceType::kGPU);

@@ -53,19 +53,23 @@ XlaExecutableRunScope::XlaExecutableRunScope(xla::LocalExecutable* executable,
   // launch kernel on the specified cuda stream of the context. Note that it
   // should do nothing for single stream device such as CPU.
   launch_stream_ = run_context_.run_options().stream;
+#ifdef WITH_CUDA
   if (SupportMultiStream(run_context_.device())) {
     xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_);
   }
+#endif  // WITH_CUDA
 
   size_t workspace_size = xla::CalcWorkspaceByteSize(executable);
   run_context_.ReserveWorkspace(workspace_size);
   run_context_.LockWorkspace();
 }
 
 XlaExecutableRunScope::~XlaExecutableRunScope() {
+#ifdef WITH_CUDA
   if (SupportMultiStream(run_context_.device())) {
     xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_);
   }
+#endif  // WITH_CUDA
   run_context_.UnlockWorkspace();
 }
 

@@ -9,9 +9,6 @@
 parser.add_argument("--src", type=str, required=False)
 args = parser.parse_args()
 
-if args.xla:
-    assert args.cuda
-
 local_label = ""
 version = f"0.5.0"
 
@@ -33,10 +30,10 @@
     compute_platform = "".join(args.cuda.split("."))
     assert len(compute_platform) == 3, compute_platform
     compute_platform = "cu" + compute_platform
-    if args.xla:
-        compute_platform += ".xla"
 else:
     compute_platform = "cpu"
+if args.xla:
+    compute_platform += ".xla"
 assert compute_platform
 version += f"+{compute_platform}"