diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fcc4ea728f4..58d2ee96c26 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,7 +123,7 @@ jobs: strategy: max-parallel: 5 matrix: - test_suite: ["cuda", "cpu", "xla"] + test_suite: ["cuda", "cpu", "xla", "xla_cpu"] include: - test_suite: cuda cuda_version: 10.2 @@ -140,6 +140,11 @@ jobs: extra_flags: --extra_oneflow_cmake_args=-DCUDA_NVCC_GENCODES=arch=compute_61,code=sm_61 --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple os: [self-hosted, linux, build] allow_fail: true + - test_suite: xla_cpu + cuda_version: 10.1 + extra_flags: --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --cpu --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple + os: [self-hosted, linux, build] + allow_fail: true steps: - name: Set environments from secrets if: contains(${{ github.repository }}, "Oneflow-Inc") @@ -278,6 +283,7 @@ jobs: "xla", "cuda_op_eager", "cpu_op_eager", + "xla_cpu" ] include: - test_suite: "cuda" @@ -308,6 +314,10 @@ jobs: os: [self-hosted, linux, gpu] allow_fail: true build_env: build.xla.env + - test_suite: "xla_cpu" + os: [self-hosted, linux, cpu] + allow_fail: true + build_env: build.xla_cpu.env steps: - name: Fix permissions run: | @@ -350,9 +360,9 @@ jobs: if [ "$test_suite" == "cuda" ] || [ "$test_suite" == "cpu" ]; then echo "bin_dir=${PWD}/bin_tmp" >> $GITHUB_ENV fi - if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ]; then + if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "xla_cpu" ]; then extra_docker_args+=" --env ONEFLOW_TEST_CPU_ONLY=1" - extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=''" + extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=-1" fi if [ "$test_suite" == "cuda_op_eager" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "cuda_new_interface" ]; then extra_docker_args+=" --env ONEFLOW_TEST_ENABLE_EAGER=1" @@ -487,7 +497,7 @@ jobs: ${image_name} \ bash -c "bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test_fp16.sh" - name: XLA Test - if: matrix.test_suite == 'xla' + if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && env.is_built != '1' run: | set -x docker run $extra_docker_args \ diff --git a/cmake/third_party/eigen.cmake b/cmake/third_party/eigen.cmake index 216ad612b24..792d60b5569 100644 --- a/cmake/third_party/eigen.cmake +++ b/cmake/third_party/eigen.cmake @@ -7,13 +7,16 @@ if(WITH_XLA) #set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz") set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz") set(EIGEN_MD5 67b12e85555e0ac97b4cf8bae7fd65ad) - else() +else() set(EIGEN_URL https://github.com/Oneflow-Inc/eigen-git-mirror/archive/e9e95489a.tar.gz) set(EIGEN_MD5 a23cb70e12d1bf9b09cb28af51bc26ae) endif() use_mirror(VARIABLE EIGEN_URL URL ${EIGEN_URL}) -add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING -DEIGEN_USE_GPU) +add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING) +if(BUILD_CUDA) + add_definitions(-DEIGEN_USE_GPU) +endif() if (NOT WITH_XLA) add_definitions(-DEIGEN_NO_MALLOC) endif() diff --git a/cmake/third_party/tensorflow.cmake b/cmake/third_party/tensorflow.cmake index 1adfc3dc32f..29b466b854a 100644 --- a/cmake/third_party/tensorflow.cmake +++ b/cmake/third_party/tensorflow.cmake @@ -1,3 +1,4 @@ +cmake_minimum_required(VERSION 3.17.0) include (ExternalProject) if (WITH_XLA) @@ -11,7 +12,12 @@ else() set(TENSORFLOW_GENFILE_DIR k8-opt) endif() -set(TF_WITH_CUDA ON) +list(APPEND TENSORFLOW_BUILD_CMD --config=noaws) +list(APPEND TENSORFLOW_BUILD_CMD --config=nogcp) +list(APPEND TENSORFLOW_BUILD_CMD --config=nohdfs) +list(APPEND TENSORFLOW_BUILD_CMD --config=nonccl) + +set(TF_WITH_CUDA ${BUILD_CUDA}) if (TF_WITH_CUDA) set(CUDA_COMPUTE_CAPABILITIES "6.0,6.1") if (NOT CUDA_VERSION VERSION_LESS "10.0") @@ -22,7 +28,7 @@ if (TF_WITH_CUDA) list(APPEND TENSORFLOW_BUILD_CMD --action_env TF_CUDA_COMPUTE_CAPABILITIES=${CUDA_COMPUTE_CAPABILITIES}) endif() -message(STATUS ${TENSORFLOW_BUILD_CMD}) +message(STATUS "TENSORFLOW_BUILD_CMD: ${TENSORFLOW_BUILD_CMD}") set(TENSORFLOW_PROJECT tensorflow) set(TENSORFLOW_SOURCES_DIR ${CMAKE_CURRENT_BINARY_DIR}/tensorflow) @@ -65,17 +71,24 @@ list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1) list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so) link_directories(${TENSORFLOW_INSTALL_DIR}/lib) -if(NOT XRT_TF_URL) - set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/fc42cf2a17e4af9f494278ddee66b6d17e1e9eaf.zip) +set(XRT_TF_DOWNLOAD_NO_EXTRACT OFF) +set(XRT_TF_URL "https://github.com/Oneflow-Inc/tensorflow/archive/7016a22292a607edc4175d07dae263faad31cd04.zip" CACHE STRING "") +message(STATUS "XRT_TF_URL: ${XRT_TF_URL}") + +if(IS_DIRECTORY ${XRT_TF_URL}) + set(XRT_TF_DOWNLOAD_NO_EXTRACT ON) +else() use_mirror(VARIABLE XRT_TF_URL URL ${XRT_TF_URL}) endif() + if (THIRD_PARTY) ExternalProject_Add(${TENSORFLOW_PROJECT} PREFIX ${TENSORFLOW_SOURCES_DIR} URL ${XRT_TF_URL} + DOWNLOAD_NO_EXTRACT ${XRT_TF_DOWNLOAD_NO_EXTRACT} CONFIGURE_COMMAND "" BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} && - bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so + ${BAZEL_ENV_ARGS} bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so INSTALL_COMMAND "" ) diff --git a/docker/package/manylinux/build_wheel.py b/docker/package/manylinux/build_wheel.py index 7326286d411..96bc68bed74 100644 --- a/docker/package/manylinux/build_wheel.py +++ b/docker/package/manylinux/build_wheel.py @@ -366,8 +366,6 @@ def is_img_existing(tag): extra_oneflow_cmake_args += " -DWITH_XLA=ON" else: extra_oneflow_cmake_args += " -DWITH_XLA=Off" - if args.xla == True and args.cpu == True: - raise ValueError("flag xla can't coexist with flag cpu") for cuda_version in cuda_versions: cache_dir = None @@ -386,6 +384,11 @@ def build(): "CUDNN_STATIC" not in extra_oneflow_cmake_args ), "CUDNN_STATIC will be set to OFF if cuda_version > 11" enforced_oneflow_cmake_args += " -DCUDNN_STATIC=OFF" + if args.xla and args.cpu: + # https://github.com/tensorflow/tensorflow/issues/35867#issuecomment-578998683 + enforced_oneflow_cmake_args += ( + ' -DBAZEL_ENV_ARGS="BAZEL_LINKLIBS=-l%:libstdc++.a"' + ) user_img_tag = f"{img_prefix}:{user}" extra_docker_args = args.extra_docker_args if "--name" not in extra_docker_args: diff --git a/oneflow/xrt/launch_kernel.cpp b/oneflow/xrt/launch_kernel.cpp index f57a5f19bdc..d19a4412cdd 100644 --- a/oneflow/xrt/launch_kernel.cpp +++ b/oneflow/xrt/launch_kernel.cpp @@ -178,9 +178,13 @@ void XrtLaunchKernel::ForwardDataContent( run_options.return_params = return_params; bool block_until_done = true; if (device_type == DeviceType::kGPU) { +#ifdef WITH_CUDA run_options.stream = ctx.device_ctx->cuda_stream(); run_options.device_memory_limit = FLAGS_max_workspace_bytes; block_until_done = false; +#else + UNIMPLEMENTED() << "wasn't compile with CUDA"; +#endif // WITH_CUDA } if (executable->engine() == xrt::XrtEngine::TENSORRT) { CHECK_EQ(device_type, DeviceType::kGPU); diff --git a/oneflow/xrt/xla/xla_executable_scope.h b/oneflow/xrt/xla/xla_executable_scope.h index bd7dfb1455a..f3727d4cf7a 100644 --- a/oneflow/xrt/xla/xla_executable_scope.h +++ b/oneflow/xrt/xla/xla_executable_scope.h @@ -53,9 +53,11 @@ XlaExecutableRunScope::XlaExecutableRunScope(xla::LocalExecutable* executable, // launch kernel on the specified cuda stream of the context. Note that it // should do nothing for single stream device such as CPU. launch_stream_ = run_context_.run_options().stream; +#ifdef WITH_CUDA if (SupportMultiStream(run_context_.device())) { xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_); } +#endif // WITH_CUDA size_t workspace_size = xla::CalcWorkspaceByteSize(executable); run_context_.ReserveWorkspace(workspace_size); @@ -63,9 +65,11 @@ XlaExecutableRunScope::XlaExecutableRunScope(xla::LocalExecutable* executable, } XlaExecutableRunScope::~XlaExecutableRunScope() { +#ifdef WITH_CUDA if (SupportMultiStream(run_context_.device())) { xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_); } +#endif // WITH_CUDA run_context_.UnlockWorkspace(); } diff --git a/tools/generate_pip_version.py b/tools/generate_pip_version.py index 15bd466ad5f..fa700786a24 100644 --- a/tools/generate_pip_version.py +++ b/tools/generate_pip_version.py @@ -9,9 +9,6 @@ parser.add_argument("--src", type=str, required=False) args = parser.parse_args() -if args.xla: - assert args.cuda - local_label = "" version = f"0.5.0" @@ -33,10 +30,10 @@ compute_platform = "".join(args.cuda.split(".")) assert len(compute_platform) == 3, compute_platform compute_platform = "cu" + compute_platform - if args.xla: - compute_platform += ".xla" else: compute_platform = "cpu" +if args.xla: + compute_platform += ".xla" assert compute_platform version += f"+{compute_platform}"