Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support XLA with CPU-only #5260

Merged
merged 28 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:
strategy:
max-parallel: 5
matrix:
test_suite: ["cuda", "cpu", "xla"]
test_suite: ["cuda", "cpu", "xla", "xla_cpu"]
include:
- test_suite: cuda
cuda_version: 10.2
Expand All @@ -140,6 +140,11 @@ jobs:
extra_flags: --extra_oneflow_cmake_args=-DCUDA_NVCC_GENCODES=arch=compute_61,code=sm_61 --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple
os: [self-hosted, linux, build]
allow_fail: true
- test_suite: xla_cpu
cuda_version: 10.1
extra_flags: --extra_oneflow_cmake_args=-DRPC_BACKEND=GRPC,LOCAL --xla --cpu --extra_oneflow_cmake_args=-DPIP_INDEX_MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple
os: [self-hosted, linux, build]
allow_fail: true
steps:
- name: Set environments from secrets
if: contains(${{ github.repository }}, "Oneflow-Inc")
Expand Down Expand Up @@ -278,6 +283,7 @@ jobs:
"xla",
"cuda_op_eager",
"cpu_op_eager",
"xla_cpu"
]
include:
- test_suite: "cuda"
Expand Down Expand Up @@ -308,6 +314,10 @@ jobs:
os: [self-hosted, linux, gpu]
allow_fail: true
build_env: build.xla.env
- test_suite: "xla_cpu"
os: [self-hosted, linux, cpu]
allow_fail: true
build_env: build.xla_cpu.env
steps:
- name: Fix permissions
run: |
Expand Down Expand Up @@ -350,9 +360,9 @@ jobs:
if [ "$test_suite" == "cuda" ] || [ "$test_suite" == "cpu" ]; then
echo "bin_dir=${PWD}/bin_tmp" >> $GITHUB_ENV
fi
if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ]; then
if [ "$test_suite" == "cpu" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "xla_cpu" ]; then
extra_docker_args+=" --env ONEFLOW_TEST_CPU_ONLY=1"
extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=''"
extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=-1"
fi
if [ "$test_suite" == "cuda_op_eager" ] || [ "$test_suite" == "cpu_op_eager" ] || [ "$test_suite" == "cuda_new_interface" ]; then
extra_docker_args+=" --env ONEFLOW_TEST_ENABLE_EAGER=1"
Expand Down Expand Up @@ -487,7 +497,7 @@ jobs:
${image_name} \
bash -c "bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test_fp16.sh"
- name: XLA Test
if: matrix.test_suite == 'xla'
if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && env.is_built != '1'
run: |
set -x
docker run $extra_docker_args \
Expand Down
7 changes: 5 additions & 2 deletions cmake/third_party/eigen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ if(WITH_XLA)
#set(EIGEN_URL "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
set(EIGEN_URL "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
set(EIGEN_MD5 67b12e85555e0ac97b4cf8bae7fd65ad)
else()
else()
set(EIGEN_URL https://github.com/Oneflow-Inc/eigen-git-mirror/archive/e9e95489a.tar.gz)
set(EIGEN_MD5 a23cb70e12d1bf9b09cb28af51bc26ae)
endif()
use_mirror(VARIABLE EIGEN_URL URL ${EIGEN_URL})

add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING -DEIGEN_USE_GPU)
add_definitions(-DEIGEN_NO_AUTOMATIC_RESIZING)
if(BUILD_CUDA)
add_definitions(-DEIGEN_USE_GPU)
endif()
if (NOT WITH_XLA)
add_definitions(-DEIGEN_NO_MALLOC)
endif()
Expand Down
23 changes: 18 additions & 5 deletions cmake/third_party/tensorflow.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cmake_minimum_required(VERSION 3.17.0)
include (ExternalProject)

if (WITH_XLA)
Expand All @@ -11,7 +12,12 @@ else()
set(TENSORFLOW_GENFILE_DIR k8-opt)
endif()

set(TF_WITH_CUDA ON)
list(APPEND TENSORFLOW_BUILD_CMD --config=noaws)
list(APPEND TENSORFLOW_BUILD_CMD --config=nogcp)
list(APPEND TENSORFLOW_BUILD_CMD --config=nohdfs)
list(APPEND TENSORFLOW_BUILD_CMD --config=nonccl)

hjchen2 marked this conversation as resolved.
Show resolved Hide resolved
set(TF_WITH_CUDA ${BUILD_CUDA})
if (TF_WITH_CUDA)
set(CUDA_COMPUTE_CAPABILITIES "6.0,6.1")
if (NOT CUDA_VERSION VERSION_LESS "10.0")
Expand All @@ -22,7 +28,7 @@ if (TF_WITH_CUDA)
list(APPEND TENSORFLOW_BUILD_CMD --action_env TF_CUDA_COMPUTE_CAPABILITIES=${CUDA_COMPUTE_CAPABILITIES})
endif()

message(STATUS ${TENSORFLOW_BUILD_CMD})
message(STATUS "TENSORFLOW_BUILD_CMD: ${TENSORFLOW_BUILD_CMD}")

set(TENSORFLOW_PROJECT tensorflow)
set(TENSORFLOW_SOURCES_DIR ${CMAKE_CURRENT_BINARY_DIR}/tensorflow)
Expand Down Expand Up @@ -65,17 +71,24 @@ list(APPEND TENSORFLOW_XLA_LIBRARIES libtensorflow_framework.so.1)
list(APPEND TENSORFLOW_XLA_LIBRARIES libxla_core.so)
link_directories(${TENSORFLOW_INSTALL_DIR}/lib)

if(NOT XRT_TF_URL)
set(XRT_TF_URL https://github.com/Oneflow-Inc/tensorflow/archive/fc42cf2a17e4af9f494278ddee66b6d17e1e9eaf.zip)
set(XRT_TF_DOWNLOAD_NO_EXTRACT OFF)
set(XRT_TF_URL "https://github.com/Oneflow-Inc/tensorflow/archive/7016a22292a607edc4175d07dae263faad31cd04.zip" CACHE STRING "")
message(STATUS "XRT_TF_URL: ${XRT_TF_URL}")

if(IS_DIRECTORY ${XRT_TF_URL})
set(XRT_TF_DOWNLOAD_NO_EXTRACT ON)
else()
use_mirror(VARIABLE XRT_TF_URL URL ${XRT_TF_URL})
endif()

if (THIRD_PARTY)
ExternalProject_Add(${TENSORFLOW_PROJECT}
PREFIX ${TENSORFLOW_SOURCES_DIR}
URL ${XRT_TF_URL}
DOWNLOAD_NO_EXTRACT ${XRT_TF_DOWNLOAD_NO_EXTRACT}
CONFIGURE_COMMAND ""
BUILD_COMMAND cd ${TENSORFLOW_SRCS_DIR} &&
bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
${BAZEL_ENV_ARGS} bazel build ${TENSORFLOW_BUILD_CMD} -j HOST_CPUS //tensorflow/compiler/jit/xla_lib:libxla_core.so
INSTALL_COMMAND ""
)

Expand Down
7 changes: 5 additions & 2 deletions docker/package/manylinux/build_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,6 @@ def is_img_existing(tag):
extra_oneflow_cmake_args += " -DWITH_XLA=ON"
else:
extra_oneflow_cmake_args += " -DWITH_XLA=Off"
if args.xla == True and args.cpu == True:
raise ValueError("flag xla can't coexist with flag cpu")
for cuda_version in cuda_versions:

cache_dir = None
Expand All @@ -386,6 +384,11 @@ def build():
"CUDNN_STATIC" not in extra_oneflow_cmake_args
), "CUDNN_STATIC will be set to OFF if cuda_version > 11"
enforced_oneflow_cmake_args += " -DCUDNN_STATIC=OFF"
if args.xla and args.cpu:
# https://github.com/tensorflow/tensorflow/issues/35867#issuecomment-578998683
enforced_oneflow_cmake_args += (
' -DBAZEL_ENV_ARGS="BAZEL_LINKLIBS=-l%:libstdc++.a"'
)
user_img_tag = f"{img_prefix}:{user}"
extra_docker_args = args.extra_docker_args
if "--name" not in extra_docker_args:
Expand Down
4 changes: 4 additions & 0 deletions oneflow/xrt/launch_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,13 @@ void XrtLaunchKernel<device_type>::ForwardDataContent(
run_options.return_params = return_params;
bool block_until_done = true;
if (device_type == DeviceType::kGPU) {
#ifdef WITH_CUDA
run_options.stream = ctx.device_ctx->cuda_stream();
run_options.device_memory_limit = FLAGS_max_workspace_bytes;
block_until_done = false;
#else
UNIMPLEMENTED() << "wasn't compile with CUDA";
#endif // WITH_CUDA
}
if (executable->engine() == xrt::XrtEngine::TENSORRT) {
CHECK_EQ(device_type, DeviceType::kGPU);
Expand Down
4 changes: 4 additions & 0 deletions oneflow/xrt/xla/xla_executable_scope.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,23 @@ XlaExecutableRunScope::XlaExecutableRunScope(xla::LocalExecutable* executable,
// launch kernel on the specified cuda stream of the context. Note that it
// should do nothing for single stream device such as CPU.
launch_stream_ = run_context_.run_options().stream;
#ifdef WITH_CUDA
if (SupportMultiStream(run_context_.device())) {
xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_);
}
#endif // WITH_CUDA

size_t workspace_size = xla::CalcWorkspaceByteSize(executable);
run_context_.ReserveWorkspace(workspace_size);
run_context_.LockWorkspace();
}

XlaExecutableRunScope::~XlaExecutableRunScope() {
#ifdef WITH_CUDA
if (SupportMultiStream(run_context_.device())) {
xla::SwapGpuStreamHandle(run_context_.stream(), &launch_stream_);
}
#endif // WITH_CUDA
run_context_.UnlockWorkspace();
}

Expand Down
7 changes: 2 additions & 5 deletions tools/generate_pip_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
parser.add_argument("--src", type=str, required=False)
args = parser.parse_args()

if args.xla:
assert args.cuda

local_label = ""
version = f"0.5.0"

Expand All @@ -33,10 +30,10 @@
compute_platform = "".join(args.cuda.split("."))
assert len(compute_platform) == 3, compute_platform
compute_platform = "cu" + compute_platform
if args.xla:
compute_platform += ".xla"
else:
compute_platform = "cpu"
if args.xla:
compute_platform += ".xla"
assert compute_platform
version += f"+{compute_platform}"

Expand Down