Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix build break on windows #14

Open
wants to merge 9 commits into
base: r1.15.4+nv20.12
Choose a base branch
from
4 changes: 3 additions & 1 deletion tensorflow/compiler/xla/service/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ load(
"tf_cc_test",
"tf_copts",
"tf_cuda_library",
"if_not_windows",
)
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load(
Expand Down Expand Up @@ -605,8 +606,9 @@ cc_library(
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:optional",
"@com_google_absl//absl/types:span",
] + if_not_windows([
"@nvtx_archive//:nvtx",
] + if_cuda_is_configured([
]) + if_cuda_is_configured([
"//tensorflow/stream_executor/cuda:cuda_stream",
"//tensorflow/core/platform/default/build_config:cublas_plugin",
"//tensorflow/core/platform/default/build_config:cudnn_plugin",
Expand Down
5 changes: 3 additions & 2 deletions tensorflow/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3287,9 +3287,10 @@ tf_cuda_library(
"//third_party/eigen3",
"//tensorflow/core/grappler/utils:functions",
"//tensorflow/core/profiler/lib:traceme",
"@nvtx_archive//:nvtx",
"//tensorflow/core/profiler/internal:traceme_recorder",
] + mkl_deps(),
] + if_not_windows([
"@nvtx_archive//:nvtx",
]) + mkl_deps(),
alwayslink = 1,
)

Expand Down
6 changes: 4 additions & 2 deletions tensorflow/core/common_runtime/eager/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load(
"tf_cc_test",
"tf_copts",
"tf_cuda_library",
"if_not_windows",
)
load(
"//third_party/mkl:build_defs.bzl",
Expand Down Expand Up @@ -203,9 +204,10 @@ tf_cuda_library(
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/grappler/optimizers:meta_optimizer",
"@nvtx_archive//:nvtx",
],
}),
}) + if_not_windows([
"@nvtx_archive//:nvtx",
]),
)

tf_cc_test(
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/framework/common_shape_fns.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ Status FusedBatchNormShape(shape_inference::InferenceContext* c) {
data_format_str);
}
const int rank =
(data_format_str == "NDHWC" or data_format_str == "NCDHW") ? 5 : 4;
(data_format_str == "NDHWC" || data_format_str == "NCDHW") ? 5 : 4;
ShapeHandle x;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &x));

Expand Down Expand Up @@ -1155,7 +1155,7 @@ Status FusedBatchNormGradShape(shape_inference::InferenceContext* c) {
data_format_str);
}
const int rank =
(data_format_str == "NDHWC" or data_format_str == "NCDHW") ? 5 : 4;
(data_format_str == "NDHWC" || data_format_str == "NCDHW") ? 5 : 4;
ShapeHandle y_backprop;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &y_backprop));
ShapeHandle x;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ inline bool NumConvOnDeviceWithDataTypeOverThreshold(

for (const auto& node : context.graph_view->GetNodes()) {
const auto* node_def = node.node();
if (!IsConv2D(*node_def) and !IsConv3D(*node_def)) {
if (!IsConv2D(*node_def) && !IsConv3D(*node_def)) {
continue;
}
const string& device_name =
Expand Down Expand Up @@ -401,7 +401,7 @@ Status PrintDebugLogs(string suffix, GraphDef* graph_) {
TF_RETURN_IF_ERROR(ReadBoolFromEnvVar(
"TF_ENABLE_LAYOUT_OPTIMIZE_GRAPH_REWRITE_LOG", /*default_value=*/false,
&allow_print));
if (not allow_print) return Status::OK();
if (!allow_print) return Status::OK();

string prepend_path = "/tmp/logs/";
if (prepend_path.empty()) return Status::OK();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ Status Transposer::CreateConstPermNode(TransposeContext* context,
node.mutable_attr()->insert({"dtype", attr_data_type});

AttrValue attr_tensor;
Tensor tensor(DT_INT32, TensorShape({permutation.size()}));
Tensor tensor(DT_INT32, TensorShape({(long long)permutation.size()}));
for (int i = 0; i < permutation.size(); i++) {
tensor.flat<int>()(i) = permutation[i];
}
Expand Down Expand Up @@ -728,7 +728,7 @@ Status DefaultLayoutSensitiveOpTransposer::TransposeNode(
TransposeContext* context, utils::MutableNodeView* node) {
DCHECK(IsDefaultLayoutSensitiveOp(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand All @@ -748,7 +748,7 @@ Status BiasAddTransposer::TransposeNode(
TransposeContext* context, utils::MutableNodeView* node) {
DCHECK(IsBiasAdd(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
if (!ShouldProcess(*context, *node)) {
Expand Down Expand Up @@ -789,7 +789,7 @@ Status BiasAddGradTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsBiasAddGrad(*node->node()));
const int rank = GetFaninPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
if (!ShouldProcess(*context, *node)) {
Expand Down Expand Up @@ -962,7 +962,7 @@ Status FusedBatchNormGradTransposer::TransposeNode(
TransposeContext* context, utils::MutableNodeView* node) {
DCHECK(IsFusedBatchNormGrad(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down Expand Up @@ -1335,7 +1335,7 @@ Status ConcatOpTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsConcat(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down Expand Up @@ -1518,7 +1518,7 @@ Status ReduceTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsReduceOp(*node->node()));
const int rank = GetFaninPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down Expand Up @@ -1591,7 +1591,7 @@ Status ShapeTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsShape(*node->node()));
const int rank = GetFaninPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down Expand Up @@ -1636,7 +1636,7 @@ Status SliceTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsSlice(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down Expand Up @@ -1907,7 +1907,7 @@ Status UnaryGradTransposer::TransposeNode(TransposeContext* context,
utils::MutableNodeView* node) {
DCHECK(IsUnaryGrad(*node->node()));
const int rank = GetFanoutPortRank(*node, 0);
if (rank != 4 and rank != 5) {
if (rank != 4 && rank != 5) {
return Status::OK();
}
ScopedDataFormatUpgrader data_format_upgrader(context, rank);
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/grappler/optimizers/remapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1284,7 +1284,7 @@ Status AddBatchNormNodes(RemapperContext* ctx, const FusedBatchNorm& matched) {
Status status;

string x_format = fused_node.attr().at(kDataFormat).s();
if (x_format == "NCHW" or x_format == "NCDHW") {
if (x_format == "NCHW" || x_format == "NCDHW") {
// Need to reshape the last 4 inputs
NodeDef new_shape;
const string new_shape_name =
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/core/kernels/fused_batch_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ class FusedBatchNormOpBase : public OpKernel {
const Tensor& side_input =
has_side_input_ ? context->input(5) : empty_side_input_;

OP_REQUIRES(context, x.dims() == 4 or x.dims() == 5,
OP_REQUIRES(context, x.dims() == 4 || x.dims() == 5,
errors::InvalidArgument("input must be 4 or 5-dimensional",
x.shape().DebugString()));
OP_REQUIRES(context, scale.dims() == 1,
Expand Down Expand Up @@ -1209,10 +1209,10 @@ class FusedBatchNormGradOpBase : public OpKernel {
// saves inverted variance.
const Tensor& saved_maybe_inv_var_or_pop_var = context->input(4);

OP_REQUIRES(context, y_backprop.dims() == 4 or y_backprop.dims() == 5,
OP_REQUIRES(context, y_backprop.dims() == 4 || y_backprop.dims() == 5,
errors::InvalidArgument("input must be 4 or 5-dimensional",
y_backprop.shape().DebugString()));
OP_REQUIRES(context, x.dims() == 4 or x.dims() == 5,
OP_REQUIRES(context, x.dims() == 4 || x.dims() == 5,
errors::InvalidArgument("input must be 4 or 5-dimensional",
x.shape().DebugString()));
OP_REQUIRES(context, scale.dims() == 1,
Expand Down
21 changes: 0 additions & 21 deletions tensorflow/core/kernels/non_max_suppression_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,27 +149,6 @@ __device__ EIGEN_STRONG_INLINE void ClearBit(T* bit_mask, int bit) {
atomicAnd(bit_mask + bin, ~(T(1) << (bit & kRemainderMask)));
}

__global__ void FlipBoxes(Box* boxes, const int* num_batch_boxes,
const int* box_strides, const int batch_size) {
// for (int b = 0; b < batch_size; ++b) {
// int box_offset = box_strides[b];
for (const int y : CudaGridRangeY(batch_size)) {
int box_offset = box_strides[y];
Box* curr_boxes = boxes + box_offset;
// if (threadIdx.x == 0) {
// printf(" FBx batch=%d, box_offset=%d, num_batch_boxes=%d boxes@ %p \n",
// y,
// box_offset, num_batch_boxes[y],curr_boxes);
// }

for (int i : GpuGridRangeX(num_batch_boxes[y])) {
Flipped<true>(curr_boxes[i]);
}
}
// }
}


// Produce a global bitmask (result_mask) of selected boxes from bitmask
// generated by NMSKernel Abort early if max_boxes boxes are selected.
// Bitmask is num_boxes*bit_mask_len bits indicating whether to keep or
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/lib/core/errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace internal {
// Eventually absl::strings will have native support for this and we will be
// able to completely remove PrepareForStrCat().
template <typename T>
typename std::enable_if<!std::is_constructible<strings::AlphaNum, T>::value,
typename std::enable_if<!std::is_convertible<T, strings::AlphaNum>::value,
string>::type
PrepareForStrCat(const T& t) {
std::stringstream ss;
Expand Down
19 changes: 8 additions & 11 deletions tensorflow/core/lib/io/path.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ namespace tensorflow {
namespace io {
namespace internal {

const char kPathSep[] = "/";

string JoinPathImpl(std::initializer_list<StringPiece> paths) {
string result;

Expand All @@ -46,18 +48,12 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) {
continue;
}

if (result[result.size() - 1] == '/') {
if (IsAbsolutePath(path)) {
strings::StrAppend(&result, path.substr(1));
} else {
strings::StrAppend(&result, path);
}
if (IsAbsolutePath(path)) path = path.substr(1);

if (result[result.size() - 1] == kPathSep[0]) {
strings::StrAppend(&result, path);
} else {
if (IsAbsolutePath(path)) {
strings::StrAppend(&result, path);
} else {
strings::StrAppend(&result, "/", path);
}
strings::StrAppend(&result, kPathSep, path);
}
}

Expand Down Expand Up @@ -126,6 +122,7 @@ bool FixBazelEnvPath(const char* path, string* out) {

return true;
}

} // namespace internal

bool IsAbsolutePath(StringPiece path) {
Expand Down
6 changes: 0 additions & 6 deletions tensorflow/core/platform/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -570,10 +570,4 @@ Status ReadTextOrBinaryProto(Env* env, const string& fname,
return ReadBinaryProto(env, fname, proto);
}

int setenv(const char* name, const char* value, int overwrite) {
return ::setenv(name, value, overwrite);
}

int unsetenv(const char* name) { return ::unsetenv(name); }

} // namespace tensorflow
4 changes: 4 additions & 0 deletions tensorflow/core/platform/nvtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PLATFORM_NVTX_H_
#define TENSORFLOW_CORE_PLATFORM_NVTX_H_

#ifdef _WIN32
#include "cuda/include/nvtx3/nvToolsExt.h"
#else
#include "third_party/nvtx3/nvToolsExt.h"
#endif

#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/attr_value_util.h"
Expand Down
7 changes: 7 additions & 0 deletions tensorflow/core/platform/posix/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include <fcntl.h>
#include <fnmatch.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
Expand Down Expand Up @@ -258,4 +259,10 @@ void PosixEnv::GetLocalTempDirectories(std::vector<string>* list) {
}
}

int setenv(const char* name, const char* value, int overwrite) {
return ::setenv(name, value, overwrite);
}

int unsetenv(const char* name) { return ::unsetenv(name); }

} // namespace tensorflow
8 changes: 7 additions & 1 deletion tensorflow/python/lib/core/bfloat16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) {
}

template <typename InType, typename OutType, typename Functor>
void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
void BinaryUFunc(char** args, const npy_intp* dimensions, const npy_intp* steps,
void* data) {
const char* i0 = args[0];
const char* i1 = args[1];
Expand All @@ -505,11 +505,17 @@ void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
}
}

// Numpy changed const-ness of PyUFuncGenericFunction, provide overload.
template <typename Functor>
void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
void* data) {
BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
}
template <typename Functor>
void CompareUFunc(char** args, const npy_intp* dimensions,
const npy_intp* steps, void* data) {
BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
}

struct Bfloat16EqFunctor {
npy_bool operator()(bfloat16 a, bfloat16 b) { return a == b; }
Expand Down
11 changes: 10 additions & 1 deletion tensorflow/stream_executor/cuda/cuda_dnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1278,9 +1278,17 @@ port::Status CheckAndFetchProjectionWeights(
cudnnDataType_t data_type;
#if CUDNN_VERSION >= 8000
RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor_v6(
/*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc,
/*hiddenSize=*/&hidden_size_v,
/*numLayers=*/&num_layers_v,
/*dropoutDesc=*/&dropout_desc,
/*inputMode=*/&input_mode,
/*direction=*/&direction,
/*mode=*/&mode,
/*algo=*/&algo,
/*mathPrec=*/&data_type));
#else
RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor(
#endif
/*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc,
/*hiddenSize=*/&hidden_size_v,
/*numLayers=*/&num_layers_v,
Expand All @@ -1290,6 +1298,7 @@ port::Status CheckAndFetchProjectionWeights(
/*mode=*/&mode,
/*algo=*/&algo,
/*dataType=*/&data_type));
#endif
int rec_proj_size_v;
int out_proj_size_v;
RETURN_IF_CUDNN_ERROR(cudnnGetRNNProjectionLayers(
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/stream_executor/cuda/cudnn_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ cudnnStatus_t GetSymbolNotFoundError() { return CUDNN_STATUS_INTERNAL_ERROR; }
#include "tensorflow/stream_executor/cuda/cudnn_6_0.inc"
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 1
#include "tensorflow/stream_executor/cuda/cudnn_7_0.inc"
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 3
// 2 instead of 3: see https://github.com/tensorflow/tensorflow/issues/32350
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 2
#include "tensorflow/stream_executor/cuda/cudnn_7_1.inc"
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 4
#include "tensorflow/stream_executor/cuda/cudnn_7_3.inc"
Expand Down
Loading