Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ LogicalResult Verify(OpTy op) {
}

//===----------------------------------------------------------------------===//
// AllocRawOp
// TFAllocOp
//===----------------------------------------------------------------------===//
template <>
LogicalResult Verify<AllocRawOp>(AllocRawOp op) {
LogicalResult Verify<TFAllocOp>(TFAllocOp op) {
// Check that the total number of operands matches the number of dynamic
// dimensions specified in the memref type.
unsigned result_dyn_dims = op.getType().getNumDynamicDims();
Expand Down
27 changes: 17 additions & 10 deletions tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,28 @@ class TFFramework_Op<string mnemonic, list<OpTrait> traits = []> :
}

//===----------------------------------------------------------------------===//
// AllocRawOp
// TFAllocOp
//===----------------------------------------------------------------------===//
def TFFramework_AllocRawOp : TFFramework_Op<"alloc_raw",
def TFFramework_TFAllocOp : TFFramework_Op<"alloc",
[MemoryEffects<[MemAlloc<DefaultResource>]>]> {
let summary = "allocation of tensors that uses TF Framework";
let description = [{
Allocation of tensors during kernel execution in the Compute method.

This should be used to allocate any temporary or output memref.
Corresponds to `Allocator::AllocateRaw` in
tensorflow/core/framework/allocator.h.
This should be used to allocate any temporary or output memref. If
`output_index` and `input_indices` are given, attempts to forward one of
the input tensors to the output by calling `OpKernelContext::forward_input`.

If the attributes are missing or the forwarding fails, calls
`Allocator::AllocateRaw` in tensorflow/core/framework/allocator.h.
}];

let arguments = (ins TFFramework_OpKernelContextType:$ctx,
Variadic<Index>:$dyn_sizes);
let arguments = (ins
TFFramework_OpKernelContextType:$ctx,
Variadic<Index>:$dyn_sizes,
OptionalAttr<I32ArrayAttr>:$input_indices,
OptionalAttr<I32Attr>:$output_index
);
let results = (outs Res<AnyMemRef, "", [MemAlloc<DefaultResource>]>:$result);

let builders = [
Expand Down Expand Up @@ -92,16 +99,16 @@ def TFFramework_AllocRawOp : TFFramework_Op<"alloc_raw",
}

//===----------------------------------------------------------------------===//
// DeallocRawOp
// TFDeallocOp
//===----------------------------------------------------------------------===//
def TFFramework_DeallocRawOp : TFFramework_Op<"dealloc_raw",
def TFFramework_TFDeallocOp : TFFramework_Op<"dealloc",
[MemoryEffects<[MemFree]>]> {
let summary = "deallocation of tensors that uses TF Framework";
let description = [{
Deallocation of tensors during kernel execution in the Compute method.

This should be used to deallocate any temporary memref that was allocated
with `tf_framework.alloc_raw`.
with `tf_framework.alloc`.
Corresponds to `Allocator::DeallocateRaw` in
tensorflow/core/framework/allocator.h.
}];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ func @tf_entry(%size_0 : index , %size_2 : index) -> index
dealloc %buf : memref<?x10x?xf32>
std.return %size_0 : index
}
// CHECK-NEXT: [[VAL_3:%.*]] = tf_framework.alloc_raw
// CHECK-NEXT: [[VAL_3:%.*]] = tf_framework.alloc
// CHECK-SAME: ([[CTX]], [[SIZE_0]], [[SIZE_2]]) : memref<?x10x?xf32>
// CHECK-NEXT: tf_framework.dealloc_raw([[CTX]], [[VAL_3]]) : memref<?x10x?xf32>
// CHECK-NEXT: tf_framework.dealloc([[CTX]], [[VAL_3]]) : memref<?x10x?xf32>
// CHECK-NEXT: return [[SIZE_0]] : index

// -----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

func @alloc_raw(%ctx: !tf_framework.op_kernel_context, %size : index) {
// expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}}
%buf = tf_framework.alloc_raw(%ctx, %size) : memref<?x10x?xi8>
%buf = tf_framework.alloc(%ctx, %size) : memref<?x10x?xi8>
return
}
25 changes: 18 additions & 7 deletions tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,28 @@
// Verify the generic form can be parsed.
// RUN: kernel-gen-opt -mlir-print-op-generic %s | kernel-gen-opt | FileCheck %s

// CHECK-LABEL: func @alloc_raw
func @alloc_raw(%ctx: !tf_framework.op_kernel_context,
// CHECK-LABEL: func @alloc
func @alloc(%ctx: !tf_framework.op_kernel_context,
%size_0 : index , %size_2 : index) {
%buf_0 = tf_framework.alloc_raw(%ctx) : memref<10xi8>
%buf_1 = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref<?x10x?xi8>
%buf_0 = tf_framework.alloc(%ctx) : memref<10xi8>
%buf_1 = tf_framework.alloc(%ctx, %size_0, %size_2) : memref<?x10x?xi8>
return
}

// CHECK-LABEL: func @dealloc_raw
func @dealloc_raw(%ctx: !tf_framework.op_kernel_context, %memref : memref<?x10xf32>) {
tf_framework.dealloc_raw(%ctx, %memref) : memref<?x10xf32>
// CHECK-LABEL: func @forwarding_alloc
func @forwarding_alloc(%ctx: !tf_framework.op_kernel_context,
%size_0 : index , %size_2 : index) {
%buf = tf_framework.alloc(%ctx, %size_0, %size_2) {
input_indices = [0 : i32, 1 : i32],
output_index = 0 : i32
} : memref<?x10x?xi8>
return
}

// CHECK-LABEL: func @dealloc
func @dealloc(%ctx: !tf_framework.op_kernel_context,
%memref : memref<?x10xf32>) {
tf_framework.dealloc(%ctx, %memref) : memref<?x10xf32>
return
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
// RUN: kernel-gen-opt %s -tf-kernel-to-llvm -split-input-file | FileCheck %s
// RUN: kernel-gen-opt %s -tf-kernel-to-llvm -split-input-file --print-ir-after-all | FileCheck %s

// CHECK: llvm.func @_mlir_ciface_tf_alloc_raw
// CHECK-SAME: (!llvm.ptr<i8>, !llvm.i64) -> !llvm.ptr<i8>
// CHECK: llvm.func @_mlir_ciface_tf_alloc
// CHECK-SAME: (!llvm.ptr<i8>, !llvm.i64, !llvm.i32, !llvm.i32, !llvm.ptr<i32>) -> !llvm.ptr<i8>

// CHECK-LABEL: llvm.func @alloc_raw(
// CHECK-LABEL: llvm.func @alloc(
// CHECK-SAME: [[TF_CTX:%.*]]: !llvm.ptr<i8>,
// CHECK-SAME: [[SIZE_0:%.*]]: !llvm.i64,
// CHECK-SAME: [[SIZE_2:%.*]]: !llvm.i64) -> [[DESC_TY:!.*]] {
func @alloc_raw(%ctx: !tf_framework.op_kernel_context,
func @alloc(%ctx: !tf_framework.op_kernel_context,
%size_0 : index , %size_2 : index) -> memref<?x10x?xf32> {
%buf = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref<?x10x?xf32>
%buf = tf_framework.alloc(%ctx, %size_0, %size_2) : memref<?x10x?xf32>
std.return %buf : memref<?x10x?xf32>
}
// Compute number of elements.
Expand All @@ -25,10 +25,19 @@ func @alloc_raw(%ctx: !tf_framework.op_kernel_context,
// CHECK: [[SIZE_OF_FLOAT:%.*]] = llvm.ptrtoint [[GEP]]
// CHECK-SAME: !llvm.ptr<float> to !llvm.i64

// Allocate memory.
// Compute total size in bytes.
// CHECK: [[NUM_BYTES:%.*]] = llvm.mul [[NUM_ELEM_1]], [[SIZE_OF_FLOAT]]
// CHECK: [[BYTES_PTR:%.*]] = llvm.call @{{.*}}([[TF_CTX]], [[NUM_BYTES]])
// CHECK-SAME: (!llvm.ptr<i8>, !llvm.i64) -> !llvm.ptr<i8>

// Compute output index (-1) and candidate indices (0, NULL).
// CHECK: [[OUTPUT_INDEX:%.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32
// CHECK-NEXT: [[NUM_CANDIDATES:%.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
// CHECK-NEXT: [[CANDIDATES_PTR:%.*]] = llvm.mlir.null : !llvm.ptr<i32>

// Allocate memory.
// CHECK: [[BYTES_PTR:%.*]] = llvm.call @{{.*}}([[TF_CTX]], [[NUM_BYTES]],
// CHECK-SAME: [[OUTPUT_INDEX]], [[NUM_CANDIDATES]], [[CANDIDATES_PTR]])
// CHECK-SAME: (!llvm.ptr<i8>, !llvm.i64, !llvm.i32, !llvm.i32, !llvm.ptr<i32>
// CHECK-SAME: ) -> !llvm.ptr<i8>

// Build memref descriptor.
// CHECK: [[DESC_0:%.*]] = llvm.mlir.undef : [[DESC_TY]]
Expand All @@ -55,13 +64,13 @@ func @alloc_raw(%ctx: !tf_framework.op_kernel_context,

// -----

// CHECK: llvm.func @_mlir_ciface_tf_dealloc_raw(!llvm.ptr<i8>, !llvm.ptr<i8>)
// CHECK: llvm.func @_mlir_ciface_tf_dealloc(!llvm.ptr<i8>, !llvm.ptr<i8>)

// CHECK-LABEL: llvm.func @dealloc_raw(
// CHECK-LABEL: llvm.func @dealloc(
// CHECK-SAME: [[TF_CTX:%.*]]: !llvm.ptr<i8>,
func @dealloc_raw(%ctx: !tf_framework.op_kernel_context,
func @dealloc(%ctx: !tf_framework.op_kernel_context,
%memref : memref<?x10xf32>) {
tf_framework.dealloc_raw(%ctx, %memref) : memref<?x10xf32>
tf_framework.dealloc(%ctx, %memref) : memref<?x10xf32>
return
}
// Extract allocated ptr from the memref descriptor.
Expand All @@ -71,5 +80,5 @@ func @dealloc_raw(%ctx: !tf_framework.op_kernel_context,
// CHECK-SAME: !llvm.ptr<float> to !llvm.ptr<i8>

// Deallocate.
// CHECK: llvm.call @_mlir_ciface_tf_dealloc_raw(
// CHECK: llvm.call @_mlir_ciface_tf_dealloc(
// CHECK-SAME: [[TF_CTX]], [[VOID_PTR]]) : (!llvm.ptr<i8>, !llvm.ptr<i8>) -> ()
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,49 @@ namespace tf_framework {
namespace {

using tensorflow::Allocator;
using tensorflow::AllocatorAttributes;

Allocator* GetAllocator(void* op_kernel_ctx) {
auto* ctx = static_cast<tensorflow::OpKernelContext*>(op_kernel_ctx);
// TODO(pifon): Figure out how to set AllocatorAttributes correctly.
tensorflow::AllocatorAttributes attrs;
AllocatorAttributes attrs;
return ctx->get_allocator(attrs);
}

} // namespace

extern "C" void* _mlir_ciface_tf_alloc_raw(void* op_kernel_ctx,
size_t num_bytes) {
extern "C" void* _mlir_ciface_tf_alloc(void* op_kernel_ctx, size_t num_bytes,
int32_t output_index,
int32_t num_candidates,
int32_t* candidate_input_indices) {
auto* ctx = static_cast<tensorflow::OpKernelContext*>(op_kernel_ctx);

if (output_index != -1) {
auto element_size = ctx->expected_output_dtype(output_index);
// Create a 1D shape, because the shapes don't have to match exactly for
// input forwarding. Only the number of elements must be the same.
tensorflow::TensorShape output_shape;
output_shape.AddDim(num_bytes / element_size);

// Iterate over indices of all inputs that can potentially be used for
// forwarding.
for (int i = 0; i < num_candidates; ++i) {
// TODO(pifon): Expose fetching AllocatorAttributes with the output_index.
AllocatorAttributes output_attr;
auto tensor = ctx->forward_input(
candidate_input_indices[i], output_index, element_size, output_shape,
ctx->output_memory_type(output_index), output_attr);
if (tensor != nullptr) {
return tensor->data();
}
}
}
// If no forwarding happened, allocate a chunk of memory.
return GetAllocator(op_kernel_ctx)
->AllocateRaw(Allocator::kAllocatorAlignment, num_bytes);
}

extern "C" void _mlir_ciface_tf_dealloc_raw(void* op_kernel_ctx, void* ptr) {
extern "C" void _mlir_ciface_tf_dealloc(void* op_kernel_ctx, void* ptr) {
GetAllocator(op_kernel_ctx)->DeallocateRaw(ptr);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ namespace mlir {
namespace kernel_gen {
namespace tf_framework {

extern "C" MLIR_RUNNERUTILS_EXPORT void* _mlir_ciface_tf_alloc_raw(
void* op_kernel_ctx, size_t num_bytes);
extern "C" MLIR_RUNNERUTILS_EXPORT void* _mlir_ciface_tf_alloc(
void* op_kernel_ctx, size_t num_bytes, int32_t output_index,
int32_t num_candidates, int32_t* candidate_input_indices);

extern "C" MLIR_RUNNERUTILS_EXPORT void _mlir_ciface_tf_dealloc_raw(
extern "C" MLIR_RUNNERUTILS_EXPORT void _mlir_ciface_tf_dealloc(
void* op_kernel_ctx, void* ptr);

} // namespace tf_framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,19 @@ class AllocOpConverter : public OpConversionPattern<AllocOp> {
return failure();
}
// Symbolic operands that bind to the symbols of the memref's layout map are
// not supported by AllocRawOp.
// not supported by TFAllocOp.
if (alloc.getNumSymbolicOperands() != 0) {
return failure();
}
rewriter.replaceOpWithNewOp<AllocRawOp>(alloc, alloc.getType(), ctx,
operands);
rewriter.replaceOpWithNewOp<TFAllocOp>(alloc, alloc.getType(), ctx,
operands);
return success();
}
};

// Converts std.dealloc to tf_framework.dealloc_raw using OpKernelContextType
// arg of the parent function.
class DeallocOpConverter : public OpConversionPattern<DeallocOp> {
class TFDeallocOpConverter : public OpConversionPattern<DeallocOp> {
public:
using OpConversionPattern<DeallocOp>::OpConversionPattern;

Expand All @@ -108,8 +108,8 @@ class DeallocOpConverter : public OpConversionPattern<DeallocOp> {
return failure();
}
DeallocOp::Adaptor transformed(operands);
rewriter.replaceOpWithNewOp<DeallocRawOp>(dealloc, ctx,
transformed.memref());
rewriter.replaceOpWithNewOp<TFDeallocOp>(dealloc, ctx,
transformed.memref());
return success();
}
};
Expand All @@ -118,7 +118,7 @@ class DeallocOpConverter : public OpConversionPattern<DeallocOp> {

void PopulateEmbedTFFrameworkConversionPatterns(
MLIRContext *context, OwningRewritePatternList *patterns) {
patterns->insert<AllocOpConverter, DeallocOpConverter, FuncOpConverter>(
patterns->insert<AllocOpConverter, TFDeallocOpConverter, FuncOpConverter>(
context);
}

Expand Down
Loading