-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[MLIR][Dialect] Add XeVM dialect #144811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
silee2
wants to merge
1
commit into
llvm:main
Choose a base branch
from
silee2:addXeVMDialect
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
[MLIR][Dialect] Add XeVM dialect #144811
+1,114
−3
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Sang Ik Lee (silee2) ChangesPatch is 48.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144811.diff 9 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
index 9c5bbae1022f7..cfad07e57021f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt
@@ -87,3 +87,13 @@ mlir_tablegen(VCIXConversions.inc -gen-llvmir-conversions)
mlir_tablegen(VCIXOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=vcix)
mlir_tablegen(VCIXOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=vcix)
add_public_tablegen_target(MLIRVCIXConversionsIncGen)
+
+add_mlir_dialect(XeVMOps xevm)
+add_mlir_doc(XeVMOps XeVMDialect Dialects/ -gen-dialect-doc -dialect=xevm)
+set(LLVM_TARGET_DEFINITIONS XeVMOps.td)
+mlir_tablegen(XeVMConversions.inc -gen-llvmir-conversions)
+mlir_tablegen(XeVMOpsEnums.h.inc -gen-enum-decls)
+mlir_tablegen(XeVMOpsEnums.cpp.inc -gen-enum-defs)
+mlir_tablegen(XeVMOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=xevm)
+mlir_tablegen(XeVMOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=xevm)
+add_public_tablegen_target(MLIRXeVMConversionsIncGen)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/XeVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/XeVMDialect.h
new file mode 100644
index 0000000000000..a83d4248c862c
--- /dev/null
+++ b/mlir/include/mlir/Dialect/LLVMIR/XeVMDialect.h
@@ -0,0 +1,28 @@
+//===-- XeVMDialect.h - MLIR XeVM target definitions ------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_LLVMIR_XEVMDIALECT_H_
+#define MLIR_DIALECT_LLVMIR_XEVMDIALECT_H_
+
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+
+#include <mlir/Dialect/LLVMIR/XeVMOpsEnums.h.inc>
+
+#define GET_ATTRDEF_CLASSES
+#include <mlir/Dialect/LLVMIR/XeVMOpsAttributes.h.inc>
+
+#define GET_OP_CLASSES
+#include <mlir/Dialect/LLVMIR/XeVMOps.h.inc>
+
+#include <mlir/Dialect/LLVMIR/XeVMOpsDialect.h.inc>
+
+#endif /* MLIR_DIALECT_LLVMIR_XEVMDIALECT_H_ */
diff --git a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td
new file mode 100644
index 0000000000000..9525c4a731efa
--- /dev/null
+++ b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td
@@ -0,0 +1,550 @@
+//===-- XeVMOps.td - XeVM dialect definition ---------------*- tablegen -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef XEVMIR_OPS
+#define XEVMIR_OPS
+
+include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
+include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/EnumAttr.td"
+
+def XeVM_Dialect : Dialect {
+ let name = "xevm";
+ let cppNamespace = "::mlir::xevm";
+ let dependentDialects = ["LLVM::LLVMDialect"];
+
+ let extraClassDeclaration = [{
+ /// Get the name for the attribute used to specify cache control
+ /// decorations.
+ static constexpr ::llvm::StringRef getCacheControlsAttrName() {
+ return ::llvm::StringLiteral("xevm.DecorationCacheControl");
+ }
+ }];
+
+ let useDefaultAttributePrinterParser = 1;
+}
+
+class XeVM_Attr<string attrName, string attrMnemonic, list<Trait> traits = []>
+ : AttrDef<XeVM_Dialect, attrName, traits> {
+ let mnemonic = attrMnemonic;
+}
+
+class XeVM_Op<string mnemonic, list<Trait> traits = []>
+ : Op<XeVM_Dialect, mnemonic, traits> {
+
+ code extraBaseClassDeclaration = [{
+ void printProperties(::mlir::MLIRContext *ctx,
+ ::mlir::OpAsmPrinter &p, const Properties &prop,
+ ::mlir::ArrayRef<::llvm::StringRef> elidedProps) {
+ Attribute propAttr = getPropertiesAsAttr(ctx, prop);
+ if (propAttr)
+ p << "<" << propAttr << ">";
+ }
+
+ static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser,
+ ::mlir::OperationState &result) {
+ if (mlir::succeeded(parser.parseOptionalLess())) {
+ if (parser.parseAttribute(result.propertiesAttr) || parser.parseGreater())
+ return failure();
+ }
+ return success();
+ }
+
+ }];
+}
+
+def XeVM_ElemType : AnyTypeOf<[AnyI8, AnyI16, AnyI32, F32, TF32, F16, BF16]>;
+
+def LoadCacheControlDefault : I32EnumAttrCase<"DEFAULT", 0, "Default">;
+def LoadCacheControl_L1uc_L2uc_L3uc
+ : I32EnumAttrCase<"L1UC_L2UC_L3UC", 1, "L1uc_L2uc_L3uc">;
+def LoadCacheControl_L1uc_L2uc_L3c
+ : I32EnumAttrCase<"L1UC_L2UC_L3C", 2, "L1uc_L2uc_L3c">;
+def LoadCacheControl_L1uc_L2c_L3uc
+ : I32EnumAttrCase<"L1UC_L2C_L3UC", 3, "L1uc_L2c_L3uc">;
+def LoadCacheControl_L1uc_L2c_L3c
+ : I32EnumAttrCase<"L1UC_L2C_L3C", 4, "L1uc_L2c_L3c">;
+def LoadCacheControl_L1c_L2uc_L3uc
+ : I32EnumAttrCase<"L1C_L2UC_L3UC", 5, "L1c_L2uc_L3uc">;
+def LoadCacheControl_L1c_L2uc_L3c
+ : I32EnumAttrCase<"L1C_L2UC_L3C", 6, "L1c_L2uc_L3c">;
+def LoadCacheControl_L1c_L2c_L3uc
+ : I32EnumAttrCase<"L1C_L2C_L3UC", 7, "L1c_L2c_L3uc">;
+def LoadCacheControl_L1c_L2c_L3c
+ : I32EnumAttrCase<"L1C_L2C_L3C", 8, "L1c_L2c_L3c">;
+def LoadCacheControl_L1s_L2uc_L3uc
+ : I32EnumAttrCase<"L1S_L2UC_L3UC", 9, "L1s_L2uc_L3uc">;
+def LoadCacheControl_L1s_L2uc_L3c
+ : I32EnumAttrCase<"L1S_L2UC_L3C", 10, "L1s_L2uc_L3c">;
+def LoadCacheControl_L1s_L2c_L3uc
+ : I32EnumAttrCase<"L1S_L2C_L3UC", 11, "L1s_L2c_L3uc">;
+def LoadCacheControl_L1s_L2c_L3c
+ : I32EnumAttrCase<"L1S_L2C_L3C", 12, "L1s_L2c_L3c">;
+def LoadCacheControlInvalidateRead
+ : I32EnumAttrCase<"INVALIDATE_READ", 13, "ir">;
+
+def XeVM_LoadCacheControl
+ : I32EnumAttr<
+ "LoadCacheControl", "XeVM load ops cache control",
+ [LoadCacheControlDefault, LoadCacheControl_L1uc_L2uc_L3uc,
+ LoadCacheControl_L1uc_L2uc_L3c, LoadCacheControl_L1uc_L2c_L3uc,
+ LoadCacheControl_L1uc_L2c_L3c, LoadCacheControl_L1c_L2uc_L3uc,
+ LoadCacheControl_L1c_L2uc_L3c, LoadCacheControl_L1c_L2c_L3uc,
+ LoadCacheControl_L1c_L2c_L3c, LoadCacheControl_L1s_L2uc_L3uc,
+ LoadCacheControl_L1s_L2uc_L3c, LoadCacheControl_L1s_L2c_L3uc,
+ LoadCacheControl_L1s_L2c_L3c, LoadCacheControlInvalidateRead]> {
+ let cppNamespace = "::mlir::xevm";
+ let genSpecializedAttr = 0;
+}
+
+def XeVM_LoadCacheControlAttr
+ : EnumAttr<XeVM_Dialect, XeVM_LoadCacheControl, "load_cache_control"> {
+ let summary = [{Describe the cache settings for load operators}];
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def StoreCacheControlDefault : I32EnumAttrCase<"DEFAULT", 0, "Default">;
+def StoreCacheControl_L1uc_L2uc_L3uc
+ : I32EnumAttrCase<"L1UC_L2UC_L3UC", 1, "L1uc_L2uc_L3uc">;
+def StoreCacheControl_L1uc_L2uc_L3wb
+ : I32EnumAttrCase<"L1UC_L2UC_L3WB", 2, "L1uc_L2uc_L3wb">;
+def StoreCacheControl_L1uc_L2wb_L3uc
+ : I32EnumAttrCase<"L1UC_L2WB_L3UC", 3, "L1uc_L2wb_L3uc">;
+def StoreCacheControl_L1uc_L2wb_L3wb
+ : I32EnumAttrCase<"L1UC_L2WB_L3WB", 4, "L1uc_L2wb_L3wb">;
+def StoreCacheControl_L1wt_L2uc_L3uc
+ : I32EnumAttrCase<"L1WT_L2UC_L3UC", 5, "L1wt_L2uc_L3uc">;
+def StoreCacheControl_L1wt_L2uc_L3wb
+ : I32EnumAttrCase<"L1WT_L2UC_L3WB", 6, "L1wt_L2uc_L3wb">;
+def StoreCacheControl_L1wt_L2wb_L3uc
+ : I32EnumAttrCase<"L1WT_L2WB_L3UC", 7, "L1wt_L2wb_L3uc">;
+def StoreCacheControl_L1wt_L2wb_L3wb
+ : I32EnumAttrCase<"L1WT_L2WB_L3WB", 8, "L1wt_L2wb_L3wb">;
+def StoreCacheControl_L1s_L2uc_L3uc
+ : I32EnumAttrCase<"L1S_L2UC_L3UC", 9, "L1s_L2uc_L3uc">;
+def StoreCacheControl_L1s_L2uc_L3wb
+ : I32EnumAttrCase<"L1S_L2UC_L3WB", 10, "L1s_L2uc_L3wb">;
+def StoreCacheControl_L1s_L2wb_L3uc
+ : I32EnumAttrCase<"L1S_L2WB_L3UC", 11, "L1s_L2wb_L3uc">;
+def StoreCacheControl_L1s_L2wb_L3wb
+ : I32EnumAttrCase<"L1S_L2WB_L3WB", 12, "L1s_L2wb_L3wb">;
+def StoreCacheControl_L1wb_L2uc_L3uc
+ : I32EnumAttrCase<"L1WB_L2UC_L3UC", 13, "L1wb_L2uc_L3uc">;
+def StoreCacheControl_L1wb_L2wb_L3uc
+ : I32EnumAttrCase<"L1WB_L2WB_L3UC", 14, "L1wb_L2wb_L3uc">;
+def StoreCacheControl_L1wb_L2uc_L3wb
+ : I32EnumAttrCase<"L1WB_L2UC_L3WB", 15, "L1wb_L2uc_L3wb">;
+
+def XeVM_StoreCacheControl
+ : I32EnumAttr<
+ "StoreCacheControl", "XeVM store ops cache control",
+ [StoreCacheControlDefault, StoreCacheControl_L1uc_L2uc_L3uc,
+ StoreCacheControl_L1uc_L2uc_L3wb, StoreCacheControl_L1uc_L2wb_L3uc,
+ StoreCacheControl_L1uc_L2wb_L3wb, StoreCacheControl_L1wt_L2uc_L3uc,
+ StoreCacheControl_L1wt_L2uc_L3wb, StoreCacheControl_L1wt_L2wb_L3uc,
+ StoreCacheControl_L1wt_L2wb_L3wb, StoreCacheControl_L1s_L2uc_L3uc,
+ StoreCacheControl_L1s_L2uc_L3wb, StoreCacheControl_L1s_L2wb_L3uc,
+ StoreCacheControl_L1s_L2wb_L3wb, StoreCacheControl_L1wb_L2uc_L3uc,
+ StoreCacheControl_L1wb_L2wb_L3uc,
+ StoreCacheControl_L1wb_L2uc_L3wb]> {
+ let cppNamespace = "::mlir::xevm";
+ let genSpecializedAttr = 0;
+}
+
+def XeVM_StoreCacheControlAttr
+ : EnumAttr<XeVM_Dialect, XeVM_StoreCacheControl, "store_cache_control"> {
+ let summary = [{Describe the cache settings for store operators}];
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def XeVM_BlockLoad2dOp
+ : XeVM_Op<"blockload2d">,
+ Results<(outs FixedVectorOfRankAndType<[1], [XeVM_ElemType]>:$res)>,
+ Arguments<(ins Arg<LLVM_AnyPointer, "", [MemRead]>:$ptr, I32:$base_width,
+ I32:$base_height, I32:$base_pitch, I32:$x, I32:$y,
+ I32Attr:$elem_size_in_bits, I32Attr:$tile_width, I32Attr:$tile_height,
+ I32Attr:$v_blocks, I1Attr:$transpose, I1Attr:$pack_register,
+ OptionalAttr<XeVM_LoadCacheControlAttr>:$cache_control)> {
+
+ let summary = "2D block load";
+
+ let description = [{
+ The `xevm.blockload2d` operation loads a two dimensional matrix tile
+ from a base matrix residing in memory. The parameters are:
+ $ptr - the base address of the base matrix containing the tile to load
+ $base_width, $base_height, $base_pitch - the shape of the base matrix.
+ pitch is the physical stride between the first columns of the current row
+ and the subsequent row. All units are in bytes.
+ $x, $y, $tile_width, $tile_height - the starting offsets and shape of
+ the tile to load in number of elements.
+ $elem_size_in_bits - the size in bits of the matrix element type
+ - 32 for f32, tf32
+ - 16 for f16, int16, bf16
+ - 8 for int8
+ $v_blocks - number of consecutive tiles in innermost dimension direction to load
+ $transpose - transpose the tile in registers (useful for 32 bit element type)
+ $pack_register - pack element types narrower than register bit width. [M, N] => [M/factor, N, factor] where factor is register_size_in_bits / elem_size_in_bits
+ $cache_control - an enumerator that sets the cache behaviour
+
+ Notes:
+ - the $transpose and $pack_register parameters are mutual exclusive
+ - transposing the tile loaded is used for A matrix in backward path or used for the B matrix operand
+ (D = C + A * B), where A has row-major layout and B should have column-major layout in memory.
+ - if the tile loaded contains out of bound elements of the matrix, they are filled with 0.
+
+ Example:
+ ```mlir
+ %base_width_a = arith.constant 32 : i32
+ %base_height_a = arith.constant 8 : i32
+ %base_pitch_a = arith.constant 32 : i32
+ %x = arith.constant 0 : i32
+ %y = arith.constant 0 : i32
+ %loaded_a = xevm.blockload2d %src, %base_width_a, %base_height_a, %base_pitch_a, %x, %y <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false : i32, pack_register=false, cache_control=#xevm.load_cache_control<Default>}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16>
+ ```
+ }];
+
+ let assemblyFormat = [{
+ operands prop-dict attr-dict `:` functional-type(operands, results)
+ }];
+
+ let extraClassDeclaration = extraBaseClassDeclaration#[{
+ /// Get cache control or return default if not set.
+ ::mlir::xevm::LoadCacheControl getCacheControlOrDefault() {
+ if(getCacheControl())
+ return *getCacheControl();
+ return ::mlir::xevm::LoadCacheControl::DEFAULT;
+ }
+ }];
+
+ let hasVerifier = 1;
+}
+
+def XeVM_BlockStore2dOp
+ : XeVM_Op<"blockstore2d">,
+ Arguments<(ins Arg<LLVM_AnyPointer, "", [MemWrite]>:$ptr, I32:$base_width,
+ I32:$base_height, I32:$base_pitch, I32:$x, I32:$y,
+ I32Attr:$elem_size_in_bits, I32Attr:$tile_width, I32Attr:$tile_height,
+ FixedVectorOfRankAndType<[1], [XeVM_ElemType]>:$stored_val,
+ OptionalAttr<XeVM_StoreCacheControlAttr>:$cache_control)> {
+
+ let summary = "2D block store";
+
+ let description = [{
+ The `xevm.blockstore2d` operation stores a two dimensional tile into a
+ larger matrix residing in memory. The parameters are:
+ $ptr - the base address of the target matrix where to store the tile
+ $base_width, $base_height, $base_pitch - the shape of the target matrix. pitch is the
+ physical stride between the first columns of the current row and the subsequent row.
+ All units are in bytes.
+ $x, $y, $tile_width, $tile_height - the starting offsets and shape of the tile to store
+ in number of elements.
+ $elem_size_in_bits - the size in bits of the matrix element
+ - 32 for f32, tf32
+ - 16 for f16, int16, bf16
+ - 8 for int8
+ $cache_control - an enumerator that sets the cache behaviour
+ $stored_val - the tile to store
+
+ Example:
+ ```mlir
+ %base_width_c = arith.constant 64 : i32
+ %base_height_c = arith.constant 8 : i32
+ %base_pitch_c = arith.constant 64 : i32
+ %x = arith.constant 0 : i32
+ %y = arith.constant 0 : i32
+ xevm.blockstore2d %dst, %base_width_c, %base_height_c, %base_pitch_c, %x, %y, %src <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control=#xevm.load_cache_control<Default>}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>)
+ ```
+ }];
+
+ let assemblyFormat = [{
+ operands prop-dict attr-dict `:` `(` type(operands) `)`
+ }];
+
+ let extraClassDeclaration = extraBaseClassDeclaration#[{
+ /// Get cache control or return default if not set.
+ ::mlir::xevm::StoreCacheControl getCacheControlOrDefault() {
+ if(getCacheControl())
+ return *getCacheControl();
+ return ::mlir::xevm::StoreCacheControl::DEFAULT;
+ }
+
+ /// Default value for v_blocks is 1.
+ constexpr uint32_t getVBlocks() {
+ return 1;
+ }
+ }];
+
+ let hasVerifier = 1;
+}
+
+def MemScopeLane : I32EnumAttrCase<"LANE", 0, "lane">;
+def MemScopeSg : I32EnumAttrCase<"SUBGROUP", 1, "subgroup">;
+def MemScopeWg : I32EnumAttrCase<"WORKGROUP", 2, "workgroup">;
+def MemScopeCluster : I32EnumAttrCase<"CLUSTER", 3, "cluster">;
+def MemScopeDevice : I32EnumAttrCase<"DEVICE", 4, "device">;
+def MemScopeSystem : I32EnumAttrCase<"SYSTEM", 5, "system">;
+
+def XeVM_MemScope
+ : I32EnumAttr<"MemScope", "XeVM memory scope",
+ [MemScopeLane, MemScopeSg, MemScopeWg, MemScopeCluster,
+ MemScopeDevice, MemScopeSystem]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xevm";
+}
+def XeVM_MemScopeAttr : EnumAttr<XeVM_Dialect, XeVM_MemScope, "mem_scope"> {
+ let summary = [{Describe memory scopes}];
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def AddrSpacePrivate : I32EnumAttrCase<"PRIVATE", 0, "private">;
+def AddrSpaceGlobal : I32EnumAttrCase<"GLOBAL", 1, "global">;
+def AddrSpaceConstant : I32EnumAttrCase<"CONSTANT", 2, "constant">;
+def AddrSpaceShared : I32EnumAttrCase<"SHARED", 3, "shared">;
+def AddrSpaceGeneric : I32EnumAttrCase<"GENERIC", 4, "generic">;
+
+def XeVM_AddrSpace
+ : I32EnumAttr<"AddrSpace", "Address spaces",
+ [AddrSpacePrivate, AddrSpaceGlobal, AddrSpaceConstant,
+ AddrSpaceShared, AddrSpaceGeneric]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "mlir::xevm";
+}
+def XeVM_AddrSpaceAttr : EnumAttr<XeVM_Dialect, XeVM_AddrSpace, "addr_space"> {
+ let summary = [{Describe address spaces}];
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def XeVM_MemfenceOp
+ : XeVM_Op<"memfence">,
+ Arguments<(ins XeVM_MemScopeAttr:$scope,
+ DefaultValuedAttr<XeVM_AddrSpaceAttr,
+ "mlir::xevm::AddrSpace::GENERIC">:$addrspace)> {
+ let summary = "Work-item's memory fence.";
+ let description = [{
+ This operation ensures that all prior memory accesses of this
+ work-item to `addrspace` are visible to all other work-items in `scope`.
+ Parameters description:
+ $scope - specify the memory scope at which all other work-items should observe
+ memory operations prior to the fence.
+ $addrspace - specify the address space of work-item's memory accesses
+ to be affected by the fence.
+ }];
+ let assemblyFormat = [{prop-dict attr-dict}];
+
+ let extraClassDeclaration = extraBaseClassDeclaration#[{
+ }];
+}
+
+def XeVM_PrefetchOp
+ : XeVM_Op<"prefetch">,
+ Arguments<(ins Arg<LLVM_AnyPointer, "", [MemRead]>:$ptr,
+ XeVM_AddrSpaceAttr:$addrspace,
+ OptionalAttr<XeVM_LoadCacheControlAttr>:$cache_control)> {
+ let summary = "Prefetch data into a cache subsystem.";
+ let description = [{
+ Work-item issues a prefetch from global memory to cache:
+ $ptr - memory pointer.
+ $addrspace - address space of a pointer, must be generic or global.
+ $cache_control - specify caching options
+ }];
+ let assemblyFormat = [{
+ operands prop-dict attr-dict `:` `(` type(operands) `)`
+ }];
+
+ let extraClassDeclaration = extraBaseClassDeclaration#[{
+ /// Get cache control or return default if not set.
+ ::mlir::xevm::LoadCacheControl getCacheControlOrDefault() {
+ if(getCacheControl())
+ return *getCacheControl();
+ return ::mlir::xevm::LoadCacheControl::DEFAULT;
+ }
+ }];
+
+ let hasVerifier = 1;
+}
+
+def XeVM_BlockPrefetch2dOp
+ : XeVM_Op<"blockprefetch2d">,
+ Arguments<(ins Arg<LLVM_AnyPointer, "", [MemRead]>:$ptr, I32:$base_width,
+ I32:$base_height, I32:$base_pitch, I32:$x, I32:$y,
+ I32Attr:$elem_size_in_bits, I32Attr:$tile_width, I32Attr:$tile_height,
+ I32Attr:$v_blocks,
+ OptionalAttr<XeVM_LoadCacheControlAttr>:$cache_control)> {
+
+ let summary = "2D block prefetch";
+
+ let description = [{
+ The `xevm.blockprefetch2d` operation prefetches a two dimensional tile
+ from a larger base matrix residing in memory. The parameters are:
+ $ptr - the base address of the base matrix containing the tile to prefetch
+ $base_width, $base_height, $base_pitch - the shape of the base matrix.
+ pitch is the physical stride between the first columns of the current row
+ and the subsequent row. All units are in bytes.
+ $x, $y, $tile_width, $tile_height - the starting offsets and shape of tile
+ to prefetch in number of elements.
+ $elem_size_in_bits - the size in bits of the matrix element
+ - 32 for f32, bf32
+ - 16 for f16, int16, bf16
+ - 8 for int8, int4, int2
+ $v_blocks - number of tiles in innermost dimension direction to prefetch
+ $cache_control - an enumerator that sets the cache behaviour
+
+ Example:
+ ```mlir
+ xevm.blockprefetch2d %ptr, %base_width, %base_height, %base_pitch, %x, %y <{elem_size_in_bits=8 : i32, tile_width=32 : i32, tile_height=8 : i32, v_blocks=1 : i32, cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32)
+ ```
+ }];
+
+ let assemblyFormat = [{
+ operands prop-dict attr-dict `:` `(` type(operands) `)`
+ }];
+
+ let extraClassDeclaration = extraBaseClassDeclaration#[{
+ /// Get cache control or return default if not set.
+ ::mlir::xevm::LoadCacheControl getCacheControlOrDefault() {
+ ...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
XeVM is a new dialect that is designed to exposes Intel GPU hardware features in a future proof way.
RFC is here: https://discourse.llvm.org/t/mlir-rfc-dialect-xevm-proposal-for-new-xevm-dialect/86955
In short, XeVM is the nvvm or rocdl for Intel GPU.
The RFC includes background and challenges that XeVM is designed to solve.
And also lists plan for upstreaming at the end.
This PR is the first of a series and it covers dialect definition and op tests only.