Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ba837fc
add igemm bwd v4r1 xdlops kernel
Apr 20, 2020
13e5227
add -abseil-string-find-startswith in CMakeList
Apr 21, 2020
fe5a8b2
modify caller of ComputeLDSRequiredSize
Apr 22, 2020
1facbc2
merge develop conflicts
shaojiewang Apr 24, 2020
71f0abf
merge conflicts
shaojiewang Apr 29, 2020
c6b52e5
Merge branch 'develop' into igemm_bwd_xdlops_v4r1
shaojiewang Apr 30, 2020
5609daf
Merge branch 'develop' into igemm_bwd_xdlops_v4r1
May 1, 2020
3205ab7
merge develop branch
shaojiewang May 6, 2020
c2abbb7
merge develop branch 1
shaojiewang May 6, 2020
0ba8e84
merge branch
shaojiewang May 6, 2020
de35602
merge develop branch
shaojiewang May 6, 2020
d20b9c8
add invoker for v4r1 xdlops bwd igemm path
shaojiewang May 6, 2020
0bec9d4
merge sqlite hpp 1
shaojiewang May 6, 2020
4c997ea
merge sqlite hpp 2
shaojiewang May 6, 2020
ab73c1c
merge sqlitedb.hpp
shaojiewang May 6, 2020
fcf342c
add v4r1 bwd xdlops kernel in invoker
shaojiewang May 6, 2020
f148d26
delete dead code in igemm bwd xdlops solver
shaojiewang May 7, 2020
9edd884
update license information for igemm bwd xdlops solver
shaojiewang May 7, 2020
c7b28ba
rename vars: use GemmA/B instead of In/WeiBlock
shaojiewang May 7, 2020
39f7cc0
remove threadwise gemm's inliine macro
shaojiewang May 7, 2020
4a1e511
remove cluster lengths from tunable params
shaojiewang May 8, 2020
e5c563b
merge solver.cpp
shaojiewang May 8, 2020
5151050
clang format for src/solver.cpp
shaojiewang May 8, 2020
d068e9a
remove non-used code
shaojiewang May 9, 2020
abf7fd5
remove workaround issues for v4r1 xdlops kernel
shaojiewang May 13, 2020
ac672e0
Merge branch 'develop' into igemm_bwd_xdlops_v4r1
May 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ include(ClangTidy)
enable_clang_tidy(
CHECKS
*
-abseil-string-find-startswith
-android-cloexec-fopen
# Yea we shouldn't be using rand()
-cert-msc30-c
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ set( MIOpen_Source
solver/conv_hip_implicit_gemm_bwd_data_v1r1.cpp
solver/conv_hip_implicit_gemm_bwd_data_v4r1.cpp
solver/conv_hip_implicit_gemm_bwd_data_v1r1_xdlops.cpp
solver/conv_hip_implicit_gemm_bwd_data_v4r1_xdlops.cpp
solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_fwd_fp32.cpp
)

Expand Down
1 change: 1 addition & 0 deletions src/conv/invokers/impl_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ InvokerFactory MakeImplGemmDataInvokerFactory(const ConvolutionContext& ctx)
// clang-format off
else if(
kernel.GetName() == "gridwise_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw" ||
kernel.GetName() == "gridwise_convolution_backward_data_implicit_gemm_v4r1_xdlops_nchw_kcyx_nkhw" ||
kernel.GetName() == "gridwise_convolution_backward_data_implicit_gemm_v4r1_ncdhw_kczyx_nkdhw")
// clang-format on
{
Expand Down
Empty file modified src/include/miopen/execution_context.hpp
100644 → 100755
Empty file.
74 changes: 74 additions & 0 deletions src/include/miopen/solver.hpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,57 @@ struct PerformanceImplicitGemmBwdDataV4R1 : Serializable<PerformanceImplicitGemm
std::string ToString() const;
};

struct PerformanceImplicitGemmBwdDataV4R1Xdlops
: Serializable<PerformanceImplicitGemmBwdDataV4R1Xdlops>
{
int GemmNPerBlock; // 2^n[8..16]
int GemmMPerBlock; // 2^n[32..128]
int GemmKPerBlock; // 2^n[4..16]

int GemmMPerWave;
int GemmNPerWave;

bool use_spare_set;

PerformanceImplicitGemmBwdDataV4R1Xdlops(int, int, int, int, int, bool);

PerformanceImplicitGemmBwdDataV4R1Xdlops()
: PerformanceImplicitGemmBwdDataV4R1Xdlops(-1, -1, -1, -1, -1, false)
{
}

PerformanceImplicitGemmBwdDataV4R1Xdlops(int a, int b, int c, int d, int e)
: PerformanceImplicitGemmBwdDataV4R1Xdlops(a, b, c, d, e, false)
{
}

PerformanceImplicitGemmBwdDataV4R1Xdlops(bool spare);

bool operator==(const PerformanceImplicitGemmBwdDataV4R1Xdlops& other) const;

template <class Self, class F>
static void Visit(Self&& self, F f)
{
f(self.GemmNPerBlock, "GemmNPerBlock");
f(self.GemmMPerBlock, "GemmMPerBlock");
f(self.GemmKPerBlock, "GemmKPerBlock");
f(self.GemmMPerWave, "GemmMPerWave");
f(self.GemmNPerWave, "GemmNPerWave");
}

std::tuple<int, bool> CalculateGridSize(const ConvolutionContext& ctx) const;
std::tuple<std::size_t, bool> CalculateLdsNumberOfByte(const ConvolutionContext& ctx) const;
std::tuple<int, int, int, int, bool>
CalculateGemmABlockCopyPerformanceParameters(const ConvolutionContext& ctx) const;
std::tuple<int, int, int, int, bool>
CalculateGemmBBlockCopyPerformanceParameters(const ConvolutionContext& ctx) const;
bool IsValidValue() const;
bool IsValid(const ConvolutionContext& ctx) const;
void EuristicInit(const ConvolutionContext& ctx);
bool SetNextValue();
std::string ToString() const;
};

struct ConvHipImplicitGemmV4R1Fwd : SolverBase<ConvolutionContext>
{
PerformanceImplicitGemmV4R1 GetPerformanceConfig(const ConvolutionContext& ctx) const;
Expand Down Expand Up @@ -1133,6 +1184,29 @@ struct ConvHipImplicitGemmBwdDataV4R1 : SolverBase<ConvolutionContext>
bool disableConfigOverrideFromEnv = false) const;
};

struct ConvHipImplicitGemmBwdDataV4R1Xdlops : SolverBase<ConvolutionContext>
{
static int CalculateNumberOfGemm(const ConvolutionContext& ctx);
static std::tuple<int, int, int> CalculateGemmSize(const ConvolutionContext& ctx, int gemm_id);
PerformanceImplicitGemmBwdDataV4R1Xdlops
GetPerformanceConfig(const ConvolutionContext& ctx) const;
bool IsValidPerformanceConfig(const ConvolutionContext& ctx,
const PerformanceImplicitGemmBwdDataV4R1Xdlops& c) const;
bool IsApplicable(const ConvolutionContext& ctx) const;
ConvSolution GetSolution(const ConvolutionContext& ctx,
const PerformanceImplicitGemmBwdDataV4R1Xdlops& config,
bool disableConfigOverrideFromEnv = false) const;
PerformanceImplicitGemmBwdDataV4R1Xdlops Search(const ConvolutionContext&) const;
int RunAndMeasureSolution(miopen::Handle& profile_h,
Comment thread
asroy marked this conversation as resolved.
ConstData_t bot_buf,
Data_t top_buf,
ConstData_t wei_buf,
ConstData_t bias_buf,
const ConvolutionContext& ctx,
const ConvSolution& solution,
float& elapsed_time) const;
};

struct ConvHipImplicitGemmBwdDataV1R1Xdlops : SolverBase<ConvolutionContext>
{
PerformanceImplicitGemmXdlops GetPerformanceConfig(const ConvolutionContext& ctx) const;
Expand Down
Empty file modified src/include/miopen/sqlite_db.hpp
100644 → 100755
Empty file.
Loading