diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a19014897..01d19de732 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -285,7 +285,7 @@ add_subdirectory(src) add_subdirectory(examples) if(BUILD_TEST) - add_subdirectory(tests) + add_subdirectory(tests/csrc) endif() # # Mesaure the compile time diff --git a/src/turbomind/kernels/logprob_kernels.cu b/src/turbomind/kernels/logprob_kernels.cu index 20474a7ab2..c94c4f45be 100644 --- a/src/turbomind/kernels/logprob_kernels.cu +++ b/src/turbomind/kernels/logprob_kernels.cu @@ -182,29 +182,29 @@ void invokeLogProbFromLogits(float* cum_log_probs, cum_log_probs, log_probs, input_lengths, max_input_length, batch_size, batch_first); } -// template void invokeLogProbFromLogits(float* cum_log_probs, -// const float* logits, -// const int* input_ids, -// const int* input_lengths, -// const size_t max_input_length, -// const size_t batch_size, -// const size_t vocab_size, -// const size_t vocab_size_padded, -// void* workspace, -// const size_t workspace_size, -// cudaStream_t stream, -// const bool batch_first); - -// template void invokeLogProbFromLogits(float* cum_log_probs, -// const half* logits, -// const int* input_ids, -// const int* input_lengths, -// const size_t max_input_length, -// const size_t batch_size, -// const size_t vocab_size, -// const size_t vocab_size_padded, -// void* workspace, -// const size_t workspace_size, -// cudaStream_t stream, -// const bool batch_first); +template void invokeLogProbFromLogits(float* cum_log_probs, + const float* logits, + const int* input_ids, + const int* input_lengths, + const size_t max_input_length, + const size_t batch_size, + const size_t vocab_size, + const size_t vocab_size_padded, + void* workspace, + const size_t workspace_size, + cudaStream_t stream, + const bool batch_first); + +template void invokeLogProbFromLogits(float* cum_log_probs, + const half* logits, + const int* input_ids, + const int* input_lengths, + const size_t max_input_length, + const size_t batch_size, + const size_t vocab_size, + const size_t vocab_size_padded, + void* workspace, + const size_t workspace_size, + cudaStream_t stream, + const bool batch_first); } // end of namespace turbomind diff --git a/tests/CMakeLists.txt b/tests/csrc/CMakeLists.txt similarity index 96% rename from tests/CMakeLists.txt rename to tests/csrc/CMakeLists.txt index 37d971daed..61a9b7383d 100644 --- a/tests/CMakeLists.txt +++ b/tests/csrc/CMakeLists.txt @@ -15,6 +15,5 @@ add_subdirectory(unittests) if(BUILD_PYT) add_subdirectory(gemm_dequantize) - add_subdirectory(moe) add_subdirectory(int8_gemm) endif() diff --git a/tests/gemm_dequantize/CMakeLists.txt b/tests/csrc/gemm_dequantize/CMakeLists.txt similarity index 100% rename from tests/gemm_dequantize/CMakeLists.txt rename to tests/csrc/gemm_dequantize/CMakeLists.txt diff --git a/tests/gemm_dequantize/th_gemm_dequantize.cc b/tests/csrc/gemm_dequantize/th_gemm_dequantize.cc similarity index 100% rename from tests/gemm_dequantize/th_gemm_dequantize.cc rename to tests/csrc/gemm_dequantize/th_gemm_dequantize.cc diff --git a/tests/gemm_dequantize/th_gemm_dequantize.py b/tests/csrc/gemm_dequantize/th_gemm_dequantize.py similarity index 100% rename from tests/gemm_dequantize/th_gemm_dequantize.py rename to tests/csrc/gemm_dequantize/th_gemm_dequantize.py diff --git a/tests/int8_gemm/CMakeLists.txt b/tests/csrc/int8_gemm/CMakeLists.txt similarity index 100% rename from tests/int8_gemm/CMakeLists.txt rename to tests/csrc/int8_gemm/CMakeLists.txt diff --git a/tests/int8_gemm/int8_gemm_test.cu b/tests/csrc/int8_gemm/int8_gemm_test.cu similarity index 100% rename from tests/int8_gemm/int8_gemm_test.cu rename to tests/csrc/int8_gemm/int8_gemm_test.cu diff --git a/tests/unittests/CMakeLists.txt b/tests/csrc/unittests/CMakeLists.txt similarity index 87% rename from tests/unittests/CMakeLists.txt rename to tests/csrc/unittests/CMakeLists.txt index 5531ffca6a..101bea0da2 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/csrc/unittests/CMakeLists.txt @@ -42,7 +42,7 @@ target_compile_features(unittest PRIVATE cxx_std_14) target_link_libraries( # Libs for test_attention_kernels unittest PUBLIC -lcudart -lcurand - gen_relative_pos_bias gpt_kernels gtest memory_utils tensor unfused_attention_kernels cuda_utils logger) + gpt_kernels gtest memory_utils tensor unfused_attention_kernels cuda_utils logger) target_link_libraries( # Libs for test_logprob_kernels unittest PUBLIC -lcudart @@ -50,7 +50,7 @@ target_link_libraries( # Libs for test_logprob_kernels target_link_libraries( # Libs for test_penalty_kernels unittest PUBLIC -lcublas -lcublasLt -lcudart - sampling_penalty_kernels beam_search_penalty_kernels memory_utils cuda_utils logger) + sampling_penalty_kernels memory_utils cuda_utils logger) target_link_libraries( # Libs for test_sampling_kernel unittest PUBLIC -lcudart @@ -71,11 +71,6 @@ add_executable(test_gpt_kernels test_gpt_kernels.cu) target_link_libraries(test_gpt_kernels PUBLIC gpt_kernels memory_utils tensor cuda_utils logger) -add_executable(test_activation test_activation.cu) -target_link_libraries(test_activation PUBLIC - -lcublas -lcublasLt -lcudart - activation_kernels memory_utils cuda_utils logger) - add_executable(test_context_attention_layer test_context_attention_layer.cu) target_link_libraries(test_context_attention_layer PUBLIC Llama -lcublas -lcublasLt -lcudart diff --git a/tests/unittests/gtest_utils.h b/tests/csrc/unittests/gtest_utils.h similarity index 100% rename from tests/unittests/gtest_utils.h rename to tests/csrc/unittests/gtest_utils.h diff --git a/tests/unittests/test_attention_kernels.cu b/tests/csrc/unittests/test_attention_kernels.cu similarity index 66% rename from tests/unittests/test_attention_kernels.cu rename to tests/csrc/unittests/test_attention_kernels.cu index c36de79667..f2a869381e 100644 --- a/tests/unittests/test_attention_kernels.cu +++ b/tests/csrc/unittests/test_attention_kernels.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "src/turbomind/kernels/gen_relative_pos_bias.h" + #include "src/turbomind/kernels/gpt_kernels.h" #include "src/turbomind/kernels/unfused_attention_kernels.h" #include "src/turbomind/utils/Tensor.h" #include "src/turbomind/utils/memory_utils.h" #include "src/turbomind/utils/nccl_utils.h" -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" #include #include @@ -333,121 +333,6 @@ public: EXPECT_TRUE(passed); } } - - void runTestAlibiMaskedSoftmax(AttentionKernelTestParam param, bool is_benchmark = false) - { - DataType dtype = getTensorType(); - - std::vector qk_shape{param.batch_size, param.head_num, param.q_length, param.k_length}; - - bool use_fp32_qk = param.use_fp32_qk_buf && dtype != TYPE_FP32; - - Tensor qk = createTensor(MEMORY_GPU, dtype, qk_shape); - Tensor qk_fp32 = use_fp32_qk ? createTensor(MEMORY_GPU, TYPE_FP32, qk_shape) : Tensor(); - Tensor attn_mask = randomAttentionMask({param.batch_size, 1, param.q_length, param.k_length}); - Tensor alibi_slopes = createTensor(MEMORY_GPU, dtype, {param.head_num}); - - // Input random initialization - if (param.use_fp32_qk_buf && dtype != TYPE_FP32) { - utils::normal(curng, qk_fp32); - } - else { - utils::normal(curng, qk); - } - invokeBuildAlibiSlopes(alibi_slopes.getPtr(), param.head_num, stream); - sync_check_cuda_error(); - - Tensor h_alibi_slopes = createTensor(MEMORY_CPU, dtype, {param.head_num}); - Tensor h_alibi_bias = - is_benchmark ? Tensor() : createTensor(MEMORY_CPU, dtype, {param.head_num, param.q_length, param.k_length}); - // The nearest power of 2 equal to / smaller than num_heads followed by HF's implementation. - T* alibi_slope_ptr = h_alibi_slopes.getPtr(); - int num_heads_pow2 = utils::pow2_rounddown(param.head_num); - for (size_t h = 0; h < param.head_num; ++h) { - // The slope of linear bias of the attention head - if (h < num_heads_pow2) { - alibi_slope_ptr[h] = static_cast(powf(powf(0.5f, powf(0.5f, log2f(num_heads_pow2) - 3.f)), h + 1)); - } - else { - alibi_slope_ptr[h] = static_cast( - powf(powf(0.5f, powf(0.5f, log2f(num_heads_pow2 << 1) - 3.f)), (h - num_heads_pow2) * 2 + 1)); - } - if (h_alibi_bias.size() > 0) { - T* alibi_bias_ptr = h_alibi_bias.getPtr(); - for (size_t qi = 0; qi < param.q_length; ++qi) { - for (size_t ki = 0; ki < param.k_length; ++ki) { - size_t hqk_idx = (h * param.q_length + qi) * param.k_length + ki; - alibi_bias_ptr[hqk_idx] = ::math::mul(alibi_slope_ptr[h], T(0.0f + ki - qi)); - } - } - } - } - EXPECT_TRUE( - checkResult("CheckAlibiSlopes", alibi_slopes.getPtr(), h_alibi_slopes.getPtr(), param.head_num)); - - // Clone to host for reference computation if needed. - Tensor h_qk = is_benchmark ? Tensor() : toHost(qk); - Tensor h_attn_mask = is_benchmark ? Tensor() : toHost(attn_mask); - Tensor h_qk_fp32 = is_benchmark ? Tensor() : toHost(qk_fp32); - - T scale = static_cast(1 / sqrtf(param.size_per_head * 1.0f)); - - if (param.use_fp32_qk_buf && dtype != TYPE_FP32) { - MaskedSoftmaxParam softmax_param; - softmax_param.attention_score = qk.getPtr(); - softmax_param.qk = qk_fp32.getPtr(); - softmax_param.attention_mask = attn_mask.getPtr(); - softmax_param.linear_bias_slopes = alibi_slopes.getPtr(); - softmax_param.batch_size = param.batch_size; - softmax_param.num_heads = param.head_num; - softmax_param.q_length = param.q_length; - softmax_param.k_length = param.k_length; - softmax_param.qk_scale = scale; - invokeMaskedSoftmax(softmax_param, stream); - sync_check_cuda_error(); - } - else { - MaskedSoftmaxParam softmax_param; - softmax_param.attention_score = qk.getPtr(); - softmax_param.qk = qk.getPtr(); - softmax_param.attention_mask = attn_mask.getPtr(); - softmax_param.linear_bias_slopes = alibi_slopes.getPtr(); - softmax_param.batch_size = param.batch_size; - softmax_param.num_heads = param.head_num; - softmax_param.q_length = param.q_length; - softmax_param.k_length = param.k_length; - softmax_param.qk_scale = scale; - invokeMaskedSoftmax(softmax_param, stream); - sync_check_cuda_error(); - } - - if (!is_benchmark) { - if (use_fp32_qk) { - computeQkSoftmax(h_qk.getPtr(), - h_qk_fp32.getPtr(), - h_attn_mask.getPtr(), - h_alibi_bias.getPtr(), - param.batch_size, - param.head_num, - param.q_length, - param.k_length, - scale); - } - else { - computeQkSoftmax(h_qk.getPtr(), - h_qk.getPtr(), - h_attn_mask.getPtr(), - h_alibi_bias.getPtr(), - param.batch_size, - param.head_num, - param.q_length, - param.k_length, - scale); - } - bool passed = checkResult("AlibiMaskedSoftmax", qk.getPtr(), h_qk.getPtr(), qk.size()); - EXPECT_TRUE(passed); - } - } }; TYPED_TEST_SUITE(AttentionKernelTest, SupportTypes); @@ -511,48 +396,4 @@ TYPED_TEST(AttentionKernelTest, Benchmark_MaskedSoftmax_LongSequence4096) this->runTestMaskedSoftmax({8, 4096, 4096, 14, 128, false, 0, false, true}, true); } -TYPED_TEST(AttentionKernelTest, AlibiMaskedSoftmax_ShortSequence1) -{ - this->runTestAlibiMaskedSoftmax({1, 12, 12, 4, 32, false, 0, false}); -} - -TYPED_TEST(AttentionKernelTest, AlibiMaskedSoftmax_ShortSequence2) -{ - // q_length is not multiple of 4. - this->runTestAlibiMaskedSoftmax({1, 11, 11, 4, 32, false, 0, false}); -} - -TYPED_TEST(AttentionKernelTest, AlibiMaskedSoftmax_ShortSequence_HasPrompt1) -{ - this->runTestAlibiMaskedSoftmax({1, 12, 20, 4, 32, false, 0, false}); -} - -TYPED_TEST(AttentionKernelTest, AlibiMaskedSoftmax_ShortSequence_HasPrompt2) -{ - // q_length is not multiple of 4. - this->runTestAlibiMaskedSoftmax({1, 11, 20, 4, 32, false, 0, false}); -} - -// Tests for long sentence generation. Assume the bloom 176B model with 8 TP. - -TYPED_TEST(AttentionKernelTest, Benchmark_AlibiMaskedSoftmax_LongSequence1024) -{ - this->runTestAlibiMaskedSoftmax({8, 1024, 1024, 14, 128, false, 0, false, true}, true); -} - -TYPED_TEST(AttentionKernelTest, Benchmark_AlibiMaskedSoftmax_LongSequence2048) -{ - this->runTestAlibiMaskedSoftmax({8, 2048, 2048, 14, 128, false, 0, false, true}, true); -} - -TYPED_TEST(AttentionKernelTest, Benchmark_AlibiMaskedSoftmax_LongSequence3072) -{ - this->runTestAlibiMaskedSoftmax({8, 3072, 3072, 14, 128, false, 0, false, true}, true); -} - -TYPED_TEST(AttentionKernelTest, Benchmark_AlibiMaskedSoftmax_LongSequence4096) -{ - this->runTestAlibiMaskedSoftmax({4, 4096, 4096, 14, 128, false, 0, false, true}, true); -} - } // end of namespace diff --git a/tests/unittests/test_context_attention_layer.cu b/tests/csrc/unittests/test_context_attention_layer.cu similarity index 99% rename from tests/unittests/test_context_attention_layer.cu rename to tests/csrc/unittests/test_context_attention_layer.cu index cf4e30ea33..66ca250019 100644 --- a/tests/unittests/test_context_attention_layer.cu +++ b/tests/csrc/unittests/test_context_attention_layer.cu @@ -360,6 +360,7 @@ int main(int argc, const char* argv[]) .out_accum = accum_buf_ptr, .cu_seqlens_q = cu_seqlens_ptr, .cu_seqlens_k = nullptr, + .group_size = 1, .layout_q = layout_q, .layout_k = layout_k, .layout_v = layout_v, diff --git a/tests/unittests/test_gemm.cu b/tests/csrc/unittests/test_gemm.cu similarity index 100% rename from tests/unittests/test_gemm.cu rename to tests/csrc/unittests/test_gemm.cu diff --git a/tests/unittests/test_gpt_kernels.cu b/tests/csrc/unittests/test_gpt_kernels.cu similarity index 100% rename from tests/unittests/test_gpt_kernels.cu rename to tests/csrc/unittests/test_gpt_kernels.cu diff --git a/tests/unittests/test_int8.cu b/tests/csrc/unittests/test_int8.cu similarity index 98% rename from tests/unittests/test_int8.cu rename to tests/csrc/unittests/test_int8.cu index 8813b01be6..96f0daeade 100644 --- a/tests/unittests/test_int8.cu +++ b/tests/csrc/unittests/test_int8.cu @@ -14,7 +14,7 @@ #include #include -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" using namespace turbomind; diff --git a/tests/unittests/test_logprob_kernels.cu b/tests/csrc/unittests/test_logprob_kernels.cu similarity index 99% rename from tests/unittests/test_logprob_kernels.cu rename to tests/csrc/unittests/test_logprob_kernels.cu index aee090c2ef..242b31ccf8 100644 --- a/tests/unittests/test_logprob_kernels.cu +++ b/tests/csrc/unittests/test_logprob_kernels.cu @@ -12,7 +12,7 @@ #include "src/turbomind/utils/logger.h" #include "src/turbomind/utils/memory_utils.h" -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" using namespace turbomind; diff --git a/tests/unittests/test_penalty_kernels.cu b/tests/csrc/unittests/test_penalty_kernels.cu similarity index 99% rename from tests/unittests/test_penalty_kernels.cu rename to tests/csrc/unittests/test_penalty_kernels.cu index 5a075d6028..3ddb1ab9f3 100644 --- a/tests/unittests/test_penalty_kernels.cu +++ b/tests/csrc/unittests/test_penalty_kernels.cu @@ -27,14 +27,11 @@ #include #include -#include "src/turbomind/kernels/beam_search_penalty_kernels.h" #include "src/turbomind/kernels/penalty_types.h" #include "src/turbomind/kernels/sampling_penalty_kernels.h" #include "src/turbomind/utils/cuda_utils.h" #include "src/turbomind/utils/memory_utils.h" - -// #include "tests/unittests/unittest_utils.h" -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" using namespace turbomind; diff --git a/tests/unittests/test_sampling.cu b/tests/csrc/unittests/test_sampling.cu similarity index 100% rename from tests/unittests/test_sampling.cu rename to tests/csrc/unittests/test_sampling.cu diff --git a/tests/unittests/test_sampling_kernels.cu b/tests/csrc/unittests/test_sampling_kernels.cu similarity index 99% rename from tests/unittests/test_sampling_kernels.cu rename to tests/csrc/unittests/test_sampling_kernels.cu index 370bbcfdf1..041b70d72d 100644 --- a/tests/unittests/test_sampling_kernels.cu +++ b/tests/csrc/unittests/test_sampling_kernels.cu @@ -19,7 +19,7 @@ #include "src/turbomind/utils/cuda_utils.h" #include "src/turbomind/utils/memory_utils.h" -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" using namespace turbomind; diff --git a/tests/unittests/test_sampling_layer.cu b/tests/csrc/unittests/test_sampling_layer.cu similarity index 99% rename from tests/unittests/test_sampling_layer.cu rename to tests/csrc/unittests/test_sampling_layer.cu index b95fcb186e..6b8bcce7c5 100644 --- a/tests/unittests/test_sampling_layer.cu +++ b/tests/csrc/unittests/test_sampling_layer.cu @@ -17,8 +17,7 @@ #include "src/turbomind/utils/memory_utils.h" #include "src/turbomind/utils/Tensor.h" -// #include "tests/unittests/unittest_utils.h" -#include "tests/unittests/gtest_utils.h" +#include "gtest_utils.h" using namespace turbomind; diff --git a/tests/unittests/test_tensor.cu b/tests/csrc/unittests/test_tensor.cu similarity index 100% rename from tests/unittests/test_tensor.cu rename to tests/csrc/unittests/test_tensor.cu diff --git a/tests/unittests/unittest_utils.h b/tests/csrc/unittests/unittest_utils.h similarity index 100% rename from tests/unittests/unittest_utils.h rename to tests/csrc/unittests/unittest_utils.h diff --git a/tests/unittests/test_activation.cu b/tests/unittests/test_activation.cu deleted file mode 100644 index 767cc1a3b9..0000000000 --- a/tests/unittests/test_activation.cu +++ /dev/null @@ -1,154 +0,0 @@ -#include // snprintf -#include // std::string -#include // std::vector - -#include "src/turbomind/kernels/activation_kernels.h" -#include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/memory_utils.h" -#include "src/turbomind/utils/logger.h" - -#include "unittest_utils.h" - -using namespace turbomind; - -#define PRINT_LIMIT 16 -#define EPSILON (1e-20) -#define EPSILON_FP16 (1e-10) - -struct TestCase { - std::string name; - size_t m; - size_t n; - size_t ite; - - std::string toString() - { - char buf[100]; - snprintf(buf, sizeof(buf), "TestCase[name=%s, m=%ld, n=%ld]", name.c_str(), m, n); - return buf; - } - - void print() - { - TM_LOG_INFO(toString()); - } -}; - -template -void testActivationKernel(TestCase tc) -{ - const int m = tc.m; - const int n = tc.n; - cudaStream_t stream; - cudaStreamCreate(&stream); - - T *output_baseline, *output_opt1, *bias; - deviceMalloc(&output_baseline, m * n); - deviceMalloc(&output_opt1, m * n); - deviceMalloc(&bias, n); - cudaD2Dcpy(output_opt1, output_baseline, m * n); - invokeGenericActivation(output_baseline, - (const T*) bias, - (const T*) nullptr, - (const T*) nullptr, - (const int*) nullptr, - (const T*) nullptr, - m, - n, - 0, - (const float*) nullptr, - (const float*) nullptr, - stream); - invokeAddBiasGeluV2(output_opt1, bias, (const int*) nullptr, (const T*) nullptr, m, n, stream); - bool passed = checkResult(tc.name, output_baseline, output_opt1, m * n, true, true); - FT_CHECK(passed); - - const int ite = tc.ite; - CudaTimer cuda_timer_baseline(stream); - // warmup - for (int i = 0; i < ite; i++) { - invokeGenericActivation(output_baseline, - (const T*) bias, - (const T*) nullptr, - (const T*) nullptr, - (const int*) nullptr, - (const T*) nullptr, - m, - n, - 0, - (const float*) nullptr, - (const float*) nullptr, - stream); - } - cuda_timer_baseline.start(); - for (int i = 0; i < ite; i++) { - invokeGenericActivation(output_baseline, - (const T*) bias, - (const T*) nullptr, - (const T*) nullptr, - (const int*) nullptr, - (const T*) nullptr, - m, - n, - 0, - (const float*) nullptr, - (const float*) nullptr, - stream); - } - float total_time_baseline = cuda_timer_baseline.stop(); - - CudaTimer cuda_timer_opt(stream); - // warmup - for (int i = 0; i < ite; i++) { - invokeAddBiasGeluV2(output_baseline, bias, (const int*) nullptr, (const T*) nullptr, m, n, stream); - } - cuda_timer_opt.start(); - for (int i = 0; i < ite; i++) { - invokeAddBiasGeluV2(output_baseline, bias, (const int*) nullptr, (const T*) nullptr, m, n, stream); - } - float total_time_opt = cuda_timer_opt.stop(); - TM_LOG_INFO("%s baseline_time: %f us, opt_time: %f us, speedup: %f (ite: %d)", - tc.toString().c_str(), - total_time_baseline / ite * 1000.f, - total_time_opt / ite * 1000.f, - total_time_baseline / total_time_opt, - ite); - - deviceFree(output_baseline); - deviceFree(output_opt1); - deviceFree(bias); -} - -int main() -{ - printf("[INFO] Device: %s \n", getDeviceName().c_str()); - std::vector test_cases{ - // TC: name / m / n - TestCase{"addBiasGelu", 32, 1024, 1000}, - TestCase{"addBiasGelu", 128, 1024, 1000}, - TestCase{"addBiasGelu", 2048, 1024, 1000}, - TestCase{"addBiasGelu", 32, 3072, 1000}, - TestCase{"addBiasGelu", 128, 3072, 1000}, - TestCase{"addBiasGelu", 2048, 3072, 1000}, - TestCase{"addBiasGelu", 32, 4096, 1000}, - TestCase{"addBiasGelu", 128, 4096, 1000}, - TestCase{"addBiasGelu", 2048, 4096, 1000}, - TestCase{"addBiasGelu", 32, 8192, 1000}, - TestCase{"addBiasGelu", 128, 8192, 1000}, - TestCase{"addBiasGelu", 2048, 8192, 1000}, - TestCase{"addBiasGelu", 32, 49152, 1000}, - TestCase{"addBiasGelu", 128, 49152, 1000}, - TestCase{"addBiasGelu", 2048, 49152, 1000}, - TestCase{"addBiasGelu", 32, 81920, 1000}, - TestCase{"addBiasGelu", 128, 81920, 1000}, - TestCase{"addBiasGelu", 2048, 81920, 1000}, - }; - - for (auto& tc : test_cases) { - // testActivationKernel(tc); - testActivationKernel(tc); - } - TM_LOG_INFO("testActivationKernel done"); - - return 0; -}