From ceb7fe2e3105aa934478be80797d4b59aec64422 Mon Sep 17 00:00:00 2001 From: amcamd Date: Thu, 22 Jun 2023 10:06:43 -0500 Subject: [PATCH] TRMM add back sample, correct documentation --- clients/samples/CMakeLists.txt | 3 +- clients/samples/example_strmm.cpp | 259 ++++++++++++++++++++++++++++++ docs/deprecation.rst | 3 + library/include/hipblas.h | 6 +- 4 files changed, 267 insertions(+), 4 deletions(-) create mode 100644 clients/samples/example_strmm.cpp diff --git a/clients/samples/CMakeLists.txt b/clients/samples/CMakeLists.txt index 664c969d4..5e7e03ecc 100644 --- a/clients/samples/CMakeLists.txt +++ b/clients/samples/CMakeLists.txt @@ -23,6 +23,7 @@ set( hipblas_samples_common ../common/utility.cpp ) add_executable( hipblas-example-sscal example_sscal.cpp ${hipblas_samples_common} ) add_executable( hipblas-example-sgemm example_sgemm.cpp ${hipblas_samples_common} ) +add_executable( hipblas-example-strmm example_strmm.cpp ${hipblas_samples_common} ) add_executable( hipblas-example-sgemm-strided-batched example_sgemm_strided_batched.cpp ${hipblas_samples_common} ) add_executable( hipblas-example-c example_c.c ${hipblas_samples_common} ) add_executable( hipblas-example-hip-complex-her2 example_hip_complex_her2.cpp ${hipblas_samples_common} ) @@ -50,7 +51,7 @@ if( NOT TARGET hipblas ) endif( ) endif( ) -list (APPEND hipblas-example-executables hipblas-example-sscal hipblas-example-sgemm hipblas-example-sgemm-strided-batched hipblas-example-c hipblas-example-hip-complex-her2 hipblas-example-hgemm-half) +list (APPEND hipblas-example-executables hipblas-example-sscal hipblas-example-strmm hipblas-example-sgemm hipblas-example-sgemm-strided-batched hipblas-example-c hipblas-example-hip-complex-her2 hipblas-example-hgemm-half) if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) list (APPEND hipblas-example-executables hipblas-example-hgemm) endif( ) diff --git a/clients/samples/example_strmm.cpp b/clients/samples/example_strmm.cpp new file mode 100644 index 000000000..7e76b56db --- /dev/null +++ b/clients/samples/example_strmm.cpp @@ -0,0 +1,259 @@ +/* ************************************************************************ + * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ************************************************************************ */ + +#include +#include +#include +#include // isnan +#include +#include +#include + +#ifndef CHECK_HIP_ERROR +#define CHECK_HIP_ERROR(error) \ + if(error != hipSuccess) \ + { \ + fprintf(stderr, \ + "Hip error: '%s'(%d) at %s:%d\n", \ + hipGetErrorString(error), \ + error, \ + __FILE__, \ + __LINE__); \ + exit(EXIT_FAILURE); \ + } +#endif + +#ifndef CHECK_HIPBLAS_ERROR +#define CHECK_HIPBLAS_ERROR(error) \ + if(error != HIPBLAS_STATUS_SUCCESS) \ + { \ + fprintf(stderr, "hipBLAS error: "); \ + if(error == HIPBLAS_STATUS_NOT_INITIALIZED) \ + fprintf(stderr, "HIPBLAS_STATUS_NOT_INITIALIZED"); \ + if(error == HIPBLAS_STATUS_ALLOC_FAILED) \ + fprintf(stderr, "HIPBLAS_STATUS_ALLOC_FAILED"); \ + if(error == HIPBLAS_STATUS_INVALID_VALUE) \ + fprintf(stderr, "HIPBLAS_STATUS_INVALID_VALUE"); \ + if(error == HIPBLAS_STATUS_MAPPING_ERROR) \ + fprintf(stderr, "HIPBLAS_STATUS_MAPPING_ERROR"); \ + if(error == HIPBLAS_STATUS_EXECUTION_FAILED) \ + fprintf(stderr, "HIPBLAS_STATUS_EXECUTION_FAILED"); \ + if(error == HIPBLAS_STATUS_INTERNAL_ERROR) \ + fprintf(stderr, "HIPBLAS_STATUS_INTERNAL_ERROR"); \ + if(error == HIPBLAS_STATUS_NOT_SUPPORTED) \ + fprintf(stderr, "HIPBLAS_STATUS_NOT_SUPPORTED"); \ + if(error == HIPBLAS_STATUS_INVALID_ENUM) \ + fprintf(stderr, "HIPBLAS_STATUS_INVALID_ENUM"); \ + if(error == HIPBLAS_STATUS_UNKNOWN) \ + fprintf(stderr, "HIPBLAS_STATUS_UNKNOWN"); \ + fprintf(stderr, "\n"); \ + exit(EXIT_FAILURE); \ + } +#endif + +#define DIM1 4 +#define DIM2 4 + +// reference code for trmm (triangle matrix matrix multiplication) +template +void trmm_reference(hipblasSideMode_t side, + hipblasFillMode_t uplo, + hipblasOperation_t trans, + hipblasDiagType_t diag, + int M, + int N, + T alpha, + const T* A, + int lda, + const T* B, + int ldb, + T* C, + int ldc) +{ + int As1 = HIPBLAS_OP_N == trans ? 1 : lda; + int As2 = HIPBLAS_OP_N == trans ? lda : 1; + + // this is 3 loop gemm algorithm with non-relevant triangle part masked + if(HIPBLAS_SIDE_LEFT == side) + { + for(int i1 = 0; i1 < M; i1++) + { + for(int i2 = 0; i2 < N; i2++) + { + T t = 0.0; + for(int i3 = 0; i3 < M; i3++) + { + if((i1 == i3) && (HIPBLAS_DIAG_UNIT == diag)) + { + t += B[i3 + i2 * ldb]; + } + else if(((i3 > i1) && (HIPBLAS_FILL_MODE_UPPER == uplo)) + || ((i1 > i3) && (HIPBLAS_FILL_MODE_LOWER == uplo)) + || ((i1 == i3) && (HIPBLAS_DIAG_NON_UNIT == diag))) + { + t += A[i1 * As1 + i3 * As2] * B[i3 + i2 * ldb]; + } + } + C[i1 + i2 * ldc] = alpha * t; + } + } + } + else if(HIPBLAS_SIDE_RIGHT == side) + { + for(int i1 = 0; i1 < M; i1++) + { + for(int i2 = 0; i2 < N; i2++) + { + T t = 0.0; + for(int i3 = 0; i3 < N; i3++) + { + if((i3 == i2) && (HIPBLAS_DIAG_UNIT == diag)) + { + t += B[i1 + i3 * ldb]; + } + else if(((i2 > i3) && (HIPBLAS_FILL_MODE_UPPER == uplo)) + || ((i3 > i2) && (HIPBLAS_FILL_MODE_LOWER == uplo)) + || ((i3 == i2) && (HIPBLAS_DIAG_NON_UNIT == diag))) + { + t += B[i1 + i3 * ldb] * A[i3 * As1 + i2 * As2]; + } + } + C[i1 + i2 * ldc] = alpha * t; + } + } + } +} + +int main() +{ + hipblasSideMode_t side = HIPBLAS_SIDE_LEFT; + hipblasFillMode_t uplo = HIPBLAS_FILL_MODE_UPPER; + hipblasOperation_t transa = HIPBLAS_OP_N; + hipblasDiagType_t diag = HIPBLAS_DIAG_NON_UNIT; + float alpha = 1.0; + + int m = DIM1, n = DIM2; + int lda, ldb, ldc, size_a, size_b, size_c; + std::cout << "strmm V3 example" << std::endl; + + if(HIPBLAS_SIDE_LEFT == side) + { + lda = m; + size_a = m * lda; + std::cout << "left"; + } + else if(HIPBLAS_SIDE_RIGHT == side) + { + lda = n; + size_a = n * lda; + std::cout << "right"; + } + HIPBLAS_FILL_MODE_UPPER == uplo ? std::cout << ",upper" : std::cout << ",lower"; + HIPBLAS_OP_N == transa ? std::cout << ",N" : std::cout << ",T"; + HIPBLAS_DIAG_NON_UNIT == diag ? std::cout << ",non_unit_diag:" : std::cout << ",unit_diag:"; + + ldb = m; + size_b = n * ldb; + + ldc = m; + size_c = n * ldc; + + // Naming: da is in GPU (device) memory. ha is in CPU (host) memory + std::vector ha(size_a); + std::vector hb(size_b); + std::vector hc(size_c); + std::vector hc_gold(size_c); + + // initial data on host + srand(1); + for(int i = 0; i < size_a; ++i) + { + // ha[i] = 1.0; + ha[i] = rand() % 17; + } + for(int i = 0; i < size_b; ++i) + { + // hb[i] = 1.0; + hb[i] = rand() % 17; + } + for(int i = 0; i < size_c; ++i) + { + // hc[i] = 1.0; + hc[i] = rand() % 17; + } + hc_gold = hc; + + // allocate memory on device + float *da, *db, *dc; + CHECK_HIP_ERROR(hipMalloc(&da, size_a * sizeof(float))); + CHECK_HIP_ERROR(hipMalloc(&db, size_b * sizeof(float))); + CHECK_HIP_ERROR(hipMalloc(&dc, size_c * sizeof(float))); + + // copy matrices from host to device + CHECK_HIP_ERROR(hipMemcpy(da, ha.data(), sizeof(float) * size_a, hipMemcpyHostToDevice)); + CHECK_HIP_ERROR(hipMemcpy(db, hb.data(), sizeof(float) * size_b, hipMemcpyHostToDevice)); + CHECK_HIP_ERROR(hipMemcpy(dc, hc.data(), sizeof(float) * size_c, hipMemcpyHostToDevice)); + + hipblasHandle_t handle; + CHECK_HIPBLAS_ERROR(hipblasCreate(&handle)); + + CHECK_HIPBLAS_ERROR( + hipblasStrmm(handle, side, uplo, transa, diag, m, n, &alpha, da, lda, db, ldb, dc, ldc)); + + // copy output from device to CPU + CHECK_HIP_ERROR(hipMemcpy(hc.data(), dc, sizeof(float) * size_c, hipMemcpyDeviceToHost)); + + std::cout << "m, n, lda, ldb, ldc = " << m << ", " << n << ", " << lda << ", " << ldb << ", " + << ldc << std::endl; + + // calculate golden or correct result + trmm_reference( + side, uplo, transa, diag, m, n, alpha, ha.data(), lda, hb.data(), ldb, hc_gold.data(), ldc); + + float max_relative_error = 0; + for(int i = 0; i < size_c; i++) + { + std::cout << "i, hc_gold[i], hc[i] = " << i << ", " << hc_gold[i] << ", " << hc[i] + << std::endl; + float relative_error = hc_gold[i] != 0 ? (hc_gold[i] - hc[i]) / hc_gold[i] : 0; + relative_error = relative_error > 0 ? relative_error : -relative_error; + max_relative_error + = relative_error < max_relative_error ? max_relative_error : relative_error; + } + float eps = std::numeric_limits::epsilon(); + float tolerance = 10; + if(isnan(max_relative_error) || max_relative_error > eps * tolerance) + { + std::cout << "FAIL: max_relative_error = " << max_relative_error << std::endl; + } + else + { + std::cout << "PASS: max_relative_error = " << max_relative_error << std::endl; + } + + CHECK_HIP_ERROR(hipFree(da)); + CHECK_HIP_ERROR(hipFree(db)); + CHECK_HIP_ERROR(hipFree(dc)); + CHECK_HIPBLAS_ERROR(hipblasDestroy(handle)); + return EXIT_SUCCESS; +} diff --git a/docs/deprecation.rst b/docs/deprecation.rst index 0ad48bb25..c5453eecc 100644 --- a/docs/deprecation.rst +++ b/docs/deprecation.rst @@ -91,6 +91,9 @@ Packed int8x4 was removed as support for arbitrary dimensioned int8_t data is a * function hipblasSetInt8Datatype was removed * function hipblasGetInt8Datatype was removed +Removed in hipBLAS 2.0 +^^^^^^^^^^^^^^^^^^^^^^ + Legacy BLAS in-place trmm ''''''''''''''''''''''''' The legacay BLAS in-place hipblasXtrmm that calculates B <- alpha * op(A) * B is removed and replaced with the diff --git a/library/include/hipblas.h b/library/include/hipblas.h index f1732d399..99ac2d1d9 100644 --- a/library/include/hipblas.h +++ b/library/include/hipblas.h @@ -14571,7 +14571,7 @@ HIPBLAS_EXPORT hipblasStatus_t hipblasZhemmStridedBatched(hipblasHandle_t op( A ) = A or op( A ) = A^T or op( A ) = A^H. Note that trmm can provide in-place functionality by passing in the same address for both - matrices B and C. + matrices B and C and by setting ldb equal to ldc. - Supported precisions in rocBLAS : s,d,c,z - Supported precisions in cuBLAS : s,d,c,z @@ -14737,7 +14737,7 @@ HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle, op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H. Note that trmmBatched can provide in-place functionality by passing in the same address for both - matrices B and C. + matrices B and C and by setting ldb equal to ldc. - Supported precisions in rocBLAS : s,d,c,z - Supported precisions in cuBLAS : No support @@ -14911,7 +14911,7 @@ HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmmBatched(hipblasHandle_t op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H. Note that trmmStridedBatched can provide in-place functionality by passing - in the same address for both matrices B and C. + in the same address for both matrices B and C and by setting ldb equal to ldc. - Supported precisions in rocBLAS : s,d,c,z - Supported precisions in cuBLAS : No support