Skip to content

Commit

Permalink
Merge pull request #2563 from ye-luo/cleanup-cublas-cusolver
Browse files Browse the repository at this point in the history
Update cublas cusolver wrapper
  • Loading branch information
ye-luo committed Jun 30, 2020
2 parents cd8c309 + 45ce3f5 commit 108879d
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 181 deletions.
62 changes: 62 additions & 0 deletions config/build_olcf_summit_Clang.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

echo "----------------------- WARNING ------------------------------------"
echo "This is **not** production ready and intended for development only!!"
echo "Use config/build_olcf_summit.sh for production on Summit."
echo "----------------------- WARNING ------------------------------------"

echo "Purging current module set"
module purge
echo "Loading QMCPACK dependency modules for summit"
module load gcc/8.1.1
module load spectrum-mpi
module load cmake
module load git
module load cuda
module load essl
module load netlib-lapack
module load hdf5
module load python/3.6.6-anaconda3-5.3.0
# private module until OLCF provides a new llvm build
module load llvm/master-latest

#the XL built fftw is buggy, use the gcc version
#module load fftw
export FFTW_HOME=/autofs/nccs-svm1_sw/summit/.swci/1-compute/opt/spack/20180914/linux-rhel7-ppc64le/gcc-6.4.0/fftw-3.3.8-5gcj2ic4el7acu3rqnfnh735jz2ez7j5
export BOOST_ROOT=/autofs/nccs-svm1_sw/summit/.swci/1-compute/opt/spack/20180914/linux-rhel7-ppc64le/gcc-6.4.0/boost-1.66.0-l3sghp3ggjzwi4vtvyb5yzsjm36npgrk

TYPE=Release
Compiler=Clang

for name in offload_real_MP offload_real offload_cplx offload_cplx_MP
do

CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D ENABLE_MASS=1 -D MASS_ROOT=/sw/summit/xl/16.1.1-5/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=`which jsrun` -D MPIEXEC_NUMPROC_FLAG='-n' -D MPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16'"
if [[ $name == *"cplx"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1"
fi

if [[ $name == *"_MP"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_MIXED_PRECISION=1"
fi

if [[ $name == *"offload"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_OFFLOAD=ON -D CUDA_HOST_COMPILER=`which gcc` -D USE_OBJECT_TARGET=ON"
fi

folder=build_summit_${Compiler}_${name}
echo "**********************************"
echo "$folder"
echo "$CMAKE_FLAGS"
echo "**********************************"
mkdir $folder
cd $folder
if [ ! -f CMakeCache.txt ] ; then
cmake $CMAKE_FLAGS -D CMAKE_C_COMPILER=mpicc -D CMAKE_CXX_COMPILER=mpicxx -D ENABLE_TIMERS=1 ..
cmake ..
fi
make -j24
cd ..

echo
done
4 changes: 1 addition & 3 deletions config/build_olcf_summit_XL.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,10 @@ export BOOST_ROOT=/autofs/nccs-svm1_sw/summit/.swci/1-compute/opt/spack/20180914
TYPE=Release
Compiler=XL

CURRENT_FOLDER=`pwd`

for name in offload_real_MP offload_real # offload_cplx offload_cplx_MP
do

CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D ENABLE_MASS=1 -D MASS_ROOT=/sw/summit/xl/16.1.1-5/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=/bin/sh -D MPIEXEC_NUMPROC_FLAG=$CURRENT_FOLDER/tests/scripts/jsrunhelper.sh"
CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D ENABLE_MASS=1 -D MASS_ROOT=/sw/summit/xl/16.1.1-5/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=`which jsrun` -D MPIEXEC_NUMPROC_FLAG='-n' -D MPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16'"
if [[ $name == *"cplx"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1"
fi
Expand Down
137 changes: 69 additions & 68 deletions src/Platforms/CUDA/cuBLAS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,18 @@ inline void cublasAssert(cublasStatus_t code, const std::string& cause, const ch
case CUBLAS_STATUS_INTERNAL_ERROR:
cublas_error = "CUBLAS_STATUS_INTERNAL_ERROR";
break;
case CUBLAS_STATUS_NOT_SUPPORTED:
cublas_error = "CUBLAS_STATUS_NOT_SUPPORTED";
break;
case CUBLAS_STATUS_LICENSE_ERROR:
cublas_error = "CUBLAS_STATUS_LICENSE_ERROR";
break;
default:
cublas_error = "<unknown>";
}

std::ostringstream err;
err << "cublasAssert: " << cublas_error << ", file " << file << ", line " << line << std::endl
err << "cublasAssert: " << cublas_error << ", file " << file << " , line " << line << std::endl
<< cause << std::endl;
std::cerr << err.str();
//if (abort) exit(code);
Expand All @@ -70,83 +76,78 @@ namespace qmcplusplus
*/
namespace cuBLAS
{
inline void gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const float* alpha,
const float* A,
int lda,
const float* B,
int ldb,
const float* beta,
float* C,
int ldc)
inline cublasStatus_t gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const float* alpha,
const float* A,
int lda,
const float* B,
int ldb,
const float* beta,
float* C,
int ldc)
{
cublasErrorCheck(cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc),
"cublasSgemm failed!");
return cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}

inline void gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const std::complex<float>* alpha,
const std::complex<float>* A,
int lda,
const std::complex<float>* B,
int ldb,
const std::complex<float>* beta,
std::complex<float>* C,
int ldc)
inline cublasStatus_t gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const std::complex<float>* alpha,
const std::complex<float>* A,
int lda,
const std::complex<float>* B,
int ldb,
const std::complex<float>* beta,
std::complex<float>* C,
int ldc)
{
cublasErrorCheck(cublasCgemm(handle, transa, transb, m, n, k, (const cuComplex*)alpha, (const cuComplex*)A, lda,
(const cuComplex*)B, ldb, (const cuComplex*)beta, (cuComplex*)C, ldc),
"cublasCgemm failed!");
return cublasCgemm(handle, transa, transb, m, n, k, (const cuComplex*)alpha, (const cuComplex*)A, lda,
(const cuComplex*)B, ldb, (const cuComplex*)beta, (cuComplex*)C, ldc);
}

inline void gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const double* alpha,
const double* A,
int lda,
const double* B,
int ldb,
const double* beta,
double* C,
int ldc)
inline cublasStatus_t gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const double* alpha,
const double* A,
int lda,
const double* B,
int ldb,
const double* beta,
double* C,
int ldc)
{
cublasErrorCheck(cublasDgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc),
"cublasDgemm failed!");
return cublasDgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}

inline void gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const std::complex<double>* alpha,
const std::complex<double>* A,
int lda,
const std::complex<double>* B,
int ldb,
const std::complex<double>* beta,
std::complex<double>* C,
int ldc)
inline cublasStatus_t gemm(cublasHandle_t& handle,
const cublasOperation_t& transa,
const cublasOperation_t& transb,
int m,
int n,
int k,
const std::complex<double>* alpha,
const std::complex<double>* A,
int lda,
const std::complex<double>* B,
int ldb,
const std::complex<double>* beta,
std::complex<double>* C,
int ldc)
{
cublasErrorCheck(cublasZgemm(handle, transa, transb, m, n, k, (const cuDoubleComplex*)alpha,
(const cuDoubleComplex*)A, lda, (const cuDoubleComplex*)B, ldb,
(const cuDoubleComplex*)beta, (cuDoubleComplex*)C, ldc),
"cublasZgemm failed!");
return cublasZgemm(handle, transa, transb, m, n, k, (const cuDoubleComplex*)alpha, (const cuDoubleComplex*)A, lda,
(const cuDoubleComplex*)B, ldb, (const cuDoubleComplex*)beta, (cuDoubleComplex*)C, ldc);
}

inline cublasStatus_t gemm_batched(cublasHandle_t& handle,
Expand Down
132 changes: 64 additions & 68 deletions src/Platforms/CUDA/cusolver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ inline void cusolverAssert(cusolverStatus_t code,
}

std::ostringstream err;
err << "cusolverAssert: " << cusolver_error << ", file " << file << ", line " << line << std::endl
err << "cusolverAssert: " << cusolver_error << ", file " << file << " , line " << line << std::endl
<< cause << std::endl;
std::cerr << err.str();
//if (abort) exit(code);
Expand All @@ -72,79 +72,75 @@ namespace qmcplusplus
{
/** interface to cusolver calls for different data types S/C/D/Z
*/
struct cusolver
namespace cusolver
{
static inline void getrf_bufferSize(cusolverDnHandle_t& handle, int m, int n, double* A, int lda, int* lwork)
{
cusolverErrorCheck(cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, lwork), "cusolverDnDgetrf_bufferSize failed!");
}
inline cusolverStatus_t getrf_bufferSize(cusolverDnHandle_t& handle, int m, int n, double* A, int lda, int* lwork)
{
return cusolverDnDgetrf_bufferSize(handle, m, n, A, lda, lwork);
}

static inline void getrf_bufferSize(cusolverDnHandle_t& handle,
int m,
int n,
std::complex<double>* A,
int lda,
int* lwork)
{
cusolverErrorCheck(cusolverDnZgetrf_bufferSize(handle, m, n, (cuDoubleComplex*)A, lda, lwork),
"cusolverDnZgetrf_bufferSize failed!");
}
inline cusolverStatus_t getrf_bufferSize(cusolverDnHandle_t& handle,
int m,
int n,
std::complex<double>* A,
int lda,
int* lwork)
{
return cusolverDnZgetrf_bufferSize(handle, m, n, (cuDoubleComplex*)A, lda, lwork);
}

static inline void getrf(cusolverDnHandle_t& handle,
int m,
int n,
double* A,
int lda,
double* work,
int* ipiv,
int* info)
{
cusolverErrorCheck(cusolverDnDgetrf(handle, m, n, A, lda, work, ipiv, info), "cusolverDnDgetrf failed!");
}
inline cusolverStatus_t getrf(cusolverDnHandle_t& handle,
int m,
int n,
double* A,
int lda,
double* work,
int* ipiv,
int* info)
{
return cusolverDnDgetrf(handle, m, n, A, lda, work, ipiv, info);
}

static inline void getrf(cusolverDnHandle_t& handle,
int m,
int n,
std::complex<double>* A,
int lda,
std::complex<double>* work,
int* ipiv,
int* info)
{
cusolverErrorCheck(cusolverDnZgetrf(handle, m, n, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)work, ipiv, info),
"cusolverDnZgetrf failed!");
}
inline cusolverStatus_t getrf(cusolverDnHandle_t& handle,
int m,
int n,
std::complex<double>* A,
int lda,
std::complex<double>* work,
int* ipiv,
int* info)
{
return cusolverDnZgetrf(handle, m, n, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)work, ipiv, info);
}

static inline void getrs(cusolverDnHandle_t& handle,
const cublasOperation_t& transa,
int m,
int n,
const double* A,
int lda,
int* ipiv,
double* B,
int ldb,
int* info)
{
cusolverErrorCheck(cusolverDnDgetrs(handle, transa, m, n, A, lda, ipiv, B, ldb, info), "cusolverDnDgetrs failed!");
}
inline cusolverStatus_t getrs(cusolverDnHandle_t& handle,
const cublasOperation_t& transa,
int m,
int n,
const double* A,
int lda,
int* ipiv,
double* B,
int ldb,
int* info)
{
return cusolverDnDgetrs(handle, transa, m, n, A, lda, ipiv, B, ldb, info);
}

static inline void getrs(cusolverDnHandle_t& handle,
const cublasOperation_t& transa,
int m,
int n,
const std::complex<double>* A,
int lda,
int* ipiv,
std::complex<double>* B,
int ldb,
int* info)
{
cusolverErrorCheck(cusolverDnZgetrs(handle, transa, m, n, (const cuDoubleComplex*)A, lda, ipiv, (cuDoubleComplex*)B,
ldb, info),
"cusolverDnZgetrs failed!");
}
};
inline cusolverStatus_t getrs(cusolverDnHandle_t& handle,
const cublasOperation_t& transa,
int m,
int n,
const std::complex<double>* A,
int lda,
int* ipiv,
std::complex<double>* B,
int ldb,
int* info)
{
return cusolverDnZgetrs(handle, transa, m, n, (const cuDoubleComplex*)A, lda, ipiv, (cuDoubleComplex*)B, ldb, info);
}
} // namespace cusolver

} // namespace qmcplusplus
#endif // QMCPLUSPLUS_CUSOLVER_H

0 comments on commit 108879d

Please sign in to comment.