Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gpu #411

Draft
wants to merge 69 commits into
base: v3
Choose a base branch
from
Draft

Gpu #411

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
73d6047
added platform cuda
Apr 7, 2022
3ee56cc
added wrapper to all blas routines
Apr 8, 2022
e421a33
added cuda in options.mk
Apr 19, 2022
40994fb
fixed typos and compilation options
Apr 19, 2022
8d2b745
fixed type cast
Apr 19, 2022
3596a6d
fixed types
Apr 19, 2022
9753d7e
fixe types
Apr 19, 2022
84011c5
added fortran functions
Apr 19, 2022
c9535cd
trying to fix already defined type error
Apr 28, 2022
c94c67d
changed lapack in cuda platform
Apr 28, 2022
16de1b7
reverted lapack in cuda platform
Apr 28, 2022
33f23b3
set cuda default in options
Apr 28, 2022
3fcc47e
added nrm2 debug print
Apr 29, 2022
89b729e
fixed typo in nrm2
Apr 29, 2022
a4e089a
fixed typo in std::endl
Apr 29, 2022
8a395da
fixed vector printing
Apr 29, 2022
065a0d9
added debug in gemv
Apr 29, 2022
e295407
added zgemv debug
Apr 29, 2022
394be3a
added debug dgemm
Apr 29, 2022
40aabd5
dont actually use gemm on gpu
Apr 29, 2022
5de66a0
dont actually use gemm on gpu - fixed typo
Apr 29, 2022
39d5b67
dont actually use gemm on gpu - fixed typo 2
Apr 29, 2022
fa88178
dont actually use gemm on gpu - dont print
Apr 29, 2022
83c8d2b
output A B C
May 2, 2022
3d19cdd
output A B C and transa b
May 2, 2022
ec7e841
fixed malloc
May 2, 2022
162e26e
fixed malloc
May 2, 2022
2c40d44
fixed malloc
May 2, 2022
801a04d
trying cudamemcpy
May 2, 2022
eb6bfca
trying cudamemcpy instead of setmatrix
May 2, 2022
67f3b51
fixed gemm?
May 2, 2022
c9f0bda
changed every set get with memcpy
May 3, 2022
ad19a9c
trying static cublas handle
May 3, 2022
ff6ff23
reverting static handle
May 3, 2022
3ffaf1d
comment to the option.mk
May 5, 2022
6a688e7
comment in cuda section is removed
May 5, 2022
61bf459
comments showing how to replace storage type
May 5, 2022
9e12546
Merge branch 'gpu' of github.com:darcangelomauro/ITensor into gpu
May 5, 2022
0a0dc51
thrust allocation
May 17, 2022
ba60801
safe pointer to thrust
May 17, 2022
7281ae3
safe pointer reinterpret cast
May 17, 2022
7b32260
fixed typo
May 17, 2022
bb3a482
redefined complex if cuda
May 17, 2022
7d38230
used thrust complex instead of cuda complex
May 17, 2022
36dcbbf
reverted to cuda complex
May 17, 2022
f77896e
reverted to simple complex
May 17, 2022
6ef6185
try cucomplex
May 18, 2022
83e89a4
use thrust complex
May 18, 2022
1274526
trying to fix thrust complex
May 18, 2022
cf7a556
fixed typo in reinterpret cast
May 18, 2022
f15ac29
revert back
May 18, 2022
04db28b
changed safe pointer behaviour
May 18, 2022
14d7b64
bypass gemm emulator
May 18, 2022
fc754b9
overrided zgemm for cuda
May 18, 2022
24020ab
forgot to include thrust
May 18, 2022
986557b
changed complex to lapack_complex
May 18, 2022
e10825c
use raw pointers
May 18, 2022
0915352
extarct raw pointer from thrust::device_ptr
May 18, 2022
4fb774c
deleted cuda stuff from normal zgemm
May 18, 2022
a7130af
fixed return type of zgemm
May 18, 2022
f3349d9
not using get()
May 20, 2022
d547310
not using get()
May 20, 2022
5be2e74
dont compile gemm_impl
May 20, 2022
69ab83b
gemm_impl
May 20, 2022
304e7c7
implemented gemm
May 20, 2022
0bb4e7e
restrict gemm to complex
May 20, 2022
c53944b
restrict gemm to complex or double
May 20, 2022
e1128a4
double requires some more overloading so we just stick to gemm complex
May 20, 2022
503df37
restrict contract to complex
May 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 132 additions & 128 deletions itensor/tensor/contract.cc

Large diffs are not rendered by default.

74 changes: 62 additions & 12 deletions itensor/tensor/gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct dgemmTask
Real a,
Real b)
: Apart(Ap),Bpart(Bp),Cpart(Cp),alpha(a),beta(b)
{
{
if(b != 0.0) copyFromC = true;
}

Expand All @@ -51,6 +51,54 @@ struct dgemmTask
{ }
};

#ifdef ITENSOR_USE_CUDA
void
gemm_impl(MatRefc<Cplx> A,
MatRefc<Cplx> B,
MatRef<Cplx> C,
Real alpha,
Real beta)
{
gemm_wrapper(isTransposed(A),
isTransposed(B),
nrows(A),
ncols(B),
ncols(A),
alpha,
A.data(),
B.data(),
beta,
C.data());
}

// C = alpha*A*B + beta*C
template<typename VA, typename VB>
void
gemm(MatRefc<VA> A,
MatRefc<VB> B,
MatRef<common_type<VA,VB>> C,
Real alpha,
Real beta)
{
if(isTransposed(C))
{
//Do C = Bt*At instead of Ct=A*B
//Recall that C.data() points to elements of C, not C.t()
//regardless of whether C.transpose()==true or false
gemm_impl(transpose(B),transpose(A),transpose(C),alpha,beta);
}
else
{
gemm_impl(A,B,C,alpha,beta);
}
}
//template void gemm(MatRefc<Real>, MatRefc<Real>, MatRef<Real>,Real,Real);
//template void gemm(MatRefc<Real>, MatRefc<Cplx>, MatRef<Cplx>,Real,Real);
//template void gemm(MatRefc<Cplx>, MatRefc<Real>, MatRef<Cplx>,Real,Real);
template void gemm(MatRefc<Cplx>, MatRefc<Cplx>, MatRef<Cplx>,Real,Real);
#else


void
cplxToRealBuf(SAFE_PTR_OF(const Real) C,
size_t imagPart,
Expand Down Expand Up @@ -169,7 +217,7 @@ gemm_impl(MatRefc<Cplx> A,
Real alpha,
Real beta)
{
#ifdef ITENSOR_USE_ZGEMM
#if defined ITENSOR_USE_ZGEMM
gemm_wrapper(isTransposed(A),
isTransposed(B),
nrows(A),
Expand All @@ -181,8 +229,8 @@ gemm_impl(MatRefc<Cplx> A,
beta,
C.data());
#else //emulate zgemm by calling dgemm four times
std::array<const dgemmTask,6>
tasks =
std::array<const dgemmTask,6>
tasks =
{{dgemmTask(0,0,0,+alpha,beta),
dgemmTask(1,1,0,-alpha),
dgemmTask(0),
Expand All @@ -191,7 +239,7 @@ gemm_impl(MatRefc<Cplx> A,
dgemmTask(1)
}};
gemm_emulator(A,B,C,alpha,beta,tasks);
#endif
#endif //ITENSOR_USE_ZGEMM
}


Expand All @@ -202,8 +250,8 @@ gemm_impl(MatRefc<Real> A,
Real alpha,
Real beta)
{
std::array<const dgemmTask,4>
tasks =
std::array<const dgemmTask,4>
tasks =
{{dgemmTask(0,0,0,+alpha,beta),
dgemmTask(0),
dgemmTask(0,1,1,+alpha,beta),
Expand All @@ -219,8 +267,8 @@ gemm_impl(MatRefc<Cplx> A,
Real alpha,
Real beta)
{
std::array<const dgemmTask,4>
tasks =
std::array<const dgemmTask,4>
tasks =
{{dgemmTask(0,0,0,+alpha,beta),
dgemmTask(0),
dgemmTask(1,0,1,+alpha,beta),
Expand Down Expand Up @@ -252,14 +300,14 @@ gemm_impl(MatRefc<Real> A,
// C = alpha*A*B + beta*C
template<typename VA, typename VB>
void
gemm(MatRefc<VA> A,
MatRefc<VB> B,
gemm(MatRefc<VA> A,
MatRefc<VB> B,
MatRef<common_type<VA,VB>> C,
Real alpha,
Real beta)
{
#ifdef DEBUG
if(!(isContiguous(A) && isContiguous(B) && isContiguous(C)))
if(!(isContiguous(A) && isContiguous(B) && isContiguous(C)))
throw std::runtime_error("multiplication of non-contiguous MatrixRefs not currently supported");
#endif

Expand Down Expand Up @@ -291,5 +339,7 @@ template void gemm(MatRefc<Real>, MatRefc<Cplx>, MatRef<Cplx>,Real,Real);
template void gemm(MatRefc<Cplx>, MatRefc<Real>, MatRef<Cplx>,Real,Real);
template void gemm(MatRefc<Cplx>, MatRefc<Cplx>, MatRef<Cplx>,Real,Real);

#endif //ITENSOR_USE_CUDA


} //namespace itensor
Loading