Skip to content

Commit

Permalink
Merge branch 'develop' into nvtx-annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
nkoukpaizan committed Nov 18, 2022
2 parents f9f6145 + 8025c3a commit 629a716
Show file tree
Hide file tree
Showing 28 changed files with 439 additions and 576 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: tests

on: [push, pull_request]
on: [push]

jobs:
test:
Expand Down
3 changes: 1 addition & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ build_on_marianas:
MY_CLUSTER: "marianas"
TIMELIMIT: '1:30:00'
SLURM_ARGS: --gres=gpu:1 --exclusive
# Through the steps, the argument to -E is automatically surrounded by quotes
CTEST_CMD: 'ctest -VV -E NlpSparse1_6|NlpSparse2_5'
CTEST_CMD: 'ctest -VV'
<<: *pnnl_tags_definition
<<: *pnnl_script_definition
rules:
Expand Down
1 change: 1 addition & 0 deletions cmake/FindHiopCudaLibraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ if(HIOP_BUILD_STATIC)
CUDA::cublasLt_static
CUDA::curand_static
CUDA::nvToolsExt
CUDA::culibos
)
endif()

Expand Down
2 changes: 1 addition & 1 deletion doc/src/sections/solver_options.tex
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ \subsubsection{Regularization}
\noindent \textbf{regularization\_method}: whether randomized method is used to compute regularizations.
\begin{itemize}
\item ``standard'' (default) - no randomized method is used. Regularization is computed as a scala times an identiy matrix, i.e., $\delta I$.
\item ``standard'' (default) - no randomized method is used. Regularization is computed as a scalar times an identiy matrix, i.e., $\delta I$.
\item ``randomized'' - use randomized regularizations.
\end{itemize}
\medskip
Expand Down
4 changes: 2 additions & 2 deletions scripts/gcc-cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ set(HIOP_SPARSE ON CACHE BOOL "")
message(STATUS "Enabling HiOp's deepchecking")
set(HIOP_DEEPCHECKS ON CACHE BOOL "")

message(STATUS "Setting default cuda architecture to 60")
set(CMAKE_CUDA_ARCHITECTURES 60 CACHE STRING "")
set(CMAKE_CUDA_ARCHITECTURES 60 70 75 80 CACHE STRING "")
message(STATUS "Setting default cuda architecture to ${CMAKE_CUDA_ARCHITECTURES}")

message(STATUS "Done preloading CMake cache with values for continuous integration")
115 changes: 63 additions & 52 deletions scripts/marianasVariables.sh
Original file line number Diff line number Diff line change
@@ -1,68 +1,80 @@
# NOTE: The following is required when running from Gitlab CI via slurm job
source /etc/profile.d/modules.sh
module use -a /qfs/projects/exasgd/src/cameron/spack/share/spack/modules/linux-centos7-zen2
module use -a /qfs/projects/exasgd/src/ci-deception/ci-modules/linux-centos7-zen2

# Load spack-built modules
# autoconf@2.69%gcc@10.2.0 patches=35c4492,7793209,a49dd5b arch=linux-centos7-zen2
module load autoconf-2.69-gcc-10.2.0-r677m42
# autoconf-archive@2022.02.11%gcc@10.2.0 patches=130cd48 arch=linux-centos7-zen2
module load autoconf-archive-2022.02.11-gcc-10.2.0-pbrbzut
# automake@1.16.5%gcc@10.2.0 arch=linux-centos7-zen2
module load automake-1.16.5-gcc-10.2.0-j4bwm4o
# blt@0.4.1%gcc@10.2.0 arch=linux-centos7-zen2
module load blt-0.4.1-gcc-10.2.0-tanugdw
# pkgconf@1.8.0%gcc@10.2.0 arch=linux-centos7-zen2
module load pkgconf-1.8.0-gcc-10.2.0-fuflwbl
# ncurses@6.3%gcc@10.2.0~symlinks+termlib abi=none arch=linux-centos7-zen2
module load ncurses-6.3-gcc-10.2.0-4wlnxto
# ca-certificates-mozilla@2022-07-19%gcc@10.2.0 arch=linux-centos7-zen2
module load ca-certificates-mozilla-2022-07-19-gcc-10.2.0-h2opehw
# camp@0.2.3%gcc@10.2.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-zen2
module load camp-0.2.3-gcc-10.2.0-vpkkybx
# cmake@3.23.2%gcc@10.2.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos7-zen2
module load cmake-3.23.2-gcc-10.2.0-i24avzq
# coinhsl@2019.05.21%gcc@10.2.0+blas arch=linux-centos7-zen2
module load coinhsl-2019.05.21-gcc-10.2.0-j7hsujd
# cub@1.16.0%gcc@10.2.0 arch=linux-centos7-zen2
module load cub-1.16.0-gcc-10.2.0-ovgrtom
# diffutils@3.8%gcc@10.2.0 arch=linux-centos7-zen2
module load diffutils-3.8-gcc-10.2.0-mjfwces
# ginkgo@glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60 arch=linux-centos7-zen2
module load ginkgo-glu_experimental-gcc-10.2.0-dbmokiq
# gmp@6.2.1%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2
module load gmp-6.2.1-gcc-10.2.0-ac4z3oa
# berkeley-db@18.1.40%gcc@10.2.0+cxx~docs+stl patches=b231fcc arch=linux-centos7-zen2
module load berkeley-db-18.1.40-gcc-10.2.0-hltd4j3
# libiconv@1.16%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2
module load libiconv-1.16-gcc-10.2.0-gbg7l5p
# diffutils@3.8%gcc@10.2.0 arch=linux-centos7-zen2
module load diffutils-3.8-gcc-10.2.0-mjfwces
# bzip2@1.0.8%gcc@10.2.0~debug~pic+shared arch=linux-centos7-zen2
module load bzip2-1.0.8-gcc-10.2.0-bxh46iv
# readline@8.1.2%gcc@10.2.0 arch=linux-centos7-zen2
module load readline-8.1.2-gcc-10.2.0-vtya5ay
# gdbm@1.19%gcc@10.2.0 arch=linux-centos7-zen2
module load gdbm-1.19-gcc-10.2.0-efj5agg
# zlib@1.2.12%gcc@10.2.0+optimize+pic+shared patches=0d38234 arch=linux-centos7-zen2
module load zlib-1.2.12-gcc-10.2.0-gnkqokp
# perl@5.34.1%gcc@10.2.0+cpanm+shared+threads arch=linux-centos7-zen2
module load perl-5.34.1-gcc-10.2.0-xp4fpdr
# openssl@1.1.1q%gcc@10.2.0~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos7-zen2
## module load openssl-1.1.1q-gcc-10.2.0-xhxspos
# cmake@3.23.3%gcc@10.2.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos7-zen2
module load cmake-3.23.3-gcc-10.2.0-ggyj7bs
# blt@0.4.1%gcc@10.2.0 arch=linux-centos7-zen2
module load blt-0.4.1-gcc-10.2.0-oabae2w
# cub@1.16.0%gcc@10.2.0 arch=linux-centos7-zen2
module load cub-1.16.0-gcc-10.2.0-ovgrtom
# cuda@11.4%gcc@10.2.0~allow-unsupported-compilers~dev arch=linux-centos7-zen2
module load cuda-11.4-gcc-10.2.0-ewurpsv
# camp@0.2.3%gcc@10.2.0+cuda~ipo+openmp~rocm~tests build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load camp-0.2.3-gcc-10.2.0-36lcy72
# openblas@0.3.20%gcc@10.2.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared patches=9f12903 symbol_suffix=none threads=none arch=linux-centos7-zen2
module load openblas-0.3.20-gcc-10.2.0-x6v3mwm
# coinhsl@2019.05.21%gcc@10.2.0+blas arch=linux-centos7-zen2
module load coinhsl-2019.05.21-gcc-10.2.0-gkzkws6
# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-x73b7k3
# magma@2.6.2%gcc@10.2.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load magma-2.6.2-gcc-10.2.0-caockkq
# metis@5.1.0%gcc@10.2.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2
module load metis-5.1.0-gcc-10.2.0-k4z4v6l
# openmpi@4.1.0mlx5.0%gcc@10.2.0~atomics~cuda~cxx~cxx_exceptions~gpfs~internal-hwloc~java~legacylaunchers~lustre~memchecker+romio+rsh~singularity+static+vt+wrapper-rpath fabrics=none patches=60ce20b schedulers=none arch=linux-centos7-zen2
module load openmpi-4.1.0mlx5.0-gcc-10.2.0-ytj7jxb
# raja@0.14.0%gcc@10.2.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=60,70,75,80arch=linux-centos7-zen2
module load raja-0.14.0-gcc-10.2.0-tyzamiy
# libsigsegv@2.13%gcc@10.2.0 arch=linux-centos7-zen2
module load libsigsegv-2.13-gcc-10.2.0-aj5goyi
# libtool@2.4.7%gcc@10.2.0 arch=linux-centos7-zen2
module load libtool-2.4.7-gcc-10.2.0-mzc2mvw
# m4@1.4.19%gcc@10.2.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos7-zen2
module load m4-1.4.19-gcc-10.2.0-k5kkyx6
# magma@2.6.2%gcc@10.2.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfocuda_arch=60 arch=linux-centos7-zen2
module load magma-2.6.2-gcc-10.2.0-o7gg2nj
# metis@5.1.0%gcc@10.2.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2
module load metis-5.1.0-gcc-10.2.0-h2r63pj
# autoconf@2.69%gcc@10.2.0 patches=35c4492,7793209,a49dd5b arch=linux-centos7-zen2
module load autoconf-2.69-gcc-10.2.0-jnh4mbw
# automake@1.16.5%gcc@10.2.0 arch=linux-centos7-zen2
module load automake-1.16.5-gcc-10.2.0-pgpzgqq
# libtool@2.4.7%gcc@10.2.0 arch=linux-centos7-zen2
module load libtool-2.4.7-gcc-10.2.0-mzc2mvw
# gmp@6.2.1%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2
module load gmp-6.2.1-gcc-10.2.0-tpo7i4x
# autoconf-archive@2022.02.11%gcc@10.2.0 patches=139214f arch=linux-centos7-zen2
module load autoconf-archive-2022.02.11-gcc-10.2.0-tirhdzr
# texinfo@6.5%gcc@10.2.0 patches=12f6edb,1732115 arch=linux-centos7-zen2
module load texinfo-6.5-gcc-10.2.0-mcrbwnj
# mpfr@4.1.0%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2
module load mpfr-4.1.0-gcc-10.2.0-ixeo4lu
# ncurses@6.2%gcc@10.2.0~symlinks+termlib abi=none arch=linux-centos7-zen2
module load ncurses-6.2-gcc-10.2.0-3b2uqgl
# openblas@0.3.20%gcc@10.2.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos7-zen2
module load openblas-0.3.20-gcc-10.2.0-qhcutll
# openmpi@4.1.0%gcc@10.2.0~atomics~cuda~cxx~cxx_exceptions~gpfs~internal-hwloc~java~legacylaunchers~lustre~memchecker+romio+rsh~singularity+static+vt+wrapper-rpath fabrics=none patches=60ce20b schedulers=none arch=linux-centos7-zen2
module load openmpi-4.1.0-gcc-10.2.0-wnndpcg
# openssl@1.1.1q%gcc@10.2.0~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos7-zen2
module load openssl-1.1.1q-gcc-10.2.0-t5hsb3s
# perl@5.26.0%gcc@10.2.0+cpanm+shared+threads patches=0eac10e,8cf4302 arch=linux-centos7-zen2
module load perl-5.26.0-gcc-10.2.0-l2yiybo
# pkgconf@1.8.0%gcc@10.2.0 arch=linux-centos7-zen2
module load pkgconf-1.8.0-gcc-10.2.0-fuflwbl
# raja@0.14.0%gcc@10.2.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-zen2
module load raja-0.14.0-gcc-10.2.0-pc2ckhw
module load mpfr-4.1.0-gcc-10.2.0-3yutkz3
# suite-sparse@5.10.1%gcc@10.2.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos7-zen2
module load suite-sparse-5.10.1-gcc-10.2.0-jkighdn
# texinfo@6.5%gcc@10.2.0 patches=12f6edb,1732115 arch=linux-centos7-zen2
module load texinfo-6.5-gcc-10.2.0-mzqgqla
# umpire@6.0.0%gcc@10.2.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=60 tests=none arch=linux-centos7-zen2
module load umpire-6.0.0-gcc-10.2.0-eunwzka
# zlib@1.2.12%gcc@10.2.0+optimize+pic+shared patches=0d38234 arch=linux-centos7-zen2
module load zlib-1.2.12-gcc-10.2.0-gnkqokp
module load suite-sparse-5.10.1-gcc-10.2.0-add65sb
# umpire@6.0.0%gcc@10.2.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=60,70,75,80 tests=none arch=linux-centos7-zen2
module load umpire-6.0.0-gcc-10.2.0-lrjkuun
# hiop@develop%gcc@10.2.0+cuda~cusolver+deepchecking~full_optimizations+ginkgo~ipo~jsrun+kron+mpi+raja~rocm~shared+sparse build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2
## module load hiop-develop-gcc-10.2.0-bgzxttu

# Load system modules
module load gcc/10.2.0
Expand All @@ -79,5 +91,4 @@ EOD
export NVBLAS_CONFIG_FILE=$PWD/nvblas.conf
echo "Generated $PWD/nvblas.conf"

export EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=60"
export CMAKE_CACHE_SCRIPT=gcc-cuda.cmake
8 changes: 7 additions & 1 deletion scripts/quartzVariables.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
module load gcc/8.3.1 cmake/3.18 mkl/2019.0 mvapich2/2.3
module load gcc/9.3.1
module load mkl/2020.0
module load mvapich2/2.3
module load cmake/3.22.4

EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DHIOP_COINHSL_DIR:STRING=/usr/workspace/chiang7/software/quartz/COIN-OR/build"
EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DHIOP_METIS_DIR:STRING=/usr/workspace/chiang7/software/quartz/COIN-OR/build"
EXTRA_CMAKE_ARGS="$EXTRA_CMAKE_ARGS -DHIOP_USE_RAJA:STRING=ON -DRAJA_DIR:STRING=/g/g92/chiang7/workspaces/chiang7/software/quartz/LLNL/RAJA/build_opt/_install -Dumpire_DIR:STRING=/g/g92/chiang7/workspaces/chiang7/software/quartz/LLNL/Umpire/build_opt/_install"


export CMAKE_CACHE_SCRIPT=gcc-cpu.cmake


2 changes: 1 addition & 1 deletion src/Drivers/Sparse/NlpSparseEx3Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static void usage(const char* exeName)
{
printf("hiOp driver %s that solves a synthetic convex problem of variable size.\n", exeName);
printf("Usage: \n");
printf(" '$ %s problem_size scala -selfcheck'\n", exeName);
printf(" '$ %s problem_size scalar -selfcheck'\n", exeName);
printf("Arguments:\n");
printf(" 'problem_size': number of decision variables [optional, default is 50]\n");
printf(" 'scala_a': small pertubation added to the inequality bounds [optional, default is 1e-6]\n");
Expand Down
117 changes: 19 additions & 98 deletions src/LinAlg/hiopLinSolverSparseGinkgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,13 @@ std::shared_ptr<gko::matrix::Csr<double, int>> transferTripletToCSR(std::shared_

void update_matrix(hiopMatrixSparse* M_,
std::shared_ptr<gko::matrix::Csr<double, int>> mtx,
std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx,
int* index_covert_CSR2Triplet_,
int* index_covert_extra_Diag2CSR_)
{
int n_ = mtx->get_size()[0];
int nnz_= mtx->get_num_stored_elements();
auto values = mtx->get_values();
auto values = host_mtx->get_values();
int rowID_tmp{0};
for(int k=0; k<nnz_; k++) {
values[k] = M_->M()[index_covert_CSR2Triplet_[k]];
Expand All @@ -197,6 +198,10 @@ void update_matrix(hiopMatrixSparse* M_,
values[index_covert_extra_Diag2CSR_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
}
}
auto exec = mtx->get_executor();
if (exec != exec->get_master()) {
mtx->copy_from(host_mtx.get());
}
}


Expand Down Expand Up @@ -283,9 +288,10 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
assert(n_==M_->n() && M_->n()==M_->m());
assert(n_>0);

exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));//gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));

mtx_ = transferTripletToCSR(exec_, n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
nnz_ = mtx_->get_num_stored_elements();

reusable_factory_ = setup_solver_factory(exec_, mtx_);
Expand All @@ -302,7 +308,7 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
if( !mtx_ ) {
this->firstCall();
} else {
update_matrix(M_, mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
}

gko_solver_ = gko::share(reusable_factory_->generate(mtx_));
Expand All @@ -319,6 +325,9 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
bool hiopLinSolverSymSparseGinkgo::solve ( hiopVector& x_ )
{
RANGE_PUSH(__FUNCTION__);
using vec = gko::matrix::Dense<double>;
using arr = gko::array<double>;
auto host = exec_->get_master();
assert(n_==M_->n() && M_->n()==M_->m());
assert(n_>0);
assert(x_.get_size()==M_->n());
Expand All @@ -330,12 +339,13 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
double* dx = x->local_data();
double* drhs = rhs->local_data();
auto x_array = gko::Array<double>::view(exec_->get_master(), n_, dx);
auto b_array = gko::Array<double>::view(exec_, n_, drhs);
auto dense_x_host = gko::matrix::Dense<double>::create(exec_->get_master(), gko::dim<2>{n_, 1}, gko::Array<double>::view(exec_->get_master(), n_, dx), 1);
auto dense_x= gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1});
const auto size = gko::dim<2>{n_, 1};
auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1);
auto dense_x = vec::create(exec_, size);
dense_x->copy_from(dense_x_host.get());
auto dense_b = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1}, b_array, 1);
auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1);
auto dense_b = vec::create(exec_, size);
dense_b->copy_from(dense_b_host.get());

gko_solver_->apply(dense_b.get(), dense_x.get());
nlp_->runStats.linsolv.tmTriuSolves.stop();
Expand All @@ -346,93 +356,4 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
return 1;
}


hiopLinSolverNonSymSparseGinkgo::hiopLinSolverNonSymSparseGinkgo(const int& n,
const int& nnz,
hiopNlpFormulation* nlp)
: hiopLinSolverNonSymSparse(n, nnz, nlp),
index_covert_CSR2Triplet_{nullptr},
index_covert_extra_Diag2CSR_{nullptr},
n_{n},
nnz_{0}
{}

hiopLinSolverNonSymSparseGinkgo::~hiopLinSolverNonSymSparseGinkgo()
{
if(index_covert_CSR2Triplet_) {
delete [] index_covert_CSR2Triplet_;
}
if(index_covert_extra_Diag2CSR_) {
delete [] index_covert_extra_Diag2CSR_;
}
}

void hiopLinSolverNonSymSparseGinkgo::firstCall()
{
nlp_->log->printf(hovSummary, "Setting up Ginkgo solver ... \n");
assert(n_==M_->n() && M_->n()==M_->m());
assert(n_>0);

exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));//gko::HipExecutor::create(0, gko::ReferenceExecutor::create());

mtx_ = transferTripletToCSR(exec_, n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
nnz_ = mtx_->get_num_stored_elements();

reusable_factory_ = setup_solver_factory(exec_, mtx_);
}

int hiopLinSolverNonSymSparseGinkgo::matrixChanged()
{
RANGE_PUSH(__FUNCTION__);
assert(n_==M_->n() && M_->n()==M_->m());
assert(n_>0);

nlp_->runStats.linsolv.tmFactTime.start();

if( !mtx_ ) {
this->firstCall();
} else {
update_matrix(M_, mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
}

gko_solver_ = gko::share(reusable_factory_->generate(mtx_));

// Temporary solution for the ginkgo GLU integration.
auto sol = gko::as<gko::solver::Gmres<>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver());
auto precond = gko::as<gko::experimental::solver::Direct<double, int>>(sol->get_preconditioner());
auto status = precond->get_factorization_status();

RANGE_POP();
return status == gko::experimental::factorization::status::success ? 0 : -1;
}

bool hiopLinSolverNonSymSparseGinkgo::solve(hiopVector& x_)
{
RANGE_PUSH(__FUNCTION__);
assert(n_==M_->n() && M_->n()==M_->m());
assert(n_>0);
assert(x_.get_size()==M_->n());

nlp_->runStats.linsolv.tmTriuSolves.start();

hiopVectorPar* x = dynamic_cast<hiopVectorPar*>(&x_);
assert(x != NULL);
hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
double* dx = x->local_data();
double* drhs = rhs->local_data();
auto x_array = gko::Array<double>::view(exec_, n_, dx);
auto b_array = gko::Array<double>::view(exec_, n_, drhs);
auto dense_x = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1}, x_array, 1);
auto dense_b = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1}, b_array, 1);

gko_solver_->apply(dense_b.get(), dense_x.get());

nlp_->runStats.linsolv.tmTriuSolves.stop();

delete rhs; rhs=nullptr;
RANGE_POP();
return 1;
}


} //end namespace hiop
Loading

0 comments on commit 629a716

Please sign in to comment.