From 7a3b64b0f8d6ccfe8ad9ef2c0fe6db2e37415b7d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 10 Aug 2023 08:01:14 -0400 Subject: [PATCH 01/51] Create minimal cmake impl The goal here is to be able to use both CMake and Makefile system interchangeably. Major change is create a system which "picks" the correct lapack wrap bodies. --- CMakeLists.txt | 347 +++++++ cmake/itensor-config.cmake.in | 40 + cmake/modules/AppendFlags.cmake | 7 + cmake/modules/SanitizeCUDA.cmake | 12 + external/cuda.cmake | 45 + external/linalgpp.cmake | 15 + itensor/CMakeLists.txt | 141 +++ itensor/tensor/lapack/cmake_lapack_wrap.cc | 845 +++++++++++++++++ itensor/tensor/lapack/cmake_lapack_wrap.h | 795 ++++++++++++++++ itensor/tensor/lapack/makefile_lapack_wrap.cc | 844 +++++++++++++++++ itensor/tensor/lapack/makefile_lapack_wrap.h | 789 ++++++++++++++++ itensor/tensor/lapack_wrap.cc | 849 +----------------- itensor/tensor/lapack_wrap.h | 792 +--------------- itensors.pc.in | 6 + unittest/CMakeLists.txt | 71 ++ 15 files changed, 3967 insertions(+), 1631 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 cmake/itensor-config.cmake.in create mode 100644 cmake/modules/AppendFlags.cmake create mode 100644 cmake/modules/SanitizeCUDA.cmake create mode 100644 external/cuda.cmake create mode 100644 external/linalgpp.cmake create mode 100644 itensor/CMakeLists.txt create mode 100644 itensor/tensor/lapack/cmake_lapack_wrap.cc create mode 100644 itensor/tensor/lapack/cmake_lapack_wrap.h create mode 100644 itensor/tensor/lapack/makefile_lapack_wrap.cc create mode 100644 itensor/tensor/lapack/makefile_lapack_wrap.h create mode 100644 itensors.pc.in create mode 100644 unittest/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..9e5b5f954 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,347 @@ +#; -*-CMake-*- + +# Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.17.0) # decouples C++ and CUDA standards, see https://gitlab.kitware.com/cmake/cmake/issues/19123 +set(ITENSORS_REQUIRED_CMAKE_VERSION 3.17.0 CACHE INTERNAL "") + +############################################################################### +# CMake defaults to address key pain points +############################################################################### + +# safety net for dev workflow: accidental install will not affect FindOrFetch* +if (NOT DEFINED CACHE{CMAKE_FIND_NO_INSTALL_PREFIX}) + set(CMAKE_FIND_NO_INSTALL_PREFIX ON CACHE BOOL "Whether find_* commands will search CMAKE_INSTALL_PREFIX and CMAKE_STAGING_PREFIX; see https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_NO_INSTALL_PREFIX.html#variable:CMAKE_FIND_NO_INSTALL_PREFIX") +endif() + +############################################################################### +# Bring ValeevGroup cmake toolkit to autofind linalg +############################################################################### +include(FetchContent) +if (DEFINED PROJECT_BINARY_DIR) + set(VG_CMAKE_KIT_PREFIX_DIR PROJECT_BINARY_DIR) +else () + set(VG_CMAKE_KIT_PREFIX_DIR CMAKE_CURRENT_BINARY_DIR) +endif() +FetchContent_Declare( + vg_cmake_kit + QUIET + GIT_REPOSITORY https://github.com/ValeevGroup/kit-cmake.git + SOURCE_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg + BINARY_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg-build + SUBBUILD_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg-subbuild +) +FetchContent_MakeAvailable(vg_cmake_kit) +list(APPEND CMAKE_MODULE_PATH "${vg_cmake_kit_SOURCE_DIR}/modules") + +############################################################################### +# Announce ourselves +############################################################################### +# see https://semver.org/ +set (ITENSORS_MAJOR_VERSION 4) +set (ITENSORS_MINOR_VERSION 0) +set (ITENSORS_PATCH_VERSION 0) +set (ITENSORS_PRERELEASE_ID beta.1) +set (ITENSORS_BUILD_ID ) + +set(ITENSORS_VERSION "${ITENSORS_MAJOR_VERSION}.${ITENSORS_MINOR_VERSION}.${ITENSORS_PATCH_VERSION}") +if (ITENSORS_PRERELEASE_ID) + set(ITENSORS_EXT_VERSION "${ITENSORS_VERSION}-${ITENSORS_PRERELEASE_ID}") +else(ITENSORS_PRERELEASE_ID) + set(ITENSORS_EXT_VERSION "${ITENSORS_VERSION}") +endif(ITENSORS_PRERELEASE_ID) +if (ITENSORS_BUILD_ID) + set(ITENSORS_EXT_VERSION "${ITENSORS_EXT_VERSION}+${ITENSORS_BUILD_ID}") +endif(ITENSORS_BUILD_ID) + +# extra cmake files are shipped with ITENSORS +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") + +include(AppendFlags) + +# Extract the git revision tag information +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git) + find_package(Git REQUIRED) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse -q HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE ITENSORS_REVISION ) + string(REGEX MATCH "[0-9a-f]*" + ITENSORS_REVISION "${ITENSORS_REVISION}") +else() + set(ITENSORS_REVISION "unknown") +endif() + +project(ITENSORS + VERSION ${ITENSORS_VERSION} + DESCRIPTION "ITensor: High-Performance Tensor Software Inspired By Tensor Diagrams" + LANGUAGES CXX + HOMEPAGE_URL "https://itensor.org/") +enable_language(C) # C needed even for basic platform introspection + +############################################################################### +# Preconditions +############################################################################### +set(TARGET_ARCH "${CMAKE_SYSTEM}-${CMAKE_SYSTEM_PROCESSOR}") + +########################## +# Standard build variables +########################## +set(ITENSORS_INSTALL_BINDIR "bin" + CACHE PATH "ITENSORS BIN install directory") +set(ITENSORS_INSTALL_INCLUDEDIR "include" + CACHE PATH "ITENSORS INCLUDE install directory") +set(ITENSORS_INSTALL_LIBDIR "lib" + CACHE PATH "ITENSORS LIB install directory") +set(ITENSORS_INSTALL_SHAREDIR "share/ITENSORS/${ITENSORS_MAJOR_VERSION}.${ITENSORS_MINOR_VERSION}.${ITENSORS_MICRO_VERSION}" + CACHE PATH "ITENSORS SHARE install directory") +set(ITENSORS_INSTALL_DATADIR "${ITENSORS_INSTALL_SHAREDIR}/data" + CACHE PATH "ITENSORS DATA install directory") +set(ITENSORS_INSTALL_DOCDIR "${ITENSORS_INSTALL_SHAREDIR}/doc" + CACHE PATH "ITENSORS DOC install directory") +set(ITENSORS_INSTALL_CMAKEDIR "lib/cmake/ITENSORS" + CACHE PATH "ITENSORS CMAKE install directory") + +# Get standard build variables from the environment if they have not already been set +if(NOT CMAKE_C_FLAGS OR NOT DEFINED CMAKE_C_FLAGS) + set(CMAKE_C_FLAGS "$ENV{CPPFLAGS}") + append_flags(CMAKE_C_FLAGS "$ENV{CFLAGS}") +endif() +if(NOT CMAKE_CXX_FLAGS OR NOT DEFINED CMAKE_CXX_FLAGS) + set(CMAKE_CXX_FLAGS "$ENV{CPPFLAGS}") + append_flags(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS}") +endif() +if(NOT CMAKE_EXE_LINKER_FLAGS OR NOT DEFINED CMAKE_EXE_LINKER_FLAGS) + set(CMAKE_EXE_LINKER_FLAGS "$ENV{LDFLAGS}") +endif() +if (NOT CMAKE_CXX_COMPILER) + message(FATAL_ERROR "C++ compiler not found") +endif() + +set(CMAKE_SKIP_RPATH FALSE) + +########################## +# We use C++17 features +########################## +# but insist on strict standard +set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ ISO Standard version") +if (NOT(CMAKE_CXX_STANDARD EQUAL 17 OR CMAKE_CXX_STANDARD EQUAL 20)) + message(FATAL_ERROR "C++ 2017 ISO Standard or higher is required to compile ITENSORS") +endif() +# C++20 is only configurable via compile features with cmake 3.12 and older +if (CMAKE_CXX_STANDARD EQUAL 20 AND CMAKE_VERSION VERSION_LESS 3.12.0) + cmake_minimum_required (VERSION 3.12.0) +endif() +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Whether to use extensions of C++ ISO Standard version") + +########################## +# Check type support +include(CheckTypeSize) +########################## + +########################## +# Load extra CMake features +########################## + +include(GNUInstallDirs) +include(CMakeDependentOption) +include(CMakePackageConfigHelpers) +include(FeatureSummary) + +set(MPI_CXX_SKIP_MPICXX TRUE CACHE BOOL "MPI_CXX_SKIP_MPICXX") + +########################## +# Configure options +########################## +option(ENABLE_MPI "Enable MPI" ON) +add_feature_info(MPI ENABLE_MPI "Message-Passing Interface supports distributed-memory parallel programs") + +option(ENABLE_CUDA "Enable use of CUDA with ITensor" OFF) +add_feature_info(CUDA ENABLE_CUDA "NVIDIA CUDA support for GPU") + +option(ENABLE_BLAS_LAPACK "Enable BLAS and LAPACK Libraries" ON) +add_feature_info(BLAS_LAPACK ENABLE_BLAS_LAPACK "Fast standardized linear algebra definitions") + +option(ENABLE_HDF5 "Compile ITensor with HDF5 option" OFF) +add_feature_info(HDF5 ENABLE_HDF5 "Storage of data via the HDF5 file protocol") + +option(ENABLE_OMP "Compile ITensor with OpenMP option" OFF) +add_feature_info(OMP ENABLE_OMP "Message passing interface which supports unified-memory threadwise-parallel programs") + +option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON) +add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules") + +set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +########################## +# miscellaneous cmake platform-neutral and platform-specific configuration +########################## +set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX +set(CMAKE_SKIP_RPATH FALSE) +set(CMAKE_SKIP_BUILD_RPATH FALSE) +set(CMAKE_SKIP_INSTALL_RPATH FALSE) +set(CMAKE_NO_SYSTEM_FROM_IMPORTED TRUE) # do not use -isystem by default to avoid include dir ordering issues as well as other issues like https://gcc.gnu.org/onlinedocs/cpp/System-Headers.html + +if(CMAKE_SYSTEM_NAME MATCHES "Darwin") + # look for frameworks and appbundles last + set(CMAKE_FIND_FRAMEWORK LAST) + set(CMAKE_FIND_APPBUNDLE LAST) +endif() + +########################## +# Include source directories +########################## +include_directories(${PROJECT_SOURCE_DIR}/itensor ${PROJECT_BINARY_DIR}/itensor) + +########################## +# external dependencies +########################## +add_library(ITENSORS INTERFACE) +# required deps: +# 1. CUDA first since others may depend on it +if(ENABLE_CUDA) + include(external/cuda.cmake) +endif(ENABLE_CUDA) + +########################## +# discover linear algebra +########################## + +include(external/linalgpp.cmake) +if(ENABLE_OMP) + MESSAGE(STATUS "Looking for OMP") + FIND_PACKAGE(OpenMP REQUIRED) + IF(OPENMP_FOUND) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + endif(OPENMP_FOUND) + target_link_libraries(ITENSORS INTERFACE ${OpenMP_C_LIBRARIES}) +endif(ENABLE_OMP) + +if(ENABLE_MPI) + MESSAGE(STATUS "Looking for MPI") + FIND_PACKAGE(MPI REQUIRED) + IF(MPI_FOUND) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MPI_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_CXX_FLAGS}") + endif(MPI_FOUND) + + target_link_libraries(ITENSORS INTERFACE ${MPI_C_LIBRARIES}) +endif(ENABLE_MPI) + +if(ENABLE_HDF5) + MESSAGE(STATUS "Looking for HDF5") + FIND_PACKAGE(HDF5 REQUIRED CXX) + target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES}) +endif(ENABLE_HDF5) +# if(ENABLE_BLAS_LAPACK) +# find_package(BLAS) +# find_package(LAPACK) +# if(LAPACK_FOUND AND BLAS_FOUND) +# set(lapackblas_libraries ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + +# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}") +# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LAPACK_LINKER_FLAGS}") +# endif() + +# target_link_libraries(ITENSORS INTERFACE ${lapackblas_libraries}) +# endif(ENABLE_BLAS_LAPACK) + + +########################## +# sources +########################## + +set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +add_subdirectory(itensor) +add_subdirectory(unittest) + +target_compile_definitions(itensor PRIVATE ITENSOR_USE_CMAKE=1) +if(${CMAKE_BUILD_TYPE} MATCHES Debug) + target_compile_definitions(itensor PRIVATE DEBUG=1) +endif(${CMAKE_BUILD_TYPE} MATCHES Debug) +########################## +# pkg-config variables +########################## +foreach(_inc ${ITENSORS_CONFIG_INCLUDE_DIRS}) + append_flags(ITENSORS_PC_CFLAGS "-I${_inc}") +endforeach() +foreach(_lib ${ITENSORS_CONFIG_LIBRARIES}) + append_flags(ITENSORS_PC_LIBS "${_lib}") +endforeach() + +########################## +# wrap up +########################## + +# Force cache refresh for compile flags +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "C compile flags" FORCE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "C++ compile flags" FORCE) +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "Compiler linker flags" FORCE) + +CONFIGURE_FILE( + ${PROJECT_SOURCE_DIR}/itensors.pc.in + ${PROJECT_BINARY_DIR}/itensors.pc +) + +# install config files +install(FILES ${PROJECT_BINARY_DIR}/itensor.pc + DESTINATION lib/pkgconfig) + +# Create the version file +write_basic_package_version_file(itensor-config-version.cmake + VERSION ${ITENSORS_VERSION} COMPATIBILITY AnyNewerVersion) + +# Create the targets file +export(EXPORT itensor + FILE "${PROJECT_BINARY_DIR}/itensor-targets.cmake") + +# Create the configure file +configure_package_config_file(cmake/itensor-config.cmake.in + "${PROJECT_BINARY_DIR}/itensor-config.cmake" + INSTALL_DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" + PATH_VARS CMAKE_INSTALL_PREFIX ITENSORS_INSTALL_BINDIR + ITENSORS_INSTALL_INCLUDEDIR ITENSORS_INSTALL_LIBDIR + ITENSORS_INSTALL_DOCDIR ITENSORS_INSTALL_CMAKEDIR) + +# Install config, version, and target files +install(EXPORT itensor + FILE "itensor-targets.cmake" + DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" + COMPONENT ITENSORS) + +install(FILES + "${PROJECT_BINARY_DIR}/itensor-config.cmake" + "${PROJECT_BINARY_DIR}/itensor-config-version.cmake" + DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" + COMPONENT ITENSORS) + + +# Add target to allow on-the-fly switching of build type + +ADD_CUSTOM_TARGET(debug + COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Debug ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target all + COMMENT "Switch CMAKE_BUILD_TYPE to Debug" + ) + +ADD_CUSTOM_TARGET(release + COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target all + COMMENT "Switch CMAKE_BUILD_TYPE to Release" + ) + +feature_summary(WHAT ALL + DESCRIPTION "=== ITensor Package/Feature Info ===") \ No newline at end of file diff --git a/cmake/itensor-config.cmake.in b/cmake/itensor-config.cmake.in new file mode 100644 index 000000000..093aec683 --- /dev/null +++ b/cmake/itensor-config.cmake.in @@ -0,0 +1,40 @@ +# - CMAKE Config file for the ITensors package +# The following variables are defined: +# ITENSORS_FOUND - System has the ITensors package +# ITENSORS_INCLUDE_DIRS - The ITensors include directory +# ITENSORS_LIBRARIES - The ITensors libraries and their dependencies +# ITENSORS_VERSION - The ITensors (core) version; see semver.org +# ITENSORS_EXT_VERSION - The ITensors version, includes prerelease id; see semver.org + +# Set package version +set(ITENSORS_VERSION "@ITENSORS_VERSION@") +set(ITENSORS_EXT_VERSION "@ITENSORS_EXT_VERSION@") + +@PACKAGE_INIT@ + +# Include library IMPORT targets +if(NOT TARGET itensor) + include("${CMAKE_CURRENT_LIST_DIR}/itensor-targets.cmake") +endif() + +# Set the itensor compiled library target +set(ITENSORS_LIBRARIES itensor) + +set(ITENSORS_SOURCE_DIR "@ITENSORS_SOURCE_DIR@") +set(ITENSORS_BINARY_DIR "@ITENSORS_BINARY_DIR@") + +set(ITENSORS_BUILD_INCLUDE_DIRS "${ITENSORS_SOURCE_DIR}/src" "${ITENSORS_BINARY_DIR}/src") +set(ITENSORS_INSTALL_INCLUDE_DIRS "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@" + "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@/itensors/external") + +# define ITENSORS_INCLUDE_DIRS according to where we are compiling: ITensor build tree or outside +# external packages should use ITENSORS_BUILD_INCLUDE_DIRS and ITENSORS_INSTALL_INCLUDE_DIRS directly +if(CMAKE_CURRENT_LIST_DIR EQUAL ITENSORS_BINARY_DIR) + set(ITENSORS_INCLUDE_DIRS "${ITENSORS_BUILD_INCLUDE_DIRS}") +else() + set(ITENSORS_INCLUDE_DIRS "${ITENSORS_INSTALL_INCLUDE_DIRS}") +endif() + +set(ITENSORS_CMAKE_TOOLCHAIN_FILE "@CMAKE_TOOLCHAIN_FILE@") + +set(ITENSORS_FOUND TRUE) \ No newline at end of file diff --git a/cmake/modules/AppendFlags.cmake b/cmake/modules/AppendFlags.cmake new file mode 100644 index 000000000..e52b8ec7d --- /dev/null +++ b/cmake/modules/AppendFlags.cmake @@ -0,0 +1,7 @@ +macro(append_flags _flags _append_flag) + + string(STRIP "${_append_flag}" _append_flag ) + set(${_flags} "${${_flags}} ${_append_flag}") + string(STRIP "${${_flags}}" ${_flags}) + +endmacro() \ No newline at end of file diff --git a/cmake/modules/SanitizeCUDA.cmake b/cmake/modules/SanitizeCUDA.cmake new file mode 100644 index 000000000..77bcc8dd4 --- /dev/null +++ b/cmake/modules/SanitizeCUDA.cmake @@ -0,0 +1,12 @@ +macro(sanitize_cuda_implicit_directories) + foreach (_type INCLUDE LINK) + set(_var CMAKE_CUDA_IMPLICIT_${_type}_DIRECTORIES) + set(_sanitized_var ) + foreach (_component ${${_var}}) + if (NOT ${_component} MATCHES "/gcc/(.*/|)[0-9]\.[0-9]\.[0-9]") + list(APPEND _sanitized_var ${_component}) + endif() + endforeach() + set(${_var} ${_sanitized_var}) + endforeach() +endmacro() \ No newline at end of file diff --git a/external/cuda.cmake b/external/cuda.cmake new file mode 100644 index 000000000..cbadadf4d --- /dev/null +++ b/external/cuda.cmake @@ -0,0 +1,45 @@ +# cmake 3.17 decouples C++ and CUDA standards, see https://gitlab.kitware.com/cmake/cmake/issues/19123 +# cmake 3.18 knows that CUDA 11 provides cuda_std_17 +cmake_minimum_required(VERSION 3.18.0) +set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_EXTENSIONS OFF) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) +# N.B. need relaxed constexpr for std::complex +# see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#constexpr-functions%5B/url%5D: +if (DEFINED CMAKE_CUDA_FLAGS) + set(CMAKE_CUDA_FLAGS "--expt-relaxed-constexpr ${CMAKE_CUDA_FLAGS}") +else() + set(CMAKE_CUDA_FLAGS "--expt-relaxed-constexpr") +endif() +enable_language(CUDA) + +set(CUDA_FOUND TRUE) +set(ITENSORS_HAS_CUDA 1 CACHE BOOL "Whether ITensor has CUDA support") + +if(ENABLE_CUDA_ERROR_CHECK) + set (ITENSORS_CHECK_CUDA_ERROR 1) +endif(ENABLE_CUDA_ERROR_CHECK) + +# find CUDA toolkit +# NB CUDAToolkit does NOT have COMPONENTS +find_package(CUDAToolkit REQUIRED) + +foreach (library cublas;nvToolsExt) + if (NOT TARGET CUDA::${library}) + message(FATAL_ERROR "CUDA::${library} not found") + endif() +endforeach() + +if (NOT DEFINED CUDAToolkit_ROOT) + get_filename_component(CUDAToolkit_ROOT "${CUDAToolkit_INCLUDE_DIR}/../" ABSOLUTE CACHE) +endif(NOT DEFINED CUDAToolkit_ROOT) + +# sanitize implicit dirs if CUDA host compiler != C++ compiler +message(STATUS "CMAKE Implicit Include Directories: ${CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES}") +message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +include(SanitizeCUDA) +sanitize_cuda_implicit_directories() +message(STATUS "CMAKE Implicit Include Directories: ${CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES}") +message(STATUS "CMAKE Implicit Link Directories: ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + diff --git a/external/linalgpp.cmake b/external/linalgpp.cmake new file mode 100644 index 000000000..89f8a1843 --- /dev/null +++ b/external/linalgpp.cmake @@ -0,0 +1,15 @@ +# import BLAS++ / LAPACK++ +if (ENABLE_WFN91_LINALG_DISCOVERY_KIT) + include(${vg_cmake_kit_SOURCE_DIR}/modules/FetchWfn91LinAlgModules.cmake) + include(${vg_cmake_kit_SOURCE_DIR}/modules/versions.cmake) + include(${vg_cmake_kit_SOURCE_DIR}/modules/FindLinalg.cmake) +endif(ENABLE_WFN91_LINALG_DISCOVERY_KIT) +include(${vg_cmake_kit_SOURCE_DIR}/modules/FindOrFetchLinalgPP.cmake) +target_link_libraries(ITENSORS INTERFACE blaspp lapackpp) +if (TARGET blaspp_headers) + target_link_libraries(ITENSORS INTERFACE blaspp_headers) +endif () +target_compile_definitions(ITENSORS INTERFACE -DITENSORS_HAS_BLAS_LAPACK=1 -DLAPACK_COMPLEX_CPP=1) +if (BLAS_IS_MKL) + target_compile_definitions(ITENSORS INTERFACE -DITENSORS_HAS_INTEL_MKL=1) +endif () \ No newline at end of file diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt new file mode 100644 index 000000000..6de90a48b --- /dev/null +++ b/itensor/CMakeLists.txt @@ -0,0 +1,141 @@ +# +# This file is part of the ITensor library +# +# Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#ITensor headers +set(ITENSOR_HEADER_FILES + all.h + all_basic.h + all_mps.h + arrow.h + core.h + decomp.h + decomp_impl.h + detail/algs.h + detail/call_rewrite.h + detail/gcounter.h + detail/skip_iterator.h + global.h + index.h + index_impl.h + indexset.h + indexset_impl.h + itdata/applyfunc.h + itdata/combiner.h + itdata/dense.h + itdata/diag.h + itdata/dotask.h + itdata/itdata.h + itdata/itlazy.h + itdata/qcombiner.h + itdata/qdense.h + itensor.h + itensor_impl.h + iterativesolvers.h + qn.h + real.h + smallstring.h + spectrum.h + tagset.h + types.h + ) + +set(ITENSOR_SOURCE_FILES + util/args.cc + util/input.cc + util/cputime.cc + tensor/lapack_wrap.cc + tensor/vec.cc + tensor/mat.cc + tensor/gemm.cc + tensor/algs.cc + tensor/contract.cc + itdata/dense.cc + itdata/combiner.cc + itdata/diag.cc + itdata/qdense.cc + itdata/qcombiner.cc + itdata/qdiag.cc + itdata/scalar.cc + qn.cc + tagset.cc + index.cc + indexset.cc + itensor.cc + spectrum.cc + decomp.cc + hermitian.cc + svd.cc + global.cc + mps/mps.cc + mps/mpsalgs.cc + mps/mpo.cc + mps/mpoalgs.cc + mps/autompo.cc) + +if(ENABLE_HDF5) + set(H5_SOURCE_FILES + util/h5/array_interface.cc + util/h5/file.cc + util/h5/group.cc + util/h5/h5object.cc + util/h5/stl/string.cc + util/h5/stl/vector.cc) + list(APPEND ${ITENSOR_SOURCE_FILES} ${H5_SOURCE_FILES}) +endif(ENABLE_HDF5) + +# Create the ITensor library +add_library(itensor ${ITENSOR_SOURCE_FILES} ${ITENSOR_HEADER_FILES}) + +set(targetname itensor) + +target_include_directories(${targetname} INTERFACE + $ + $ + $ + ) + +set(ITENSORS_LINK_LIBRARIES ${CMAKE_CXX_FLAGS} blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") +target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) + +# append current CMAKE_CXX_FLAGS +string(REPLACE " " ";" CMAKE_CXX_FLAG_LIST "${CMAKE_CXX_FLAGS}") +target_compile_options(${targetname} PUBLIC ${CMAKE_CXX_FLAG_LIST}) +target_compile_features(${targetname} PUBLIC "cxx_std_${CMAKE_CXX_STANDARD}") + +if (LAPACK_INCLUDE_DIRS) + target_include_directories(${targetname} PUBLIC ${LAPACK_INCLUDE_DIRS}) +endif (LAPACK_INCLUDE_DIRS) +if (LAPACK_COMPILE_OPTIONS) + target_compile_options(${targetname} PUBLIC ${LAPACK_COMPILE_OPTIONS}) +endif (LAPACK_COMPILE_OPTIONS) +if (LAPACK_COMPILE_DEFINITIONS) + target_compile_definitions(${targetname} PUBLIC ${LAPACK_COMPILE_DEFINITIONS}) +endif (LAPACK_COMPILE_DEFINITIONS) + +# Add library to the list of installed components +install(TARGETS itensor EXPORT itensor COMPONENT itensor + LIBRARY DESTINATION "${ITENSOR_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${ITENSOR_INSTALL_LIBDIR}") +# Install header files +install( + DIRECTORY + ${PROJECT_SOURCE_DIR} + ${PROJECT_BINARY_DIR} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.h" + PATTERN "CMakeFiles" EXCLUDE +) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc new file mode 100644 index 000000000..6d8ec7c56 --- /dev/null +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -0,0 +1,845 @@ +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "itensor/tensor/lapack/cmake_lapack_wrap.h" +//#include "itensor/tensor/permutecplx.h" + +namespace itensor { + +// +// daxpy +// Y += alpha*X +// +void +daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy) //increment with which to step through Y + { +//#ifdef ITENSOR_USE_CBLAS +// cblas_daxpy(n,alpha,X,incx,Y,incy); +//#else +// auto Xnc = const_cast(X); +// F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); +//#endif + } + +// +// dnrm2 +// +LAPACK_REAL +dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx) + { +//#ifdef ITENSOR_USE_CBLAS +// return cblas_dnrm2(N,X,incx); +//#else +// auto *Xnc = const_cast(X); +// return F77NAME(dnrm2)(&N,Xnc,&incx); +//#endif + return -1; + } + +// +// ddot +// +LAPACK_REAL +ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy) + { +//#ifdef ITENSOR_USE_CBLAS +// return cblas_ddot(N,X,incx,Y,incy); +//#else +// auto *Xnc = const_cast(X); +// auto *Ync = const_cast(Y); +// return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); +//#endif + return -1; + } + +// +// zdotc +// +Cplx +zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy) + { +//#ifdef ITENSOR_USE_CBLAS +// Cplx res; +//#if defined PLATFORM_openblas +// auto pX = reinterpret_cast(X); +// auto pY = reinterpret_cast(Y); +// auto pres = reinterpret_cast(&res); +//#else +// auto pX = reinterpret_cast(X); +// auto pY = reinterpret_cast(Y); +// auto pres = reinterpret_cast(&res); +//#endif +// cblas_zdotc_sub(N,pX,incx,pY,incy,pres); +// return res; +//#else +// auto ncX = const_cast(X); +// auto ncY = const_cast(Y); +// auto pX = reinterpret_cast(ncX); +// auto pY = reinterpret_cast(ncY); +// auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); +// auto cplx_res = reinterpret_cast(&res); +// return *cplx_res; +//#endif + return Cplx{}; + } + +// +// dgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C) + { + LAPACK_INT lda = m, + ldb = k; +//#ifdef ITENSOR_USE_CBLAS +// auto at = CblasNoTrans, +// bt = CblasNoTrans; +// if(transa) +// { +// at = CblasTrans; +// lda = k; +// } +// if(transb) +// { +// bt = CblasTrans; +// ldb = n; +// } +// cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); +//#else +// auto *pA = const_cast(A); +// auto *pB = const_cast(B); +// char at = 'N'; +// char bt = 'N'; +// if(transa) +// { +// at = 'T'; +// lda = k; +// } +// if(transb) +// { +// bt = 'T'; +// ldb = n; +// } +// F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); +//#endif + } + +// +// zgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + const Cplx* A, + const Cplx* B, + Cplx beta, + Cplx* C) + { + LAPACK_INT lda = m, + ldb = k; +//#ifdef PLATFORM_openblas +// auto at = CblasNoTrans, +// bt = CblasNoTrans; +// if(transa) +// { +// at = CblasTrans; +// lda = k; +// } +// if(transb) +// { +// bt = CblasTrans; +// ldb = n; +// } +// //auto ralpha = realRef(alpha); +// //auto ialpha = imagRef(alpha); +// //auto rbeta = realRef(beta); +// //auto ibeta = imagRef(beta); +// //if(ialpha != 0.0 || ibeta != 0.0) +// // { +// // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); +// // } +// auto* palpha = reinterpret_cast(&alpha); +// auto* pbeta = reinterpret_cast(&beta); +// auto* pA = reinterpret_cast(A); +// auto* pB = reinterpret_cast(B); +// auto* pC = reinterpret_cast(C); +// cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); +//#else //platform not openblas +//#ifdef ITENSOR_USE_CBLAS +// auto at = CblasNoTrans, +// bt = CblasNoTrans; +// if(transa) +// { +// at = CblasTrans; +// lda = k; +// } +// if(transb) +// { +// bt = CblasTrans; +// ldb = n; +// } +// auto palpha = (void*)(&alpha); +// auto pbeta = (void*)(&beta); +// cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); +//#else //use Fortran zgemm +// auto *ncA = const_cast(A); +// auto *ncB = const_cast(B); +// auto *pA = reinterpret_cast(ncA); +// auto *pB = reinterpret_cast(ncB); +// auto *pC = reinterpret_cast(C); +// auto *palpha = reinterpret_cast(&alpha); +// auto *pbeta = reinterpret_cast(&beta); +// char at = 'N'; +// char bt = 'N'; +// if(transa) +// { +// at = 'T'; +// lda = k; +// } +// if(transb) +// { +// bt = 'T'; +// ldb = n; +// } +// F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); +//#endif +//#endif + } + +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy) + { +//#ifdef ITENSOR_USE_CBLAS +// auto Tr = trans ? CblasTrans : CblasNoTrans; +// cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); +//#else +// char Tr = trans ? 'T' : 'N'; +// F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); +//#endif + } + +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy) + { +//#ifdef PLATFORM_openblas +// auto Tr = trans ? CblasTrans : CblasNoTrans; +// //auto ralpha = realRef(alpha); +// //auto ialpha = imagRef(alpha); +// //auto rbeta = realRef(beta); +// //auto ibeta = imagRef(beta); +// //if(ialpha != 0.0 || ibeta != 0.0) +// // { +// // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); +// // } +// auto* palpha = reinterpret_cast(&alpha); +// auto* pbeta = reinterpret_cast(&beta); +// auto* pA = reinterpret_cast(A); +// auto* px = reinterpret_cast(x); +// auto* py = reinterpret_cast(y); +// cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); +//#else //platform other than openblas +//#ifdef ITENSOR_USE_CBLAS +// auto Tr = trans ? CblasTrans : CblasNoTrans; +// auto palpha = reinterpret_cast(&alpha); +// auto pbeta = reinterpret_cast(&beta); +// cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); +//#else +// char Tr = trans ? 'T' : 'N'; +// auto ncA = const_cast(A); +// auto ncx = const_cast(x); +// auto pA = reinterpret_cast(ncA); +// auto px = reinterpret_cast(ncx); +// auto py = reinterpret_cast(y); +// auto palpha = reinterpret_cast(&alpha); +// auto pbeta = reinterpret_cast(&beta); +// F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); +//#endif +//#endif + } + + +// +// dsyev +// +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info) //error info + { + std::vector work; + LAPACK_INT lda = n; + +//#ifdef PLATFORM_acml +// static const LAPACK_INT one = 1; +// LAPACK_INT lwork = std::max(one,3*n-1); +// work.resize(lwork+2); +// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); +//#else +// //Compute optimal workspace size (will be written to wkopt) +// LAPACK_INT lwork = -1; //tell dsyev to compute optimal size +// LAPACK_REAL wkopt = 0; +// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); +// lwork = LAPACK_INT(wkopt); +// work.resize(lwork+2); +// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); +//#endif + } + +// +// dscal +// +void +dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc) + { +//#ifdef ITENSOR_USE_CBLAS +// cblas_dscal(N,alpha,data,inc); +//#else +// F77NAME(dscal)(&N,&alpha,data,&inc); +//#endif + } + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); +//#else +// F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); +//#endif + } + + + + void +dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); +//#else +// F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); +//#endif + } + + + + void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); +//#else +// F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); +//#endif + } + + + + void +dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + // std::vector superb; + + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); + //superb.resize(l -1); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); +//#else +// F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); +//#endif + } + +// +// dgeqrf +// +// QR factorization of a real matrix A +// +void +dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); +// F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// +void +dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); +// F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); + } + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// +void +zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); +// F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// +void +zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); +// #ifdef PLATFORM_lapacke +// LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); +// #else +// F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); +// #endif + } + +// +// dgesv +// +LAPACK_INT +dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b) + { + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; +// F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); + return info; + } + +// +// zgesv +// +LAPACK_INT +zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b) + { + auto pa = reinterpret_cast(a); + auto pb = reinterpret_cast(b); + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; +// F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); + return info; + } + +// +// dlange +// +double +dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a) + { + double norma; +//#ifdef PLATFORM_lapacke +// norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); +//#else +// std::vector work; +// if(norm == 'I' || norm == 'i') work.resize(m); +//#ifdef PLATFORM_acml +// LAPACK_INT norm_len = 1; +// norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); +//#else +// norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); +//#endif +//#endif + return norma; + } + +// +// zlange +// +LAPACK_REAL +zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a) + { + LAPACK_REAL norma; +//#ifdef PLATFORM_lapacke +// auto pA = reinterpret_cast(a); +// norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); +//#else +// std::vector work; +// if(norm == 'I' || norm == 'i') work.resize(m); +// auto pA = reinterpret_cast(a); +//#ifdef PLATFORM_acml +// LAPACK_INT norm_len = 1; +// norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); +//#else +// norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); +//#endif +//#endif + return norma; + } + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// +LAPACK_INT +zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d) //eigenvalues on return + { + static const LAPACK_INT one = 1; + char jobz = 'V'; + char uplo = 'U'; + lapack_int info = 0; +//#ifdef PLATFORM_lapacke +// std::vector work(N); +// LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); +//#else +// LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); +// std::vector work(lwork); +// std::vector rwork(lwork); +// LAPACK_INT info = 0; +// static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); +// auto pA = reinterpret_cast(A); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// LAPACK_INT uplo_len = 1; +// F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); +//#else +// F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); +//#endif +// +//#endif //PLATFORM_lapacke + return info; + } + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT itype = 1; + LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); + work.resize(lwork); +//#ifdef PLATFORM_acml +// LAPACK_INT jobz_len = 1; +// LAPACK_INT uplo_len = 1; +// F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); +//#else +// F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); +//#endif + } + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +LAPACK_INT +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr) //right eigenvectors on return + { + std::vector work; + std::vector cpA; + + cpA.resize(n*n); + std::copy(A,A+n*n,cpA.data()); + + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT info = 0; +//#ifdef PLATFORM_acml +// LAPACK_INT lwork = -1; +// LAPACK_REAL wquery = 0; +// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); +// +// lwork = static_cast(wquery); +// work.resize(lwork); +// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); +//#else +// LAPACK_INT lwork = -1; +// LAPACK_REAL wquery = 0; +// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); +// +// lwork = static_cast(wquery); +// work.resize(lwork); +// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); +//#endif + //println("jobvl = ",jobvl); + //println("nevecl = ",nevecl); + //println("vl data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vl); + // ++vl; + // } + //println("vr data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vr); + // ++vr; + // } + return info; + } + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +LAPACK_INT +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr) //right eigenvectors on return + { + static const LAPACK_INT one = 1; + std::vector cpA; + std::vector work; + std::vector rwork; + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT lwork = std::max(one,4*n); + work.resize(lwork); + LAPACK_INT lrwork = std::max(one,2*n); + rwork.resize(lrwork); + + //Copy A data into cpA + cpA.resize(n*n); + auto pA = reinterpret_cast(A); + std::copy(pA,pA+n*n,cpA.data()); + + auto pd = reinterpret_cast(d); + auto pvl = reinterpret_cast(vl); + auto pvr = reinterpret_cast(vr); + + LAPACK_INT info = 0; +//#ifdef PLATFORM_acml +// F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); +//#else +// F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); +//#endif + return info; + } + +} //namespace itensor + diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.h b/itensor/tensor/lapack/cmake_lapack_wrap.h new file mode 100644 index 000000000..a1d3bca7f --- /dev/null +++ b/itensor/tensor/lapack/cmake_lapack_wrap.h @@ -0,0 +1,795 @@ +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef __ITENSOR_LAPACK_WRAP_h +#define __ITENSOR_LAPACK_WRAP_h + +#include +//#include "config.h" +#include "itensor/types.h" +#include "itensor/util/timers.h" + +#include // BLASPP +#include // LAPACKPP + +#define LAPACK_INT lapack_int +#define LAPACK_REAL double +#define LAPACK_COMPLEX lapack_complex_double +#define BLAS_LAYOUT blas::Layout::ColMajor +// +// Headers and typedefs +// + +//// +//// +//// Generic Linux LAPACK +//// +//// +//#ifdef PLATFORM_lapack +// +//#define LAPACK_REQUIRE_EXTERN +// +//namespace itensor { +// using LAPACK_INT = int; +// using LAPACK_REAL = double; +// typedef struct +// { +// LAPACK_REAL real, imag; +// } LAPACK_COMPLEX; +//} +//#elif defined PLATFORM_openblas +// +//#define ITENSOR_USE_CBLAS +// +//#include "cblas.h" +//#include "lapacke.h" +//#undef I //lapacke.h includes complex.h which defined an `I` macro +// //that can cause problems, so best to undefine it +// +//namespace itensor { +//using LAPACK_INT = lapack_int; +//using LAPACK_REAL = double; +//using LAPACK_COMPLEX = lapack_complex_double; +// +//inline LAPACK_REAL& +//realRef(LAPACK_COMPLEX & z) +// { +// auto* p = reinterpret_cast(&z); +// return p[0]; +// } +// +//inline LAPACK_REAL& +//imagRef(LAPACK_COMPLEX & z) +// { +// auto* p = reinterpret_cast(&z); +// return p[1]; +// } +//} +// +//// +//// +//// Apple Accelerate/vecLib +//// +//// +//#elif defined PLATFORM_macos +// +//#define ITENSOR_USE_CBLAS +////#define ITENSOR_USE_ZGEMM +// +//#include +// namespace itensor { +// using LAPACK_INT = __CLPK_integer; +// using LAPACK_REAL = __CLPK_doublereal; +// using LAPACK_COMPLEX = __CLPK_doublecomplex; +// +// inline LAPACK_REAL& +// realRef(LAPACK_COMPLEX & z) { return z.r; } +// +// inline LAPACK_REAL& +// imagRef(LAPACK_COMPLEX & z) { return z.i; } +// } + +//// +//// +//// Intel MKL +//// +//// +//#elif defined PLATFORM_mkl +// +//#define ITENSOR_USE_CBLAS +//#define ITENSOR_USE_ZGEMM +// +//#include "mkl_cblas.h" +//#include "mkl_lapack.h" +// namespace itensor { +// using LAPACK_INT = MKL_INT; +// using LAPACK_REAL = double; +// using LAPACK_COMPLEX = MKL_Complex16; +// +// inline LAPACK_REAL& +// realRef(LAPACK_COMPLEX & z) { return z.real; } +// +// inline LAPACK_REAL& +// imagRef(LAPACK_COMPLEX & z) { return z.imag; } +// } + +//// +//// +//// AMD ACML +//// +//// +//#elif defined PLATFORM_acml +// +//#define LAPACK_REQUIRE_EXTERN +////#include "acml.h" +// namespace itensor { +// using LAPACK_INT = int; +// using LAPACK_REAL = double; +// typedef struct +// { +// LAPACK_REAL real, imag; +// } LAPACK_COMPLEX; +// +// inline LAPACK_REAL& +// realRef(LAPACK_COMPLEX & z) { return z.real; } +// +// inline LAPACK_REAL& +// imagRef(LAPACK_COMPLEX & z) { return z.imag; } +// } +// +//#endif // different PLATFORM types +// +// +// +//#ifdef FORTRAN_NO_TRAILING_UNDERSCORE +//#define F77NAME(x) x +//#else +//#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) +//#define F77NAME(x) LAPACK_##x +//#else +//#define F77NAME(x) x##_ +//#endif +//#endif + + +namespace itensor { + +// +// +// Forward declarations of fortran lapack routines +// +// +#ifdef LAPACK_REQUIRE_EXTERN +extern "C" { + +//dnrm2 declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); +#else +LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); +#endif + + +//daxpy declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); +#else +void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, + LAPACK_REAL* X, LAPACK_INT* incx, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//ddot declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); +#else +LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zdotc declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); +#else +LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif + +//dgemm declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const double __alpha, const double *__A, + const int __lda, const double *__B, const int __ldb, + const double __beta, double *__C, const int __ldc); +#else +void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, + LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, + LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); +#endif + +//zgemm declaration +#ifdef PLATFORM_openblas +void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, + OPENBLAS_CONST blasint M, + OPENBLAS_CONST blasint N, + OPENBLAS_CONST blasint K, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *A, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *B, + OPENBLAS_CONST blasint ldb, + OPENBLAS_CONST double *beta, + double *C, + OPENBLAS_CONST blasint ldc); +#else //platform not openblas + +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const void *__alpha, const void *__A, const int __lda, + const void *__B, const int __ldb, const void *__beta, void *__C, + const int __ldc); +#else +void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, + LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, + LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); +#endif + +#endif //zgemm declaration + +//dgemv declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, + const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, + const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, + const LAPACK_INT incY); +#else +void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, + LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zgemv declaration +#ifdef PLATFORM_openblas +void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, + OPENBLAS_CONST blasint m, + OPENBLAS_CONST blasint n, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *a, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *x, + OPENBLAS_CONST blasint incx, + OPENBLAS_CONST double *beta, + double *y, + OPENBLAS_CONST blasint incy); +#else +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, + const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, + const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); +#else +void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, + LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, + LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif +#endif //zgemv declaration + +#ifdef PLATFORM_acml +void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, + double *w, double *work, int *lwork, int *info, + int jobz_len, int uplo_len); +#else +void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, + const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, + LAPACK_INT* info ); +#endif + +#ifdef ITENSOR_USE_CBLAS +void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); +#else +void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); +#else +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + + #ifdef PLATFORM_acml + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); +#endif + +#ifdef PLATFORM_acml +void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, + LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, + LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, + int jobz_len); +#else +void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + +void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, + double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); + +void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, + LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); + + +void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#else +void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + +void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, + LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); + +void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, + LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); +#elif defined PLATFORM_acml +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); +#elif defined PLATFORM_acml +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* w); +#elif defined PLATFORM_acml +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + +} //extern "C" +#endif + +// +// daxpy +// Y += alpha*X +// +void +daxpy_wrapper(lapack_int n, //number of elements of X,Y + Real alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + lapack_int incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + lapack_int incy); //increment with which to step through Y + +// +// dnrm2 +// +LAPACK_REAL +dnrm2_wrapper(lapack_int N, + const LAPACK_REAL* X, + lapack_int incx = 1); + +// +// ddot +// +LAPACK_REAL +ddot_wrapper(lapack_int N, + const LAPACK_REAL* X, + lapack_int incx, + const LAPACK_REAL* Y, + lapack_int incy); + +// +// zdotc +// +Cplx +zdotc_wrapper(lapack_int N, + Cplx const* X, + lapack_int incx, + Cplx const* Y, + lapack_int incy); + +// +// dgemm +// +void +gemm_wrapper(bool transa, + bool transb, + lapack_int m, + lapack_int n, + lapack_int k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C); + +// +// zgemm +// +void +gemm_wrapper(bool transa, + bool transb, + lapack_int m, + lapack_int n, + lapack_int k, + Cplx alpha, + Cplx const* A, + Cplx const* B, + Cplx beta, + Cplx * C); + +// +// dgemv - matrix*vector multiply +// +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + lapack_int m, + lapack_int n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + lapack_int incx, + LAPACK_REAL* y, + lapack_int incy); + +// +// zgemv - matrix*vector multiply +// +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + lapack_int m, + lapack_int n, + Cplx const* A, + Cplx const* x, + lapack_int incx, + Cplx* y, + lapack_int incy); + + +// +// dsyev +// +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + lapack_int n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + lapack_int& info); //error info + +// +// dscal +// +void +dscal_wrapper(lapack_int N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + lapack_int inc = 1); + + +void +dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + lapack_int* m, //number of rows of input matrix *A + lapack_int* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + lapack_int *info); + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + lapack_int *m, //number of rows of input matrix *A + lapack_int *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + lapack_int *info); + + + void +dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + lapack_int* m, //number of rows of input matrix *A + lapack_int* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + lapack_int *info); + +void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + lapack_int *m, //number of rows of input matrix *A + lapack_int *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + lapack_int *info); + + +// +// dgeqrf +// +// QR factorization of a real matrix A +// +void +dgeqrf_wrapper(lapack_int* m, //number of rows of A + lapack_int* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + lapack_int* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + lapack_int* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// +void +dorgqr_wrapper(lapack_int* m, //number of rows of A + lapack_int* n, //number of cols of A + lapack_int* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + lapack_int* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + lapack_int* info); //error info + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// +void +zgeqrf_wrapper(lapack_int* m, //number of rows of A + lapack_int* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + lapack_int* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + lapack_int* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// +void +zungqr_wrapper(lapack_int* m, //number of rows of A + lapack_int* n, //number of cols of A + lapack_int* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + lapack_int* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf + lapack_int* info); //error info + +// dgesv +// +// computes the solution to system of linear equations A*X = B +// where A is a general real matrix +// +lapack_int +dgesv_wrapper(lapack_int n, + lapack_int nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b); + +// +// zgesv +// +// computes the solution to system of linear euqations A*X =B +// where A is a general complex matrix +// +lapack_int +zgesv_wrapper(lapack_int n, + lapack_int nrhs, + Cplx* a, + Cplx* b); + +// +// dlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// +double +dlange_wrapper(char norm, + lapack_int m, + lapack_int n, + double* a); + +// +// zlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// +LAPACK_REAL +zlange_wrapper(char norm, + lapack_int m, + lapack_int n, + Cplx* a); + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// +lapack_int +zheev_wrapper(lapack_int N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d); //eigenvalues on return + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + lapack_int* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + lapack_int* info); //error info + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +// Returns "info" integer +// +lapack_int +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + lapack_int n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr); //right eigenvectors on return + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +// Returns "info" integer +// +lapack_int +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + lapack_int n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr); //right eigenvectors on return + +} //namespace itensor + +#endif diff --git a/itensor/tensor/lapack/makefile_lapack_wrap.cc b/itensor/tensor/lapack/makefile_lapack_wrap.cc new file mode 100644 index 000000000..69e82c642 --- /dev/null +++ b/itensor/tensor/lapack/makefile_lapack_wrap.cc @@ -0,0 +1,844 @@ +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "itensor/tensor/lapack/makefile_lapack_wrap.h" +//#include "itensor/tensor/permutecplx.h" + +namespace itensor { + +// +// daxpy +// Y += alpha*X +// +void +daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy) //increment with which to step through Y + { +#ifdef ITENSOR_USE_CBLAS + cblas_daxpy(n,alpha,X,incx,Y,incy); +#else + auto Xnc = const_cast(X); + F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); +#endif + } + +// +// dnrm2 +// +LAPACK_REAL +dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx) + { +#ifdef ITENSOR_USE_CBLAS + return cblas_dnrm2(N,X,incx); +#else + auto *Xnc = const_cast(X); + return F77NAME(dnrm2)(&N,Xnc,&incx); +#endif + return -1; + } + +// +// ddot +// +LAPACK_REAL +ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + return cblas_ddot(N,X,incx,Y,incy); +#else + auto *Xnc = const_cast(X); + auto *Ync = const_cast(Y); + return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); +#endif + return -1; + } + +// +// zdotc +// +Cplx +zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + Cplx res; +#if defined PLATFORM_openblas + auto pX = reinterpret_cast(X); + auto pY = reinterpret_cast(Y); + auto pres = reinterpret_cast(&res); +#else + auto pX = reinterpret_cast(X); + auto pY = reinterpret_cast(Y); + auto pres = reinterpret_cast(&res); +#endif + cblas_zdotc_sub(N,pX,incx,pY,incy,pres); + return res; +#else + auto ncX = const_cast(X); + auto ncY = const_cast(Y); + auto pX = reinterpret_cast(ncX); + auto pY = reinterpret_cast(ncY); + auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); + auto cplx_res = reinterpret_cast(&res); + return *cplx_res; +#endif + return Cplx{}; + } + +// +// dgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C) + { + LAPACK_INT lda = m, + ldb = k; +#ifdef ITENSOR_USE_CBLAS + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); +#else + auto *pA = const_cast(A); + auto *pB = const_cast(B); + char at = 'N'; + char bt = 'N'; + if(transa) + { + at = 'T'; + lda = k; + } + if(transb) + { + bt = 'T'; + ldb = n; + } + F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); +#endif + } + +// +// zgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + const Cplx* A, + const Cplx* B, + Cplx beta, + Cplx* C) + { + LAPACK_INT lda = m, + ldb = k; +#ifdef PLATFORM_openblas + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + //auto ralpha = realRef(alpha); + //auto ialpha = imagRef(alpha); + //auto rbeta = realRef(beta); + //auto ibeta = imagRef(beta); + //if(ialpha != 0.0 || ibeta != 0.0) + // { + // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); + // } + auto* palpha = reinterpret_cast(&alpha); + auto* pbeta = reinterpret_cast(&beta); + auto* pA = reinterpret_cast(A); + auto* pB = reinterpret_cast(B); + auto* pC = reinterpret_cast(C); + cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); +#else //platform not openblas +#ifdef ITENSOR_USE_CBLAS + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + auto palpha = (void*)(&alpha); + auto pbeta = (void*)(&beta); + cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); +#else //use Fortran zgemm + auto *ncA = const_cast(A); + auto *ncB = const_cast(B); + auto *pA = reinterpret_cast(ncA); + auto *pB = reinterpret_cast(ncB); + auto *pC = reinterpret_cast(C); + auto *palpha = reinterpret_cast(&alpha); + auto *pbeta = reinterpret_cast(&beta); + char at = 'N'; + char bt = 'N'; + if(transa) + { + at = 'T'; + lda = k; + } + if(transb) + { + bt = 'T'; + ldb = n; + } + F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); +#endif +#endif + } + +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + auto Tr = trans ? CblasTrans : CblasNoTrans; + cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); +#else + char Tr = trans ? 'T' : 'N'; + F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); +#endif + } + +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy) + { +#ifdef PLATFORM_openblas + auto Tr = trans ? CblasTrans : CblasNoTrans; + //auto ralpha = realRef(alpha); + //auto ialpha = imagRef(alpha); + //auto rbeta = realRef(beta); + //auto ibeta = imagRef(beta); + //if(ialpha != 0.0 || ibeta != 0.0) + // { + // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); + // } + auto* palpha = reinterpret_cast(&alpha); + auto* pbeta = reinterpret_cast(&beta); + auto* pA = reinterpret_cast(A); + auto* px = reinterpret_cast(x); + auto* py = reinterpret_cast(y); + cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); +#else //platform other than openblas +#ifdef ITENSOR_USE_CBLAS + auto Tr = trans ? CblasTrans : CblasNoTrans; + auto palpha = reinterpret_cast(&alpha); + auto pbeta = reinterpret_cast(&beta); + cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); +#else + char Tr = trans ? 'T' : 'N'; + auto ncA = const_cast(A); + auto ncx = const_cast(x); + auto pA = reinterpret_cast(ncA); + auto px = reinterpret_cast(ncx); + auto py = reinterpret_cast(y); + auto palpha = reinterpret_cast(&alpha); + auto pbeta = reinterpret_cast(&beta); + F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); +#endif +#endif + } + + +// +// dsyev +// +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info) //error info + { + std::vector work; + LAPACK_INT lda = n; + +#ifdef PLATFORM_acml + static const LAPACK_INT one = 1; + LAPACK_INT lwork = std::max(one,3*n-1); + work.resize(lwork+2); + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); +#else + //Compute optimal workspace size (will be written to wkopt) + LAPACK_INT lwork = -1; //tell dsyev to compute optimal size + LAPACK_REAL wkopt = 0; + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); + lwork = LAPACK_INT(wkopt); + work.resize(lwork+2); + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); +#endif + } + +// +// dscal +// +void +dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc) + { +#ifdef ITENSOR_USE_CBLAS + cblas_dscal(N,alpha,data,inc); +#else + F77NAME(dscal)(&N,&alpha,data,&inc); +#endif + } + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); +#else + F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); +#endif + } + + + + void +dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); +#else + F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); +#endif + } + + + + void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); +#else + F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); +#endif + } + + + + void +dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + // std::vector superb; + + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); + //superb.resize(l -1); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); +#else + F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); +#endif + } + +// +// dgeqrf +// +// QR factorization of a real matrix A +// +void +dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// +void +dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); + } + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// +void +zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); + F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// +void +zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); + #ifdef PLATFORM_lapacke + LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); + #else + F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); + #endif + } + +// +// dgesv +// +LAPACK_INT +dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b) + { + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; + F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); + return info; + } + +// +// zgesv +// +LAPACK_INT +zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b) + { + auto pa = reinterpret_cast(a); + auto pb = reinterpret_cast(b); + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; + F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); + return info; + } + +// +// dlange +// +double +dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a) + { + double norma; +#ifdef PLATFORM_lapacke + norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); +#else + std::vector work; + if(norm == 'I' || norm == 'i') work.resize(m); +#ifdef PLATFORM_acml + LAPACK_INT norm_len = 1; + norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); +#else + norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); +#endif +#endif + return norma; + } + +// +// zlange +// +LAPACK_REAL +zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a) + { + LAPACK_REAL norma; +#ifdef PLATFORM_lapacke + auto pA = reinterpret_cast(a); + norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); +#else + std::vector work; + if(norm == 'I' || norm == 'i') work.resize(m); + auto pA = reinterpret_cast(a); +#ifdef PLATFORM_acml + LAPACK_INT norm_len = 1; + norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); +#else + norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); +#endif +#endif + return norma; + } + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// +LAPACK_INT +zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d) //eigenvalues on return + { + static const LAPACK_INT one = 1; + char jobz = 'V'; + char uplo = 'U'; +#ifdef PLATFORM_lapacke + std::vector work(N); + LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); +#else + LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); + std::vector work(lwork); + std::vector rwork(lwork); + LAPACK_INT info = 0; + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + LAPACK_INT uplo_len = 1; + F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); +#else + F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); +#endif + +#endif //PLATFORM_lapacke + return info; + } + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT itype = 1; + LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); + work.resize(lwork); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + LAPACK_INT uplo_len = 1; + F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); +#else + F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); +#endif + } + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +LAPACK_INT +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr) //right eigenvectors on return + { + std::vector work; + std::vector cpA; + + cpA.resize(n*n); + std::copy(A,A+n*n,cpA.data()); + + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT info = 0; +#ifdef PLATFORM_acml + LAPACK_INT lwork = -1; + LAPACK_REAL wquery = 0; + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); + + lwork = static_cast(wquery); + work.resize(lwork); + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); +#else + LAPACK_INT lwork = -1; + LAPACK_REAL wquery = 0; + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); + + lwork = static_cast(wquery); + work.resize(lwork); + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); +#endif + //println("jobvl = ",jobvl); + //println("nevecl = ",nevecl); + //println("vl data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vl); + // ++vl; + // } + //println("vr data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vr); + // ++vr; + // } + return info; + } + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +LAPACK_INT +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr) //right eigenvectors on return + { + static const LAPACK_INT one = 1; + std::vector cpA; + std::vector work; + std::vector rwork; + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT lwork = std::max(one,4*n); + work.resize(lwork); + LAPACK_INT lrwork = std::max(one,2*n); + rwork.resize(lrwork); + + //Copy A data into cpA + cpA.resize(n*n); + auto pA = reinterpret_cast(A); + std::copy(pA,pA+n*n,cpA.data()); + + auto pd = reinterpret_cast(d); + auto pvl = reinterpret_cast(vl); + auto pvr = reinterpret_cast(vr); + + LAPACK_INT info = 0; +#ifdef PLATFORM_acml + F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); +#else + F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); +#endif + return info; + } + +} //namespace itensor + diff --git a/itensor/tensor/lapack/makefile_lapack_wrap.h b/itensor/tensor/lapack/makefile_lapack_wrap.h new file mode 100644 index 000000000..217c019fc --- /dev/null +++ b/itensor/tensor/lapack/makefile_lapack_wrap.h @@ -0,0 +1,789 @@ +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef __ITENSOR_LAPACK_WRAP_h +#define __ITENSOR_LAPACK_WRAP_h + +#include +#ifndef ITENSOR_USE_CMAKE +#include "itensor/config.h" +#endif // ITENSOR_USE_CMAKE +#include "itensor/types.h" +#include "itensor/util/timers.h" + +// +// Headers and typedefs +// + +// +// +// Generic Linux LAPACK +// +// +#ifdef PLATFORM_lapack + +#define LAPACK_REQUIRE_EXTERN + +namespace itensor { + using LAPACK_INT = int; + using LAPACK_REAL = double; + typedef struct + { + LAPACK_REAL real, imag; + } LAPACK_COMPLEX; +} +#elif defined PLATFORM_openblas + +#define ITENSOR_USE_CBLAS + +#include "cblas.h" +#include "lapacke.h" +#undef I //lapacke.h includes complex.h which defined an `I` macro + //that can cause problems, so best to undefine it + +namespace itensor { +using LAPACK_INT = lapack_int; +using LAPACK_REAL = double; +using LAPACK_COMPLEX = lapack_complex_double; + +inline LAPACK_REAL& +realRef(LAPACK_COMPLEX & z) + { + auto* p = reinterpret_cast(&z); + return p[0]; + } + +inline LAPACK_REAL& +imagRef(LAPACK_COMPLEX & z) + { + auto* p = reinterpret_cast(&z); + return p[1]; + } +} + +// +// +// Apple Accelerate/vecLib +// +// +#elif defined PLATFORM_macos + +#define ITENSOR_USE_CBLAS +//#define ITENSOR_USE_ZGEMM + +#include + namespace itensor { + using LAPACK_INT = __CLPK_integer; + using LAPACK_REAL = __CLPK_doublereal; + using LAPACK_COMPLEX = __CLPK_doublecomplex; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.r; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.i; } + } + +// +// +// Intel MKL +// +// +#elif defined PLATFORM_mkl + +#define ITENSOR_USE_CBLAS +#define ITENSOR_USE_ZGEMM + +#include "mkl_cblas.h" +#include "mkl_lapack.h" + namespace itensor { + using LAPACK_INT = MKL_INT; + using LAPACK_REAL = double; + using LAPACK_COMPLEX = MKL_Complex16; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.real; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.imag; } + } + +// +// +// AMD ACML +// +// +#elif defined PLATFORM_acml + +#define LAPACK_REQUIRE_EXTERN +//#include "acml.h" + namespace itensor { + using LAPACK_INT = int; + using LAPACK_REAL = double; + typedef struct + { + LAPACK_REAL real, imag; + } LAPACK_COMPLEX; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.real; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.imag; } + } + +#endif // different PLATFORM types + + + +#ifdef FORTRAN_NO_TRAILING_UNDERSCORE +#define F77NAME(x) x +#else +#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) +#define F77NAME(x) LAPACK_##x +#else +#define F77NAME(x) x##_ +#endif +#endif + +namespace itensor { + +// +// +// Forward declarations of fortran lapack routines +// +// +#ifdef LAPACK_REQUIRE_EXTERN +extern "C" { + +//dnrm2 declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); +#else +LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); +#endif + + +//daxpy declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); +#else +void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, + LAPACK_REAL* X, LAPACK_INT* incx, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//ddot declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); +#else +LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zdotc declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); +#else +LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif + +//dgemm declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const double __alpha, const double *__A, + const int __lda, const double *__B, const int __ldb, + const double __beta, double *__C, const int __ldc); +#else +void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, + LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, + LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); +#endif + +//zgemm declaration +#ifdef PLATFORM_openblas +void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, + OPENBLAS_CONST blasint M, + OPENBLAS_CONST blasint N, + OPENBLAS_CONST blasint K, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *A, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *B, + OPENBLAS_CONST blasint ldb, + OPENBLAS_CONST double *beta, + double *C, + OPENBLAS_CONST blasint ldc); +#else //platform not openblas + +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const void *__alpha, const void *__A, const int __lda, + const void *__B, const int __ldb, const void *__beta, void *__C, + const int __ldc); +#else +void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, + LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, + LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); +#endif + +#endif //zgemm declaration + +//dgemv declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, + const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, + const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, + const LAPACK_INT incY); +#else +void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, + LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zgemv declaration +#ifdef PLATFORM_openblas +void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, + OPENBLAS_CONST blasint m, + OPENBLAS_CONST blasint n, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *a, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *x, + OPENBLAS_CONST blasint incx, + OPENBLAS_CONST double *beta, + double *y, + OPENBLAS_CONST blasint incy); +#else +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, + const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, + const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); +#else +void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, + LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, + LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif +#endif //zgemv declaration + +#ifdef PLATFORM_acml +void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, + double *w, double *work, int *lwork, int *info, + int jobz_len, int uplo_len); +#else +void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, + const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, + LAPACK_INT* info ); +#endif + +#ifdef ITENSOR_USE_CBLAS +void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); +#else +void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); +#else +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + + #ifdef PLATFORM_acml + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); +#endif + +#ifdef PLATFORM_acml +void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, + LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, + LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, + int jobz_len); +#else +void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + +void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, + double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); + +void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, + LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); + + +void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#else +void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + +void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, + LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); + +void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, + LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); +#elif defined PLATFORM_acml +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); +#elif defined PLATFORM_acml +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* w); +#elif defined PLATFORM_acml +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + +} //extern "C" +#endif + +// +// daxpy +// Y += alpha*X +// +void +daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy); //increment with which to step through Y + +// +// dnrm2 +// +LAPACK_REAL +dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx = 1); + +// +// ddot +// +LAPACK_REAL +ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy); + +// +// zdotc +// +Cplx +zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy); + +// +// dgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C); + +// +// zgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + Cplx const* A, + Cplx const* B, + Cplx beta, + Cplx * C); + +// +// dgemv - matrix*vector multiply +// +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy); + +// +// zgemv - matrix*vector multiply +// +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy); + + +// +// dsyev +// +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info); //error info + +// +// dscal +// +void +dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc = 1); + + +void +dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + + void +dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + +void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + +// +// dgeqrf +// +// QR factorization of a real matrix A +// +void +dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// +void +dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info); //error info + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// +void +zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// +void +zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf + LAPACK_INT* info); //error info + +// dgesv +// +// computes the solution to system of linear equations A*X = B +// where A is a general real matrix +// +LAPACK_INT +dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b); + +// +// zgesv +// +// computes the solution to system of linear euqations A*X =B +// where A is a general complex matrix +// +LAPACK_INT +zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b); + +// +// dlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// +double +dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a); + +// +// zlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// +LAPACK_REAL +zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a); + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// +LAPACK_INT +zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d); //eigenvalues on return + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info); //error info + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +// Returns "info" integer +// +LAPACK_INT +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr); //right eigenvectors on return + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +// Returns "info" integer +// +LAPACK_INT +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr); //right eigenvectors on return + +} //namespace itensor + +#endif diff --git a/itensor/tensor/lapack_wrap.cc b/itensor/tensor/lapack_wrap.cc index 41d54825a..b0419dd05 100644 --- a/itensor/tensor/lapack_wrap.cc +++ b/itensor/tensor/lapack_wrap.cc @@ -1,844 +1,5 @@ -// -// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "itensor/tensor/lapack_wrap.h" -//#include "itensor/tensor/permutecplx.h" - -namespace itensor { - -// -// daxpy -// Y += alpha*X -// -void -daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y - LAPACK_REAL alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - LAPACK_INT incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - LAPACK_INT incy) //increment with which to step through Y - { -#ifdef ITENSOR_USE_CBLAS - cblas_daxpy(n,alpha,X,incx,Y,incy); -#else - auto Xnc = const_cast(X); - F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); -#endif - } - -// -// dnrm2 -// -LAPACK_REAL -dnrm2_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx) - { -#ifdef ITENSOR_USE_CBLAS - return cblas_dnrm2(N,X,incx); -#else - auto *Xnc = const_cast(X); - return F77NAME(dnrm2)(&N,Xnc,&incx); -#endif - return -1; - } - -// -// ddot -// -LAPACK_REAL -ddot_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx, - const LAPACK_REAL* Y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - return cblas_ddot(N,X,incx,Y,incy); -#else - auto *Xnc = const_cast(X); - auto *Ync = const_cast(Y); - return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); -#endif - return -1; - } - -// -// zdotc -// -Cplx -zdotc_wrapper(LAPACK_INT N, - Cplx const* X, - LAPACK_INT incx, - Cplx const* Y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - Cplx res; -#if defined PLATFORM_openblas - auto pX = reinterpret_cast(X); - auto pY = reinterpret_cast(Y); - auto pres = reinterpret_cast(&res); -#else - auto pX = reinterpret_cast(X); - auto pY = reinterpret_cast(Y); - auto pres = reinterpret_cast(&res); -#endif - cblas_zdotc_sub(N,pX,incx,pY,incy,pres); - return res; -#else - auto ncX = const_cast(X); - auto ncY = const_cast(Y); - auto pX = reinterpret_cast(ncX); - auto pY = reinterpret_cast(ncY); - auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); - auto cplx_res = reinterpret_cast(&res); - return *cplx_res; -#endif - return Cplx{}; - } - -// -// dgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C) - { - LAPACK_INT lda = m, - ldb = k; -#ifdef ITENSOR_USE_CBLAS - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); -#else - auto *pA = const_cast(A); - auto *pB = const_cast(B); - char at = 'N'; - char bt = 'N'; - if(transa) - { - at = 'T'; - lda = k; - } - if(transb) - { - bt = 'T'; - ldb = n; - } - F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); -#endif - } - -// -// zgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - Cplx alpha, - const Cplx* A, - const Cplx* B, - Cplx beta, - Cplx* C) - { - LAPACK_INT lda = m, - ldb = k; -#ifdef PLATFORM_openblas - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - //auto ralpha = realRef(alpha); - //auto ialpha = imagRef(alpha); - //auto rbeta = realRef(beta); - //auto ibeta = imagRef(beta); - //if(ialpha != 0.0 || ibeta != 0.0) - // { - // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); - // } - auto* palpha = reinterpret_cast(&alpha); - auto* pbeta = reinterpret_cast(&beta); - auto* pA = reinterpret_cast(A); - auto* pB = reinterpret_cast(B); - auto* pC = reinterpret_cast(C); - cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); -#else //platform not openblas -#ifdef ITENSOR_USE_CBLAS - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - auto palpha = (void*)(&alpha); - auto pbeta = (void*)(&beta); - cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); -#else //use Fortran zgemm - auto *ncA = const_cast(A); - auto *ncB = const_cast(B); - auto *pA = reinterpret_cast(ncA); - auto *pB = reinterpret_cast(ncB); - auto *pC = reinterpret_cast(C); - auto *palpha = reinterpret_cast(&alpha); - auto *pbeta = reinterpret_cast(&beta); - char at = 'N'; - char bt = 'N'; - if(transa) - { - at = 'T'; - lda = k; - } - if(transb) - { - bt = 'T'; - ldb = n; - } - F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); -#endif -#endif - } - -void -gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - LAPACK_INT m, - LAPACK_INT n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - LAPACK_INT incx, - LAPACK_REAL* y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - auto Tr = trans ? CblasTrans : CblasNoTrans; - cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); -#else - char Tr = trans ? 'T' : 'N'; - F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); -#endif - } - -void -gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - LAPACK_INT m, - LAPACK_INT n, - Cplx const* A, - Cplx const* x, - LAPACK_INT incx, - Cplx* y, - LAPACK_INT incy) - { -#ifdef PLATFORM_openblas - auto Tr = trans ? CblasTrans : CblasNoTrans; - //auto ralpha = realRef(alpha); - //auto ialpha = imagRef(alpha); - //auto rbeta = realRef(beta); - //auto ibeta = imagRef(beta); - //if(ialpha != 0.0 || ibeta != 0.0) - // { - // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); - // } - auto* palpha = reinterpret_cast(&alpha); - auto* pbeta = reinterpret_cast(&beta); - auto* pA = reinterpret_cast(A); - auto* px = reinterpret_cast(x); - auto* py = reinterpret_cast(y); - cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); -#else //platform other than openblas -#ifdef ITENSOR_USE_CBLAS - auto Tr = trans ? CblasTrans : CblasNoTrans; - auto palpha = reinterpret_cast(&alpha); - auto pbeta = reinterpret_cast(&beta); - cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); -#else - char Tr = trans ? 'T' : 'N'; - auto ncA = const_cast(A); - auto ncx = const_cast(x); - auto pA = reinterpret_cast(ncA); - auto px = reinterpret_cast(ncx); - auto py = reinterpret_cast(y); - auto palpha = reinterpret_cast(&alpha); - auto pbeta = reinterpret_cast(&beta); - F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); -#endif -#endif - } - - -// -// dsyev -// -void -dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - LAPACK_INT n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - LAPACK_INT& info) //error info - { - std::vector work; - LAPACK_INT lda = n; - -#ifdef PLATFORM_acml - static const LAPACK_INT one = 1; - LAPACK_INT lwork = std::max(one,3*n-1); - work.resize(lwork+2); - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); -#else - //Compute optimal workspace size (will be written to wkopt) - LAPACK_INT lwork = -1; //tell dsyev to compute optimal size - LAPACK_REAL wkopt = 0; - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); - lwork = LAPACK_INT(wkopt); - work.resize(lwork+2); - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); -#endif - } - -// -// dscal -// -void -dscal_wrapper(LAPACK_INT N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - LAPACK_INT inc) - { -#ifdef ITENSOR_USE_CBLAS - cblas_dscal(N,alpha,data,inc); -#else - F77NAME(dscal)(&N,&alpha,data,&inc); -#endif - } - -void -zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); -#else - F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); -#endif - } - - - - void -dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); -#else - F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); -#endif - } - - - - void -zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); -#else - F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); -#endif - } - - - - void -dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - // std::vector superb; - - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); - //superb.resize(l -1); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); -#else - F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); -#endif - } - -// -// dgeqrf -// -// QR factorization of a real matrix A -// -void -dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); - } - -// -// dorgqr -// -// Generates Q from output of QR factorization routine dgeqrf (see above) -// -void -dorgqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); - } - - - // -// dgeqrf -// -// QR factorization of a complex matrix A -// -void -zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); - F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); - } - -// -// dorgqr -// -// Generates Q from output of QR factorization routine zgeqrf (see above) -// -void -zungqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); - #ifdef PLATFORM_lapacke - LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); - #else - F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); - #endif - } - -// -// dgesv -// -LAPACK_INT -dgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b) - { - LAPACK_INT lda = n; - std::vector ipiv(n); - LAPACK_INT ldb = n; - LAPACK_INT info = 0; - F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); - return info; - } - -// -// zgesv -// -LAPACK_INT -zgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - Cplx* a, - Cplx* b) - { - auto pa = reinterpret_cast(a); - auto pb = reinterpret_cast(b); - LAPACK_INT lda = n; - std::vector ipiv(n); - LAPACK_INT ldb = n; - LAPACK_INT info = 0; - F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); - return info; - } - -// -// dlange -// -double -dlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - double* a) - { - double norma; -#ifdef PLATFORM_lapacke - norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); -#else - std::vector work; - if(norm == 'I' || norm == 'i') work.resize(m); -#ifdef PLATFORM_acml - LAPACK_INT norm_len = 1; - norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); -#else - norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); -#endif -#endif - return norma; - } - -// -// zlange -// -LAPACK_REAL -zlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - Cplx* a) - { - LAPACK_REAL norma; -#ifdef PLATFORM_lapacke - auto pA = reinterpret_cast(a); - norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); -#else - std::vector work; - if(norm == 'I' || norm == 'i') work.resize(m); - auto pA = reinterpret_cast(a); -#ifdef PLATFORM_acml - LAPACK_INT norm_len = 1; - norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); -#else - norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); -#endif -#endif - return norma; - } - -// -// zheev -// -// Eigenvalues and eigenvectors of complex Hermitian matrix A -// -LAPACK_INT -zheev_wrapper(LAPACK_INT N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d) //eigenvalues on return - { - static const LAPACK_INT one = 1; - char jobz = 'V'; - char uplo = 'U'; -#ifdef PLATFORM_lapacke - std::vector work(N); - LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); -#else - LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); - std::vector work(lwork); - std::vector rwork(lwork); - LAPACK_INT info = 0; - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - LAPACK_INT uplo_len = 1; - F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); -#else - F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); -#endif - -#endif //PLATFORM_lapacke - return info; - } - -// -// dsygv -// -// Eigenvalues and eigenvectors of generalized eigenvalue problem -// A*x = lambda*B*x -// A and B must be symmetric -// B must be positive definite -// -void -dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT itype = 1; - LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); - work.resize(lwork); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - LAPACK_INT uplo_len = 1; - F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); -#else - F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); -#endif - } - -// -// dgeev -// -// Eigenvalues and eigenvectors of real, square matrix A -// A can be a general real matrix, not assumed symmetric -// -LAPACK_INT -dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr) //right eigenvectors on return - { - std::vector work; - std::vector cpA; - - cpA.resize(n*n); - std::copy(A,A+n*n,cpA.data()); - - LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); - LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); - LAPACK_INT info = 0; -#ifdef PLATFORM_acml - LAPACK_INT lwork = -1; - LAPACK_REAL wquery = 0; - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); - - lwork = static_cast(wquery); - work.resize(lwork); - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); -#else - LAPACK_INT lwork = -1; - LAPACK_REAL wquery = 0; - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); - - lwork = static_cast(wquery); - work.resize(lwork); - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); -#endif - //println("jobvl = ",jobvl); - //println("nevecl = ",nevecl); - //println("vl data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vl); - // ++vl; - // } - //println("vr data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vr); - // ++vr; - // } - return info; - } - -// -// zgeev -// -// Eigenvalues and eigenvectors of complex, square matrix A -// A can be a general complex matrix, not assumed symmetric -// -LAPACK_INT -zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr) //right eigenvectors on return - { - static const LAPACK_INT one = 1; - std::vector cpA; - std::vector work; - std::vector rwork; - LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); - LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); - LAPACK_INT lwork = std::max(one,4*n); - work.resize(lwork); - LAPACK_INT lrwork = std::max(one,2*n); - rwork.resize(lrwork); - - //Copy A data into cpA - cpA.resize(n*n); - auto pA = reinterpret_cast(A); - std::copy(pA,pA+n*n,cpA.data()); - - auto pd = reinterpret_cast(d); - auto pvl = reinterpret_cast(vl); - auto pvr = reinterpret_cast(vr); - - LAPACK_INT info = 0; -#ifdef PLATFORM_acml - F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); -#else - F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); -#endif - return info; - } - -} //namespace itensor - +#ifdef ITENSOR_USE_CMAKE +#include +#else //ITENSOR_USE_CMAKE +#include +#endif //ITENSOR_USE_CMAKE \ No newline at end of file diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index 6cd53baf8..83b06cce4 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -1,787 +1,5 @@ -// -// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __ITENSOR_LAPACK_WRAP_h -#define __ITENSOR_LAPACK_WRAP_h - -#include -#include "itensor/config.h" -#include "itensor/types.h" -#include "itensor/util/timers.h" - -// -// Headers and typedefs -// - -// -// -// Generic Linux LAPACK -// -// -#ifdef PLATFORM_lapack - -#define LAPACK_REQUIRE_EXTERN - -namespace itensor { - using LAPACK_INT = int; - using LAPACK_REAL = double; - typedef struct - { - LAPACK_REAL real, imag; - } LAPACK_COMPLEX; -} -#elif defined PLATFORM_openblas - -#define ITENSOR_USE_CBLAS - -#include "cblas.h" -#include "lapacke.h" -#undef I //lapacke.h includes complex.h which defined an `I` macro - //that can cause problems, so best to undefine it - -namespace itensor { -using LAPACK_INT = lapack_int; -using LAPACK_REAL = double; -using LAPACK_COMPLEX = lapack_complex_double; - -inline LAPACK_REAL& -realRef(LAPACK_COMPLEX & z) - { - auto* p = reinterpret_cast(&z); - return p[0]; - } - -inline LAPACK_REAL& -imagRef(LAPACK_COMPLEX & z) - { - auto* p = reinterpret_cast(&z); - return p[1]; - } -} - -// -// -// Apple Accelerate/vecLib -// -// -#elif defined PLATFORM_macos - -#define ITENSOR_USE_CBLAS -//#define ITENSOR_USE_ZGEMM - -#include - namespace itensor { - using LAPACK_INT = __CLPK_integer; - using LAPACK_REAL = __CLPK_doublereal; - using LAPACK_COMPLEX = __CLPK_doublecomplex; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.r; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.i; } - } - -// -// -// Intel MKL -// -// -#elif defined PLATFORM_mkl - -#define ITENSOR_USE_CBLAS -#define ITENSOR_USE_ZGEMM - -#include "mkl_cblas.h" -#include "mkl_lapack.h" - namespace itensor { - using LAPACK_INT = MKL_INT; - using LAPACK_REAL = double; - using LAPACK_COMPLEX = MKL_Complex16; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.real; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.imag; } - } - -// -// -// AMD ACML -// -// -#elif defined PLATFORM_acml - -#define LAPACK_REQUIRE_EXTERN -//#include "acml.h" - namespace itensor { - using LAPACK_INT = int; - using LAPACK_REAL = double; - typedef struct - { - LAPACK_REAL real, imag; - } LAPACK_COMPLEX; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.real; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.imag; } - } - -#endif // different PLATFORM types - - - -#ifdef FORTRAN_NO_TRAILING_UNDERSCORE -#define F77NAME(x) x -#else -#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) -#define F77NAME(x) LAPACK_##x -#else -#define F77NAME(x) x##_ -#endif -#endif - -namespace itensor { - -// -// -// Forward declarations of fortran lapack routines -// -// -#ifdef LAPACK_REQUIRE_EXTERN -extern "C" { - -//dnrm2 declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); -#else -LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); -#endif - - -//daxpy declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); -#else -void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, - LAPACK_REAL* X, LAPACK_INT* incx, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//ddot declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); -#else -LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zdotc declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); -#else -LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif - -//dgemm declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const double __alpha, const double *__A, - const int __lda, const double *__B, const int __ldb, - const double __beta, double *__C, const int __ldc); -#else -void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, - LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, - LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); -#endif - -//zgemm declaration -#ifdef PLATFORM_openblas -void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, - OPENBLAS_CONST blasint M, - OPENBLAS_CONST blasint N, - OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *A, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *B, - OPENBLAS_CONST blasint ldb, - OPENBLAS_CONST double *beta, - double *C, - OPENBLAS_CONST blasint ldc); -#else //platform not openblas - -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const void *__alpha, const void *__A, const int __lda, - const void *__B, const int __ldb, const void *__beta, void *__C, - const int __ldc); -#else -void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, - LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, - LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); -#endif - -#endif //zgemm declaration - -//dgemv declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemv(const enum CBLAS_ORDER Order, - const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, - const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, - const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, - const LAPACK_INT incY); -#else -void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, - LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zgemv declaration -#ifdef PLATFORM_openblas -void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, - OPENBLAS_CONST blasint m, - OPENBLAS_CONST blasint n, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *a, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *x, - OPENBLAS_CONST blasint incx, - OPENBLAS_CONST double *beta, - double *y, - OPENBLAS_CONST blasint incy); -#else -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, - const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, - const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); -#else -void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, - LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, - LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif -#endif //zgemv declaration - -#ifdef PLATFORM_acml -void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, - double *w, double *work, int *lwork, int *info, - int jobz_len, int uplo_len); -#else -void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, - const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, - LAPACK_INT* info ); -#endif - -#ifdef ITENSOR_USE_CBLAS -void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); -#else -void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); -#else -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - - #ifdef PLATFORM_acml - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); -#endif - -#ifdef PLATFORM_acml -void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, - LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, - LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, - int jobz_len); -#else -void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - -void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, - double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); - -void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, - LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); - - -void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#else -void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - -void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, - LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); - -void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, - LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); -#elif defined PLATFORM_acml -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); -#elif defined PLATFORM_acml -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, - lapack_complex_double* a, lapack_int lda, double* w); -#elif defined PLATFORM_acml -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - -} //extern "C" -#endif - -// -// daxpy -// Y += alpha*X -// -void -daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y - LAPACK_REAL alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - LAPACK_INT incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - LAPACK_INT incy); //increment with which to step through Y - -// -// dnrm2 -// -LAPACK_REAL -dnrm2_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx = 1); - -// -// ddot -// -LAPACK_REAL -ddot_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx, - const LAPACK_REAL* Y, - LAPACK_INT incy); - -// -// zdotc -// -Cplx -zdotc_wrapper(LAPACK_INT N, - Cplx const* X, - LAPACK_INT incx, - Cplx const* Y, - LAPACK_INT incy); - -// -// dgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C); - -// -// zgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - Cplx alpha, - Cplx const* A, - Cplx const* B, - Cplx beta, - Cplx * C); - -// -// dgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - LAPACK_INT m, - LAPACK_INT n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - LAPACK_INT incx, - LAPACK_REAL* y, - LAPACK_INT incy); - -// -// zgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - LAPACK_INT m, - LAPACK_INT n, - Cplx const* A, - Cplx const* x, - LAPACK_INT incx, - Cplx* y, - LAPACK_INT incy); - - -// -// dsyev -// -void -dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - LAPACK_INT n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - LAPACK_INT& info); //error info - -// -// dscal -// -void -dscal_wrapper(LAPACK_INT N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - LAPACK_INT inc = 1); - - -void -dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - -void -zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - - void -dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - -void -zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - -// -// dgeqrf -// -// QR factorization of a real matrix A -// -void -dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine dgeqrf (see above) -// -void -dorgqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info); //error info - - - // -// dgeqrf -// -// QR factorization of a complex matrix A -// -void -zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine zgeqrf (see above) -// -void -zungqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf - LAPACK_INT* info); //error info - -// dgesv -// -// computes the solution to system of linear equations A*X = B -// where A is a general real matrix -// -LAPACK_INT -dgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b); - -// -// zgesv -// -// computes the solution to system of linear euqations A*X =B -// where A is a general complex matrix -// -LAPACK_INT -zgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - Cplx* a, - Cplx* b); - -// -// dlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -double -dlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - double* a); - -// -// zlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -LAPACK_REAL -zlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - Cplx* a); - -// -// zheev -// -// Eigenvalues and eigenvectors of complex Hermitian matrix A -// -LAPACK_INT -zheev_wrapper(LAPACK_INT N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d); //eigenvalues on return - -// -// dsygv -// -// Eigenvalues and eigenvectors of generalized eigenvalue problem -// A*x = lambda*B*x -// A and B must be symmetric -// B must be positive definite -// -void -dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - LAPACK_INT* info); //error info - -// -// dgeev -// -// Eigenvalues and eigenvectors of real, square matrix A -// A can be a general real matrix, not assumed symmetric -// -// Returns "info" integer -// -LAPACK_INT -dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr); //right eigenvectors on return - -// -// zgeev -// -// Eigenvalues and eigenvectors of complex, square matrix A -// A can be a general complex matrix, not assumed symmetric -// -// Returns "info" integer -// -LAPACK_INT -zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr); //right eigenvectors on return - -} //namespace itensor - -#endif +#ifndef ITENSOR_USE_CMAKE +#include +#else //ITENSOR_USE_CMAKE +#include +#endif //ITENSOR_USE_CMAKE \ No newline at end of file diff --git a/itensors.pc.in b/itensors.pc.in new file mode 100644 index 000000000..1f0a3ef72 --- /dev/null +++ b/itensors.pc.in @@ -0,0 +1,6 @@ +Name: itensor +Description: A high-performance tensor software inspired by tensor diagrams +Version: @ITENSOR_VERSION@ +URL: https://itensor.org/ +Cflags: -I@CMAKE_INSTALL_PREFIX@/include @CMAKE_CXX_FLAGS@ @ITENSOR_PC_CFLAGS@ +Libs: -L@CMAKE_INSTALL_PREFIX@/lib @CMAKE_EXE_LINKER_FLAGS@ @ITENSOR_PC_LIBS@ \ No newline at end of file diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt new file mode 100644 index 000000000..6897c8b14 --- /dev/null +++ b/unittest/CMakeLists.txt @@ -0,0 +1,71 @@ +# +# This file is part of the ITensor library +# +# Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(executable itensor_check) +#ifdef test +#SOURCES+= $(test)_test.cc +#else +set(itensor_test_src_files + util_test.cc + algorithm_test.cc + real_test.cc + args_test.cc + matrix_test.cc + tensor_test.cc + contract_test.cc + sparse_contract_test.cc + index_test.cc + indexset_test.cc + itensor_test.cc + qn_test.cc + decomp_test.cc + mps_test.cc + mpo_test.cc + autompo_test.cc + iterativesolvers_test.cc + regression_test.cc + localop_test.cc + siteset_test.cc + test.cc + ) + +if(ITENSOR_USE_HDF5) + set(h5_test_source_files + hdf5_test.cc) + list(APPEND ${itensor_test_src_files} ${h5_test_source_files}) +endif(ITENSOR_USE_HDF5) + +add_executable(${executable} EXCLUDE_FROM_ALL ${itensor_test_src_files}) +# Add include directories and compiler flags for ta_test +target_include_directories(${executable} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ) +target_link_libraries(${executable} itensor) + +# Add test suite +add_test(itensor/unit/build "${CMAKE_COMMAND}" --build "${CMAKE_BINARY_DIR}" --target ${executable}) +set_tests_properties(itensor/unit/build PROPERTIES FIXTURES_SETUP ITENSOR_TEST_EXEC) +add_test(NAME itensor/unit/run + COMMAND ${executable}) +set_tests_properties(itensor/unit/run + PROPERTIES + FIXTURES_REQUIRED ITENSOR_TEST_EXEC + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/unittest" + ) +target_compile_options(${executable} PUBLIC ${CMAKE_CXX_FLAG_LIST}) +target_compile_features(${executable} PUBLIC "cxx_std_${CMAKE_CXX_STANDARD}") From 15e0e89857ff60f11a466e51ac7e9dad8c11b1d6 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 10:42:46 -0400 Subject: [PATCH 02/51] Make include_directories at base not ./itensor --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e5b5f954..ab32f3ccd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,7 +203,7 @@ endif() ########################## # Include source directories ########################## -include_directories(${PROJECT_SOURCE_DIR}/itensor ${PROJECT_BINARY_DIR}/itensor) +include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR}) ########################## # external dependencies From 8581eb31fd1e0843f0e6722732436610814c6357 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 10:52:12 -0400 Subject: [PATCH 03/51] ifndef -> ifdef --- itensor/tensor/lapack_wrap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index 83b06cce4..01e8831da 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -1,4 +1,4 @@ -#ifndef ITENSOR_USE_CMAKE +#ifdef ITENSOR_USE_CMAKE #include #else //ITENSOR_USE_CMAKE #include From 98010f4facd5cef7b4d12cc9c77b195da84803f5 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 10:57:45 -0400 Subject: [PATCH 04/51] Remove uneccessary macro --- itensor/tensor/lapack/makefile_lapack_wrap.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/itensor/tensor/lapack/makefile_lapack_wrap.h b/itensor/tensor/lapack/makefile_lapack_wrap.h index 217c019fc..6cd53baf8 100644 --- a/itensor/tensor/lapack/makefile_lapack_wrap.h +++ b/itensor/tensor/lapack/makefile_lapack_wrap.h @@ -17,9 +17,7 @@ #define __ITENSOR_LAPACK_WRAP_h #include -#ifndef ITENSOR_USE_CMAKE #include "itensor/config.h" -#endif // ITENSOR_USE_CMAKE #include "itensor/types.h" #include "itensor/util/timers.h" From f19901775e28b95e25e7e33233172a6b1cfb6b39 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 11:44:10 -0400 Subject: [PATCH 05/51] Add definitions to itensor_check --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab32f3ccd..fbd87446b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -269,8 +269,10 @@ add_subdirectory(itensor) add_subdirectory(unittest) target_compile_definitions(itensor PRIVATE ITENSOR_USE_CMAKE=1) +target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_CMAKE=1) if(${CMAKE_BUILD_TYPE} MATCHES Debug) target_compile_definitions(itensor PRIVATE DEBUG=1) + target_compile_definitions(itensor_check PRIVATE DEBUG=1) endif(${CMAKE_BUILD_TYPE} MATCHES Debug) ########################## # pkg-config variables From 39e4aefc6125f5909c4274bf2a16e2d955b6756e Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 11:44:56 -0400 Subject: [PATCH 06/51] Get Error working if using CMAKE/BLAS++/LAPACK++ --- itensor/itdata/task_types.h | 3 +-- itensor/util/error.h | 9 ++++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/itensor/itdata/task_types.h b/itensor/itdata/task_types.h index 538e40c35..7fec4ffec 100644 --- a/itensor/itdata/task_types.h +++ b/itensor/itdata/task_types.h @@ -166,8 +166,7 @@ struct ApplyIT void applyITImpl(stdx::choice<2>,T1, T2 &) { - auto msg = tinyformat::format("Apply: function doesn't map %s->%s",typeName(),typeName()); - Error(msg); + Error( tinyformat::format("Apply: function doesn't map %s->%s",typeName(),typeName())); } template auto diff --git a/itensor/util/error.h b/itensor/util/error.h index df61206ad..b0d3345ce 100644 --- a/itensor/util/error.h +++ b/itensor/util/error.h @@ -19,11 +19,18 @@ #include #include #include - +#ifdef ITENSOR_USE_CMAKE +#include +#include +#endif // ITENSOR_USE_CMAKE namespace itensor{ void error(const std::string& s); void error(const std::string& s, int line,const char* file); +#ifndef ITENSOR_USE_CMAKE #define Error(exp) error(exp, __LINE__, __FILE__) +#else +#define Error lapack::Error +#endif // ITENSOR_USE_CMAKE struct ITError : std::runtime_error { From b7bc32736ff3f7b2d77291c9fe2be0dd133a048a Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 12:46:03 -0400 Subject: [PATCH 07/51] Working on linear algebra --- itensor/tensor/lapack/cmake_lapack_wrap.cc | 457 +++------------------ 1 file changed, 60 insertions(+), 397 deletions(-) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc index 6d8ec7c56..f48c79840 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -30,12 +30,7 @@ daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y LAPACK_REAL* Y, //pointer to head of vector Y LAPACK_INT incy) //increment with which to step through Y { -//#ifdef ITENSOR_USE_CBLAS -// cblas_daxpy(n,alpha,X,incx,Y,incy); -//#else -// auto Xnc = const_cast(X); -// F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); -//#endif + blas::axpy(n, alpha, X, incx, Y, incy); } // @@ -46,13 +41,7 @@ dnrm2_wrapper(LAPACK_INT N, const LAPACK_REAL* X, LAPACK_INT incx) { -//#ifdef ITENSOR_USE_CBLAS -// return cblas_dnrm2(N,X,incx); -//#else -// auto *Xnc = const_cast(X); -// return F77NAME(dnrm2)(&N,Xnc,&incx); -//#endif - return -1; + return blas::nrm2(N, X, incx); } // @@ -65,14 +54,7 @@ ddot_wrapper(LAPACK_INT N, const LAPACK_REAL* Y, LAPACK_INT incy) { -//#ifdef ITENSOR_USE_CBLAS -// return cblas_ddot(N,X,incx,Y,incy); -//#else -// auto *Xnc = const_cast(X); -// auto *Ync = const_cast(Y); -// return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); -//#endif - return -1; + return blas::dot(N, X, incx, Y, incy); } // @@ -85,29 +67,7 @@ zdotc_wrapper(LAPACK_INT N, Cplx const* Y, LAPACK_INT incy) { -//#ifdef ITENSOR_USE_CBLAS -// Cplx res; -//#if defined PLATFORM_openblas -// auto pX = reinterpret_cast(X); -// auto pY = reinterpret_cast(Y); -// auto pres = reinterpret_cast(&res); -//#else -// auto pX = reinterpret_cast(X); -// auto pY = reinterpret_cast(Y); -// auto pres = reinterpret_cast(&res); -//#endif -// cblas_zdotc_sub(N,pX,incx,pY,incy,pres); -// return res; -//#else -// auto ncX = const_cast(X); -// auto ncY = const_cast(Y); -// auto pX = reinterpret_cast(ncX); -// auto pY = reinterpret_cast(ncY); -// auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); -// auto cplx_res = reinterpret_cast(&res); -// return *cplx_res; -//#endif - return Cplx{}; + return blas::dot(N, X, incx, Y, incy); } // @@ -127,37 +87,16 @@ gemm_wrapper(bool transa, { LAPACK_INT lda = m, ldb = k; -//#ifdef ITENSOR_USE_CBLAS -// auto at = CblasNoTrans, -// bt = CblasNoTrans; -// if(transa) -// { -// at = CblasTrans; -// lda = k; -// } -// if(transb) -// { -// bt = CblasTrans; -// ldb = n; -// } -// cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); -//#else -// auto *pA = const_cast(A); -// auto *pB = const_cast(B); -// char at = 'N'; -// char bt = 'N'; -// if(transa) -// { -// at = 'T'; -// lda = k; -// } -// if(transb) -// { -// bt = 'T'; -// ldb = n; -// } -// F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); -//#endif + auto at = blas::Op::NoTrans, + bt = blas::Op::NoTrans; + if(transa) { + at = blas::Op::Trans; + lda = k; + } if(transb){ + bt = blas::Op::Trans; + ldb = n; + } + blas::gemm(blas::Layout::ColMajor, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, m); } // @@ -177,73 +116,16 @@ gemm_wrapper(bool transa, { LAPACK_INT lda = m, ldb = k; -//#ifdef PLATFORM_openblas -// auto at = CblasNoTrans, -// bt = CblasNoTrans; -// if(transa) -// { -// at = CblasTrans; -// lda = k; -// } -// if(transb) -// { -// bt = CblasTrans; -// ldb = n; -// } -// //auto ralpha = realRef(alpha); -// //auto ialpha = imagRef(alpha); -// //auto rbeta = realRef(beta); -// //auto ibeta = imagRef(beta); -// //if(ialpha != 0.0 || ibeta != 0.0) -// // { -// // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); -// // } -// auto* palpha = reinterpret_cast(&alpha); -// auto* pbeta = reinterpret_cast(&beta); -// auto* pA = reinterpret_cast(A); -// auto* pB = reinterpret_cast(B); -// auto* pC = reinterpret_cast(C); -// cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); -//#else //platform not openblas -//#ifdef ITENSOR_USE_CBLAS -// auto at = CblasNoTrans, -// bt = CblasNoTrans; -// if(transa) -// { -// at = CblasTrans; -// lda = k; -// } -// if(transb) -// { -// bt = CblasTrans; -// ldb = n; -// } -// auto palpha = (void*)(&alpha); -// auto pbeta = (void*)(&beta); -// cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); -//#else //use Fortran zgemm -// auto *ncA = const_cast(A); -// auto *ncB = const_cast(B); -// auto *pA = reinterpret_cast(ncA); -// auto *pB = reinterpret_cast(ncB); -// auto *pC = reinterpret_cast(C); -// auto *palpha = reinterpret_cast(&alpha); -// auto *pbeta = reinterpret_cast(&beta); -// char at = 'N'; -// char bt = 'N'; -// if(transa) -// { -// at = 'T'; -// lda = k; -// } -// if(transb) -// { -// bt = 'T'; -// ldb = n; -// } -// F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); -//#endif -//#endif + auto at = blas::Op::NoTrans, + bt = blas::Op::NoTrans; + if(transa) { + at = blas::Op::Trans; + lda = k; + } if(transb){ + bt = blas::Op::Trans; + ldb = n; + } + blas::gemm(blas::Layout::ColMajor, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, m); } void @@ -258,13 +140,8 @@ gemv_wrapper(bool trans, LAPACK_REAL* y, LAPACK_INT incy) { -//#ifdef ITENSOR_USE_CBLAS -// auto Tr = trans ? CblasTrans : CblasNoTrans; -// cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); -//#else -// char Tr = trans ? 'T' : 'N'; -// F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); -//#endif + auto Tr = trans ? blas::Op::Trans : blas::Op::NoTrans; + blas::gemv(blas::Layout::ColMajor, Tr, m, n, alpha, A, m, x, incx, beta, y, incy); } void @@ -279,40 +156,8 @@ gemv_wrapper(bool trans, Cplx* y, LAPACK_INT incy) { -//#ifdef PLATFORM_openblas -// auto Tr = trans ? CblasTrans : CblasNoTrans; -// //auto ralpha = realRef(alpha); -// //auto ialpha = imagRef(alpha); -// //auto rbeta = realRef(beta); -// //auto ibeta = imagRef(beta); -// //if(ialpha != 0.0 || ibeta != 0.0) -// // { -// // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); -// // } -// auto* palpha = reinterpret_cast(&alpha); -// auto* pbeta = reinterpret_cast(&beta); -// auto* pA = reinterpret_cast(A); -// auto* px = reinterpret_cast(x); -// auto* py = reinterpret_cast(y); -// cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); -//#else //platform other than openblas -//#ifdef ITENSOR_USE_CBLAS -// auto Tr = trans ? CblasTrans : CblasNoTrans; -// auto palpha = reinterpret_cast(&alpha); -// auto pbeta = reinterpret_cast(&beta); -// cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); -//#else -// char Tr = trans ? 'T' : 'N'; -// auto ncA = const_cast(A); -// auto ncx = const_cast(x); -// auto pA = reinterpret_cast(ncA); -// auto px = reinterpret_cast(ncx); -// auto py = reinterpret_cast(y); -// auto palpha = reinterpret_cast(&alpha); -// auto pbeta = reinterpret_cast(&beta); -// F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); -//#endif -//#endif + auto Tr = trans ? blas::Op::Trans : blas::Op::NoTrans; + blas::gemv(blas::Layout::ColMajor, Tr, m, n, alpha, A, m, x, incx, beta, y, incy); } @@ -327,23 +172,8 @@ dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs LAPACK_REAL* eigs, //eigenvalues on return LAPACK_INT& info) //error info { - std::vector work; LAPACK_INT lda = n; - -//#ifdef PLATFORM_acml -// static const LAPACK_INT one = 1; -// LAPACK_INT lwork = std::max(one,3*n-1); -// work.resize(lwork+2); -// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); -//#else -// //Compute optimal workspace size (will be written to wkopt) -// LAPACK_INT lwork = -1; //tell dsyev to compute optimal size -// LAPACK_REAL wkopt = 0; -// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); -// lwork = LAPACK_INT(wkopt); -// work.resize(lwork+2); -// F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); -//#endif + lapack::syev(lapack::char2job(jobz), blas::char2uplo(uplo), n, A, lda, eigs); } // @@ -355,11 +185,7 @@ dscal_wrapper(LAPACK_INT N, LAPACK_REAL* data, LAPACK_INT inc) { -//#ifdef ITENSOR_USE_CBLAS -// cblas_dscal(N,alpha,data,inc); -//#else -// F77NAME(dscal)(&N,&alpha,data,&inc); -//#endif + blas::scal(N, alpha, data, inc); } void @@ -373,24 +199,7 @@ zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to comp Cplx *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); -//#else -// F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); -//#endif + lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); } @@ -406,19 +215,7 @@ dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to comp LAPACK_REAL *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - std::vector work; - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); -//#else -// F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); -//#endif + lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); } @@ -434,24 +231,7 @@ zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to comp Cplx *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); -//#else -// F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); -//#endif + lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); } @@ -467,22 +247,7 @@ dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to comp LAPACK_REAL *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - std::vector work; - // std::vector superb; - - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); - //superb.resize(l -1); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); -//#else -// F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); -//#endif + lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); } // @@ -500,11 +265,7 @@ dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A //length should be min(m,n) LAPACK_INT* info) //error info { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); -// F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); + lapack::geqrf(*m, *n, A, *lda, tau); } // @@ -522,11 +283,7 @@ dorgqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_REAL* tau, //scalar factors as returned by dgeqrf LAPACK_INT* info) //error info { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); -// F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); + lapack::orgqr(*m, *n, *k, A, *lda, tau); } @@ -545,13 +302,9 @@ zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A //length should be min(m,n) LAPACK_INT* info) //error info { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); -// F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); + auto ptau = reinterpret_cast(tau); + lapack::geqrf(*m, *n, A, *lda, ptau); } // @@ -569,17 +322,9 @@ zungqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf LAPACK_INT* info) //error info { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); -// #ifdef PLATFORM_lapacke -// LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); -// #else -// F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); -// #endif + auto ptau = reinterpret_cast(tau); + lapack::ungqr(*m, *n, *k, A, *lda, ptau); } // @@ -592,10 +337,9 @@ dgesv_wrapper(LAPACK_INT n, LAPACK_REAL* b) { LAPACK_INT lda = n; - std::vector ipiv(n); + std::vector ipiv(n); LAPACK_INT ldb = n; - LAPACK_INT info = 0; -// F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); + auto info = lapack::gesv(n, nrhs, a, lda, ipiv.data(), b, ldb); return info; } @@ -608,13 +352,10 @@ zgesv_wrapper(LAPACK_INT n, Cplx* a, Cplx* b) { - auto pa = reinterpret_cast(a); - auto pb = reinterpret_cast(b); LAPACK_INT lda = n; - std::vector ipiv(n); + std::vector ipiv(n); LAPACK_INT ldb = n; - LAPACK_INT info = 0; -// F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); + auto info = lapack::gesv(n, nrhs, a, lda, ipiv.data(), b, ldb); return info; } @@ -627,20 +368,7 @@ dlange_wrapper(char norm, LAPACK_INT n, double* a) { - double norma; -//#ifdef PLATFORM_lapacke -// norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); -//#else -// std::vector work; -// if(norm == 'I' || norm == 'i') work.resize(m); -//#ifdef PLATFORM_acml -// LAPACK_INT norm_len = 1; -// norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); -//#else -// norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); -//#endif -//#endif - return norma; + return lapack::lange(lapack::char2norm(norm), m, n, a, m); } // @@ -652,22 +380,7 @@ zlange_wrapper(char norm, LAPACK_INT n, Cplx* a) { - LAPACK_REAL norma; -//#ifdef PLATFORM_lapacke -// auto pA = reinterpret_cast(a); -// norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); -//#else -// std::vector work; -// if(norm == 'I' || norm == 'i') work.resize(m); -// auto pA = reinterpret_cast(a); -//#ifdef PLATFORM_acml -// LAPACK_INT norm_len = 1; -// norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); -//#else -// norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); -//#endif -//#endif - return norma; + return lapack::lange(lapack::char2norm(norm), m, n, a, m); } // @@ -683,26 +396,8 @@ zheev_wrapper(LAPACK_INT N, //number of cols of A static const LAPACK_INT one = 1; char jobz = 'V'; char uplo = 'U'; - lapack_int info = 0; -//#ifdef PLATFORM_lapacke -// std::vector work(N); -// LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); -//#else -// LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); -// std::vector work(lwork); -// std::vector rwork(lwork); -// LAPACK_INT info = 0; -// static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); -// auto pA = reinterpret_cast(A); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// LAPACK_INT uplo_len = 1; -// F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); -//#else -// F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); -//#endif -// -//#endif //PLATFORM_lapacke + lapack_int info = lapack::heev(lapack::char2job(jobz), blas::char2uplo(uplo), N, A, N, d); + return info; } @@ -724,18 +419,8 @@ dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs LAPACK_REAL* d, //eigenvalues on return LAPACK_INT* info) //error info { - static const LAPACK_INT one = 1; - std::vector work; LAPACK_INT itype = 1; - LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); - work.resize(lwork); -//#ifdef PLATFORM_acml -// LAPACK_INT jobz_len = 1; -// LAPACK_INT uplo_len = 1; -// F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); -//#else -// F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); -//#endif + lapack::sygv(itype, lapack::char2job(*jobz), blas::char2uplo(*uplo), *n, A, *n, B, *n, d); } // @@ -763,37 +448,13 @@ dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); LAPACK_INT info = 0; -//#ifdef PLATFORM_acml -// LAPACK_INT lwork = -1; -// LAPACK_REAL wquery = 0; -// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); -// -// lwork = static_cast(wquery); -// work.resize(lwork); -// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); -//#else -// LAPACK_INT lwork = -1; -// LAPACK_REAL wquery = 0; -// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); -// -// lwork = static_cast(wquery); -// work.resize(lwork); -// F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); -//#endif - //println("jobvl = ",jobvl); - //println("nevecl = ",nevecl); - //println("vl data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vl); - // ++vl; - // } - //println("vr data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vr); - // ++vr; - // } + std::vector W; + info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, cpA.data(), n, W.data(), vl, nevecl, vr, nevecr); + auto v = 0; + for(auto & i : W){ + *(dr + v) = std::real(i); + *(di + v) = std::imag(i); + } return info; } @@ -813,7 +474,7 @@ zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' Cplx * vr) //right eigenvectors on return { static const LAPACK_INT one = 1; - std::vector cpA; + std::vector cpA; std::vector work; std::vector rwork; LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); @@ -825,7 +486,7 @@ zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' //Copy A data into cpA cpA.resize(n*n); - auto pA = reinterpret_cast(A); + auto pA = reinterpret_cast(A); std::copy(pA,pA+n*n,cpA.data()); auto pd = reinterpret_cast(d); @@ -833,6 +494,8 @@ zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' auto pvr = reinterpret_cast(vr); LAPACK_INT info = 0; + std::vector W; + info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, cpA.data(), n, W.data(), vl, nevecl, vr, nevecr); //#ifdef PLATFORM_acml // F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); //#else From 9f65507b2a1c4d6babc08627bf2211981bf86fdf Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 16:53:36 -0400 Subject: [PATCH 08/51] export include dirs as base itensor source --- cmake/itensor-config.cmake.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/itensor-config.cmake.in b/cmake/itensor-config.cmake.in index 093aec683..ee26973e6 100644 --- a/cmake/itensor-config.cmake.in +++ b/cmake/itensor-config.cmake.in @@ -23,9 +23,9 @@ set(ITENSORS_LIBRARIES itensor) set(ITENSORS_SOURCE_DIR "@ITENSORS_SOURCE_DIR@") set(ITENSORS_BINARY_DIR "@ITENSORS_BINARY_DIR@") -set(ITENSORS_BUILD_INCLUDE_DIRS "${ITENSORS_SOURCE_DIR}/src" "${ITENSORS_BINARY_DIR}/src") +set(ITENSORS_BUILD_INCLUDE_DIRS "${ITENSORS_SOURCE_DIR} "${ITENSORS_BINARY_DIR}") set(ITENSORS_INSTALL_INCLUDE_DIRS "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@" - "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@/itensors/external") + "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@") # define ITENSORS_INCLUDE_DIRS according to where we are compiling: ITensor build tree or outside # external packages should use ITENSORS_BUILD_INCLUDE_DIRS and ITENSORS_INSTALL_INCLUDE_DIRS directly From 9088c35a6740af94a8393bb73db4e2f1690d914b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 10 Aug 2023 17:39:48 -0400 Subject: [PATCH 09/51] Update linear algebra --- itensor/tensor/lapack/cmake_lapack_wrap.cc | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc index f48c79840..bbeb3b68b 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -448,12 +448,12 @@ dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); LAPACK_INT info = 0; - std::vector W; + std::vector W(n); info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, cpA.data(), n, W.data(), vl, nevecl, vr, nevecr); - auto v = 0; - for(auto & i : W){ - *(dr + v) = std::real(i); - *(di + v) = std::imag(i); + auto ptr = W.data(); + for(size_t i = 0; i < n; ++i){ + *(dr + i) = std::real(*(ptr + i)); + *(di + i) = std::imag(*(ptr + i)); } return info; } @@ -496,11 +496,7 @@ zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' LAPACK_INT info = 0; std::vector W; info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, cpA.data(), n, W.data(), vl, nevecl, vr, nevecr); -//#ifdef PLATFORM_acml -// F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); -//#else -// F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); -//#endif + return info; } From 2f8d17fb98291297f577e117d4d783cc6af82f0c Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Fri, 11 Aug 2023 16:21:19 -0400 Subject: [PATCH 10/51] Working to fix linear algebra --- itensor/tensor/lapack/cmake_lapack_wrap.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc index bbeb3b68b..cc93a3762 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -96,7 +96,7 @@ gemm_wrapper(bool transa, bt = blas::Op::Trans; ldb = n; } - blas::gemm(blas::Layout::ColMajor, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, m); + blas::gemm(blas::Layout::ColMajor, at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); } // @@ -114,8 +114,8 @@ gemm_wrapper(bool transa, Cplx beta, Cplx* C) { - LAPACK_INT lda = m, - ldb = k; + LAPACK_INT lda = m, + ldb = k; auto at = blas::Op::NoTrans, bt = blas::Op::NoTrans; if(transa) { @@ -125,7 +125,7 @@ gemm_wrapper(bool transa, bt = blas::Op::Trans; ldb = n; } - blas::gemm(blas::Layout::ColMajor, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, m); + blas::gemm(blas::Layout::ColMajor, at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); } void @@ -173,7 +173,8 @@ dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs LAPACK_INT& info) //error info { LAPACK_INT lda = n; - lapack::syev(lapack::char2job(jobz), blas::char2uplo(uplo), n, A, lda, eigs); + info = lapack::syev(lapack::char2job(jobz), blas::char2uplo(uplo), n, A, lda, eigs); + std::vector eigs_up(n); } // From e5cf5bc2c1af95f6528db5f6373cd5f6c3c2f530 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 14:01:23 -0400 Subject: [PATCH 11/51] Fix linear algebra --- itensor/tensor/lapack/cmake_lapack_wrap.cc | 29 ++++++++++++---------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc index cc93a3762..e7bf69dd2 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -174,7 +174,6 @@ dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs { LAPACK_INT lda = n; info = lapack::syev(lapack::char2job(jobz), blas::char2uplo(uplo), n, A, lda, eigs); - std::vector eigs_up(n); } // @@ -200,7 +199,8 @@ zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to comp Cplx *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); + LAPACK_INT l = std::min(*m,*n); + *info = lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, l); } @@ -216,7 +216,8 @@ dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to comp LAPACK_REAL *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); + LAPACK_INT l = std::min(*m,*n); + *info = lapack::gesdd(lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, l); } @@ -232,7 +233,8 @@ zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to comp Cplx *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); + LAPACK_INT l = std::min(*m,*n); + *info = lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, l); } @@ -248,7 +250,8 @@ dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to comp LAPACK_REAL *vt, //on return, unitary matrix V transpose LAPACK_INT *info) { - lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, *n); + LAPACK_INT l = std::min(*m,*n); + *info = lapack::gesvd(lapack::char2job(*jobz), lapack::char2job(*jobz), *m, *n, A, *m, s, u, *m, vt, l); } // @@ -266,7 +269,7 @@ dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A //length should be min(m,n) LAPACK_INT* info) //error info { - lapack::geqrf(*m, *n, A, *lda, tau); + *info = lapack::geqrf(*m, *n, A, *lda, tau); } // @@ -284,7 +287,7 @@ dorgqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_REAL* tau, //scalar factors as returned by dgeqrf LAPACK_INT* info) //error info { - lapack::orgqr(*m, *n, *k, A, *lda, tau); + *info = lapack::orgqr(*m, *n, *k, A, *lda, tau); } @@ -305,7 +308,8 @@ zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A { static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); auto ptau = reinterpret_cast(tau); - lapack::geqrf(*m, *n, A, *lda, ptau); + *info = lapack::geqrf(*m, *n, A, *lda, ptau); + tau = reinterpret_cast(ptau); } // @@ -325,7 +329,8 @@ zungqr_wrapper(LAPACK_INT* m, //number of rows of A { static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); auto ptau = reinterpret_cast(tau); - lapack::ungqr(*m, *n, *k, A, *lda, ptau); + *info = lapack::ungqr(*m, *n, *k, A, *lda, ptau); + tau = reinterpret_cast(ptau); } // @@ -340,8 +345,7 @@ dgesv_wrapper(LAPACK_INT n, LAPACK_INT lda = n; std::vector ipiv(n); LAPACK_INT ldb = n; - auto info = lapack::gesv(n, nrhs, a, lda, ipiv.data(), b, ldb); - return info; + return lapack::gesv(n, nrhs, a, lda, ipiv.data(), b, ldb); } // @@ -394,7 +398,6 @@ zheev_wrapper(LAPACK_INT N, //number of cols of A Cplx * A, //matrix A, on return contains eigenvectors LAPACK_REAL * d) //eigenvalues on return { - static const LAPACK_INT one = 1; char jobz = 'V'; char uplo = 'U'; lapack_int info = lapack::heev(lapack::char2job(jobz), blas::char2uplo(uplo), N, A, N, d); @@ -421,7 +424,7 @@ dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs LAPACK_INT* info) //error info { LAPACK_INT itype = 1; - lapack::sygv(itype, lapack::char2job(*jobz), blas::char2uplo(*uplo), *n, A, *n, B, *n, d); + *info = lapack::sygv(itype, lapack::char2job(*jobz), blas::char2uplo(*uplo), *n, A, *n, B, *n, d); } // From 1df8e6109da0b942c98ed7500422797ba0f8230b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 14:01:49 -0400 Subject: [PATCH 12/51] Start trying to get HDF5 linked properly --- CMakeLists.txt | 13 +++++++++++-- itensor/CMakeLists.txt | 12 +++++++++++- unittest/CMakeLists.txt | 9 ++++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fbd87446b..3475a4d2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,8 +136,12 @@ set(CMAKE_SKIP_RPATH FALSE) # We use C++17 features ########################## # but insist on strict standard -set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ ISO Standard version") -if (NOT(CMAKE_CXX_STANDARD EQUAL 17 OR CMAKE_CXX_STANDARD EQUAL 20)) +#if(ENABLE_HDF5) + set(CMAKE_CXX_STANDARD 20 CACHE STRING "C++ ISO Standard version") +#else() +# set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ ISO Standard version") +#endif(ENABLE_HDF5) +if (CMAKE_CXX_STANDARD VERSION_LESS 20) message(FATAL_ERROR "C++ 2017 ISO Standard or higher is required to compile ITENSORS") endif() # C++20 is only configurable via compile features with cmake 3.12 and older @@ -274,6 +278,11 @@ if(${CMAKE_BUILD_TYPE} MATCHES Debug) target_compile_definitions(itensor PRIVATE DEBUG=1) target_compile_definitions(itensor_check PRIVATE DEBUG=1) endif(${CMAKE_BUILD_TYPE} MATCHES Debug) + +if(ENABLE_HDF5) + target_compile_definitions(itensor PRIVATE ITENSOR_USE_HDF5=1) + target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_HDF5=1) +endif(ENABLE_HDF5) ########################## # pkg-config variables ########################## diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index 6de90a48b..ed4d150e7 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -94,7 +94,10 @@ if(ENABLE_HDF5) util/h5/h5object.cc util/h5/stl/string.cc util/h5/stl/vector.cc) - list(APPEND ${ITENSOR_SOURCE_FILES} ${H5_SOURCE_FILES}) + + set(ITENSOR_SOURCE_FILES + ${ITENSOR_SOURCE_FILES} + ${H5_SOURCE_FILES}) endif(ENABLE_HDF5) # Create the ITensor library @@ -109,6 +112,13 @@ target_include_directories(${targetname} INTERFACE ) set(ITENSORS_LINK_LIBRARIES ${CMAKE_CXX_FLAGS} blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") +if(ENABLE_HDF5) + set(ITENSORS_LINK_LIBRARIES + ${ITENSORS_LINK_LIBRARIES} + HDF5::HDF5 + ${HDF5_INCLUDE_DIRS}) +endif(ENABLE_HDF5) +message(STATUS "ITENSORS_LINK_LIBRARIES: ${ITENSORS_LINK_LIBRARIES}") target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) # append current CMAKE_CXX_FLAGS diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 6897c8b14..0fe8388d9 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -43,11 +43,14 @@ set(itensor_test_src_files test.cc ) -if(ITENSOR_USE_HDF5) +if(ENABLE_HDF5) set(h5_test_source_files hdf5_test.cc) - list(APPEND ${itensor_test_src_files} ${h5_test_source_files}) -endif(ITENSOR_USE_HDF5) + set(itensor_test_src_files + ${itensor_test_src_files} + ${h5_test_source_files} + ) +endif(ENABLE_HDF5) add_executable(${executable} EXCLUDE_FROM_ALL ${itensor_test_src_files}) # Add include directories and compiler flags for ta_test From b82846b93aa958c3bf4f75c15f5939fa52958194 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 15:49:24 -0400 Subject: [PATCH 13/51] Add ITENSOR_USE_OMP flag --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3475a4d2b..d0c257d9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -279,6 +279,11 @@ if(${CMAKE_BUILD_TYPE} MATCHES Debug) target_compile_definitions(itensor_check PRIVATE DEBUG=1) endif(${CMAKE_BUILD_TYPE} MATCHES Debug) +if(ENABLE_OMP) + message(STATUS "Setting ITENSOR_USE_OMP") + target_compile_definitions(itensor PRIVATE ITENSOR_USE_OMP=1) + target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_OMP=1) +endif() if(ENABLE_HDF5) target_compile_definitions(itensor PRIVATE ITENSOR_USE_HDF5=1) target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_HDF5=1) From 1f3b9bdda3f60c1b90acb50464396551959de5d9 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 15:49:43 -0400 Subject: [PATCH 14/51] Fix zgeev --- itensor/tensor/lapack/cmake_lapack_wrap.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/lapack/cmake_lapack_wrap.cc index e7bf69dd2..154bd6ed2 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/lapack/cmake_lapack_wrap.cc @@ -479,27 +479,17 @@ zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' { static const LAPACK_INT one = 1; std::vector cpA; - std::vector work; - std::vector rwork; LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); - LAPACK_INT lwork = std::max(one,4*n); - work.resize(lwork); - LAPACK_INT lrwork = std::max(one,2*n); - rwork.resize(lrwork); //Copy A data into cpA cpA.resize(n*n); auto pA = reinterpret_cast(A); std::copy(pA,pA+n*n,cpA.data()); - auto pd = reinterpret_cast(d); - auto pvl = reinterpret_cast(vl); - auto pvr = reinterpret_cast(vr); - LAPACK_INT info = 0; - std::vector W; - info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, cpA.data(), n, W.data(), vl, nevecl, vr, nevecr); + info = lapack::geev(lapack::char2job(jobvl), lapack::char2job(jobvr), n, + cpA.data(), n, d, vl, nevecl, vr, nevecr); return info; } From 65021ffb17af926f062ebb51cabb28f74eab41b3 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 16:12:10 -0400 Subject: [PATCH 15/51] Force CMAKE_STANDARD 20 --- external/cuda.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/cuda.cmake b/external/cuda.cmake index cbadadf4d..c92e821c0 100644 --- a/external/cuda.cmake +++ b/external/cuda.cmake @@ -1,7 +1,7 @@ # cmake 3.17 decouples C++ and CUDA standards, see https://gitlab.kitware.com/cmake/cmake/issues/19123 # cmake 3.18 knows that CUDA 11 provides cuda_std_17 cmake_minimum_required(VERSION 3.18.0) -set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_STANDARD 20) set(CMAKE_CUDA_EXTENSIONS OFF) set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) From 22d92e805fd87b32be931269cec6dcdf067519b0 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 18:09:13 -0400 Subject: [PATCH 16/51] Update HDF5, still not fully functioning missing an hdf5_hl target --- CMakeLists.txt | 22 ++++++++-------------- itensor/CMakeLists.txt | 8 ++------ 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d0c257d9b..6b7c2d734 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,21 +247,10 @@ endif(ENABLE_MPI) if(ENABLE_HDF5) MESSAGE(STATUS "Looking for HDF5") - FIND_PACKAGE(HDF5 REQUIRED CXX) - target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES}) + FIND_PACKAGE(HDF5 REQUIRED) + include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) + target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} HDF5::HDF5) endif(ENABLE_HDF5) -# if(ENABLE_BLAS_LAPACK) -# find_package(BLAS) -# find_package(LAPACK) -# if(LAPACK_FOUND AND BLAS_FOUND) -# set(lapackblas_libraries ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) - -# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}") -# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LAPACK_LINKER_FLAGS}") -# endif() - -# target_link_libraries(ITENSORS INTERFACE ${lapackblas_libraries}) -# endif(ENABLE_BLAS_LAPACK) ########################## @@ -353,6 +342,11 @@ ADD_CUSTOM_TARGET(debug COMMENT "Switch CMAKE_BUILD_TYPE to Debug" ) +ADD_CUSTOM_TARGET(check + COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Debug ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target itensor_check + COMMENT "Check ITensors, done in Debug mode" + ) ADD_CUSTOM_TARGET(release COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target all diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index ed4d150e7..467725aeb 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -112,14 +112,10 @@ target_include_directories(${targetname} INTERFACE ) set(ITENSORS_LINK_LIBRARIES ${CMAKE_CXX_FLAGS} blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") +target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) if(ENABLE_HDF5) - set(ITENSORS_LINK_LIBRARIES - ${ITENSORS_LINK_LIBRARIES} - HDF5::HDF5 - ${HDF5_INCLUDE_DIRS}) + target_link_libraries(${targetname} PUBLIC HDF5::HDF5) endif(ENABLE_HDF5) -message(STATUS "ITENSORS_LINK_LIBRARIES: ${ITENSORS_LINK_LIBRARIES}") -target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) # append current CMAKE_CXX_FLAGS string(REPLACE " " ";" CMAKE_CXX_FLAG_LIST "${CMAKE_CXX_FLAGS}") From 02ebfb150bea448323d30cf96dddb1462b2cf86d Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 14 Aug 2023 18:32:54 -0400 Subject: [PATCH 17/51] Fix HDF5 library link --- CMakeLists.txt | 4 ++-- itensor/CMakeLists.txt | 2 +- unittest/CMakeLists.txt | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b7c2d734..1934f3abf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,7 +249,7 @@ if(ENABLE_HDF5) MESSAGE(STATUS "Looking for HDF5") FIND_PACKAGE(HDF5 REQUIRED) include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) - target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} HDF5::HDF5) + target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) endif(ENABLE_HDF5) @@ -354,4 +354,4 @@ ADD_CUSTOM_TARGET(release ) feature_summary(WHAT ALL - DESCRIPTION "=== ITensor Package/Feature Info ===") \ No newline at end of file + DESCRIPTION "=== ITensor Package/Feature Info ===") diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index 467725aeb..716f436ac 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -114,7 +114,7 @@ target_include_directories(${targetname} INTERFACE set(ITENSORS_LINK_LIBRARIES ${CMAKE_CXX_FLAGS} blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) if(ENABLE_HDF5) - target_link_libraries(${targetname} PUBLIC HDF5::HDF5) + target_link_libraries(${targetname} PUBLIC ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) endif(ENABLE_HDF5) # append current CMAKE_CXX_FLAGS diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 0fe8388d9..c5d665434 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -59,6 +59,9 @@ target_include_directories(${executable} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ) target_link_libraries(${executable} itensor) +if(ENABLE_HDF5) + target_link_libraries(${executable} ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} HDF5::HDF5) +endif(ENABLE_HDF5) # Add test suite add_test(itensor/unit/build "${CMAKE_COMMAND}" --build "${CMAKE_BINARY_DIR}" --target ${executable}) From b1280758d40212eb7f62a701abc15fc16324d169 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 15 Aug 2023 12:41:27 -0400 Subject: [PATCH 18/51] Try to add cmake check to jenkins --- jenkins/Jenkinsfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 4cb3bd0e0..e2c4b360d 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -25,6 +25,20 @@ pipeline { sh 'make -j2 -C sample' sh 'make -C unittest' } + steps{ + sh 'export source_dir=pwd' + sh 'cd ../' + sh 'mkdir test_cmake' + sh 'cd test_cmake' + sh 'cmake ${source_dir} \ + -DCMAKE_INSTALL_PREFIX="../install" \ + -DCMAKE_CXX_FLAGS="-std=c++20" \ + -DENABLE_OMP=ON \ + -DENABLE_MPI=ON \ + ' + sh 'make check -j10' + sh './unittest/itensor_check' + } } stage('linux-openblas') { From 798c4ec197f43dcbe33bfdfe4fc23a50a20c2841 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 15 Aug 2023 12:43:19 -0400 Subject: [PATCH 19/51] Use only one step --- jenkins/Jenkinsfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index e2c4b360d..520f6eb57 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -20,12 +20,10 @@ pipeline { PLATFORM = 'lapack' } steps { - sh '''sed '/^PLATFORM=/,/^$/s/^/#/;/^#PLATFORM='$PLATFORM'$/,/^$/s/^#//' options.mk.sample > options.mk''' sh 'make -j2' + sh '''sed '/^PLATFORM=/,/^$/s/^/#/;/^#PLATFORM='$PLATFORM'$/,/^$/s/^#//' options.mk.sample > options.mk''' sh 'make -j2 -C sample' sh 'make -C unittest' - } - steps{ sh 'export source_dir=pwd' sh 'cd ../' sh 'mkdir test_cmake' From 28dca20395f3e7520fccdd7261d99ab991000aed Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 15 Aug 2023 12:45:23 -0400 Subject: [PATCH 20/51] Order was swapped by mistake --- jenkins/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 520f6eb57..122d7748b 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -20,8 +20,8 @@ pipeline { PLATFORM = 'lapack' } steps { - sh 'make -j2' sh '''sed '/^PLATFORM=/,/^$/s/^/#/;/^#PLATFORM='$PLATFORM'$/,/^$/s/^#//' options.mk.sample > options.mk''' + sh 'make -j2' sh 'make -j2 -C sample' sh 'make -C unittest' sh 'export source_dir=pwd' From 8c62c908cac1a4e945c0c19c349eab4fbc7dc460 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 14:11:54 -0400 Subject: [PATCH 21/51] install cmake --- jenkins/Dockerfile.ubuntu | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 9054a3f81..3d7d0dc8c 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,6 +7,7 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ + cmake\ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ From 4fcba3ba48267e41f7866296a0214ba6e8ab9044 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 14:21:44 -0400 Subject: [PATCH 22/51] make all of cmake one sh command --- jenkins/Jenkinsfile | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 122d7748b..187d5231e 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -24,18 +24,17 @@ pipeline { sh 'make -j2' sh 'make -j2 -C sample' sh 'make -C unittest' - sh 'export source_dir=pwd' - sh 'cd ../' - sh 'mkdir test_cmake' - sh 'cd test_cmake' - sh 'cmake ${source_dir} \ + sh 'export source_dir=pwd; + cd ../; + mkdir test_cmake; + cd test_cmake; + cmake ${source_dir} \ -DCMAKE_INSTALL_PREFIX="../install" \ -DCMAKE_CXX_FLAGS="-std=c++20" \ -DENABLE_OMP=ON \ - -DENABLE_MPI=ON \ - ' - sh 'make check -j10' - sh './unittest/itensor_check' + -DENABLE_MPI=ON; + make check -j10; + ./unittest/itensor_check' } } From 17f66e91cb1a7a48e9d4e1534ee37aca49b1000f Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 14:35:11 -0400 Subject: [PATCH 23/51] Add missing space --- jenkins/Dockerfile.ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 3d7d0dc8c..fc6e725b3 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,7 +7,7 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ - cmake\ + cmake \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ From 3cdc1f8c0b7e9fea337a7ec44e425a7e4ddfad8e Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 14:39:02 -0400 Subject: [PATCH 24/51] Adjust spaces --- jenkins/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 187d5231e..e867d5fe6 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -33,8 +33,8 @@ pipeline { -DCMAKE_CXX_FLAGS="-std=c++20" \ -DENABLE_OMP=ON \ -DENABLE_MPI=ON; - make check -j10; - ./unittest/itensor_check' + make check -j10; + ./unittest/itensor_check' } } From 40d94fc592615a80836c2148d0074cc8f400569c Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 15:41:15 -0400 Subject: [PATCH 25/51] Fix formatting in Jenkinsfile --- jenkins/Jenkinsfile | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index e867d5fe6..f8cc4763d 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -24,17 +24,19 @@ pipeline { sh 'make -j2' sh 'make -j2 -C sample' sh 'make -C unittest' - sh 'export source_dir=pwd; - cd ../; - mkdir test_cmake; - cd test_cmake; - cmake ${source_dir} \ - -DCMAKE_INSTALL_PREFIX="../install" \ - -DCMAKE_CXX_FLAGS="-std=c++20" \ - -DENABLE_OMP=ON \ - -DENABLE_MPI=ON; - make check -j10; - ./unittest/itensor_check' + sh ''' + export source_dir=pwd; + cd ../; + mkdir test_cmake; + cd test_cmake; + cmake ${source_dir} \ + -DCMAKE_INSTALL_PREFIX="../install" \ + -DCMAKE_CXX_FLAGS="-std=c++20" \ + -DENABLE_OMP=ON \ + -DENABLE_MPI=ON; + make check -j10; + ./unittest/itensor_check + ''' } } From 82cd3ece3c401b6034f6214e632eea03ba12f4dd Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 15:53:26 -0400 Subject: [PATCH 26/51] ITENSORS -> ITENSOR --- CMakeLists.txt | 110 +++++++++++++++++----------------- cmake/itensor-config.cmake.in | 40 ++++++------- external/cuda.cmake | 4 +- external/linalgpp.cmake | 8 +-- itensor/CMakeLists.txt | 4 +- 5 files changed, 83 insertions(+), 83 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1934f3abf..6a22f01e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ # limitations under the License. cmake_minimum_required(VERSION 3.17.0) # decouples C++ and CUDA standards, see https://gitlab.kitware.com/cmake/cmake/issues/19123 -set(ITENSORS_REQUIRED_CMAKE_VERSION 3.17.0 CACHE INTERNAL "") +set(ITENSOR_REQUIRED_CMAKE_VERSION 3.17.0 CACHE INTERNAL "") ############################################################################### # CMake defaults to address key pain points @@ -50,23 +50,23 @@ list(APPEND CMAKE_MODULE_PATH "${vg_cmake_kit_SOURCE_DIR}/modules") # Announce ourselves ############################################################################### # see https://semver.org/ -set (ITENSORS_MAJOR_VERSION 4) -set (ITENSORS_MINOR_VERSION 0) -set (ITENSORS_PATCH_VERSION 0) -set (ITENSORS_PRERELEASE_ID beta.1) -set (ITENSORS_BUILD_ID ) - -set(ITENSORS_VERSION "${ITENSORS_MAJOR_VERSION}.${ITENSORS_MINOR_VERSION}.${ITENSORS_PATCH_VERSION}") -if (ITENSORS_PRERELEASE_ID) - set(ITENSORS_EXT_VERSION "${ITENSORS_VERSION}-${ITENSORS_PRERELEASE_ID}") -else(ITENSORS_PRERELEASE_ID) - set(ITENSORS_EXT_VERSION "${ITENSORS_VERSION}") -endif(ITENSORS_PRERELEASE_ID) -if (ITENSORS_BUILD_ID) - set(ITENSORS_EXT_VERSION "${ITENSORS_EXT_VERSION}+${ITENSORS_BUILD_ID}") -endif(ITENSORS_BUILD_ID) - -# extra cmake files are shipped with ITENSORS +set (ITENSOR_MAJOR_VERSION 4) +set (ITENSOR_MINOR_VERSION 0) +set (ITENSOR_PATCH_VERSION 0) +set (ITENSOR_PRERELEASE_ID beta.1) +set (ITENSOR_BUILD_ID ) + +set(ITENSOR_VERSION "${ITENSOR_MAJOR_VERSION}.${ITENSOR_MINOR_VERSION}.${ITENSOR_PATCH_VERSION}") +if (ITENSOR_PRERELEASE_ID) + set(ITENSOR_EXT_VERSION "${ITENSOR_VERSION}-${ITENSOR_PRERELEASE_ID}") +else(ITENSOR_PRERELEASE_ID) + set(ITENSOR_EXT_VERSION "${ITENSOR_VERSION}") +endif(ITENSOR_PRERELEASE_ID) +if (ITENSOR_BUILD_ID) + set(ITENSOR_EXT_VERSION "${ITENSOR_EXT_VERSION}+${ITENSOR_BUILD_ID}") +endif(ITENSOR_BUILD_ID) + +# extra cmake files are shipped with ITENSOR list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") include(AppendFlags) @@ -77,15 +77,15 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git) execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse -q HEAD WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE ITENSORS_REVISION ) + OUTPUT_VARIABLE ITENSOR_REVISION ) string(REGEX MATCH "[0-9a-f]*" - ITENSORS_REVISION "${ITENSORS_REVISION}") + ITENSOR_REVISION "${ITENSOR_REVISION}") else() - set(ITENSORS_REVISION "unknown") + set(ITENSOR_REVISION "unknown") endif() -project(ITENSORS - VERSION ${ITENSORS_VERSION} +project(ITENSOR + VERSION ${ITENSOR_VERSION} DESCRIPTION "ITensor: High-Performance Tensor Software Inspired By Tensor Diagrams" LANGUAGES CXX HOMEPAGE_URL "https://itensor.org/") @@ -99,20 +99,20 @@ set(TARGET_ARCH "${CMAKE_SYSTEM}-${CMAKE_SYSTEM_PROCESSOR}") ########################## # Standard build variables ########################## -set(ITENSORS_INSTALL_BINDIR "bin" - CACHE PATH "ITENSORS BIN install directory") -set(ITENSORS_INSTALL_INCLUDEDIR "include" - CACHE PATH "ITENSORS INCLUDE install directory") -set(ITENSORS_INSTALL_LIBDIR "lib" - CACHE PATH "ITENSORS LIB install directory") -set(ITENSORS_INSTALL_SHAREDIR "share/ITENSORS/${ITENSORS_MAJOR_VERSION}.${ITENSORS_MINOR_VERSION}.${ITENSORS_MICRO_VERSION}" - CACHE PATH "ITENSORS SHARE install directory") -set(ITENSORS_INSTALL_DATADIR "${ITENSORS_INSTALL_SHAREDIR}/data" - CACHE PATH "ITENSORS DATA install directory") -set(ITENSORS_INSTALL_DOCDIR "${ITENSORS_INSTALL_SHAREDIR}/doc" - CACHE PATH "ITENSORS DOC install directory") -set(ITENSORS_INSTALL_CMAKEDIR "lib/cmake/ITENSORS" - CACHE PATH "ITENSORS CMAKE install directory") +set(ITENSOR_INSTALL_BINDIR "bin" + CACHE PATH "ITENSOR BIN install directory") +set(ITENSOR_INSTALL_INCLUDEDIR "include" + CACHE PATH "ITENSOR INCLUDE install directory") +set(ITENSOR_INSTALL_LIBDIR "lib" + CACHE PATH "ITENSOR LIB install directory") +set(ITENSOR_INSTALL_SHAREDIR "share/ITENSOR/${ITENSOR_MAJOR_VERSION}.${ITENSOR_MINOR_VERSION}.${ITENSOR_MICRO_VERSION}" + CACHE PATH "ITENSOR SHARE install directory") +set(ITENSOR_INSTALL_DATADIR "${ITENSOR_INSTALL_SHAREDIR}/data" + CACHE PATH "ITENSOR DATA install directory") +set(ITENSOR_INSTALL_DOCDIR "${ITENSOR_INSTALL_SHAREDIR}/doc" + CACHE PATH "ITENSOR DOC install directory") +set(ITENSOR_INSTALL_CMAKEDIR "lib/cmake/ITENSOR" + CACHE PATH "ITENSOR CMAKE install directory") # Get standard build variables from the environment if they have not already been set if(NOT CMAKE_C_FLAGS OR NOT DEFINED CMAKE_C_FLAGS) @@ -142,7 +142,7 @@ set(CMAKE_SKIP_RPATH FALSE) # set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ ISO Standard version") #endif(ENABLE_HDF5) if (CMAKE_CXX_STANDARD VERSION_LESS 20) - message(FATAL_ERROR "C++ 2017 ISO Standard or higher is required to compile ITENSORS") + message(FATAL_ERROR "C++ 2017 ISO Standard or higher is required to compile ITENSOR") endif() # C++20 is only configurable via compile features with cmake 3.12 and older if (CMAKE_CXX_STANDARD EQUAL 20 AND CMAKE_VERSION VERSION_LESS 3.12.0) @@ -212,7 +212,7 @@ include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR}) ########################## # external dependencies ########################## -add_library(ITENSORS INTERFACE) +add_library(ITENSOR INTERFACE) # required deps: # 1. CUDA first since others may depend on it if(ENABLE_CUDA) @@ -231,7 +231,7 @@ if(ENABLE_OMP) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") endif(OPENMP_FOUND) - target_link_libraries(ITENSORS INTERFACE ${OpenMP_C_LIBRARIES}) + target_link_libraries(ITENSOR INTERFACE ${OpenMP_C_LIBRARIES}) endif(ENABLE_OMP) if(ENABLE_MPI) @@ -242,14 +242,14 @@ if(ENABLE_MPI) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_CXX_FLAGS}") endif(MPI_FOUND) - target_link_libraries(ITENSORS INTERFACE ${MPI_C_LIBRARIES}) + target_link_libraries(ITENSOR INTERFACE ${MPI_C_LIBRARIES}) endif(ENABLE_MPI) if(ENABLE_HDF5) MESSAGE(STATUS "Looking for HDF5") FIND_PACKAGE(HDF5 REQUIRED) include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) - target_link_libraries(ITENSORS INTERFACE ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) + target_link_libraries(ITENSOR INTERFACE ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) endif(ENABLE_HDF5) @@ -280,11 +280,11 @@ endif(ENABLE_HDF5) ########################## # pkg-config variables ########################## -foreach(_inc ${ITENSORS_CONFIG_INCLUDE_DIRS}) - append_flags(ITENSORS_PC_CFLAGS "-I${_inc}") +foreach(_inc ${ITENSOR_CONFIG_INCLUDE_DIRS}) + append_flags(ITENSOR_PC_CFLAGS "-I${_inc}") endforeach() -foreach(_lib ${ITENSORS_CONFIG_LIBRARIES}) - append_flags(ITENSORS_PC_LIBS "${_lib}") +foreach(_lib ${ITENSOR_CONFIG_LIBRARIES}) + append_flags(ITENSOR_PC_LIBS "${_lib}") endforeach() ########################## @@ -307,7 +307,7 @@ install(FILES ${PROJECT_BINARY_DIR}/itensor.pc # Create the version file write_basic_package_version_file(itensor-config-version.cmake - VERSION ${ITENSORS_VERSION} COMPATIBILITY AnyNewerVersion) + VERSION ${ITENSOR_VERSION} COMPATIBILITY AnyNewerVersion) # Create the targets file export(EXPORT itensor @@ -316,22 +316,22 @@ export(EXPORT itensor # Create the configure file configure_package_config_file(cmake/itensor-config.cmake.in "${PROJECT_BINARY_DIR}/itensor-config.cmake" - INSTALL_DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" - PATH_VARS CMAKE_INSTALL_PREFIX ITENSORS_INSTALL_BINDIR - ITENSORS_INSTALL_INCLUDEDIR ITENSORS_INSTALL_LIBDIR - ITENSORS_INSTALL_DOCDIR ITENSORS_INSTALL_CMAKEDIR) + INSTALL_DESTINATION "${ITENSOR_INSTALL_CMAKEDIR}" + PATH_VARS CMAKE_INSTALL_PREFIX ITENSOR_INSTALL_BINDIR + ITENSOR_INSTALL_INCLUDEDIR ITENSOR_INSTALL_LIBDIR + ITENSOR_INSTALL_DOCDIR ITENSOR_INSTALL_CMAKEDIR) # Install config, version, and target files install(EXPORT itensor FILE "itensor-targets.cmake" - DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" - COMPONENT ITENSORS) + DESTINATION "${ITENSOR_INSTALL_CMAKEDIR}" + COMPONENT ITENSOR) install(FILES "${PROJECT_BINARY_DIR}/itensor-config.cmake" "${PROJECT_BINARY_DIR}/itensor-config-version.cmake" - DESTINATION "${ITENSORS_INSTALL_CMAKEDIR}" - COMPONENT ITENSORS) + DESTINATION "${ITENSOR_INSTALL_CMAKEDIR}" + COMPONENT ITENSOR) # Add target to allow on-the-fly switching of build type diff --git a/cmake/itensor-config.cmake.in b/cmake/itensor-config.cmake.in index ee26973e6..6fb0936d2 100644 --- a/cmake/itensor-config.cmake.in +++ b/cmake/itensor-config.cmake.in @@ -1,14 +1,14 @@ # - CMAKE Config file for the ITensors package # The following variables are defined: -# ITENSORS_FOUND - System has the ITensors package -# ITENSORS_INCLUDE_DIRS - The ITensors include directory -# ITENSORS_LIBRARIES - The ITensors libraries and their dependencies -# ITENSORS_VERSION - The ITensors (core) version; see semver.org -# ITENSORS_EXT_VERSION - The ITensors version, includes prerelease id; see semver.org +# ITENSOR_FOUND - System has the ITensors package +# ITENSOR_INCLUDE_DIRS - The ITensors include directory +# ITENSOR_LIBRARIES - The ITensors libraries and their dependencies +# ITENSOR_VERSION - The ITensors (core) version; see semver.org +# ITENSOR_EXT_VERSION - The ITensors version, includes prerelease id; see semver.org # Set package version -set(ITENSORS_VERSION "@ITENSORS_VERSION@") -set(ITENSORS_EXT_VERSION "@ITENSORS_EXT_VERSION@") +set(ITENSOR_VERSION "@ITENSOR_VERSION@") +set(ITENSOR_EXT_VERSION "@ITENSOR_EXT_VERSION@") @PACKAGE_INIT@ @@ -18,23 +18,23 @@ if(NOT TARGET itensor) endif() # Set the itensor compiled library target -set(ITENSORS_LIBRARIES itensor) +set(ITENSOR_LIBRARIES itensor) -set(ITENSORS_SOURCE_DIR "@ITENSORS_SOURCE_DIR@") -set(ITENSORS_BINARY_DIR "@ITENSORS_BINARY_DIR@") +set(ITENSOR_SOURCE_DIR "@ITENSOR_SOURCE_DIR@") +set(ITENSOR_BINARY_DIR "@ITENSOR_BINARY_DIR@") -set(ITENSORS_BUILD_INCLUDE_DIRS "${ITENSORS_SOURCE_DIR} "${ITENSORS_BINARY_DIR}") -set(ITENSORS_INSTALL_INCLUDE_DIRS "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@" - "@PACKAGE_ITENSORS_INSTALL_INCLUDEDIR@") +set(ITENSOR_BUILD_INCLUDE_DIRS "${ITENSOR_SOURCE_DIR} "${ITENSOR_BINARY_DIR}") +set(ITENSOR_INSTALL_INCLUDE_DIRS "@PACKAGE_ITENSOR_INSTALL_INCLUDEDIR@" + "@PACKAGE_ITENSOR_INSTALL_INCLUDEDIR@") -# define ITENSORS_INCLUDE_DIRS according to where we are compiling: ITensor build tree or outside -# external packages should use ITENSORS_BUILD_INCLUDE_DIRS and ITENSORS_INSTALL_INCLUDE_DIRS directly -if(CMAKE_CURRENT_LIST_DIR EQUAL ITENSORS_BINARY_DIR) - set(ITENSORS_INCLUDE_DIRS "${ITENSORS_BUILD_INCLUDE_DIRS}") +# define ITENSOR_INCLUDE_DIRS according to where we are compiling: ITensor build tree or outside +# external packages should use ITENSOR_BUILD_INCLUDE_DIRS and ITENSOR_INSTALL_INCLUDE_DIRS directly +if(CMAKE_CURRENT_LIST_DIR EQUAL ITENSOR_BINARY_DIR) + set(ITENSOR_INCLUDE_DIRS "${ITENSOR_BUILD_INCLUDE_DIRS}") else() - set(ITENSORS_INCLUDE_DIRS "${ITENSORS_INSTALL_INCLUDE_DIRS}") + set(ITENSOR_INCLUDE_DIRS "${ITENSOR_INSTALL_INCLUDE_DIRS}") endif() -set(ITENSORS_CMAKE_TOOLCHAIN_FILE "@CMAKE_TOOLCHAIN_FILE@") +set(ITENSOR_CMAKE_TOOLCHAIN_FILE "@CMAKE_TOOLCHAIN_FILE@") -set(ITENSORS_FOUND TRUE) \ No newline at end of file +set(ITENSOR_FOUND TRUE) \ No newline at end of file diff --git a/external/cuda.cmake b/external/cuda.cmake index c92e821c0..412aec813 100644 --- a/external/cuda.cmake +++ b/external/cuda.cmake @@ -15,10 +15,10 @@ endif() enable_language(CUDA) set(CUDA_FOUND TRUE) -set(ITENSORS_HAS_CUDA 1 CACHE BOOL "Whether ITensor has CUDA support") +set(ITENSOR_HAS_CUDA 1 CACHE BOOL "Whether ITensor has CUDA support") if(ENABLE_CUDA_ERROR_CHECK) - set (ITENSORS_CHECK_CUDA_ERROR 1) + set (ITENSOR_CHECK_CUDA_ERROR 1) endif(ENABLE_CUDA_ERROR_CHECK) # find CUDA toolkit diff --git a/external/linalgpp.cmake b/external/linalgpp.cmake index 89f8a1843..62fb5ea25 100644 --- a/external/linalgpp.cmake +++ b/external/linalgpp.cmake @@ -5,11 +5,11 @@ if (ENABLE_WFN91_LINALG_DISCOVERY_KIT) include(${vg_cmake_kit_SOURCE_DIR}/modules/FindLinalg.cmake) endif(ENABLE_WFN91_LINALG_DISCOVERY_KIT) include(${vg_cmake_kit_SOURCE_DIR}/modules/FindOrFetchLinalgPP.cmake) -target_link_libraries(ITENSORS INTERFACE blaspp lapackpp) +target_link_libraries(ITENSOR INTERFACE blaspp lapackpp) if (TARGET blaspp_headers) - target_link_libraries(ITENSORS INTERFACE blaspp_headers) + target_link_libraries(ITENSOR INTERFACE blaspp_headers) endif () -target_compile_definitions(ITENSORS INTERFACE -DITENSORS_HAS_BLAS_LAPACK=1 -DLAPACK_COMPLEX_CPP=1) +target_compile_definitions(ITENSOR INTERFACE -DITENSOR_HAS_BLAS_LAPACK=1 -DLAPACK_COMPLEX_CPP=1) if (BLAS_IS_MKL) - target_compile_definitions(ITENSORS INTERFACE -DITENSORS_HAS_INTEL_MKL=1) + target_compile_definitions(ITENSOR INTERFACE -DITENSOR_HAS_INTEL_MKL=1) endif () \ No newline at end of file diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index 716f436ac..3a8ebb92b 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -108,10 +108,10 @@ set(targetname itensor) target_include_directories(${targetname} INTERFACE $ $ - $ + $ ) -set(ITENSORS_LINK_LIBRARIES ${CMAKE_CXX_FLAGS} blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") +set(ITENSOR_LINK_LIBRARIES blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) if(ENABLE_HDF5) target_link_libraries(${targetname} PUBLIC ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) From 7ca63f0da59bea0ee50761319b504fe494205fbb Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 16:04:08 -0400 Subject: [PATCH 27/51] Build in src --- jenkins/Jenkinsfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index f8cc4763d..a5807e750 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -26,11 +26,8 @@ pipeline { sh 'make -C unittest' sh ''' export source_dir=pwd; - cd ../; - mkdir test_cmake; - cd test_cmake; cmake ${source_dir} \ - -DCMAKE_INSTALL_PREFIX="../install" \ + -DCMAKE_INSTALL_PREFIX="." \ -DCMAKE_CXX_FLAGS="-std=c++20" \ -DENABLE_OMP=ON \ -DENABLE_MPI=ON; From 593748899b6ed42c80a809d481150848fa9cb3ab Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 16:18:03 -0400 Subject: [PATCH 28/51] use . instead of pwd --- jenkins/Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index a5807e750..23d71d6d8 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -25,8 +25,7 @@ pipeline { sh 'make -j2 -C sample' sh 'make -C unittest' sh ''' - export source_dir=pwd; - cmake ${source_dir} \ + cmake . \ -DCMAKE_INSTALL_PREFIX="." \ -DCMAKE_CXX_FLAGS="-std=c++20" \ -DENABLE_OMP=ON \ From 1d2761b272b810f68bef3ba93dfa47be9c9776a3 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 16:35:35 -0400 Subject: [PATCH 29/51] Make sure cmake is high enough version --- jenkins/Dockerfile.ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index fc6e725b3..53cd3eaa6 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,7 +7,7 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ - cmake \ + cmake=3.17.0 \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ From bf9ac88f7903262e00ed13ad0043d9915fbee507 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 17:06:37 -0400 Subject: [PATCH 30/51] Try to install cmake manually --- jenkins/Dockerfile.ubuntu | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 53cd3eaa6..5d12757bd 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,8 +7,14 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ - cmake=3.17.0 \ + wget \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ + wget https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0.tar.gz && \ + tar zxvf cmake-3.20.0.tar.gz && \ + cd cmake-3.20.0.tar.gz && \ + ./bootstrap && \ + make && \ + make install && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* From fd5353862653a7b057b7fd05839515973befe718 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 17:09:49 -0400 Subject: [PATCH 31/51] fix spelling error --- jenkins/Dockerfile.ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 5d12757bd..b31e7719c 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -13,7 +13,7 @@ RUN apt-get update && \ apt-get autoclean -y && \ wget https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0.tar.gz && \ tar zxvf cmake-3.20.0.tar.gz && \ - cd cmake-3.20.0.tar.gz && \ + cd cmake-3.20.0 && \ ./bootstrap && \ make && \ make install && \ From b47a7ab6899fa1706de47efba3240f3e7bb9f3ec Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 17:34:19 -0400 Subject: [PATCH 32/51] Add openssl for cmake --- jenkins/Dockerfile.ubuntu | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index b31e7719c..bf7512c50 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,6 +7,7 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ + libssl-dev \ wget \ && \ apt-get autoremove --purge -y && \ From a79fe9ded3e548d22d0fd4df16ed1ff5c589a94e Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 17:53:26 -0400 Subject: [PATCH 33/51] Fix CMake flags in include dirs --- CMakeLists.txt | 12 ++++-------- itensor/CMakeLists.txt | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a22f01e9..46489a20f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -261,21 +261,17 @@ set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(itensor) add_subdirectory(unittest) -target_compile_definitions(itensor PRIVATE ITENSOR_USE_CMAKE=1) -target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_CMAKE=1) +target_compile_definitions(itensor PUBLIC ITENSOR_USE_CMAKE=1) if(${CMAKE_BUILD_TYPE} MATCHES Debug) - target_compile_definitions(itensor PRIVATE DEBUG=1) - target_compile_definitions(itensor_check PRIVATE DEBUG=1) + target_compile_definitions(itensor PUBLIC DEBUG=1) endif(${CMAKE_BUILD_TYPE} MATCHES Debug) if(ENABLE_OMP) message(STATUS "Setting ITENSOR_USE_OMP") - target_compile_definitions(itensor PRIVATE ITENSOR_USE_OMP=1) - target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_OMP=1) + target_compile_definitions(itensor PUBLIC ITENSOR_USE_OMP=1) endif() if(ENABLE_HDF5) - target_compile_definitions(itensor PRIVATE ITENSOR_USE_HDF5=1) - target_compile_definitions(itensor_check PRIVATE ITENSOR_USE_HDF5=1) + target_compile_definitions(itensor PUBLIC ITENSOR_USE_HDF5=1) endif(ENABLE_HDF5) ########################## # pkg-config variables diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index 3a8ebb92b..1f8ce0d48 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -106,8 +106,8 @@ add_library(itensor ${ITENSOR_SOURCE_FILES} ${ITENSOR_HEADER_FILES}) set(targetname itensor) target_include_directories(${targetname} INTERFACE - $ - $ + $ + $ $ ) From 0e0d20757832301ae94c01b4824399730d04cfde Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 15 Aug 2023 17:53:45 -0400 Subject: [PATCH 34/51] Use pip instead of make --- jenkins/Dockerfile.ubuntu | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index bf7512c50..ee0b93505 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,15 +7,10 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ - libssl-dev \ + python3-pip \ wget \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ - wget https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0.tar.gz && \ - tar zxvf cmake-3.20.0.tar.gz && \ - cd cmake-3.20.0 && \ - ./bootstrap && \ - make && \ - make install && \ + pip install cmake && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* From bf574d4b6f46bccbd2afbc65363764853d0ab6c8 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:17:44 -0400 Subject: [PATCH 35/51] link to blas/lapack privately and consolidate lapack wrap files --- itensor/CMakeLists.txt | 4 +- .../tensor/{lapack => }/cmake_lapack_wrap.cc | 6 +- itensor/tensor/lapack/cmake_lapack_wrap.h | 795 ---------------- itensor/tensor/lapack/makefile_lapack_wrap.cc | 844 ----------------- itensor/tensor/lapack/makefile_lapack_wrap.h | 787 ---------------- itensor/tensor/lapack_wrap.cc | 849 +++++++++++++++++- itensor/tensor/lapack_wrap.h | 804 ++++++++++++++++- 7 files changed, 1649 insertions(+), 2440 deletions(-) rename itensor/tensor/{lapack => }/cmake_lapack_wrap.cc (99%) delete mode 100644 itensor/tensor/lapack/cmake_lapack_wrap.h delete mode 100644 itensor/tensor/lapack/makefile_lapack_wrap.cc delete mode 100644 itensor/tensor/lapack/makefile_lapack_wrap.h diff --git a/itensor/CMakeLists.txt b/itensor/CMakeLists.txt index 1f8ce0d48..898b18793 100644 --- a/itensor/CMakeLists.txt +++ b/itensor/CMakeLists.txt @@ -57,7 +57,7 @@ set(ITENSOR_SOURCE_FILES util/args.cc util/input.cc util/cputime.cc - tensor/lapack_wrap.cc + tensor/cmake_lapack_wrap.cc tensor/vec.cc tensor/mat.cc tensor/gemm.cc @@ -112,7 +112,7 @@ target_include_directories(${targetname} INTERFACE ) set(ITENSOR_LINK_LIBRARIES blaspp lapackpp ${blaspp_headers} CACHE STRING "List of libraries which ITensor is dependent on") -target_link_libraries(${targetname} PUBLIC ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) +target_link_libraries(${targetname} LINK_PRIVATE ${ITENSOR_LINK_LIBRARIES} blaspp lapackpp) if(ENABLE_HDF5) target_link_libraries(${targetname} PUBLIC ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} hdf5 hdf5_hl) endif(ENABLE_HDF5) diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.cc b/itensor/tensor/cmake_lapack_wrap.cc similarity index 99% rename from itensor/tensor/lapack/cmake_lapack_wrap.cc rename to itensor/tensor/cmake_lapack_wrap.cc index 154bd6ed2..473b8eb37 100644 --- a/itensor/tensor/lapack/cmake_lapack_wrap.cc +++ b/itensor/tensor/cmake_lapack_wrap.cc @@ -13,8 +13,10 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "itensor/tensor/lapack/cmake_lapack_wrap.h" -//#include "itensor/tensor/permutecplx.h" +#include // BLASPP +#include // LAPACKPP + +#include "itensor/tensor/lapack_wrap.h" namespace itensor { diff --git a/itensor/tensor/lapack/cmake_lapack_wrap.h b/itensor/tensor/lapack/cmake_lapack_wrap.h deleted file mode 100644 index a1d3bca7f..000000000 --- a/itensor/tensor/lapack/cmake_lapack_wrap.h +++ /dev/null @@ -1,795 +0,0 @@ -// -// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __ITENSOR_LAPACK_WRAP_h -#define __ITENSOR_LAPACK_WRAP_h - -#include -//#include "config.h" -#include "itensor/types.h" -#include "itensor/util/timers.h" - -#include // BLASPP -#include // LAPACKPP - -#define LAPACK_INT lapack_int -#define LAPACK_REAL double -#define LAPACK_COMPLEX lapack_complex_double -#define BLAS_LAYOUT blas::Layout::ColMajor -// -// Headers and typedefs -// - -//// -//// -//// Generic Linux LAPACK -//// -//// -//#ifdef PLATFORM_lapack -// -//#define LAPACK_REQUIRE_EXTERN -// -//namespace itensor { -// using LAPACK_INT = int; -// using LAPACK_REAL = double; -// typedef struct -// { -// LAPACK_REAL real, imag; -// } LAPACK_COMPLEX; -//} -//#elif defined PLATFORM_openblas -// -//#define ITENSOR_USE_CBLAS -// -//#include "cblas.h" -//#include "lapacke.h" -//#undef I //lapacke.h includes complex.h which defined an `I` macro -// //that can cause problems, so best to undefine it -// -//namespace itensor { -//using LAPACK_INT = lapack_int; -//using LAPACK_REAL = double; -//using LAPACK_COMPLEX = lapack_complex_double; -// -//inline LAPACK_REAL& -//realRef(LAPACK_COMPLEX & z) -// { -// auto* p = reinterpret_cast(&z); -// return p[0]; -// } -// -//inline LAPACK_REAL& -//imagRef(LAPACK_COMPLEX & z) -// { -// auto* p = reinterpret_cast(&z); -// return p[1]; -// } -//} -// -//// -//// -//// Apple Accelerate/vecLib -//// -//// -//#elif defined PLATFORM_macos -// -//#define ITENSOR_USE_CBLAS -////#define ITENSOR_USE_ZGEMM -// -//#include -// namespace itensor { -// using LAPACK_INT = __CLPK_integer; -// using LAPACK_REAL = __CLPK_doublereal; -// using LAPACK_COMPLEX = __CLPK_doublecomplex; -// -// inline LAPACK_REAL& -// realRef(LAPACK_COMPLEX & z) { return z.r; } -// -// inline LAPACK_REAL& -// imagRef(LAPACK_COMPLEX & z) { return z.i; } -// } - -//// -//// -//// Intel MKL -//// -//// -//#elif defined PLATFORM_mkl -// -//#define ITENSOR_USE_CBLAS -//#define ITENSOR_USE_ZGEMM -// -//#include "mkl_cblas.h" -//#include "mkl_lapack.h" -// namespace itensor { -// using LAPACK_INT = MKL_INT; -// using LAPACK_REAL = double; -// using LAPACK_COMPLEX = MKL_Complex16; -// -// inline LAPACK_REAL& -// realRef(LAPACK_COMPLEX & z) { return z.real; } -// -// inline LAPACK_REAL& -// imagRef(LAPACK_COMPLEX & z) { return z.imag; } -// } - -//// -//// -//// AMD ACML -//// -//// -//#elif defined PLATFORM_acml -// -//#define LAPACK_REQUIRE_EXTERN -////#include "acml.h" -// namespace itensor { -// using LAPACK_INT = int; -// using LAPACK_REAL = double; -// typedef struct -// { -// LAPACK_REAL real, imag; -// } LAPACK_COMPLEX; -// -// inline LAPACK_REAL& -// realRef(LAPACK_COMPLEX & z) { return z.real; } -// -// inline LAPACK_REAL& -// imagRef(LAPACK_COMPLEX & z) { return z.imag; } -// } -// -//#endif // different PLATFORM types -// -// -// -//#ifdef FORTRAN_NO_TRAILING_UNDERSCORE -//#define F77NAME(x) x -//#else -//#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) -//#define F77NAME(x) LAPACK_##x -//#else -//#define F77NAME(x) x##_ -//#endif -//#endif - - -namespace itensor { - -// -// -// Forward declarations of fortran lapack routines -// -// -#ifdef LAPACK_REQUIRE_EXTERN -extern "C" { - -//dnrm2 declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); -#else -LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); -#endif - - -//daxpy declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); -#else -void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, - LAPACK_REAL* X, LAPACK_INT* incx, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//ddot declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); -#else -LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zdotc declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); -#else -LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif - -//dgemm declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const double __alpha, const double *__A, - const int __lda, const double *__B, const int __ldb, - const double __beta, double *__C, const int __ldc); -#else -void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, - LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, - LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); -#endif - -//zgemm declaration -#ifdef PLATFORM_openblas -void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, - OPENBLAS_CONST blasint M, - OPENBLAS_CONST blasint N, - OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *A, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *B, - OPENBLAS_CONST blasint ldb, - OPENBLAS_CONST double *beta, - double *C, - OPENBLAS_CONST blasint ldc); -#else //platform not openblas - -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const void *__alpha, const void *__A, const int __lda, - const void *__B, const int __ldb, const void *__beta, void *__C, - const int __ldc); -#else -void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, - LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, - LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); -#endif - -#endif //zgemm declaration - -//dgemv declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemv(const enum CBLAS_ORDER Order, - const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, - const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, - const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, - const LAPACK_INT incY); -#else -void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, - LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zgemv declaration -#ifdef PLATFORM_openblas -void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, - OPENBLAS_CONST blasint m, - OPENBLAS_CONST blasint n, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *a, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *x, - OPENBLAS_CONST blasint incx, - OPENBLAS_CONST double *beta, - double *y, - OPENBLAS_CONST blasint incy); -#else -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, - const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, - const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); -#else -void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, - LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, - LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif -#endif //zgemv declaration - -#ifdef PLATFORM_acml -void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, - double *w, double *work, int *lwork, int *info, - int jobz_len, int uplo_len); -#else -void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, - const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, - LAPACK_INT* info ); -#endif - -#ifdef ITENSOR_USE_CBLAS -void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); -#else -void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); -#else -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - - #ifdef PLATFORM_acml - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); -#endif - -#ifdef PLATFORM_acml -void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, - LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, - LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, - int jobz_len); -#else -void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - -void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, - double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); - -void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, - LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); - - -void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#else -void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - -void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, - LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); - -void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, - LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); -#elif defined PLATFORM_acml -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); -#elif defined PLATFORM_acml -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, - lapack_complex_double* a, lapack_int lda, double* w); -#elif defined PLATFORM_acml -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - -} //extern "C" -#endif - -// -// daxpy -// Y += alpha*X -// -void -daxpy_wrapper(lapack_int n, //number of elements of X,Y - Real alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - lapack_int incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - lapack_int incy); //increment with which to step through Y - -// -// dnrm2 -// -LAPACK_REAL -dnrm2_wrapper(lapack_int N, - const LAPACK_REAL* X, - lapack_int incx = 1); - -// -// ddot -// -LAPACK_REAL -ddot_wrapper(lapack_int N, - const LAPACK_REAL* X, - lapack_int incx, - const LAPACK_REAL* Y, - lapack_int incy); - -// -// zdotc -// -Cplx -zdotc_wrapper(lapack_int N, - Cplx const* X, - lapack_int incx, - Cplx const* Y, - lapack_int incy); - -// -// dgemm -// -void -gemm_wrapper(bool transa, - bool transb, - lapack_int m, - lapack_int n, - lapack_int k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C); - -// -// zgemm -// -void -gemm_wrapper(bool transa, - bool transb, - lapack_int m, - lapack_int n, - lapack_int k, - Cplx alpha, - Cplx const* A, - Cplx const* B, - Cplx beta, - Cplx * C); - -// -// dgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - lapack_int m, - lapack_int n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - lapack_int incx, - LAPACK_REAL* y, - lapack_int incy); - -// -// zgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - lapack_int m, - lapack_int n, - Cplx const* A, - Cplx const* x, - lapack_int incx, - Cplx* y, - lapack_int incy); - - -// -// dsyev -// -void -dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - lapack_int n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - lapack_int& info); //error info - -// -// dscal -// -void -dscal_wrapper(lapack_int N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - lapack_int inc = 1); - - -void -dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - lapack_int* m, //number of rows of input matrix *A - lapack_int* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - lapack_int *info); - -void -zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - lapack_int *m, //number of rows of input matrix *A - lapack_int *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - lapack_int *info); - - - void -dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - lapack_int* m, //number of rows of input matrix *A - lapack_int* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - lapack_int *info); - -void -zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - lapack_int *m, //number of rows of input matrix *A - lapack_int *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - lapack_int *info); - - -// -// dgeqrf -// -// QR factorization of a real matrix A -// -void -dgeqrf_wrapper(lapack_int* m, //number of rows of A - lapack_int* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - lapack_int* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - lapack_int* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine dgeqrf (see above) -// -void -dorgqr_wrapper(lapack_int* m, //number of rows of A - lapack_int* n, //number of cols of A - lapack_int* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - lapack_int* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - lapack_int* info); //error info - - - // -// dgeqrf -// -// QR factorization of a complex matrix A -// -void -zgeqrf_wrapper(lapack_int* m, //number of rows of A - lapack_int* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - lapack_int* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - lapack_int* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine zgeqrf (see above) -// -void -zungqr_wrapper(lapack_int* m, //number of rows of A - lapack_int* n, //number of cols of A - lapack_int* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - lapack_int* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf - lapack_int* info); //error info - -// dgesv -// -// computes the solution to system of linear equations A*X = B -// where A is a general real matrix -// -lapack_int -dgesv_wrapper(lapack_int n, - lapack_int nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b); - -// -// zgesv -// -// computes the solution to system of linear euqations A*X =B -// where A is a general complex matrix -// -lapack_int -zgesv_wrapper(lapack_int n, - lapack_int nrhs, - Cplx* a, - Cplx* b); - -// -// dlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -double -dlange_wrapper(char norm, - lapack_int m, - lapack_int n, - double* a); - -// -// zlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -LAPACK_REAL -zlange_wrapper(char norm, - lapack_int m, - lapack_int n, - Cplx* a); - -// -// zheev -// -// Eigenvalues and eigenvectors of complex Hermitian matrix A -// -lapack_int -zheev_wrapper(lapack_int N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d); //eigenvalues on return - -// -// dsygv -// -// Eigenvalues and eigenvectors of generalized eigenvalue problem -// A*x = lambda*B*x -// A and B must be symmetric -// B must be positive definite -// -void -dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - lapack_int* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - lapack_int* info); //error info - -// -// dgeev -// -// Eigenvalues and eigenvectors of real, square matrix A -// A can be a general real matrix, not assumed symmetric -// -// Returns "info" integer -// -lapack_int -dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - lapack_int n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr); //right eigenvectors on return - -// -// zgeev -// -// Eigenvalues and eigenvectors of complex, square matrix A -// A can be a general complex matrix, not assumed symmetric -// -// Returns "info" integer -// -lapack_int -zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - lapack_int n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr); //right eigenvectors on return - -} //namespace itensor - -#endif diff --git a/itensor/tensor/lapack/makefile_lapack_wrap.cc b/itensor/tensor/lapack/makefile_lapack_wrap.cc deleted file mode 100644 index 69e82c642..000000000 --- a/itensor/tensor/lapack/makefile_lapack_wrap.cc +++ /dev/null @@ -1,844 +0,0 @@ -// -// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "itensor/tensor/lapack/makefile_lapack_wrap.h" -//#include "itensor/tensor/permutecplx.h" - -namespace itensor { - -// -// daxpy -// Y += alpha*X -// -void -daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y - LAPACK_REAL alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - LAPACK_INT incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - LAPACK_INT incy) //increment with which to step through Y - { -#ifdef ITENSOR_USE_CBLAS - cblas_daxpy(n,alpha,X,incx,Y,incy); -#else - auto Xnc = const_cast(X); - F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); -#endif - } - -// -// dnrm2 -// -LAPACK_REAL -dnrm2_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx) - { -#ifdef ITENSOR_USE_CBLAS - return cblas_dnrm2(N,X,incx); -#else - auto *Xnc = const_cast(X); - return F77NAME(dnrm2)(&N,Xnc,&incx); -#endif - return -1; - } - -// -// ddot -// -LAPACK_REAL -ddot_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx, - const LAPACK_REAL* Y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - return cblas_ddot(N,X,incx,Y,incy); -#else - auto *Xnc = const_cast(X); - auto *Ync = const_cast(Y); - return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); -#endif - return -1; - } - -// -// zdotc -// -Cplx -zdotc_wrapper(LAPACK_INT N, - Cplx const* X, - LAPACK_INT incx, - Cplx const* Y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - Cplx res; -#if defined PLATFORM_openblas - auto pX = reinterpret_cast(X); - auto pY = reinterpret_cast(Y); - auto pres = reinterpret_cast(&res); -#else - auto pX = reinterpret_cast(X); - auto pY = reinterpret_cast(Y); - auto pres = reinterpret_cast(&res); -#endif - cblas_zdotc_sub(N,pX,incx,pY,incy,pres); - return res; -#else - auto ncX = const_cast(X); - auto ncY = const_cast(Y); - auto pX = reinterpret_cast(ncX); - auto pY = reinterpret_cast(ncY); - auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); - auto cplx_res = reinterpret_cast(&res); - return *cplx_res; -#endif - return Cplx{}; - } - -// -// dgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C) - { - LAPACK_INT lda = m, - ldb = k; -#ifdef ITENSOR_USE_CBLAS - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); -#else - auto *pA = const_cast(A); - auto *pB = const_cast(B); - char at = 'N'; - char bt = 'N'; - if(transa) - { - at = 'T'; - lda = k; - } - if(transb) - { - bt = 'T'; - ldb = n; - } - F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); -#endif - } - -// -// zgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - Cplx alpha, - const Cplx* A, - const Cplx* B, - Cplx beta, - Cplx* C) - { - LAPACK_INT lda = m, - ldb = k; -#ifdef PLATFORM_openblas - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - //auto ralpha = realRef(alpha); - //auto ialpha = imagRef(alpha); - //auto rbeta = realRef(beta); - //auto ibeta = imagRef(beta); - //if(ialpha != 0.0 || ibeta != 0.0) - // { - // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); - // } - auto* palpha = reinterpret_cast(&alpha); - auto* pbeta = reinterpret_cast(&beta); - auto* pA = reinterpret_cast(A); - auto* pB = reinterpret_cast(B); - auto* pC = reinterpret_cast(C); - cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); -#else //platform not openblas -#ifdef ITENSOR_USE_CBLAS - auto at = CblasNoTrans, - bt = CblasNoTrans; - if(transa) - { - at = CblasTrans; - lda = k; - } - if(transb) - { - bt = CblasTrans; - ldb = n; - } - auto palpha = (void*)(&alpha); - auto pbeta = (void*)(&beta); - cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); -#else //use Fortran zgemm - auto *ncA = const_cast(A); - auto *ncB = const_cast(B); - auto *pA = reinterpret_cast(ncA); - auto *pB = reinterpret_cast(ncB); - auto *pC = reinterpret_cast(C); - auto *palpha = reinterpret_cast(&alpha); - auto *pbeta = reinterpret_cast(&beta); - char at = 'N'; - char bt = 'N'; - if(transa) - { - at = 'T'; - lda = k; - } - if(transb) - { - bt = 'T'; - ldb = n; - } - F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); -#endif -#endif - } - -void -gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - LAPACK_INT m, - LAPACK_INT n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - LAPACK_INT incx, - LAPACK_REAL* y, - LAPACK_INT incy) - { -#ifdef ITENSOR_USE_CBLAS - auto Tr = trans ? CblasTrans : CblasNoTrans; - cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); -#else - char Tr = trans ? 'T' : 'N'; - F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); -#endif - } - -void -gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - LAPACK_INT m, - LAPACK_INT n, - Cplx const* A, - Cplx const* x, - LAPACK_INT incx, - Cplx* y, - LAPACK_INT incy) - { -#ifdef PLATFORM_openblas - auto Tr = trans ? CblasTrans : CblasNoTrans; - //auto ralpha = realRef(alpha); - //auto ialpha = imagRef(alpha); - //auto rbeta = realRef(beta); - //auto ibeta = imagRef(beta); - //if(ialpha != 0.0 || ibeta != 0.0) - // { - // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); - // } - auto* palpha = reinterpret_cast(&alpha); - auto* pbeta = reinterpret_cast(&beta); - auto* pA = reinterpret_cast(A); - auto* px = reinterpret_cast(x); - auto* py = reinterpret_cast(y); - cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); -#else //platform other than openblas -#ifdef ITENSOR_USE_CBLAS - auto Tr = trans ? CblasTrans : CblasNoTrans; - auto palpha = reinterpret_cast(&alpha); - auto pbeta = reinterpret_cast(&beta); - cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); -#else - char Tr = trans ? 'T' : 'N'; - auto ncA = const_cast(A); - auto ncx = const_cast(x); - auto pA = reinterpret_cast(ncA); - auto px = reinterpret_cast(ncx); - auto py = reinterpret_cast(y); - auto palpha = reinterpret_cast(&alpha); - auto pbeta = reinterpret_cast(&beta); - F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); -#endif -#endif - } - - -// -// dsyev -// -void -dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - LAPACK_INT n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - LAPACK_INT& info) //error info - { - std::vector work; - LAPACK_INT lda = n; - -#ifdef PLATFORM_acml - static const LAPACK_INT one = 1; - LAPACK_INT lwork = std::max(one,3*n-1); - work.resize(lwork+2); - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); -#else - //Compute optimal workspace size (will be written to wkopt) - LAPACK_INT lwork = -1; //tell dsyev to compute optimal size - LAPACK_REAL wkopt = 0; - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); - lwork = LAPACK_INT(wkopt); - work.resize(lwork+2); - F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); -#endif - } - -// -// dscal -// -void -dscal_wrapper(LAPACK_INT N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - LAPACK_INT inc) - { -#ifdef ITENSOR_USE_CBLAS - cblas_dscal(N,alpha,data,inc); -#else - F77NAME(dscal)(&N,&alpha,data,&inc); -#endif - } - -void -zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); -#else - F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); -#endif - } - - - - void -dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); -#else - F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); -#endif - } - - - - void -zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - std::vector rwork; - std::vector iwork; - auto pA = reinterpret_cast(A); - auto pU = reinterpret_cast(u); - auto pVt = reinterpret_cast(vt); - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*l+2*l+g+100; - work.resize(lwork); - rwork.resize(5*l*(1+l)); - iwork.resize(8*l); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); -#else - F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); -#endif - } - - - - void -dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info) - { - std::vector work; - // std::vector superb; - - std::vector iwork; - LAPACK_INT l = std::min(*m,*n), - g = std::max(*m,*n); - LAPACK_INT lwork = l*(6 + 4*l) + g; - work.resize(lwork); - iwork.resize(8*l); - //superb.resize(l -1); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); -#else - F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); -#endif - } - -// -// dgeqrf -// -// QR factorization of a real matrix A -// -void -dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); - } - -// -// dorgqr -// -// Generates Q from output of QR factorization routine dgeqrf (see above) -// -void -dorgqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); - } - - - // -// dgeqrf -// -// QR factorization of a complex matrix A -// -void -zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); - F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); - } - -// -// dorgqr -// -// Generates Q from output of QR factorization routine zgeqrf (see above) -// -void -zungqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - auto lwork = std::max(one,4*std::max(*n,*m)); - work.resize(lwork+2); - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); - #ifdef PLATFORM_lapacke - LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); - #else - F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); - #endif - } - -// -// dgesv -// -LAPACK_INT -dgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b) - { - LAPACK_INT lda = n; - std::vector ipiv(n); - LAPACK_INT ldb = n; - LAPACK_INT info = 0; - F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); - return info; - } - -// -// zgesv -// -LAPACK_INT -zgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - Cplx* a, - Cplx* b) - { - auto pa = reinterpret_cast(a); - auto pb = reinterpret_cast(b); - LAPACK_INT lda = n; - std::vector ipiv(n); - LAPACK_INT ldb = n; - LAPACK_INT info = 0; - F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); - return info; - } - -// -// dlange -// -double -dlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - double* a) - { - double norma; -#ifdef PLATFORM_lapacke - norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); -#else - std::vector work; - if(norm == 'I' || norm == 'i') work.resize(m); -#ifdef PLATFORM_acml - LAPACK_INT norm_len = 1; - norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); -#else - norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); -#endif -#endif - return norma; - } - -// -// zlange -// -LAPACK_REAL -zlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - Cplx* a) - { - LAPACK_REAL norma; -#ifdef PLATFORM_lapacke - auto pA = reinterpret_cast(a); - norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); -#else - std::vector work; - if(norm == 'I' || norm == 'i') work.resize(m); - auto pA = reinterpret_cast(a); -#ifdef PLATFORM_acml - LAPACK_INT norm_len = 1; - norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); -#else - norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); -#endif -#endif - return norma; - } - -// -// zheev -// -// Eigenvalues and eigenvectors of complex Hermitian matrix A -// -LAPACK_INT -zheev_wrapper(LAPACK_INT N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d) //eigenvalues on return - { - static const LAPACK_INT one = 1; - char jobz = 'V'; - char uplo = 'U'; -#ifdef PLATFORM_lapacke - std::vector work(N); - LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); -#else - LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); - std::vector work(lwork); - std::vector rwork(lwork); - LAPACK_INT info = 0; - static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); - auto pA = reinterpret_cast(A); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - LAPACK_INT uplo_len = 1; - F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); -#else - F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); -#endif - -#endif //PLATFORM_lapacke - return info; - } - -// -// dsygv -// -// Eigenvalues and eigenvectors of generalized eigenvalue problem -// A*x = lambda*B*x -// A and B must be symmetric -// B must be positive definite -// -void -dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - LAPACK_INT* info) //error info - { - static const LAPACK_INT one = 1; - std::vector work; - LAPACK_INT itype = 1; - LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); - work.resize(lwork); -#ifdef PLATFORM_acml - LAPACK_INT jobz_len = 1; - LAPACK_INT uplo_len = 1; - F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); -#else - F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); -#endif - } - -// -// dgeev -// -// Eigenvalues and eigenvectors of real, square matrix A -// A can be a general real matrix, not assumed symmetric -// -LAPACK_INT -dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr) //right eigenvectors on return - { - std::vector work; - std::vector cpA; - - cpA.resize(n*n); - std::copy(A,A+n*n,cpA.data()); - - LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); - LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); - LAPACK_INT info = 0; -#ifdef PLATFORM_acml - LAPACK_INT lwork = -1; - LAPACK_REAL wquery = 0; - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); - - lwork = static_cast(wquery); - work.resize(lwork); - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); -#else - LAPACK_INT lwork = -1; - LAPACK_REAL wquery = 0; - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); - - lwork = static_cast(wquery); - work.resize(lwork); - F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); -#endif - //println("jobvl = ",jobvl); - //println("nevecl = ",nevecl); - //println("vl data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vl); - // ++vl; - // } - //println("vr data = "); - //for(auto j = 0; j < n*n; ++j) - // { - // println(*vr); - // ++vr; - // } - return info; - } - -// -// zgeev -// -// Eigenvalues and eigenvectors of complex, square matrix A -// A can be a general complex matrix, not assumed symmetric -// -LAPACK_INT -zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr) //right eigenvectors on return - { - static const LAPACK_INT one = 1; - std::vector cpA; - std::vector work; - std::vector rwork; - LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); - LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); - LAPACK_INT lwork = std::max(one,4*n); - work.resize(lwork); - LAPACK_INT lrwork = std::max(one,2*n); - rwork.resize(lrwork); - - //Copy A data into cpA - cpA.resize(n*n); - auto pA = reinterpret_cast(A); - std::copy(pA,pA+n*n,cpA.data()); - - auto pd = reinterpret_cast(d); - auto pvl = reinterpret_cast(vl); - auto pvr = reinterpret_cast(vr); - - LAPACK_INT info = 0; -#ifdef PLATFORM_acml - F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); -#else - F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); -#endif - return info; - } - -} //namespace itensor - diff --git a/itensor/tensor/lapack/makefile_lapack_wrap.h b/itensor/tensor/lapack/makefile_lapack_wrap.h deleted file mode 100644 index 6cd53baf8..000000000 --- a/itensor/tensor/lapack/makefile_lapack_wrap.h +++ /dev/null @@ -1,787 +0,0 @@ -// -// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __ITENSOR_LAPACK_WRAP_h -#define __ITENSOR_LAPACK_WRAP_h - -#include -#include "itensor/config.h" -#include "itensor/types.h" -#include "itensor/util/timers.h" - -// -// Headers and typedefs -// - -// -// -// Generic Linux LAPACK -// -// -#ifdef PLATFORM_lapack - -#define LAPACK_REQUIRE_EXTERN - -namespace itensor { - using LAPACK_INT = int; - using LAPACK_REAL = double; - typedef struct - { - LAPACK_REAL real, imag; - } LAPACK_COMPLEX; -} -#elif defined PLATFORM_openblas - -#define ITENSOR_USE_CBLAS - -#include "cblas.h" -#include "lapacke.h" -#undef I //lapacke.h includes complex.h which defined an `I` macro - //that can cause problems, so best to undefine it - -namespace itensor { -using LAPACK_INT = lapack_int; -using LAPACK_REAL = double; -using LAPACK_COMPLEX = lapack_complex_double; - -inline LAPACK_REAL& -realRef(LAPACK_COMPLEX & z) - { - auto* p = reinterpret_cast(&z); - return p[0]; - } - -inline LAPACK_REAL& -imagRef(LAPACK_COMPLEX & z) - { - auto* p = reinterpret_cast(&z); - return p[1]; - } -} - -// -// -// Apple Accelerate/vecLib -// -// -#elif defined PLATFORM_macos - -#define ITENSOR_USE_CBLAS -//#define ITENSOR_USE_ZGEMM - -#include - namespace itensor { - using LAPACK_INT = __CLPK_integer; - using LAPACK_REAL = __CLPK_doublereal; - using LAPACK_COMPLEX = __CLPK_doublecomplex; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.r; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.i; } - } - -// -// -// Intel MKL -// -// -#elif defined PLATFORM_mkl - -#define ITENSOR_USE_CBLAS -#define ITENSOR_USE_ZGEMM - -#include "mkl_cblas.h" -#include "mkl_lapack.h" - namespace itensor { - using LAPACK_INT = MKL_INT; - using LAPACK_REAL = double; - using LAPACK_COMPLEX = MKL_Complex16; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.real; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.imag; } - } - -// -// -// AMD ACML -// -// -#elif defined PLATFORM_acml - -#define LAPACK_REQUIRE_EXTERN -//#include "acml.h" - namespace itensor { - using LAPACK_INT = int; - using LAPACK_REAL = double; - typedef struct - { - LAPACK_REAL real, imag; - } LAPACK_COMPLEX; - - inline LAPACK_REAL& - realRef(LAPACK_COMPLEX & z) { return z.real; } - - inline LAPACK_REAL& - imagRef(LAPACK_COMPLEX & z) { return z.imag; } - } - -#endif // different PLATFORM types - - - -#ifdef FORTRAN_NO_TRAILING_UNDERSCORE -#define F77NAME(x) x -#else -#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) -#define F77NAME(x) LAPACK_##x -#else -#define F77NAME(x) x##_ -#endif -#endif - -namespace itensor { - -// -// -// Forward declarations of fortran lapack routines -// -// -#ifdef LAPACK_REQUIRE_EXTERN -extern "C" { - -//dnrm2 declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); -#else -LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); -#endif - - -//daxpy declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); -#else -void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, - LAPACK_REAL* X, LAPACK_INT* incx, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//ddot declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); -#else -LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zdotc declaration -#ifdef ITENSOR_USE_CBLAS -LAPACK_REAL -cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); -#else -LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif - -//dgemm declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const double __alpha, const double *__A, - const int __lda, const double *__B, const int __ldb, - const double __beta, double *__C, const int __ldc); -#else -void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, - LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, - LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); -#endif - -//zgemm declaration -#ifdef PLATFORM_openblas -void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, - OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, - OPENBLAS_CONST blasint M, - OPENBLAS_CONST blasint N, - OPENBLAS_CONST blasint K, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *A, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *B, - OPENBLAS_CONST blasint ldb, - OPENBLAS_CONST double *beta, - double *C, - OPENBLAS_CONST blasint ldc); -#else //platform not openblas - -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemm(const enum CBLAS_ORDER __Order, - const enum CBLAS_TRANSPOSE __TransA, - const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, - const int __K, const void *__alpha, const void *__A, const int __lda, - const void *__B, const int __ldb, const void *__beta, void *__C, - const int __ldc); -#else -void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, - LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, - LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); -#endif - -#endif //zgemm declaration - -//dgemv declaration -#ifdef ITENSOR_USE_CBLAS -void cblas_dgemv(const enum CBLAS_ORDER Order, - const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, - const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, - const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, - const LAPACK_INT incY); -#else -void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, - LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, - LAPACK_REAL* Y, LAPACK_INT* incy); -#endif - -//zgemv declaration -#ifdef PLATFORM_openblas -void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, - OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, - OPENBLAS_CONST blasint m, - OPENBLAS_CONST blasint n, - OPENBLAS_CONST double *alpha, - OPENBLAS_CONST double *a, - OPENBLAS_CONST blasint lda, - OPENBLAS_CONST double *x, - OPENBLAS_CONST blasint incx, - OPENBLAS_CONST double *beta, - double *y, - OPENBLAS_CONST blasint incy); -#else -#ifdef ITENSOR_USE_CBLAS -void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, - const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, - const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); -#else -void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, - LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, - LAPACK_COMPLEX* Y, LAPACK_INT* incy); -#endif -#endif //zgemv declaration - -#ifdef PLATFORM_acml -void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, - double *w, double *work, int *lwork, int *info, - int jobz_len, int uplo_len); -#else -void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, - const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, - LAPACK_INT* info ); -#endif - -#ifdef ITENSOR_USE_CBLAS -void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); -#else -void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); -#else -void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, - double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, - double *work, LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - - #ifdef PLATFORM_acml - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); -#else - void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); -#endif - -#ifdef PLATFORM_acml -void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, - LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, - LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, - int jobz_len); -#else -void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, - LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); -#endif - -void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, - double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); - -void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, - LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); - - -void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#else -void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - -void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, - LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); - -void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, - LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); - -#ifdef PLATFORM_lapacke -double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); -#elif defined PLATFORM_acml -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); -#elif defined PLATFORM_acml -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); -#else -LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); -#endif - -#ifdef PLATFORM_lapacke -lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, - lapack_complex_double* a, lapack_int lda, double* w); -#elif defined PLATFORM_acml -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, - double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); -#else -void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, - LAPACK_INT *lwork, LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, - LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, - double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, - LAPACK_INT *info); -#endif - - -#ifdef PLATFORM_acml -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); -#else -void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, - LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, - LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, - LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, - LAPACK_INT *info); -#endif - -} //extern "C" -#endif - -// -// daxpy -// Y += alpha*X -// -void -daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y - LAPACK_REAL alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - LAPACK_INT incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - LAPACK_INT incy); //increment with which to step through Y - -// -// dnrm2 -// -LAPACK_REAL -dnrm2_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx = 1); - -// -// ddot -// -LAPACK_REAL -ddot_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx, - const LAPACK_REAL* Y, - LAPACK_INT incy); - -// -// zdotc -// -Cplx -zdotc_wrapper(LAPACK_INT N, - Cplx const* X, - LAPACK_INT incx, - Cplx const* Y, - LAPACK_INT incy); - -// -// dgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C); - -// -// zgemm -// -void -gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - Cplx alpha, - Cplx const* A, - Cplx const* B, - Cplx beta, - Cplx * C); - -// -// dgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - LAPACK_INT m, - LAPACK_INT n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - LAPACK_INT incx, - LAPACK_REAL* y, - LAPACK_INT incy); - -// -// zgemv - matrix*vector multiply -// -void -gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - LAPACK_INT m, - LAPACK_INT n, - Cplx const* A, - Cplx const* x, - LAPACK_INT incx, - Cplx* y, - LAPACK_INT incy); - - -// -// dsyev -// -void -dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - LAPACK_INT n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - LAPACK_INT& info); //error info - -// -// dscal -// -void -dscal_wrapper(LAPACK_INT N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - LAPACK_INT inc = 1); - - -void -dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - -void -zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - - void -dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - -void -zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - -// -// dgeqrf -// -// QR factorization of a real matrix A -// -void -dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine dgeqrf (see above) -// -void -dorgqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info); //error info - - - // -// dgeqrf -// -// QR factorization of a complex matrix A -// -void -zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info - -// -// dorgqr -// -// Generates Q from output of QR factorization routine zgeqrf (see above) -// -void -zungqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf - LAPACK_INT* info); //error info - -// dgesv -// -// computes the solution to system of linear equations A*X = B -// where A is a general real matrix -// -LAPACK_INT -dgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b); - -// -// zgesv -// -// computes the solution to system of linear euqations A*X =B -// where A is a general complex matrix -// -LAPACK_INT -zgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - Cplx* a, - Cplx* b); - -// -// dlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -double -dlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - double* a); - -// -// zlange -// -// returns the value of the 1-norm, Frobenius norm, infinity-norm, -// or the largest absolute value of any element of a general rectangular matrix. -// -LAPACK_REAL -zlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - Cplx* a); - -// -// zheev -// -// Eigenvalues and eigenvectors of complex Hermitian matrix A -// -LAPACK_INT -zheev_wrapper(LAPACK_INT N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d); //eigenvalues on return - -// -// dsygv -// -// Eigenvalues and eigenvectors of generalized eigenvalue problem -// A*x = lambda*B*x -// A and B must be symmetric -// B must be positive definite -// -void -dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - LAPACK_INT* info); //error info - -// -// dgeev -// -// Eigenvalues and eigenvectors of real, square matrix A -// A can be a general real matrix, not assumed symmetric -// -// Returns "info" integer -// -LAPACK_INT -dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr); //right eigenvectors on return - -// -// zgeev -// -// Eigenvalues and eigenvectors of complex, square matrix A -// A can be a general complex matrix, not assumed symmetric -// -// Returns "info" integer -// -LAPACK_INT -zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr); //right eigenvectors on return - -} //namespace itensor - -#endif diff --git a/itensor/tensor/lapack_wrap.cc b/itensor/tensor/lapack_wrap.cc index b0419dd05..41d54825a 100644 --- a/itensor/tensor/lapack_wrap.cc +++ b/itensor/tensor/lapack_wrap.cc @@ -1,5 +1,844 @@ -#ifdef ITENSOR_USE_CMAKE -#include -#else //ITENSOR_USE_CMAKE -#include -#endif //ITENSOR_USE_CMAKE \ No newline at end of file +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "itensor/tensor/lapack_wrap.h" +//#include "itensor/tensor/permutecplx.h" + +namespace itensor { + +// +// daxpy +// Y += alpha*X +// +void +daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy) //increment with which to step through Y + { +#ifdef ITENSOR_USE_CBLAS + cblas_daxpy(n,alpha,X,incx,Y,incy); +#else + auto Xnc = const_cast(X); + F77NAME(daxpy)(&n,&alpha,Xnc,&incx,Y,&incy); +#endif + } + +// +// dnrm2 +// +LAPACK_REAL +dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx) + { +#ifdef ITENSOR_USE_CBLAS + return cblas_dnrm2(N,X,incx); +#else + auto *Xnc = const_cast(X); + return F77NAME(dnrm2)(&N,Xnc,&incx); +#endif + return -1; + } + +// +// ddot +// +LAPACK_REAL +ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + return cblas_ddot(N,X,incx,Y,incy); +#else + auto *Xnc = const_cast(X); + auto *Ync = const_cast(Y); + return F77NAME(ddot)(&N,Xnc,&incx,Ync,&incy); +#endif + return -1; + } + +// +// zdotc +// +Cplx +zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + Cplx res; +#if defined PLATFORM_openblas + auto pX = reinterpret_cast(X); + auto pY = reinterpret_cast(Y); + auto pres = reinterpret_cast(&res); +#else + auto pX = reinterpret_cast(X); + auto pY = reinterpret_cast(Y); + auto pres = reinterpret_cast(&res); +#endif + cblas_zdotc_sub(N,pX,incx,pY,incy,pres); + return res; +#else + auto ncX = const_cast(X); + auto ncY = const_cast(Y); + auto pX = reinterpret_cast(ncX); + auto pY = reinterpret_cast(ncY); + auto res = F77NAME(zdotc)(&N,pX,&incx,pY,&incy); + auto cplx_res = reinterpret_cast(&res); + return *cplx_res; +#endif + return Cplx{}; + } + +// +// dgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C) + { + LAPACK_INT lda = m, + ldb = k; +#ifdef ITENSOR_USE_CBLAS + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + cblas_dgemm(CblasColMajor,at,bt,m,n,k,alpha,A,lda,B,ldb,beta,C,m); +#else + auto *pA = const_cast(A); + auto *pB = const_cast(B); + char at = 'N'; + char bt = 'N'; + if(transa) + { + at = 'T'; + lda = k; + } + if(transb) + { + bt = 'T'; + ldb = n; + } + F77NAME(dgemm)(&at,&bt,&m,&n,&k,&alpha,pA,&lda,pB,&ldb,&beta,C,&m); +#endif + } + +// +// zgemm +// +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + const Cplx* A, + const Cplx* B, + Cplx beta, + Cplx* C) + { + LAPACK_INT lda = m, + ldb = k; +#ifdef PLATFORM_openblas + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + //auto ralpha = realRef(alpha); + //auto ialpha = imagRef(alpha); + //auto rbeta = realRef(beta); + //auto ibeta = imagRef(beta); + //if(ialpha != 0.0 || ibeta != 0.0) + // { + // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); + // } + auto* palpha = reinterpret_cast(&alpha); + auto* pbeta = reinterpret_cast(&beta); + auto* pA = reinterpret_cast(A); + auto* pB = reinterpret_cast(B); + auto* pC = reinterpret_cast(C); + cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,pA,lda,pB,ldb,pbeta,pC,m); +#else //platform not openblas +#ifdef ITENSOR_USE_CBLAS + auto at = CblasNoTrans, + bt = CblasNoTrans; + if(transa) + { + at = CblasTrans; + lda = k; + } + if(transb) + { + bt = CblasTrans; + ldb = n; + } + auto palpha = (void*)(&alpha); + auto pbeta = (void*)(&beta); + cblas_zgemm(CblasColMajor,at,bt,m,n,k,palpha,(void*)A,lda,(void*)B,ldb,pbeta,(void*)C,m); +#else //use Fortran zgemm + auto *ncA = const_cast(A); + auto *ncB = const_cast(B); + auto *pA = reinterpret_cast(ncA); + auto *pB = reinterpret_cast(ncB); + auto *pC = reinterpret_cast(C); + auto *palpha = reinterpret_cast(&alpha); + auto *pbeta = reinterpret_cast(&beta); + char at = 'N'; + char bt = 'N'; + if(transa) + { + at = 'T'; + lda = k; + } + if(transb) + { + bt = 'T'; + ldb = n; + } + F77NAME(zgemm)(&at,&bt,&m,&n,&k,palpha,pA,&lda,pB,&ldb,pbeta,pC,&m); +#endif +#endif + } + +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy) + { +#ifdef ITENSOR_USE_CBLAS + auto Tr = trans ? CblasTrans : CblasNoTrans; + cblas_dgemv(CblasColMajor,Tr,m,n,alpha,A,m,x,incx,beta,y,incy); +#else + char Tr = trans ? 'T' : 'N'; + F77NAME(dgemv)(&Tr,&m,&n,&alpha,const_cast(A),&m,const_cast(x),&incx,&beta,y,&incy); +#endif + } + +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy) + { +#ifdef PLATFORM_openblas + auto Tr = trans ? CblasTrans : CblasNoTrans; + //auto ralpha = realRef(alpha); + //auto ialpha = imagRef(alpha); + //auto rbeta = realRef(beta); + //auto ibeta = imagRef(beta); + //if(ialpha != 0.0 || ibeta != 0.0) + // { + // throw std::runtime_error("Complex alpha, beta not supported in zgemm for PLATFORM=openblas"); + // } + auto* palpha = reinterpret_cast(&alpha); + auto* pbeta = reinterpret_cast(&beta); + auto* pA = reinterpret_cast(A); + auto* px = reinterpret_cast(x); + auto* py = reinterpret_cast(y); + cblas_zgemv(CblasColMajor,Tr,m,n,palpha,pA,m,px,incx,pbeta,py,incy); +#else //platform other than openblas +#ifdef ITENSOR_USE_CBLAS + auto Tr = trans ? CblasTrans : CblasNoTrans; + auto palpha = reinterpret_cast(&alpha); + auto pbeta = reinterpret_cast(&beta); + cblas_zgemv(CblasColMajor,Tr,m,n,palpha,(void*)A,m,(void*)x,incx,pbeta,(void*)y,incy); +#else + char Tr = trans ? 'T' : 'N'; + auto ncA = const_cast(A); + auto ncx = const_cast(x); + auto pA = reinterpret_cast(ncA); + auto px = reinterpret_cast(ncx); + auto py = reinterpret_cast(y); + auto palpha = reinterpret_cast(&alpha); + auto pbeta = reinterpret_cast(&beta); + F77NAME(zgemv)(&Tr,&m,&n,palpha,pA,&m,px,&incx,pbeta,py,&incy); +#endif +#endif + } + + +// +// dsyev +// +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info) //error info + { + std::vector work; + LAPACK_INT lda = n; + +#ifdef PLATFORM_acml + static const LAPACK_INT one = 1; + LAPACK_INT lwork = std::max(one,3*n-1); + work.resize(lwork+2); + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info,1,1); +#else + //Compute optimal workspace size (will be written to wkopt) + LAPACK_INT lwork = -1; //tell dsyev to compute optimal size + LAPACK_REAL wkopt = 0; + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,&wkopt,&lwork,&info); + lwork = LAPACK_INT(wkopt); + work.resize(lwork+2); + F77NAME(dsyev)(&jobz,&uplo,&n,A,&lda,eigs,work.data(),&lwork,&info); +#endif + } + +// +// dscal +// +void +dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc) + { +#ifdef ITENSOR_USE_CBLAS + cblas_dscal(N,alpha,data,inc); +#else + F77NAME(dscal)(&N,&alpha,data,&inc); +#endif + } + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info,jobz_len); +#else + F77NAME(zgesdd)(jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),iwork.data(),info); +#endif + } + + + + void +dgesdd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info,jobz_len); +#else + F77NAME(dgesdd)(jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork,iwork.data(),info); +#endif + } + + + + void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + std::vector rwork; + std::vector iwork; + auto pA = reinterpret_cast(A); + auto pU = reinterpret_cast(u); + auto pVt = reinterpret_cast(vt); + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*l+2*l+g+100; + work.resize(lwork); + rwork.resize(5*l*(1+l)); + iwork.resize(8*l); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info,jobz_len); +#else + F77NAME(zgesvd)(jobz,jobz,m,n,pA,m,s,pU,m,pVt,&l,work.data(),&lwork,rwork.data(),info); +#endif + } + + + + void +dgesvd_wrapper(char* jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info) + { + std::vector work; + // std::vector superb; + + std::vector iwork; + LAPACK_INT l = std::min(*m,*n), + g = std::max(*m,*n); + LAPACK_INT lwork = l*(6 + 4*l) + g; + work.resize(lwork); + iwork.resize(8*l); + //superb.resize(l -1); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info, jobz_len); +#else + F77NAME(dgesvd)(jobz,jobz,m,n,A,m,s,u,m,vt,&l,work.data(),&lwork, info); +#endif + } + +// +// dgeqrf +// +// QR factorization of a real matrix A +// +void +dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + F77NAME(dgeqrf)(m,n,A,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// +void +dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + F77NAME(dorgqr)(m,n,k,A,lda,tau,work.data(),&lwork,info); + } + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// +void +zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); + F77NAME(zgeqrf)(m,n,pA,lda,tau,work.data(),&lwork,info); + } + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// +void +zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + auto lwork = std::max(one,4*std::max(*n,*m)); + work.resize(lwork+2); + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); + #ifdef PLATFORM_lapacke + LAPACKE_zungqr(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); + #else + F77NAME(zungqr)(m,n,k,pA,lda,tau,work.data(),&lwork,info); + #endif + } + +// +// dgesv +// +LAPACK_INT +dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b) + { + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; + F77NAME(dgesv)(&n,&nrhs,a,&lda,ipiv.data(),b,&ldb,&info); + return info; + } + +// +// zgesv +// +LAPACK_INT +zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b) + { + auto pa = reinterpret_cast(a); + auto pb = reinterpret_cast(b); + LAPACK_INT lda = n; + std::vector ipiv(n); + LAPACK_INT ldb = n; + LAPACK_INT info = 0; + F77NAME(zgesv)(&n,&nrhs,pa,&lda,ipiv.data(),pb,&ldb,&info); + return info; + } + +// +// dlange +// +double +dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a) + { + double norma; +#ifdef PLATFORM_lapacke + norma = LAPACKE_dlange(LAPACK_COL_MAJOR,norm,m,n,a,m); +#else + std::vector work; + if(norm == 'I' || norm == 'i') work.resize(m); +#ifdef PLATFORM_acml + LAPACK_INT norm_len = 1; + norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data(),norm_len); +#else + norma = F77NAME(dlange)(&norm,&m,&n,a,&m,work.data()); +#endif +#endif + return norma; + } + +// +// zlange +// +LAPACK_REAL +zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a) + { + LAPACK_REAL norma; +#ifdef PLATFORM_lapacke + auto pA = reinterpret_cast(a); + norma = LAPACKE_zlange(LAPACK_COL_MAJOR,norm,m,n,pa,m); +#else + std::vector work; + if(norm == 'I' || norm == 'i') work.resize(m); + auto pA = reinterpret_cast(a); +#ifdef PLATFORM_acml + LAPACK_INT norm_len = 1; + norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data(),norm_len); +#else + norma = F77NAME(zlange)(&norm,&m,&n,pA,&m,work.data()); +#endif +#endif + return norma; + } + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// +LAPACK_INT +zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d) //eigenvalues on return + { + static const LAPACK_INT one = 1; + char jobz = 'V'; + char uplo = 'U'; +#ifdef PLATFORM_lapacke + std::vector work(N); + LAPACKE_zheev(LAPACK_COL_MAJOR,jobz,uplo,N,A,N,w.data()); +#else + LAPACK_INT lwork = std::max(one,3*N-1);//max(1, 1+6*N+2*N*N); + std::vector work(lwork); + std::vector rwork(lwork); + LAPACK_INT info = 0; + static_assert(sizeof(LAPACK_COMPLEX)==sizeof(Cplx),"LAPACK_COMPLEX and itensor::Cplx have different size"); + auto pA = reinterpret_cast(A); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + LAPACK_INT uplo_len = 1; + F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info,jobz_len,uplo_len); +#else + F77NAME(zheev)(&jobz,&uplo,&N,pA,&N,d,work.data(),&lwork,rwork.data(),&info); +#endif + +#endif //PLATFORM_lapacke + return info; + } + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info) //error info + { + static const LAPACK_INT one = 1; + std::vector work; + LAPACK_INT itype = 1; + LAPACK_INT lwork = std::max(one,3*(*n)-1);//std::max(1, 1+6*N+2*N*N); + work.resize(lwork); +#ifdef PLATFORM_acml + LAPACK_INT jobz_len = 1; + LAPACK_INT uplo_len = 1; + F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info,jobz_len,uplo_len); +#else + F77NAME(dsygv)(&itype,jobz,uplo,n,A,n,B,n,d,work.data(),&lwork,info); +#endif + } + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +LAPACK_INT +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr) //right eigenvectors on return + { + std::vector work; + std::vector cpA; + + cpA.resize(n*n); + std::copy(A,A+n*n,cpA.data()); + + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT info = 0; +#ifdef PLATFORM_acml + LAPACK_INT lwork = -1; + LAPACK_REAL wquery = 0; + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info,1,1); + + lwork = static_cast(wquery); + work.resize(lwork); + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info,1,1); +#else + LAPACK_INT lwork = -1; + LAPACK_REAL wquery = 0; + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,&wquery,&lwork,&info); + + lwork = static_cast(wquery); + work.resize(lwork); + F77NAME(dgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,dr,di,vl,&nevecl,vr,&nevecr,work.data(),&lwork,&info); +#endif + //println("jobvl = ",jobvl); + //println("nevecl = ",nevecl); + //println("vl data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vl); + // ++vl; + // } + //println("vr data = "); + //for(auto j = 0; j < n*n; ++j) + // { + // println(*vr); + // ++vr; + // } + return info; + } + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +LAPACK_INT +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr) //right eigenvectors on return + { + static const LAPACK_INT one = 1; + std::vector cpA; + std::vector work; + std::vector rwork; + LAPACK_INT nevecl = (jobvl == 'V' ? n : 1); + LAPACK_INT nevecr = (jobvr == 'V' ? n : 1); + LAPACK_INT lwork = std::max(one,4*n); + work.resize(lwork); + LAPACK_INT lrwork = std::max(one,2*n); + rwork.resize(lrwork); + + //Copy A data into cpA + cpA.resize(n*n); + auto pA = reinterpret_cast(A); + std::copy(pA,pA+n*n,cpA.data()); + + auto pd = reinterpret_cast(d); + auto pvl = reinterpret_cast(vl); + auto pvr = reinterpret_cast(vr); + + LAPACK_INT info = 0; +#ifdef PLATFORM_acml + F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info,1,1); +#else + F77NAME(zgeev)(&jobvl,&jobvr,&n,cpA.data(),&n,pd,pvl,&nevecl,pvr,&nevecr,work.data(),&lwork,rwork.data(),&info); +#endif + return info; + } + +} //namespace itensor + diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index 01e8831da..776ed5813 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -1,5 +1,799 @@ -#ifdef ITENSOR_USE_CMAKE -#include -#else //ITENSOR_USE_CMAKE -#include -#endif //ITENSOR_USE_CMAKE \ No newline at end of file +// +// Copyright 2018 The Simons Foundation, Inc. - All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef __ITENSOR_LAPACK_WRAP_h +#define __ITENSOR_LAPACK_WRAP_h + +#include +#ifndef ITENSOR_USE_CMAKE +#include "itensor/config.h" +#else +#ifndef LAPACK_INT + #ifdef LAPACK_ILP64 + #define lapack_int int64_t + #else // LAPACK_ILP64 + #define LAPACK_INT int + #endif // LAPACK_ILP64 +#endif // LAPACK_INT +#define LAPACK_REAL double +#define LAPACK_COMPLEX itensor::Cplx +#endif //ITENSOR_USE_CMAKE +#include "itensor/types.h" +#include "itensor/util/timers.h" + +// +// Headers and typedefs +// + +// +// +// Generic Linux LAPACK +// +// +#ifdef PLATFORM_lapack + +#define LAPACK_REQUIRE_EXTERN + +namespace itensor { + using LAPACK_INT = int; + using LAPACK_REAL = double; + typedef struct + { + LAPACK_REAL real, imag; + } LAPACK_COMPLEX; +} +#elif defined PLATFORM_openblas + +#define ITENSOR_USE_CBLAS + +#include "cblas.h" +#include "lapacke.h" +#undef I //lapacke.h includes complex.h which defined an `I` macro + //that can cause problems, so best to undefine it + +namespace itensor { +using LAPACK_INT = lapack_int; +using LAPACK_REAL = double; +using LAPACK_COMPLEX = lapack_complex_double; + +inline LAPACK_REAL& +realRef(LAPACK_COMPLEX & z) + { + auto* p = reinterpret_cast(&z); + return p[0]; + } + +inline LAPACK_REAL& +imagRef(LAPACK_COMPLEX & z) + { + auto* p = reinterpret_cast(&z); + return p[1]; + } +} + +// +// +// Apple Accelerate/vecLib +// +// +#elif defined PLATFORM_macos + +#define ITENSOR_USE_CBLAS +//#define ITENSOR_USE_ZGEMM + +#include + namespace itensor { + using LAPACK_INT = __CLPK_integer; + using LAPACK_REAL = __CLPK_doublereal; + using LAPACK_COMPLEX = __CLPK_doublecomplex; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.r; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.i; } + } + +// +// +// Intel MKL +// +// +#elif defined PLATFORM_mkl + +#define ITENSOR_USE_CBLAS +#define ITENSOR_USE_ZGEMM + +#include "mkl_cblas.h" +#include "mkl_lapack.h" + namespace itensor { + using LAPACK_INT = MKL_INT; + using LAPACK_REAL = double; + using LAPACK_COMPLEX = MKL_Complex16; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.real; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.imag; } + } + +// +// +// AMD ACML +// +// +#elif defined PLATFORM_acml + +#define LAPACK_REQUIRE_EXTERN +//#include "acml.h" + namespace itensor { + using LAPACK_INT = int; + using LAPACK_REAL = double; + typedef struct + { + LAPACK_REAL real, imag; + } LAPACK_COMPLEX; + + inline LAPACK_REAL& + realRef(LAPACK_COMPLEX & z) { return z.real; } + + inline LAPACK_REAL& + imagRef(LAPACK_COMPLEX & z) { return z.imag; } + } + +#endif // different PLATFORM types + + + +#ifdef FORTRAN_NO_TRAILING_UNDERSCORE +#define F77NAME(x) x +#else +#if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) +#define F77NAME(x) LAPACK_##x +#else +#define F77NAME(x) x##_ +#endif +#endif + +namespace itensor { + +// +// +// Forward declarations of fortran lapack routines +// +// +#ifdef LAPACK_REQUIRE_EXTERN + extern "C" { + +//dnrm2 declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL cblas_dnrm2(LAPACK_INT N, LAPACK_REAL *X, LAPACK_INT incX); +#else +LAPACK_REAL F77NAME(dnrm2)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx); +#endif + + +//daxpy declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_daxpy(const int n, const double alpha, const double *X, const int incX, double *Y, const int incY); +#else +void F77NAME(daxpy)(LAPACK_INT* n, LAPACK_REAL* alpha, + LAPACK_REAL* X, LAPACK_INT* incx, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//ddot declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_ddot(const LAPACK_INT N, const LAPACK_REAL *X, const LAPACK_INT incx, const LAPACK_REAL *Y, const LAPACK_INT incy); +#else +LAPACK_REAL F77NAME(ddot)(LAPACK_INT* N, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zdotc declaration +#ifdef ITENSOR_USE_CBLAS +LAPACK_REAL +cblas_zdotc_sub(const LAPACK_INT N, const void *X, const LAPACK_INT incx, const void *Y, const LAPACK_INT incy, void *res); +#else +LAPACK_COMPLEX F77NAME(zdotc)(LAPACK_INT* N, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif + +//dgemm declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const double __alpha, const double *__A, + const int __lda, const double *__B, const int __ldb, + const double __beta, double *__C, const int __ldc); +#else +void F77NAME(dgemm)(char*,char*,LAPACK_INT*,LAPACK_INT*,LAPACK_INT*, + LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*,LAPACK_REAL*, + LAPACK_INT*,LAPACK_REAL*,LAPACK_REAL*,LAPACK_INT*); +#endif + +//zgemm declaration +#ifdef PLATFORM_openblas +void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, + OPENBLAS_CONST blasint M, + OPENBLAS_CONST blasint N, + OPENBLAS_CONST blasint K, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *A, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *B, + OPENBLAS_CONST blasint ldb, + OPENBLAS_CONST double *beta, + double *C, + OPENBLAS_CONST blasint ldc); +#else //platform not openblas + +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemm(const enum CBLAS_ORDER __Order, + const enum CBLAS_TRANSPOSE __TransA, + const enum CBLAS_TRANSPOSE __TransB, const int __M, const int __N, + const int __K, const void *__alpha, const void *__A, const int __lda, + const void *__B, const int __ldb, const void *__beta, void *__C, + const int __ldc); +#else +void F77NAME(zgemm)(char* transa,char* transb,LAPACK_INT* m,LAPACK_INT* n,LAPACK_INT* k, + LAPACK_COMPLEX* alpha,LAPACK_COMPLEX* A,LAPACK_INT* LDA,LAPACK_COMPLEX* B, + LAPACK_INT* LDB,LAPACK_COMPLEX* beta,LAPACK_COMPLEX* C,LAPACK_INT* LDC); +#endif + +#endif //zgemm declaration + +//dgemv declaration +#ifdef ITENSOR_USE_CBLAS +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const LAPACK_INT M, const LAPACK_INT N, + const LAPACK_REAL alpha, const LAPACK_REAL *A, const LAPACK_INT lda, + const LAPACK_REAL *X, const LAPACK_INT incX, const LAPACK_REAL beta, LAPACK_REAL *Y, + const LAPACK_INT incY); +#else +void F77NAME(dgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_REAL* alpha, LAPACK_REAL* A, + LAPACK_INT* LDA, LAPACK_REAL* X, LAPACK_INT* incx, LAPACK_REAL* beta, + LAPACK_REAL* Y, LAPACK_INT* incy); +#endif + +//zgemv declaration +#ifdef PLATFORM_openblas +void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, + OPENBLAS_CONST blasint m, + OPENBLAS_CONST blasint n, + OPENBLAS_CONST double *alpha, + OPENBLAS_CONST double *a, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *x, + OPENBLAS_CONST blasint incx, + OPENBLAS_CONST double *beta, + double *y, + OPENBLAS_CONST blasint incy); +#else +#ifdef ITENSOR_USE_CBLAS +void cblas_zgemv(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE trans, const LAPACK_INT m, + const LAPACK_INT n, const void *alpha, const void *a, const LAPACK_INT lda, + const void *x, const LAPACK_INT incx, const void *beta, void *y, const LAPACK_INT incy); +#else +void F77NAME(zgemv)(char* transa,LAPACK_INT* M,LAPACK_INT* N,LAPACK_COMPLEX* alpha, LAPACK_COMPLEX* A, + LAPACK_INT* LDA, LAPACK_COMPLEX* X, LAPACK_INT* incx, LAPACK_COMPLEX* beta, + LAPACK_COMPLEX* Y, LAPACK_INT* incy); +#endif +#endif //zgemv declaration + +#ifdef PLATFORM_acml +void F77NAME(dsyev)(char *jobz, char *uplo, int *n, double *a, int *lda, + double *w, double *work, int *lwork, int *info, + int jobz_len, int uplo_len); +#else +void F77NAME(dsyev)(const char* jobz, const char* uplo, const LAPACK_INT* n, double* a, + const LAPACK_INT* lda, double* w, double* work, const LAPACK_INT* lwork, + LAPACK_INT* info ); +#endif + +#ifdef ITENSOR_USE_CBLAS +void cblas_dscal(const LAPACK_INT N, const LAPACK_REAL alpha, LAPACK_REAL* X,const LAPACK_INT incX); +#else +void F77NAME(dscal)(LAPACK_INT* N, LAPACK_REAL* alpha, LAPACK_REAL* X,LAPACK_INT* incX); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info, int jobz_len); +#else +void F77NAME(dgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(dgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, double *s, + double *u, LAPACK_INT *ldu, double *vt, LAPACK_INT *ldvt, + double *work, LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + + #ifdef PLATFORM_acml + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info, int jobz_len); +#else + void F77NAME(zgesvd)(char *jobz, char* jobv, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, LAPACK_REAL *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_REAL * rwork, LAPACK_INT *info); +#endif + +#ifdef PLATFORM_acml +void F77NAME(zgesdd)(char *jobz, int *m, int *n, LAPACK_COMPLEX *a, int *lda, double *s, + LAPACK_COMPLEX *u, int *ldu, LAPACK_COMPLEX *vt, int *ldvt, + LAPACK_COMPLEX *work, int *lwork, double *rwork, int *iwork, int *info, + int jobz_len); +#else +void F77NAME(zgesdd)(char *jobz, LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, double *s, + LAPACK_COMPLEX *u, LAPACK_INT *ldu, LAPACK_COMPLEX *vt, LAPACK_INT *ldvt, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, LAPACK_INT *iwork, LAPACK_INT *info); +#endif + +void F77NAME(dgeqrf)(LAPACK_INT *m, LAPACK_INT *n, double *a, LAPACK_INT *lda, + double *tau, double *work, LAPACK_INT *lwork, LAPACK_INT *info); + +void F77NAME(dorgqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, double *a, + LAPACK_INT *lda, double *tau, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); + + +void F77NAME(zgeqrf)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +void LAPACKE_zungqr(int matrix_layout, LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#else +void F77NAME(zungqr)(LAPACK_INT *m, LAPACK_INT *n, LAPACK_INT *k, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *tau, LAPACK_COMPLEX *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + +void F77NAME(dgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_REAL *a, LAPACK_INT *lda, + LAPACK_INT *ipiv, LAPACK_REAL *b, LAPACK_INT *ldb, LAPACK_INT *info); + +void F77NAME(zgesv)(LAPACK_INT *n, LAPACK_INT *nrhs, LAPACK_COMPLEX *a, LAPACK_INT * lda, + LAPACK_INT *ipiv, LAPACK_COMPLEX *b, LAPACK_INT *ldb, LAPACK_INT *info); + +#ifdef PLATFORM_lapacke +double LAPACKE_dlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda); +#elif defined PLATFORM_acml +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +double F77NAME(dlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, double* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_real LAPACKE_zlange(int matrix_layout, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda); +#elif defined PLATFORM_acml +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work, LAPACK_INT norm_len); +#else +LAPACK_REAL F77NAME(zlange)(char* norm, LAPACK_INT* m, LAPACK_INT* n, LAPACK_COMPLEX* a, LAPACK_INT* lda, double* work); +#endif + +#ifdef PLATFORM_lapacke +lapack_int LAPACKE_zheev(int matrix_order, char jobz, char uplo, lapack_int n, + lapack_complex_double* a, lapack_int lda, double* w); +#elif defined PLATFORM_acml +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(zheev)(char *jobz, char *uplo, LAPACK_INT *n, LAPACK_COMPLEX *a, LAPACK_INT *lda, + double *w, LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info, LAPACK_INT jobz_len, LAPACK_INT uplo_len); +#else +void F77NAME(dsygv)(LAPACK_INT *itype, char *jobz, char *uplo, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *b, LAPACK_INT *ldb, double *w, double *work, + LAPACK_INT *lwork, LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(dgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, double *a, + LAPACK_INT *lda, double *wr, double *wi, double *vl, LAPACK_INT *ldvl, + double *vr, LAPACK_INT *ldvr, double *work, LAPACK_INT *lwork, + LAPACK_INT *info); +#endif + + +#ifdef PLATFORM_acml +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info, LAPACK_INT jobvl_len, LAPACK_INT jobvr_len); +#else +void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, + LAPACK_INT *lda, LAPACK_COMPLEX *w, LAPACK_COMPLEX *vl, + LAPACK_INT *ldvl, LAPACK_COMPLEX *vr, LAPACK_INT *ldvr, + LAPACK_COMPLEX *work, LAPACK_INT *lwork, double *rwork, + LAPACK_INT *info); +#endif + +} //extern "C" +#endif + +// +// daxpy +// Y += alpha*X +// + void + daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy); //increment with which to step through Y + +// +// dnrm2 +// + LAPACK_REAL + dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx = 1); + +// +// ddot +// + LAPACK_REAL + ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy); + +// +// zdotc +// + Cplx + zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy); + +// +// dgemm +// + void + gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C); + +// +// zgemm +// + void + gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + Cplx const* A, + Cplx const* B, + Cplx beta, + Cplx * C); + +// +// dgemv - matrix*vector multiply +// + void + gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy); + +// +// zgemv - matrix*vector multiply +// + void + gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy); + + +// +// dsyev +// + void + dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info); //error info + +// +// dscal +// + void + dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc = 1); + + + void + dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + void + zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + + void + dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + void + zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + +// +// dgeqrf +// +// QR factorization of a real matrix A +// + void + dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine dgeqrf (see above) +// + void + dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info); //error info + + + // +// dgeqrf +// +// QR factorization of a complex matrix A +// + void + zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info + +// +// dorgqr +// +// Generates Q from output of QR factorization routine zgeqrf (see above) +// + void + zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf + LAPACK_INT* info); //error info + +// dgesv +// +// computes the solution to system of linear equations A*X = B +// where A is a general real matrix +// + LAPACK_INT + dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b); + +// +// zgesv +// +// computes the solution to system of linear euqations A*X =B +// where A is a general complex matrix +// + LAPACK_INT + zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b); + +// +// dlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// + double + dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a); + +// +// zlange +// +// returns the value of the 1-norm, Frobenius norm, infinity-norm, +// or the largest absolute value of any element of a general rectangular matrix. +// + LAPACK_REAL + zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a); + +// +// zheev +// +// Eigenvalues and eigenvectors of complex Hermitian matrix A +// + LAPACK_INT + zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d); //eigenvalues on return + +// +// dsygv +// +// Eigenvalues and eigenvectors of generalized eigenvalue problem +// A*x = lambda*B*x +// A and B must be symmetric +// B must be positive definite +// + void + dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info); //error info + +// +// dgeev +// +// Eigenvalues and eigenvectors of real, square matrix A +// A can be a general real matrix, not assumed symmetric +// +// Returns "info" integer +// + LAPACK_INT + dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr); //right eigenvectors on return + +// +// zgeev +// +// Eigenvalues and eigenvectors of complex, square matrix A +// A can be a general complex matrix, not assumed symmetric +// +// Returns "info" integer +// + LAPACK_INT + zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr); //right eigenvectors on return + +} //namespace itensor + +#endif From 1241db41232c78395049132c9ec417d0864ffb59 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:18:18 -0400 Subject: [PATCH 36/51] Fix error in util/error.h to use itensor::error --- itensor/util/error.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/itensor/util/error.h b/itensor/util/error.h index b0d3345ce..73541ffa1 100644 --- a/itensor/util/error.h +++ b/itensor/util/error.h @@ -19,18 +19,18 @@ #include #include #include -#ifdef ITENSOR_USE_CMAKE -#include -#include -#endif // ITENSOR_USE_CMAKE +//#ifdef ITENSOR_USE_CMAKE +//#include +//#include +//#endif // ITENSOR_USE_CMAKE namespace itensor{ void error(const std::string& s); void error(const std::string& s, int line,const char* file); -#ifndef ITENSOR_USE_CMAKE -#define Error(exp) error(exp, __LINE__, __FILE__) -#else -#define Error lapack::Error -#endif // ITENSOR_USE_CMAKE +//#ifndef ITENSOR_USE_CMAKE +#define Error error +//#else +//#define Error lapack::Error +//#endif // ITENSOR_USE_CMAKE struct ITError : std::runtime_error { From 70f11e8d75dc2e64cb69fc98dfc53e34bc3b1d66 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:19:17 -0400 Subject: [PATCH 37/51] Add git to jenkins --- jenkins/Dockerfile.ubuntu | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index ee0b93505..403e14a67 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -9,6 +9,7 @@ RUN apt-get update && \ libopenblas-dev \ python3-pip \ wget \ + git-all \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ From a18769ca21933571b4a2367e1b61a01a32895630 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:20:27 -0400 Subject: [PATCH 38/51] Remove unecessary comments --- itensor/util/error.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/itensor/util/error.h b/itensor/util/error.h index 73541ffa1..c7c2e6b29 100644 --- a/itensor/util/error.h +++ b/itensor/util/error.h @@ -19,18 +19,10 @@ #include #include #include -//#ifdef ITENSOR_USE_CMAKE -//#include -//#include -//#endif // ITENSOR_USE_CMAKE namespace itensor{ void error(const std::string& s); void error(const std::string& s, int line,const char* file); -//#ifndef ITENSOR_USE_CMAKE #define Error error -//#else -//#define Error lapack::Error -//#endif // ITENSOR_USE_CMAKE struct ITError : std::runtime_error { From 088497e962d463143cc378a98f634e25177c1efb Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:25:53 -0400 Subject: [PATCH 39/51] Revert changes to task_types.h --- itensor/itdata/task_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/itensor/itdata/task_types.h b/itensor/itdata/task_types.h index 7fec4ffec..538e40c35 100644 --- a/itensor/itdata/task_types.h +++ b/itensor/itdata/task_types.h @@ -166,7 +166,8 @@ struct ApplyIT void applyITImpl(stdx::choice<2>,T1, T2 &) { - Error( tinyformat::format("Apply: function doesn't map %s->%s",typeName(),typeName())); + auto msg = tinyformat::format("Apply: function doesn't map %s->%s",typeName(),typeName()); + Error(msg); } template auto From 25639fe1b8e63d58d70f1794e44073208af2a75d Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:26:42 -0400 Subject: [PATCH 40/51] Add back missing space --- itensor/util/error.h | 1 + 1 file changed, 1 insertion(+) diff --git a/itensor/util/error.h b/itensor/util/error.h index c7c2e6b29..d84d5faca 100644 --- a/itensor/util/error.h +++ b/itensor/util/error.h @@ -19,6 +19,7 @@ #include #include #include + namespace itensor{ void error(const std::string& s); void error(const std::string& s, int line,const char* file); From 57e0e25e9a1c9560e1a73e8a7dcc4216a70f4376 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 16:33:32 -0400 Subject: [PATCH 41/51] If using CMAKE don't allow F77NALE to be defined --- itensor/tensor/lapack_wrap.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index 776ed5813..bda286588 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -157,16 +157,17 @@ imagRef(LAPACK_COMPLEX & z) #endif // different PLATFORM types - +#ifndef ITENSOR_USE_CMAKE #ifdef FORTRAN_NO_TRAILING_UNDERSCORE #define F77NAME(x) x #else #if defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) #define F77NAME(x) LAPACK_##x -#else +#else // defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) #define F77NAME(x) x##_ -#endif -#endif +#endif // defined(LAPACK_GLOBAL) || defined(LAPACK_NAME) +#endif // FORTRAN_NO_TRAILING_UNDERSCORE +#endif // ITENSOR_USE_CMAKE namespace itensor { From 345dab0cdc102141cfcca22cd581ae1e1d22d091 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Wed, 16 Aug 2023 17:28:00 -0400 Subject: [PATCH 42/51] Use pip3 instead of pip --- jenkins/Dockerfile.ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 403e14a67..82658d71d 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -13,5 +13,5 @@ RUN apt-get update && \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ - pip install cmake && \ + pip3 install cmake && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* From 3fe2398c6486fd6183a03b82057d9eb44a6c416d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 17 Aug 2023 11:33:38 -0400 Subject: [PATCH 43/51] Revert from pip back to manually installing cmake --- jenkins/Dockerfile.ubuntu | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 82658d71d..10779acd4 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -7,11 +7,16 @@ RUN apt-get update && \ liblapack-dev \ liblapacke-dev \ libopenblas-dev \ - python3-pip \ + libssl-dev \ wget \ git-all \ && \ apt-get autoremove --purge -y && \ apt-get autoclean -y && \ - pip3 install cmake && \ + wget https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0.tar.gz && \ + tar zxvf cmake-3.20.0.tar.gz && \ + cd cmake-3.20.0 && \ + ./bootstrap && \ + make && \ + make install && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* From ef176807ea80e92ff32726a92e1dc8cbf7546189 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 17 Aug 2023 12:07:37 -0400 Subject: [PATCH 44/51] Add build-essential to try and get c++ --- jenkins/Dockerfile.ubuntu | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/Dockerfile.ubuntu b/jenkins/Dockerfile.ubuntu index 10779acd4..008b1ba47 100644 --- a/jenkins/Dockerfile.ubuntu +++ b/jenkins/Dockerfile.ubuntu @@ -2,6 +2,7 @@ FROM ubuntu:bionic RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ make \ g++ \ liblapack-dev \ From aed4b80b6b11ab430bd16f547411ae6cfff3b57d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 10:53:20 -0400 Subject: [PATCH 45/51] Revert definition of Error since lapack/blas linked privately --- itensor/util/error.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/itensor/util/error.h b/itensor/util/error.h index d84d5faca..df61206ad 100644 --- a/itensor/util/error.h +++ b/itensor/util/error.h @@ -23,7 +23,7 @@ namespace itensor{ void error(const std::string& s); void error(const std::string& s, int line,const char* file); -#define Error error +#define Error(exp) error(exp, __LINE__, __FILE__) struct ITError : std::runtime_error { From 5125ae3c6aea2c1f8d8a21360242c2eeb04deceb Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 10:53:53 -0400 Subject: [PATCH 46/51] make doesn't target all just itensor --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46489a20f..b47de3102 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -334,7 +334,7 @@ install(FILES ADD_CUSTOM_TARGET(debug COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Debug ${CMAKE_CURRENT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target all + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target itensor COMMENT "Switch CMAKE_BUILD_TYPE to Debug" ) @@ -345,7 +345,7 @@ ADD_CUSTOM_TARGET(check ) ADD_CUSTOM_TARGET(release COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=Release ${CMAKE_CURRENT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target all + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_CURRENT_BINARY_DIR} --target itensor COMMENT "Switch CMAKE_BUILD_TYPE to Release" ) From 75080370f520460421d7b5b4d4402c1416408fab Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 11:11:39 -0400 Subject: [PATCH 47/51] revert formatting (remove added tabs) --- itensor/tensor/lapack_wrap.h | 424 +++++++++++++++++------------------ 1 file changed, 212 insertions(+), 212 deletions(-) diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index bda286588..18231aee1 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -460,168 +460,168 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // daxpy // Y += alpha*X // - void - daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y - LAPACK_REAL alpha, //scale factor - const LAPACK_REAL* X, //pointer to head of vector X - LAPACK_INT incx, //increment with which to step through X - LAPACK_REAL* Y, //pointer to head of vector Y - LAPACK_INT incy); //increment with which to step through Y +void +daxpy_wrapper(LAPACK_INT n, //number of elements of X,Y + LAPACK_REAL alpha, //scale factor + const LAPACK_REAL* X, //pointer to head of vector X + LAPACK_INT incx, //increment with which to step through X + LAPACK_REAL* Y, //pointer to head of vector Y + LAPACK_INT incy); //increment with which to step through Y // // dnrm2 // - LAPACK_REAL - dnrm2_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx = 1); +LAPACK_REAL +dnrm2_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx = 1); // // ddot // - LAPACK_REAL - ddot_wrapper(LAPACK_INT N, - const LAPACK_REAL* X, - LAPACK_INT incx, - const LAPACK_REAL* Y, - LAPACK_INT incy); +LAPACK_REAL +ddot_wrapper(LAPACK_INT N, + const LAPACK_REAL* X, + LAPACK_INT incx, + const LAPACK_REAL* Y, + LAPACK_INT incy); // // zdotc // - Cplx - zdotc_wrapper(LAPACK_INT N, - Cplx const* X, - LAPACK_INT incx, - Cplx const* Y, - LAPACK_INT incy); +Cplx +zdotc_wrapper(LAPACK_INT N, + Cplx const* X, + LAPACK_INT incx, + Cplx const* Y, + LAPACK_INT incy); // // dgemm // - void - gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - LAPACK_REAL alpha, - LAPACK_REAL const* A, - LAPACK_REAL const* B, - LAPACK_REAL beta, - LAPACK_REAL * C); +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + LAPACK_REAL alpha, + LAPACK_REAL const* A, + LAPACK_REAL const* B, + LAPACK_REAL beta, + LAPACK_REAL * C); // // zgemm // - void - gemm_wrapper(bool transa, - bool transb, - LAPACK_INT m, - LAPACK_INT n, - LAPACK_INT k, - Cplx alpha, - Cplx const* A, - Cplx const* B, - Cplx beta, - Cplx * C); +void +gemm_wrapper(bool transa, + bool transb, + LAPACK_INT m, + LAPACK_INT n, + LAPACK_INT k, + Cplx alpha, + Cplx const* A, + Cplx const* B, + Cplx beta, + Cplx * C); // // dgemv - matrix*vector multiply // - void - gemv_wrapper(bool trans, - LAPACK_REAL alpha, - LAPACK_REAL beta, - LAPACK_INT m, - LAPACK_INT n, - const LAPACK_REAL* A, - const LAPACK_REAL* x, - LAPACK_INT incx, - LAPACK_REAL* y, - LAPACK_INT incy); +void +gemv_wrapper(bool trans, + LAPACK_REAL alpha, + LAPACK_REAL beta, + LAPACK_INT m, + LAPACK_INT n, + const LAPACK_REAL* A, + const LAPACK_REAL* x, + LAPACK_INT incx, + LAPACK_REAL* y, + LAPACK_INT incy); // // zgemv - matrix*vector multiply // - void - gemv_wrapper(bool trans, - Cplx alpha, - Cplx beta, - LAPACK_INT m, - LAPACK_INT n, - Cplx const* A, - Cplx const* x, - LAPACK_INT incx, - Cplx* y, - LAPACK_INT incy); +void +gemv_wrapper(bool trans, + Cplx alpha, + Cplx beta, + LAPACK_INT m, + LAPACK_INT n, + Cplx const* A, + Cplx const* x, + LAPACK_INT incx, + Cplx* y, + LAPACK_INT incy); // // dsyev // - void - dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs - char uplo, //if uplo=='U', read from upper triangle of A - LAPACK_INT n, //number of cols of A - LAPACK_REAL* A, //symmetric matrix A - LAPACK_REAL* eigs, //eigenvalues on return - LAPACK_INT& info); //error info +void +dsyev_wrapper(char jobz, //if jobz=='V', compute eigs and evecs + char uplo, //if uplo=='U', read from upper triangle of A + LAPACK_INT n, //number of cols of A + LAPACK_REAL* A, //symmetric matrix A + LAPACK_REAL* eigs, //eigenvalues on return + LAPACK_INT& info); //error info // // dscal // - void - dscal_wrapper(LAPACK_INT N, - LAPACK_REAL alpha, - LAPACK_REAL* data, - LAPACK_INT inc = 1); - - - void - dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - void - zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - - void - dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT* m, //number of rows of input matrix *A - LAPACK_INT* n, //number of cols of input matrix *A - LAPACK_REAL *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - LAPACK_REAL *u, //on return, unitary matrix U - LAPACK_REAL *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); - - void - zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V - LAPACK_INT *m, //number of rows of input matrix *A - LAPACK_INT *n, //number of cols of input matrix *A - Cplx *A, //contents of input matrix A - LAPACK_REAL *s, //on return, singular values of A - Cplx *u, //on return, unitary matrix U - Cplx *vt, //on return, unitary matrix V transpose - LAPACK_INT *info); +void +dscal_wrapper(LAPACK_INT N, + LAPACK_REAL alpha, + LAPACK_REAL* data, + LAPACK_INT inc = 1); + + +void +dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + +void +zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + + +void +dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT* m, //number of rows of input matrix *A + LAPACK_INT* n, //number of cols of input matrix *A + LAPACK_REAL *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + LAPACK_REAL *u, //on return, unitary matrix U + LAPACK_REAL *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); + +void +zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute + //choosing *jobz=='S' computes min(m,n) cols of U, V + LAPACK_INT *m, //number of rows of input matrix *A + LAPACK_INT *n, //number of cols of input matrix *A + Cplx *A, //contents of input matrix A + LAPACK_REAL *s, //on return, singular values of A + Cplx *u, //on return, unitary matrix U + Cplx *vt, //on return, unitary matrix V transpose + LAPACK_INT *info); // @@ -629,72 +629,72 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // // QR factorization of a real matrix A // - void - dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info +void +dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info // // dorgqr // // Generates Q from output of QR factorization routine dgeqrf (see above) // - void - dorgqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_REAL* tau, //scalar factors as returned by dgeqrf - LAPACK_INT* info); //error info +void +dorgqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_REAL* tau, //scalar factors as returned by dgeqrf + LAPACK_INT* info); //error info - // +// // dgeqrf // // QR factorization of a complex matrix A // - void - zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - Cplx* A, //matrix A - //on return upper triangle contains R - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) - LAPACK_INT* info); //error info +void +zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + Cplx* A, //matrix A + //on return upper triangle contains R + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors + //length should be min(m,n) + LAPACK_INT* info); //error info // // dorgqr // // Generates Q from output of QR factorization routine zgeqrf (see above) // - void - zungqr_wrapper(LAPACK_INT* m, //number of rows of A - LAPACK_INT* n, //number of cols of A - LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) - Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q - LAPACK_INT* lda, //size of A (usually same as n) - LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf - LAPACK_INT* info); //error info +void +zungqr_wrapper(LAPACK_INT* m, //number of rows of A + LAPACK_INT* n, //number of cols of A + LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) + Cplx* A, //matrix A, as returned from "A" argument of dgeqrf + //on return contains Q + LAPACK_INT* lda, //size of A (usually same as n) + LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf + LAPACK_INT* info); //error info // dgesv // // computes the solution to system of linear equations A*X = B // where A is a general real matrix // - LAPACK_INT - dgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - LAPACK_REAL* a, - LAPACK_REAL* b); +LAPACK_INT +dgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + LAPACK_REAL* a, + LAPACK_REAL* b); // // zgesv @@ -702,11 +702,11 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // computes the solution to system of linear euqations A*X =B // where A is a general complex matrix // - LAPACK_INT - zgesv_wrapper(LAPACK_INT n, - LAPACK_INT nrhs, - Cplx* a, - Cplx* b); +LAPACK_INT +zgesv_wrapper(LAPACK_INT n, + LAPACK_INT nrhs, + Cplx* a, + Cplx* b); // // dlange @@ -714,11 +714,11 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // returns the value of the 1-norm, Frobenius norm, infinity-norm, // or the largest absolute value of any element of a general rectangular matrix. // - double - dlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - double* a); +double +dlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + double* a); // // zlange @@ -726,21 +726,21 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // returns the value of the 1-norm, Frobenius norm, infinity-norm, // or the largest absolute value of any element of a general rectangular matrix. // - LAPACK_REAL - zlange_wrapper(char norm, - LAPACK_INT m, - LAPACK_INT n, - Cplx* a); +LAPACK_REAL +zlange_wrapper(char norm, + LAPACK_INT m, + LAPACK_INT n, + Cplx* a); // // zheev // // Eigenvalues and eigenvectors of complex Hermitian matrix A // - LAPACK_INT - zheev_wrapper(LAPACK_INT N, //number of cols of A - Cplx * A, //matrix A, on return contains eigenvectors - LAPACK_REAL * d); //eigenvalues on return +LAPACK_INT +zheev_wrapper(LAPACK_INT N, //number of cols of A + Cplx * A, //matrix A, on return contains eigenvectors + LAPACK_REAL * d); //eigenvalues on return // // dsygv @@ -750,15 +750,15 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // A and B must be symmetric // B must be positive definite // - void - dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues - char* uplo, //if 'U', use upper triangle of A - LAPACK_INT* n, //number of cols of A - LAPACK_REAL* A, //matrix A, on return contains eigenvectors - LAPACK_REAL* B, //matrix B - LAPACK_REAL* d, //eigenvalues on return - LAPACK_INT* info); //error info +void +dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs + //if 'N', only eigenvalues + char* uplo, //if 'U', use upper triangle of A + LAPACK_INT* n, //number of cols of A + LAPACK_REAL* A, //matrix A, on return contains eigenvectors + LAPACK_REAL* B, //matrix B + LAPACK_REAL* d, //eigenvalues on return + LAPACK_INT* info); //error info // // dgeev @@ -768,15 +768,15 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // // Returns "info" integer // - LAPACK_INT - dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - LAPACK_REAL const* A, //matrix A - LAPACK_REAL* dr, //real parts of eigenvalues - LAPACK_REAL* di, //imaginary parts of eigenvalues - LAPACK_REAL* vl, //left eigenvectors on return - LAPACK_REAL* vr); //right eigenvectors on return +LAPACK_INT +dgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + LAPACK_REAL const* A, //matrix A + LAPACK_REAL* dr, //real parts of eigenvalues + LAPACK_REAL* di, //imaginary parts of eigenvalues + LAPACK_REAL* vl, //left eigenvectors on return + LAPACK_REAL* vr); //right eigenvectors on return // // zgeev @@ -786,14 +786,14 @@ void F77NAME(zgeev)(char *jobvl, char *jobvr, LAPACK_INT *n, LAPACK_COMPLEX *a, // // Returns "info" integer // - LAPACK_INT - zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' - char jobvr, //if 'V', compute right eigenvectors, else 'N' - LAPACK_INT n, //number of rows/cols of A - Cplx const* A, //matrix A - Cplx * d, //eigenvalues - Cplx * vl, //left eigenvectors on return - Cplx * vr); //right eigenvectors on return +LAPACK_INT +zgeev_wrapper(char jobvl, //if 'V', compute left eigenvectors, else 'N' + char jobvr, //if 'V', compute right eigenvectors, else 'N' + LAPACK_INT n, //number of rows/cols of A + Cplx const* A, //matrix A + Cplx * d, //eigenvalues + Cplx * vl, //left eigenvectors on return + Cplx * vr); //right eigenvectors on return } //namespace itensor From f517e3b62b105f809034991a3a048cbd7419130e Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 11:15:26 -0400 Subject: [PATCH 48/51] more formatting [no ci] --- itensor/tensor/lapack_wrap.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index 18231aee1..bce49eb60 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -580,7 +580,7 @@ dscal_wrapper(LAPACK_INT N, void dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V + //choosing *jobz=='S' computes min(m,n) cols of U, V LAPACK_INT* m, //number of rows of input matrix *A LAPACK_INT* n, //number of cols of input matrix *A LAPACK_REAL *A, //contents of input matrix A @@ -591,7 +591,7 @@ dgesdd_wrapper(char * jobz, //char* specifying how much of U, V to com void zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V + //choosing *jobz=='S' computes min(m,n) cols of U, V LAPACK_INT *m, //number of rows of input matrix *A LAPACK_INT *n, //number of cols of input matrix *A Cplx *A, //contents of input matrix A @@ -603,7 +603,7 @@ zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to comp void dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V + //choosing *jobz=='S' computes min(m,n) cols of U, V LAPACK_INT* m, //number of rows of input matrix *A LAPACK_INT* n, //number of cols of input matrix *A LAPACK_REAL *A, //contents of input matrix A @@ -614,7 +614,7 @@ dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to com void zgesvd_wrapper(char *jobz, //char* specifying how much of U, V to compute - //choosing *jobz=='S' computes min(m,n) cols of U, V + //choosing *jobz=='S' computes min(m,n) cols of U, V LAPACK_INT *m, //number of rows of input matrix *A LAPACK_INT *n, //number of cols of input matrix *A Cplx *A, //contents of input matrix A @@ -633,10 +633,10 @@ void dgeqrf_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_INT* n, //number of cols of A LAPACK_REAL* A, //matrix A - //on return upper triangle contains R + //on return upper triangle contains R LAPACK_INT* lda, //size of A (usually same as n) LAPACK_REAL* tau, //scalar factors of elementary reflectors - //length should be min(m,n) + //length should be min(m,n) LAPACK_INT* info); //error info // @@ -649,7 +649,7 @@ dorgqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_INT* n, //number of cols of A LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) LAPACK_REAL* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q + //on return contains Q LAPACK_INT* lda, //size of A (usually same as n) LAPACK_REAL* tau, //scalar factors as returned by dgeqrf LAPACK_INT* info); //error info @@ -664,10 +664,10 @@ void zgeqrf_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_INT* n, //number of cols of A Cplx* A, //matrix A - //on return upper triangle contains R + //on return upper triangle contains R LAPACK_INT* lda, //size of A (usually same as n) LAPACK_COMPLEX* tau, //scalar factors of elementary reflectors - //length should be min(m,n) + //length should be min(m,n) LAPACK_INT* info); //error info // @@ -680,7 +680,7 @@ zungqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_INT* n, //number of cols of A LAPACK_INT* k, //number of elementary reflectors, typically min(m,n) Cplx* A, //matrix A, as returned from "A" argument of dgeqrf - //on return contains Q + //on return contains Q LAPACK_INT* lda, //size of A (usually same as n) LAPACK_COMPLEX* tau, //scalar factors as returned by zgeqrf LAPACK_INT* info); //error info @@ -752,7 +752,7 @@ zheev_wrapper(LAPACK_INT N, //number of cols of A // void dsygv_wrapper(char* jobz, //if 'V', compute both eigs and evecs - //if 'N', only eigenvalues + //if 'N', only eigenvalues char* uplo, //if 'U', use upper triangle of A LAPACK_INT* n, //number of cols of A LAPACK_REAL* A, //matrix A, on return contains eigenvectors From d001c204ee3cc63c358c9363339a4d5e8b4fff66 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 11:17:57 -0400 Subject: [PATCH 49/51] formatting to match main [no ci] --- itensor/tensor/lapack_wrap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/itensor/tensor/lapack_wrap.h b/itensor/tensor/lapack_wrap.h index bce49eb60..e9b874a4e 100644 --- a/itensor/tensor/lapack_wrap.h +++ b/itensor/tensor/lapack_wrap.h @@ -177,7 +177,7 @@ namespace itensor { // // #ifdef LAPACK_REQUIRE_EXTERN - extern "C" { +extern "C" { //dnrm2 declaration #ifdef ITENSOR_USE_CBLAS @@ -601,7 +601,7 @@ zgesdd_wrapper(char *jobz, //char* specifying how much of U, V to comp LAPACK_INT *info); -void + void dgesvd_wrapper(char * jobz, //char* specifying how much of U, V to compute //choosing *jobz=='S' computes min(m,n) cols of U, V LAPACK_INT* m, //number of rows of input matrix *A @@ -655,7 +655,7 @@ dorgqr_wrapper(LAPACK_INT* m, //number of rows of A LAPACK_INT* info); //error info -// + // // dgeqrf // // QR factorization of a complex matrix A @@ -737,7 +737,7 @@ zlange_wrapper(char norm, // // Eigenvalues and eigenvectors of complex Hermitian matrix A // -LAPACK_INT +LAPACK_INT zheev_wrapper(LAPACK_INT N, //number of cols of A Cplx * A, //matrix A, on return contains eigenvectors LAPACK_REAL * d); //eigenvalues on return From 25c9bf0c6e5dc7eae660844eaa6e82b05eb88f41 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 23 Aug 2023 11:36:02 -0400 Subject: [PATCH 50/51] Some unit tests rely on the DEBUG build of ITensor. Comment out if undefined --- unittest/itensor_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unittest/itensor_test.cc b/unittest/itensor_test.cc index c6e0f059d..6bfb26079 100644 --- a/unittest/itensor_test.cc +++ b/unittest/itensor_test.cc @@ -1488,12 +1488,14 @@ SECTION("NoprimeTest") } SECTION("Case 2") { +#ifdef DEBUG ITensor T(s1,prime(s1)); //Check that T.noPrime() //throws an exception since it would //lead to duplicate indices CHECK_THROWS_AS(T.noPrime(),ITError); +#endif // DEBUG } } @@ -1693,11 +1695,13 @@ SECTION("Tag functions") SECTION("Check error throws for duplicate indices") { +#ifdef DEBUG auto T2 = ITensor(setTags(l,"Link,0"),setTags(l,"Link,n=2,0")); //Check that remove the tag "2" //throws an exception since it would //lead to duplicate indices CHECK_THROWS_AS(T2.removeTags("n=2"),ITError); +#endif // DEBUG } SECTION("Test contraction") From 4f6db7be458fbb6835dc0220dd6bbce2c2bca624 Mon Sep 17 00:00:00 2001 From: Nils Wentzell Date: Tue, 30 Apr 2024 12:30:31 -0400 Subject: [PATCH 51/51] Remove use of std::shared_ptr::unique() --- itensor/itdata/itdata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/itensor/itdata/itdata.h b/itensor/itdata/itdata.h index 7b8b132da..2981129de 100644 --- a/itensor/itdata/itdata.h +++ b/itensor/itdata/itdata.h @@ -252,7 +252,7 @@ class ManageStore template operator T&() { - if(!(pdata_->unique())) + if(!(pdata_->use_count() == 1)) { auto* olda1 = static_cast(pdata_->get()); *pdata_ = std::make_shared>(*olda1); @@ -318,7 +318,7 @@ T* ManageStore:: modifyData(const T& d) { //if(!pparg1_) Error("Can't modify const data"); - if(!(pparg1_->unique())) + if(!(pparg1_->use_count() == 1)) { auto* olda1 = static_cast*>(pparg1_->get()); *pparg1_ = std::make_shared>(olda1->d);