Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 0 additions & 63 deletions .github/workflows/config/validation_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,41 +75,6 @@
"cuda_distribution": "fedora42"
}
},
{
"platform": "linux/amd64",
"cuda_major": 11,
"operating_systems":
[
"ubuntu:22.04",
"redhat/ubi8:8.10",
"opensuse/leap:15.5",
"fedora:42"
],
"ubuntu:22.04":
{
"libcdev_package": "libc6-dev",
"cudart_version": "11.8",
"cuda_distribution": "ubuntu2204"
},
"redhat/ubi8:8.10":
{
"libcdev_package": "glibc-devel",
"cudart_version": "11.8",
"cuda_distribution": "rhel9"
},
"opensuse/leap:15.5":
{
"libcdev_package": "glibc-devel",
"cudart_version": "11.8",
"cuda_distribution": "opensuse15"
},
"fedora:42":
{
"libcdev_package": "glibc-devel",
"cudart_version": "11.8",
"cuda_distribution": "rhel9"
}
},
{
"platform": "linux/arm64",
"cuda_major": 12,
Expand All @@ -130,34 +95,6 @@
"cudart_version": "12.6",
"cuda_distribution": "rhel9"
}
},
{
"platform": "linux/arm64",
"cuda_major": 11,
"operating_systems":
[
"ubuntu:22.04",
"redhat/ubi8:8.10",
"fedora:42"
],
"ubuntu:22.04":
{
"libcdev_package": "libc6-dev",
"cudart_version": "11.8",
"cuda_distribution": "ubuntu2204"
},
"redhat/ubi8:8.10":
{
"libcdev_package": "glibc-devel",
"cudart_version": "11.8",
"cuda_distribution": "rhel9"
},
"fedora:42":
{
"libcdev_package": "glibc-devel",
"cudart_version": "11.8",
"cuda_distribution": "rhel9"
}
}
]
}
4 changes: 2 additions & 2 deletions docker/build/devdeps.ext.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ ENV UCX_TLS=rc,cuda_copy,cuda_ipc,gdr_copy,sm

# Install CUDA

ARG cuda_packages="cuda-cudart cuda-nvrtc cuda-compiler libcublas libcublas-dev libcurand-dev libcusolver libcusparse-dev libnvjitlink"
ARG cuda_packages="cuda-cudart cuda-nvrtc cuda-compiler libcublas libcublas-dev libcurand-dev libcusolver libcusparse-dev libnvjitlink cuda-nvml-dev"
RUN if [ -n "$cuda_packages" ]; then \
# Filter out libnvjitlink if CUDA version is less than 12
if [ $(echo $CUDA_VERSION | cut -d "." -f1) -lt 12 ]; then \
Expand Down Expand Up @@ -175,7 +175,7 @@ ENV PIP_BREAK_SYSTEM_PACKAGES=1
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip && \
apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/* && \
python3 -m pip install --break-system-packages cupy-cuda$(echo $CUDA_VERSION | cut -d . -f1)x==13.4.1 cuquantum-cu$(echo $CUDA_VERSION | cut -d . -f1)==25.06 && \
python3 -m pip install --break-system-packages cupy-cuda$(echo $CUDA_VERSION | cut -d . -f1)x==13.4.1 cuquantum-cu$(echo $CUDA_VERSION | cut -d . -f1)==25.09 && \
if [ "$(python3 --version | grep -o [0-9\.]* | cut -d . -f -2)" != "3.12" ]; then \
echo "expecting Python version 3.12"; \
fi
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ requires-python = ">=3.10"
license = { file="LICENSE" }
dependencies = [
'astpretty ~= 3.0',
'cuquantum-cu12 == 25.06',
'cuquantum-cu12 == 25.09',
'numpy >= 1.24',
'scipy >= 1.10.1',
'requests >= 2.31',
Expand Down
8 changes: 5 additions & 3 deletions runtime/nvqir/cutensornet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ set(CUTENSORNET_PATCH ${CMAKE_MATCH_1})

set(CUTENSORNET_VERSION ${CUTENSORNET_MAJOR}.${CUTENSORNET_MINOR}.${CUTENSORNET_PATCH})
message(STATUS "Found cutensornet version: ${CUTENSORNET_VERSION}")
# We need cutensornet v2.7.0+ (cutensornetStateApplyGeneralChannel)
if (${CUTENSORNET_VERSION} VERSION_GREATER_EQUAL "2.7")
# We need cutensornet v2.9.0+
# Using the new flow: define a network with cutensornetCreateNetwork, append inputs via cutensornetNetworkAppendTensor, set output with cutensornetNetworkSetOutputTensor,
# then prepare and run using cutensornetNetworkPrepareContraction and cutensornetNetworkContract.
if (${CUTENSORNET_VERSION} VERSION_GREATER_EQUAL "2.9")
set (BASE_TENSOR_BACKEND_SRS tensornet_utils.cpp)
get_filename_component(CUTENSORNET_INCLUDE_DIR ${CUTENSORNET_INC} DIRECTORY)
get_filename_component(CUTENSORNET_LIB_DIR ${CUTENSORNET_LIB} DIRECTORY)
Expand Down Expand Up @@ -93,5 +95,5 @@ if (${CUTENSORNET_VERSION} VERSION_GREATER_EQUAL "2.7")
target_link_libraries(nvqir-tensornet PRIVATE tensornet-mpi-util)
target_link_libraries(nvqir-tensornet-fp32 PRIVATE tensornet-mpi-util)
else()
message(WARNING "Skipped tensornet backend due to incompatible cutensornet version. Please install cutensornet v2.3.0+.")
message(WARNING "Skipped tensornet backend due to incompatible cutensornet version. Please install cutensornet v2.9.0+.")
endif()
50 changes: 36 additions & 14 deletions runtime/nvqir/cutensornet/mps_simulation_state.inc
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,26 @@ std::complex<double> MPSSimulationState<ScalarType>::computeOverlap(
}

cutensornetNetworkDescriptor_t m_tnDescr;
HANDLE_CUTN_ERROR(cutensornetCreateNetworkDescriptor(
cutnHandle, numTensors, numModes.data(), extentsIn.data(), NULL,
modesIn.data(), tensAttr.data(), 0, NULL, NULL, NULL, dataType,
computeType, &m_tnDescr));
// Set up tensor network
HANDLE_CUTN_ERROR(cutensornetCreateNetwork(cutnHandle, &m_tnDescr));

int64_t tensorIDs[numTensors]; // for input tensors

// attach the input tensors to the network
for (int32_t t = 0; t < numTensors; ++t) {
HANDLE_CUTN_ERROR(cutensornetNetworkAppendTensor(
cutnHandle, m_tnDescr, tensModes[t].size(), tensExtents[t].data(),
tensModes[t].data(), &tensAttr[t], dataType, &tensorIDs[t]));
}

// set the output tensor
HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensor(cutnHandle, m_tnDescr, 0,
NULL, dataType));

// set the network compute type
HANDLE_CUTN_ERROR(cutensornetNetworkSetAttribute(
cutnHandle, m_tnDescr, CUTENSORNET_NETWORK_COMPUTE_TYPE, &computeType,
sizeof(computeType)));

cutensornetContractionOptimizerConfig_t m_tnConfig;

Expand Down Expand Up @@ -188,11 +204,10 @@ std::complex<double> MPSSimulationState<ScalarType>::computeOverlap(
cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
CUTENSORNET_WORKSPACE_SCRATCH, scratchPad.d_scratch,
requiredWorkspaceSize));
cutensornetContractionPlan_t m_tnPlan;
{
ScopedTraceWithContext("cutensornetCreateContractionPlan");
HANDLE_CUTN_ERROR(cutensornetCreateContractionPlan(
cutnHandle, m_tnDescr, m_tnPath, workDesc, &m_tnPlan));
ScopedTraceWithContext("cutensornetNetworkPrepareContraction");
HANDLE_CUTN_ERROR(
cutensornetNetworkPrepareContraction(cutnHandle, m_tnDescr, workDesc));
}
// Compute the unnormalized overlap
std::vector<const void *> rawDataIn(numTensors);
Expand All @@ -202,11 +217,19 @@ std::complex<double> MPSSimulationState<ScalarType>::computeOverlap(
}
void *m_dOverlap{nullptr};
HANDLE_CUDA_ERROR(cudaMalloc(&m_dOverlap, overlapSize));

// Set tensor's data buffers and strides
for (int32_t t = 0; t < numTensors; ++t) {
HANDLE_CUTN_ERROR(cutensornetNetworkSetInputTensorMemory(
cutnHandle, m_tnDescr, tensorIDs[t], rawDataIn[t], NULL));
}
HANDLE_CUTN_ERROR(cutensornetNetworkSetOutputTensorMemory(
cutnHandle, m_tnDescr, m_dOverlap, NULL));

{
ScopedTraceWithContext("cutensornetContractSlices");
HANDLE_CUTN_ERROR(cutensornetContractSlices(cutnHandle, m_tnPlan,
rawDataIn.data(), m_dOverlap, 0,
workDesc, NULL, 0x0));
ScopedTraceWithContext("cutensornetNetworkContract");
HANDLE_CUTN_ERROR(cutensornetNetworkContract(cutnHandle, m_tnDescr, 0,
workDesc, NULL, 0x0));
}
// Get the overlap value back to Host
std::complex<double> overlap = 0.0;
Expand All @@ -224,10 +247,9 @@ std::complex<double> MPSSimulationState<ScalarType>::computeOverlap(

// Clean up
HANDLE_CUDA_ERROR(cudaFree(m_dOverlap));
HANDLE_CUTN_ERROR(cutensornetDestroyContractionPlan(m_tnPlan));
HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerInfo(m_tnPath));
HANDLE_CUTN_ERROR(cutensornetDestroyContractionOptimizerConfig(m_tnConfig));
HANDLE_CUTN_ERROR(cutensornetDestroyNetworkDescriptor(m_tnDescr));
HANDLE_CUTN_ERROR(cutensornetDestroyNetwork(m_tnDescr));

return std::abs(overlap);
}
Expand Down
12 changes: 7 additions & 5 deletions scripts/configure_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ if [ "$1" == "install-cuda" ]; then
dnf config-manager --add-repo "${CUDA_DOWNLOAD_URL}/${DISTRIBUTION}/${CUDA_ARCH_FOLDER}/cuda-${DISTRIBUTION}.repo"
dnf install -y --nobest --setopt=install_weak_deps=False \
cuda-toolkit-$(echo ${CUDA_VERSION} | tr . -)
# custatevec is now linked to `libnvidia-ml.so.1`, which is provided in the NVIDIA driver.
# For build on non-GPU systems, we also need to install the driver.
dnf install -y --nobest --setopt=install_weak_deps=False nvidia-driver-libs
# [<CUDAInstall]
fi

Expand Down Expand Up @@ -72,7 +75,7 @@ if [ "$1" == "install-cuquantum" ]; then
CUDA_ARCH_FOLDER=$([ "$(uname -m)" == "aarch64" ] && echo sbsa || echo x86_64)

# [>cuQuantumInstall]
CUQUANTUM_VERSION=25.06.0.10
CUQUANTUM_VERSION=25.09.0.7
CUQUANTUM_DOWNLOAD_URL=https://developer.download.nvidia.com/compute/cuquantum/redist/cuquantum

cuquantum_archive=cuquantum-linux-${CUDA_ARCH_FOLDER}-${CUQUANTUM_VERSION}_cuda$(echo ${CUDA_VERSION} | cut -d . -f1)-archive.tar.xz
Expand All @@ -88,14 +91,13 @@ if [ "$1" == "install-cutensor" ]; then
CUDA_ARCH_FOLDER=$([ "$(uname -m)" == "aarch64" ] && echo sbsa || echo x86_64)

# [>cuTensorInstall]
CUTENSOR_VERSION=2.2.0.0
CUTENSOR_VERSION=2.3.1.0
CUTENSOR_DOWNLOAD_URL=https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor

cutensor_archive=libcutensor-linux-${CUDA_ARCH_FOLDER}-${CUTENSOR_VERSION}-archive.tar.xz
cutensor_archive=libcutensor-linux-${CUDA_ARCH_FOLDER}-${CUTENSOR_VERSION}_cuda$(echo ${CUDA_VERSION} | cut -d . -f1)-archive.tar.xz
wget "${CUTENSOR_DOWNLOAD_URL}/linux-${CUDA_ARCH_FOLDER}/${cutensor_archive}"
mkdir -p "${CUTENSOR_INSTALL_PREFIX}" && tar xf "${cutensor_archive}" --strip-components 1 -C "${CUTENSOR_INSTALL_PREFIX}"
mv "${CUTENSOR_INSTALL_PREFIX}"/lib/$(echo ${CUDA_VERSION} | cut -d . -f1)/* ${CUTENSOR_INSTALL_PREFIX}/lib/
ls -d ${CUTENSOR_INSTALL_PREFIX}/lib/*/ | xargs rm -rf && rm -rf "${cutensor_archive}"
rm -rf "${cutensor_archive}"
# [<cuTensorInstall]
fi

Expand Down
30 changes: 30 additions & 0 deletions unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,36 @@ create_tests_with_backend(dm backends/QPPDMTester.cpp)
create_tests_with_backend(stim "")

if (CUSTATEVEC_ROOT AND CUDA_FOUND)
find_program(NVIDIA_SMI "nvidia-smi")
if(${NVIDIA_SMI} STREQUAL "NVIDIA_SMI-NOTFOUND")
# libcustatevec.so has linkage to libnvdia-ml.so.1, which is part of NVIDIA driver.
# On a build system without NVIDIA GPUs, this lib cannot be resolved;
# hence, linking these test executables will fail.
# On these CPU-only build systems, we directly link the nvidia-ml-dev library, which provides
# stub symbols for the actual driver library, to these test executables.
# Running these tests will ultimately requires a system with GPUs, i.e., the proper nvidia-ml lib
# will be loaded by the runtime linker/loader.
find_package(CUDAToolkit REQUIRED)
# make sure CUDA_TOOLKIT_ROOT_DIR is set
if(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR)
set(CUDA_TOOLKIT_ROOT_DIR ${CUDAToolkit_BIN_DIR}/..)
endif()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(NVIDIA_ML_PATH ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/stubs)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(NVIDIA_ML_PATH ${CUDA_TOOLKIT_ROOT_DIR}/targets/sbsa-linux/lib/stubs)
else()
message(FATAL_ERROR "Neither x86_64 nor aarch64 was detected." )
endif()
# Find nvidia-ml manually
find_library(NVIDIA_ML NAMES nvidia-ml PATHS ${NVIDIA_ML_PATH})
message(STATUS "NVIDIA ML lib: ${NVIDIA_ML}")
# Inject the file during build and remove it after build (install)
file(CREATE_LINK ${NVIDIA_ML} ${CMAKE_CURRENT_BINARY_DIR}/libnvidia-ml.so.1)
link_directories(${CMAKE_CURRENT_BINARY_DIR})
install(CODE "file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/libnvidia-ml.so.1)")
endif()

create_tests_with_backend(custatevec-fp32 "")
# Given that the fp32 and fp64 difference is largely inherited
# from a dependency, we omit fp64 tests here and rely on the
Expand Down
Loading